Esempi in Python per DictList.DictList, esempi in Python per pyLibrary.dot.lists.DictList.DictList

Esempio n. 1

0

Mostra file

File: jx.py Progetto: davehunt/ActiveData

def tuple(data, field_name):
    """
    RETURN LIST  OF TUPLES
    """
    if isinstance(data, Cube):
        Log.error("not supported yet")

    if isinstance(data, FlatList):
        Log.error("not supported yet")

    if isinstance(field_name, Mapping) and "value" in field_name:
        # SIMPLIFY {"value":value} AS STRING
        field_name = field_name["value"]

    # SIMPLE PYTHON ITERABLE ASSUMED
    if isinstance(field_name, basestring):
        if len(split_field(field_name)) == 1:
            return [(d[field_name], ) for d in data]
        else:
            path = split_field(field_name)
            output = []
            flat_list._tuple1(data, path, 0, output)
            return output
    elif isinstance(field_name, list):
        paths = [_select_a_field(f) for f in field_name]
        output = DictList()
        _tuple((), unwrap(data), paths, 0, output)
        return output
    else:
        paths = [_select_a_field(field_name)]
        output = DictList()
        _tuple((), data, paths, 0, output)
        return output

Esempio n. 2

0

Mostra file

File: decoder.py Progetto: davehunt/ActiveData

 def right(self, num=None):
     """
     WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE RIGHT
     """
     self._convert()
     if num == None:
         return DictList([self.list[-1]])
     if num <= 0:
         return Null
     return DictList(self.list[-num])

Esempio n. 3

0

Mostra file

File: decoder.py Progetto: davehunt/ActiveData

 def not_right(self, num):
     """
     WITH SLICES BEING FLAT, WE NEED A SIMPLE WAY TO SLICE FROM THE LEFT [:-num:]
     """
     self._convert()
     if num == None:
         return DictList([self.list[:-1:]])
     if num <= 0:
         return Null
     return DictList(self.list[:-num:])

Esempio n. 4

0

Mostra file

def groupby(data,
            keys=None,
            size=None,
            min_size=None,
            max_size=None,
            contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Cube):
        return data.groupby(keys)

    keys = listwrap(keys)

    def get_keys(d):
        output = Dict()
        for k in keys:
            output[k] = d[k]
        return output

    if contiguous:
        try:
            if not data:
                return wrap([])

            agg = DictList()
            acc = DictList()
            curr_key = value2key(keys, data[0])
            for d in data:
                key = value2key(keys, d)
                if key != curr_key:
                    agg.append((get_keys(acc[0]), acc))
                    curr_key = key
                    acc = [d]
                else:
                    acc.append(d)
            agg.append((get_keys(acc[0]), acc))
            return wrap(agg)
        except Exception, e:
            Log.error("Problem grouping contiguous values", e)

Esempio n. 5

0

Mostra file

def _normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if sort == None:
        return DictList.EMPTY

    output = DictList()
    for s in listwrap(sort):
        if isinstance(s, basestring):
            output.append({"value": jx_expression(s), "sort": 1})
        elif isinstance(s, Expression):
            output.append({"value": s, "sort": 1})
        elif Math.is_integer(s):
            output.append({"value": OffsetOp("offset", s), "sort": 1})
        elif all(d in sort_direction
                 for d in s.values()) and not s.sort and not s.value:
            for v, d in s.items():
                output.append({"value": jx_expression(v), "sort": -1})
        else:
            output.append({
                "value": jx_expression(coalesce(s.value, s.field)),
                "sort": coalesce(sort_direction[s.sort], 1)
            })
    return output

Esempio n. 6

0

Mostra file

def es_fieldop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)
    es_query.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(qb_expression_to_esfilter(query.where))
        }
    }
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = qb_sort_to_es_sort(query.sort)
    es_query.fields = DictList()
    source = "fields"
    for s in select.value:
        if s == "*":
            es_query.fields = None
            source = "_source"
        elif s == ".":
            es_query.fields = None
            source = "_source"
        elif isinstance(s, basestring) and is_keyword(s):
            es_query.fields.append(s)
        elif isinstance(s, list) and es_query.fields is not None:
            es_query.fields.extend(s)
        elif isinstance(s, Mapping) and es_query.fields is not None:
            es_query.fields.extend(s.values())
        elif es_query.fields is not None:
            es_query.fields.append(s)
    es_query.sort = [{
        s.field: "asc" if s.sort >= 0 else "desc"
    } for s in query.sort]

    return extract_rows(es, es_query, source, select, query)

Esempio n. 7

0

Mostra file

def es_setop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.fields = DictList()
    es_query.sort = qb_sort_to_es_sort(query.sort)
    source = "fields"
    for s in select:
        if s.value == "*":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif s.value == ".":
            es_query.fields = None
            es_query.script_fields = None
            source = "_source"
        elif isinstance(s.value, basestring) and is_keyword(s.value):
            es_query.fields.append(s.value)
        elif isinstance(s.value, list) and es_query.fields is not None:
            es_query.fields.extend(s.value)
        else:
            es_query.script_fields[literal_field(s.name)] = {
                "script": qb_expression_to_ruby(s.value)
            }

    return extract_rows(es, es_query, source, select, query)

Esempio n. 8

0

Mostra file

def groupby_size(data, size):
    if hasattr(data, "next"):
        iterator = data
    elif hasattr(data, "__iter__"):
        iterator = data.__iter__()
    else:
        Log.error("do not know how to handle this type")

    done = DictList()

    def more():
        output = DictList()
        for i in range(size):
            try:
                output.append(iterator.next())
            except StopIteration:
                done.append(True)
                break
        return output

    # THIS IS LAZY
    i = 0
    while True:
        output = more()
        yield (i, output)
        if len(done) > 0:
            break
        i += 1

Esempio n. 9

0

Mostra file

File: jx_usingMySQL.py Progetto: davehunt/ActiveData

        def post(sql):
            # FIND OUT THE default DOMAIN SIZES
            result = self.db.column_query(sql)
            num_edges = len(edges)
            for e, edge in enumerate(edges):
                domain = edge.domain
                if domain.type == "default":
                    domain.type = "set"
                    parts = set(result[e])
                    domain.partitions = [{
                        "index": i,
                        "value": p
                    } for i, p in enumerate(parts)]
                    domain.map = {p: i for i, p in enumerate(parts)}
                else:
                    Log.error("Do not know what to do here, yet")

            # FILL THE DATA CUBE
            maps = [(unwrap(e.domain.map), result[i])
                    for i, e in enumerate(edges)]
            cubes = DictList()
            for c, s in enumerate(select):
                data = Matrix(*[
                    len(e.domain.partitions) + (1 if e.allow_nulls else 0)
                    for e in edges
                ])
                for rownum, value in enumerate(result[c + num_edges]):
                    coord = [m[r[rownum]] for m, r in maps]
                    data[coord] = value
                cubes.append(data)

            if isinstance(query.select, list):
                return cubes
            else:
                return cubes[0]

Esempio n. 10

0

Mostra file

    def __init__(self, **desc):
        Domain.__init__(self, **desc)

        self.NULL = Null
        self.partitions = DictList()
        self.map = dict()
        self.map[None] = self.NULL

Esempio n. 11

0

Mostra file

File: jx.py Progetto: davehunt/ActiveData

def _select(template, data, fields, depth):
    output = DictList()
    deep_path = []
    deep_fields = UniqueIndex(["name"])
    for d in data:
        if isinstance(d, Dict):
            Log.error("programmer error, _select can not handle Dict")

        record = template.copy()
        children = None
        for f in fields:
            index, c = _select_deep(d, f, depth, record)
            children = c if children is None else children
            if index:
                path = f.value[0:index:]
                if not deep_fields[f]:
                    deep_fields.add(
                        f)  # KEEP TRACK OF WHICH FIELDS NEED DEEPER SELECT
                short = MIN([len(deep_path), len(path)])
                if path[:short:] != deep_path[:short:]:
                    Log.error(
                        "Dangerous to select into more than one branch at time"
                    )
                if len(deep_path) < len(path):
                    deep_path = path
        if not children:
            output.append(record)
        else:
            output.extend(_select(record, children, deep_fields, depth + 1))

    return output

Esempio n. 12

0

Mostra file

    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        desc = wrap(desc)

        self.type = "set"
        self.order = {}
        self.NULL = Null
        self.partitions = DictList()

        if isinstance(self.key, set):
            Log.error("problem")

        if isinstance(desc.partitions[0], basestring):
            # ASSMUE PARTS ARE STRINGS, CONVERT TO REAL PART OBJECTS
            self.key = ("value", )
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                part = {"name": p, "value": p}
                self.partitions.append(part)
                self.map[p] = part
                self.order[p] = i
        elif desc.partitions and desc.dimension.fields and len(
                desc.dimension.fields) > 1:
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.dimension.fields)
        elif desc.partitions and isinstance(desc.key, (list, set)):
            # TODO: desc.key CAN BE MUCH LIKE A SELECT, WHICH UniqueIndex CAN NOT HANDLE
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
        elif desc.partitions and isinstance(desc.partitions[0][desc.key],
                                            Mapping):
            self.key = desc.key
            self.map = UniqueIndex(keys=desc.key)
            # self.key = UNION(set(d[desc.key].keys()) for d in desc.partitions)
            # self.map = UniqueIndex(keys=self.key)
        elif desc.key == None:
            Log.error("Domains must have keys")
        elif self.key:
            self.key = desc.key
            self.map = dict()
            self.map[None] = self.NULL
            self.order[None] = len(desc.partitions)
            for i, p in enumerate(desc.partitions):
                self.map[p[self.key]] = p
                self.order[p[self.key]] = i
        elif all(p.esfilter for p in self.partitions):
            # EVERY PART HAS AN esfilter DEFINED, SO USE THEM
            for i, p in enumerate(self.partitions):
                p.dataIndex = i

        else:
            Log.error("Can not hanldle")

        self.label = coalesce(self.label, "name")

        if isinstance(desc.partitions, list):
            self.partitions = desc.partitions.copy()
        else:
            Log.error("expecting a list of partitions")

Esempio n. 13

0

Mostra file

def es_setop(es, query):
    es_query, filters = es14.util.es_query_template(query.frum.name)
    set_default(filters[0], simplify_esfilter(query.where.to_esfilter()))
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = DictList()

    return extract_rows(es, es_query, query)

Esempio n. 14

0

Mostra file

 def _iter():
     g = 0
     out = DictList()
     try:
         for i, d in enumerate(data):
             out.append(d)
             if (i + 1) % max_size == 0:
                 yield g, out
                 g += 1
                 out = DictList()
         if out:
             yield g, out
     except Exception, e:
         if out:
             # AT LEAST TRY TO RETURN WHAT HAS BEEN PROCESSED SO FAR
             yield g, out
         Log.error("Problem inside qb.groupby", e)

Esempio n. 15

0

Mostra file

 def more():
     output = DictList()
     for i in range(size):
         try:
             output.append(iterator.next())
         except StopIteration:
             done.append(True)
             break
     return output

Esempio n. 16

0

Mostra file

File: setop.py Progetto: davehunt/ActiveData

def es_fieldop(es, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)
    FromES.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    }
    FromES.size = coalesce(query.limit, 200000)
    FromES.fields = DictList()
    for s in select.value:
        if s == "*":
            FromES.fields = None
        elif isinstance(s, list):
            FromES.fields.extend(s)
        elif isinstance(s, Mapping):
            FromES.fields.extend(s.values())
        else:
            FromES.fields.append(s)
    FromES.sort = [{
        s.field: "asc" if s.sort >= 0 else "desc"
    } for s in query.sort]

    data = es09.util.post(es, FromES, query.limit)

    T = data.hits.hits
    matricies = {}
    for s in select:
        if s.value == "*":
            matricies[s.name] = Matrix.wrap([t._source for t in T])
        elif isinstance(s.value, Mapping):
            # for k, v in s.value.items():
            #     matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
            matricies[s.name] = Matrix.wrap([{
                k: unwrap(t.fields).get(v, None)
                for k, v in s.value.items()
            } for t in T])
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([
                tuple(unwrap(t.fields).get(ss, None) for ss in s.value)
                for t in T
            ])
        elif not s.value:
            matricies[s.name] = Matrix.wrap(
                [unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap(
                    [unwrap(t.fields).get(s.value, None) for t in T])
            except Exception, e:
                Log.error("", e)

Esempio n. 17

0

Mostra file

File: cube.py Progetto: davehunt/ActiveData

    def groupby(self, edges):
        """
        SLICE THIS CUBE IN TO ONES WITH LESS DIMENSIONALITY
        simple==True WILL HAVE GROUPS BASED ON PARTITION VALUE, NOT PARTITION OBJECTS
        """
        edges = DictList([_normalize_edge(e) for e in edges])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[
            getKey[i](p)
            for p in e.domain.partitions + ([None] if e.allowNulls else [])
        ] for i, e in enumerate(self.edges)]

        def coord2term(coord):
            output = wrap_leaves(
                {keys[i]: lookup[i][c]
                 for i, c in enumerate(coord)})
            return output

        if isinstance(self.select, list):
            selects = listwrap(self.select)
            index, v = zip(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = zip(coord, [
                Cube(self.select, remainder,
                     {s.name: v[i]
                      for i, s in enumerate(selects)}) for v in zip(*values)
            ])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (coord2term(coord), v)
                for coord, v in self.data[self.select.name].groupby(selector))
        else:
            output = (
                (coord2term(coord), Cube(self.select, remainder, v))
                for coord, v in self.data[self.select.name].groupby(selector))

        return output

Esempio n. 18

0

Mostra file

File: jx.py Progetto: davehunt/ActiveData

def select(data, field_name):
    """
    return list with values from field_name
    """
    if isinstance(data, Cube):
        return data._select(_normalize_selects(field_name))

    if isinstance(data, FlatList):
        return data.select(field_name)

    if isinstance(data, UniqueIndex):
        data = data._data.values(
        )  # THE SELECT ROUTINE REQUIRES dicts, NOT Dict WHILE ITERATING

    if isinstance(data, Mapping):
        return select_one(data, field_name)

    if isinstance(field_name, Mapping):
        field_name = wrap(field_name)
        if field_name.value in ["*", "."]:
            return data

        if field_name.value:
            # SIMPLIFY {"value":value} AS STRING
            field_name = field_name.value

    # SIMPLE PYTHON ITERABLE ASSUMED
    if isinstance(field_name, basestring):
        path = split_field(field_name)
        if len(path) == 1:
            return DictList([d[field_name] for d in data])
        else:
            output = DictList()
            flat_list._select1(data, path, 0, output)
            return output
    elif isinstance(field_name, list):
        keys = [_select_a_field(wrap(f)) for f in field_name]
        return _select(Dict(), unwrap(data), keys, 0)
    else:
        keys = [_select_a_field(field_name)]
        return _select(Dict(), unwrap(data), keys, 0)

Esempio n. 19

0

Mostra file

File: query.py Progetto: klahnakoski/MoTreeherder

def _where_terms(master, where, schema):
    """
    USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
    master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
    """
    if isinstance(where, Mapping):
        if where.term:
            # MAP TERM
            try:
                output = _map_term_using_schema(master, [], where.term,
                                                schema.edges)
                return output
            except Exception, e:
                Log.error("programmer problem?", e)
        elif where.terms:
            # MAP TERM
            output = DictList()
            for k, v in where.terms.items():
                if not isinstance(v, (list, set)):
                    Log.error("terms filter expects list of values")
                edge = schema.edges[k]
                if not edge:
                    output.append({"terms": {k: v}})
                else:
                    if isinstance(edge, basestring):
                        # DIRECT FIELD REFERENCE
                        return {"terms": {edge: v}}
                    try:
                        domain = edge.getDomain()
                    except Exception, e:
                        Log.error("programmer error", e)
                    fields = domain.dimension.fields
                    if isinstance(fields, Mapping):
                        or_agg = []
                        for vv in v:
                            and_agg = []
                            for local_field, es_field in fields.items():
                                vvv = vv[local_field]
                                if vvv != None:
                                    and_agg.append({"term": {es_field: vvv}})
                            or_agg.append({"and": and_agg})
                        output.append({"or": or_agg})
                    elif isinstance(fields,
                                    list) and len(fields) == 1 and is_keyword(
                                        fields[0]):
                        output.append({"terms": {fields[0]: v}})
                    elif domain.partitions:
                        output.append({
                            "or":
                            [domain.getPartByKey(vv).esfilter for vv in v]
                        })
            return {"and": output}

Esempio n. 20

0

Mostra file

    def _aggop(self, query):
        """
        SINGLE ROW RETURNED WITH AGGREGATES
        """
        if isinstance(query.select, list):
            # RETURN SINGLE OBJECT WITH AGGREGATES
            for s in query.select:
                if s.aggregate not in aggregates:
                    Log.error("Expecting all columns to have an aggregate: {{select}}", select=s)

            selects = DictList()
            for s in query.select:
                selects.append(aggregates[s.aggregate].replace("{{code}}", s.value) + " AS " + self.db.quote_column(s.name))

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
            """, {
                "selects": SQL(",\n".join(selects)),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.filter)
            })

            return sql, lambda sql: self.db.column(sql)[0]  # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES
        else:
            # RETURN SINGLE VALUE
            s0 = query.select
            if s0.aggregate not in aggregates:
                Log.error("Expecting all columns to have an aggregate: {{select}}", select=s0)

            select = aggregates[s0.aggregate].replace("{{code}}", s0.value) + " AS " + self.db.quote_column(s0.name)

            sql = expand_template("""
                SELECT
                    {{selects}}
                FROM
                    {{table}}
                {{where}}
            """, {
                "selects": SQL(select),
                "table": self._subquery(query["from"])[0],
                "where": self._where2sql(query.where)
            })

            def post(sql):
                result = self.db.column_query(sql)
                return result[0][0]

            return sql, post  # RETURN SINGLE VALUE

Esempio n. 21

0

Mostra file

File: cube.py Progetto: klahnakoski/MoTreeherder

    def _groupby(self, edges):
        """
        RETURNS LIST OF (coord, values) TUPLES, WHERE
            coord IS THE INDEX INTO self CUBE (-1 INDEX FOR COORDINATES NOT GROUPED BY)
            values ALL VALUES THAT BELONG TO THE SLICE

        """
        edges = DictList([_normalize_edge(e) for e in edges])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]

        if isinstance(self.select, list):
            selects = listwrap(self.select)
            index, v = zip(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = zip(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (
                    coord2term(coord),
                    v
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )
        else:
            output = (
                (
                    coord2term(coord),
                    Cube(self.select, remainder, v)
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )

        return output

Esempio n. 22

0

Mostra file

File: jx.py Progetto: davehunt/ActiveData

def sort(data, fieldnames=None, already_normalized=False):
    """
    PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction}
    """
    try:
        if data == None:
            return Null

        if not fieldnames:
            return wrap(sorted(data, value_compare))

        if already_normalized:
            formal = fieldnames
        else:
            formal = query._normalize_sort(fieldnames)

        funcs = [(jx_expression_to_function(f.value), f.sort) for f in formal]

        def comparer(left, right):
            for func, sort_ in funcs:
                try:
                    result = value_compare(func(left), func(right), sort_)
                    if result != 0:
                        return result
                except Exception, e:
                    Log.error("problem with compare", e)
            return 0

        if isinstance(data, list):
            output = DictList([unwrap(d) for d in sorted(data, cmp=comparer)])
        elif hasattr(data, "__iter__"):
            output = DictList(
                [unwrap(d) for d in sorted(list(data), cmp=comparer)])
        else:
            Log.error("Do not know how to handle")
            output = None

        return output

Esempio n. 23

0

Mostra file

def wrap(v):
    type_ = _get(v, "__class__")

    if type_ is dict:
        m = Dict(v)
        return m
    elif type_ is NoneType:
        return Null
    elif type_ is list:
        return DictList(v)
    elif type_ is GeneratorType:
        return (wrap(vv) for vv in v)
    else:
        return v

Esempio n. 24

0

Mostra file

File: terms_stats.py Progetto: davehunt/ActiveData

def _getAllEdges(facetEdges, edgeDepth):
    """
    RETURN ALL PARTITION COMBINATIONS:  A LIST OF ORDERED TUPLES
    """
    if edgeDepth == len(facetEdges):
        return [()]
    edge = facetEdges[edgeDepth]

    deeper = _getAllEdges(facetEdges, edgeDepth + 1)

    output = DictList()
    partitions = edge.domain.partitions
    for part in partitions:
        for deep in deeper:
            output.append((part, ) + deep)
    return output

Esempio n. 25

0

Mostra file

File: setop.py Progetto: davehunt/ActiveData

def es_deepop(es, mvel, query):
    FromES = es09.util.build_es_query(query)

    select = query.edges

    temp_query = query.copy()
    temp_query.select = select
    temp_query.edges = DictList()
    FromES.facets.mvel = {
        "terms": {
            "script_field": mvel.code(temp_query),
            "size": query.limit
        },
        "facet_filter":
        simplify_esfilter(jx_expression(query.where).to_esfilter())
    }

    data = es09.util.post(es, FromES, query.limit)

    rows = unpack_terms(data.facets.mvel, query.edges)
    terms = zip(*rows)

    # NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
    edges = query.edges
    for f, e in enumerate(edges):
        for r in terms[f]:
            e.domain.getPartByKey(r)

        e.index = f
        for p, part in enumerate(e.domain.partitions):
            part.dataIndex = p
        e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    dims = [len(e.domain.partitions) for e in query.edges]
    output = Matrix(*dims)

    # FILL CUBE
    for r in rows:
        term_coord = [
            e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)
        ]
        output[term_coord] = SUM(output[term_coord], r[-1])

    cube = Cube(query.select, query.edges, {query.select.name: output})
    cube.frum = query
    return cube

Esempio n. 26

0

Mostra file

def wrap(v):
    type_ = _get(v, "__class__")

    if type_ is dict:
        m = Dict(v)
        return m
        # m = object.__new__(Dict)
        # object.__setattr__(m, "_dict", v)
        # return m

    elif type_ is NoneType:
        return Null
    elif type_ is list:
        return DictList(v)
    elif type_ is GeneratorType:
        return (wrap(vv) for vv in v)
    else:
        return v

Esempio n. 27

0

Mostra file

def _tuple(template, data, fields, depth, output):
    deep_path = None
    deep_fields = DictList()
    for d in data:
        record = template
        for f in fields:
            index, children, record = _tuple_deep(d, f, depth, record)
            if index:
                path = f.value[0:index:]
                deep_fields.append(f)
                if deep_path and path != deep_path:
                    Log.error("Dangerous to select into more than one branch at time")
        if not children:
            output.append(record)
        else:
            _tuple(record, children, deep_fields, depth + 1, output)

    return output

Esempio n. 28

0

Mostra file

    def update(self, command):
        """
        EXPECTING command == {"set":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS AN ES FILTER
        """
        command = wrap(command)

        # GET IDS OF DOCUMENTS
        results = self._es.search({
            "fields": [],
            "query": {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": _normalize_where(command.where, self)
                }
            },
            "size": 200000
        })

        # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT)
        scripts = DictList()
        for k, v in command.set.items():
            if not is_keyword(k):
                Log.error("Only support simple paths for now")

            scripts.append("ctx._source." + k + " = " +
                           expressions.qb_expression_to_ruby(v) + ";\n")
        script = "".join(scripts)

        if results.hits.hits:
            command = []
            for id in results.hits.hits._id:
                command.append({"update": {"_id": id}})
                command.append({"script": script})
            content = ("\n".join(convert.value2json(c)
                                 for c in command) + "\n").encode('utf-8')
            self._es.cluster._post(
                self._es.path + "/_bulk",
                data=content,
                headers={"Content-Type": "application/json"})

Esempio n. 29

0

Mostra file

File: dimensions.py Progetto: mozilla/ActiveData-ETL

def addParts(parentPart, childPath, count, index):
    """
    BUILD A hierarchy BY REPEATEDLY CALLING self METHOD WITH VARIOUS childPaths
    count IS THE NUMBER FOUND FOR self PATH
    """
    if index == None:
        index = 0
    if index == len(childPath):
        return
    c = childPath[index]
    parentPart.count = coalesce(parentPart.count, 0) + count

    if parentPart.partitions == None:
        parentPart.partitions = DictList()
    for i, part in enumerate(parentPart.partitions):
        if part.name == c.name:
            addParts(part, childPath, count, index + 1)
            return

    parentPart.partitions.append(c)
    addParts(c, childPath, count, index + 1)

Esempio n. 30

0

Mostra file

def normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if not sort:
        return DictList.EMPTY

    output = DictList()
    for s in listwrap(sort):
        if isinstance(s, basestring) or Math.is_integer(s):
            output.append({"value": s, "sort": 1})
        elif not s.field and not s.value and s.sort == None:
            #ASSUME {name: sort} FORM
            for n, v in s.items():
                output.append({"value": n, "sort": sort_direction[v]})
        else:
            output.append({
                "value": coalesce(s.field, s.value),
                "sort": coalesce(sort_direction[s.sort], 1)
            })
    return wrap(output)