コード例 #1
0
def format_cube(T, select, query=None):
    table = format_table(T, select, query)

    if len(table.data) == 0:
        return Cube(
            select,
            edges=[{
                "name": "rownum",
                "domain": {
                    "type": "rownum",
                    "min": 0,
                    "max": 0,
                    "interval": 1
                }
            }],
            data={h: Matrix(list=[])
                  for i, h in enumerate(table.header)})

    cols = zip(*unwrap(table.data))
    return Cube(
        select,
        edges=[{
            "name": "rownum",
            "domain": {
                "type": "rownum",
                "min": 0,
                "max": len(table.data),
                "interval": 1
            }
        }],
        data={h: Matrix(list=cols[i])
              for i, h in enumerate(table.header)})
コード例 #2
0
ファイル: setop.py プロジェクト: davehunt/ActiveData
def es_fieldop(es, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)
    FromES.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    }
    FromES.size = coalesce(query.limit, 200000)
    FromES.fields = DictList()
    for s in select.value:
        if s == "*":
            FromES.fields = None
        elif isinstance(s, list):
            FromES.fields.extend(s)
        elif isinstance(s, Mapping):
            FromES.fields.extend(s.values())
        else:
            FromES.fields.append(s)
    FromES.sort = [{
        s.field: "asc" if s.sort >= 0 else "desc"
    } for s in query.sort]

    data = es09.util.post(es, FromES, query.limit)

    T = data.hits.hits
    matricies = {}
    for s in select:
        if s.value == "*":
            matricies[s.name] = Matrix.wrap([t._source for t in T])
        elif isinstance(s.value, Mapping):
            # for k, v in s.value.items():
            #     matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
            matricies[s.name] = Matrix.wrap([{
                k: unwrap(t.fields).get(v, None)
                for k, v in s.value.items()
            } for t in T])
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([
                tuple(unwrap(t.fields).get(ss, None) for ss in s.value)
                for t in T
            ])
        elif not s.value:
            matricies[s.name] = Matrix.wrap(
                [unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap(
                    [unwrap(t.fields).get(s.value, None) for t in T])
            except Exception, e:
                Log.error("", e)
コード例 #3
0
ファイル: cube.py プロジェクト: klahnakoski/MoTreeherder
    def __init__(self, select, edges, data, frum=None):
        """
        data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE
        ALLOWED, USING THE select AND edges TO DESCRIBE THE data
        """

        self.is_value = False if isinstance(select, list) else True
        self.select = select
        self.meta = Dict(format="cube")       # PUT EXTRA MARKUP HERE
        self.is_none = False

        if not all(data.values()):
            is_none = True

        # ENSURE frum IS PROPER FORM
        if isinstance(select, list):
            if edges and OR(not isinstance(v, Matrix) for v in data.values()):
                Log.error("Expecting data to be a dict with Matrix values")

        if not edges:
            if not data:
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.ZERO}
                self.edges = DictList.EMPTY
            elif isinstance(data, Mapping):
                # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
                length = MAX([len(v) for v in data.values()])
                if length >= 1:
                    self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}])
                else:
                    self.edges = DictList.EMPTY
            elif isinstance(data, list):
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.wrap(data)}
                self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}])
            elif isinstance(data, Matrix):
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: data}
            else:
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix(value=data)}
                self.edges = DictList.EMPTY
        else:
            self.edges = wrap(edges)

        self.data = data
コード例 #4
0
ファイル: cube.py プロジェクト: davehunt/ActiveData
    def window(self, window):
        if window.edges or window.sort:
            Log.error("not implemented")

        from pyLibrary.queries import jx

        # SET OP
        canonical = self.data.values()[0]
        accessor = jx.get(window.value)
        cnames = self.data.keys()

        # ANNOTATE EXISTING CUBE WITH NEW COLUMN
        m = self.data[window.name] = Matrix(dims=canonical.dims)
        for coord in canonical._all_combos():
            row = Dict(
            )  # IT IS SAD WE MUST HAVE A Dict(), THERE ARE {"script": expression} USING THE DOT NOTATION
            for k in cnames:
                row[k] = self.data[k][coord]
            for c, e in zip(coord, self.edges):
                row[e.name] = e.domain.partitions[c]
            m[coord] = accessor(
                row, Null,
                Null)  # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO

        self.select.append(window)
        return self
コード例 #5
0
ファイル: format.py プロジェクト: davehunt/ActiveData
    def data():
        dims = tuple(
            len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
            for e in new_edges)
        is_sent = Matrix(dims=dims, zeros=0)
        for row, coord, agg in aggs_iterator(aggs, decoders):
            is_sent[coord] = 1

            output = [d.get_value(c) for c, d in zip(coord, decoders)]
            for s in select:
                output.append(_pull(s, agg))
            yield output

        # EMIT THE MISSING CELLS IN THE CUBE
        if not query.groupby:
            for c, v in is_sent:
                if not v:
                    record = [
                        d.get_value(c[i]) for i, d in enumerate(decoders)
                    ]
                    for s in select:
                        if s.aggregate == "count":
                            record.append(0)
                        else:
                            record.append(None)
                    yield record
コード例 #6
0
ファイル: jx_usingMySQL.py プロジェクト: davehunt/ActiveData
        def post(sql):
            # FIND OUT THE default DOMAIN SIZES
            result = self.db.column_query(sql)
            num_edges = len(edges)
            for e, edge in enumerate(edges):
                domain = edge.domain
                if domain.type == "default":
                    domain.type = "set"
                    parts = set(result[e])
                    domain.partitions = [{
                        "index": i,
                        "value": p
                    } for i, p in enumerate(parts)]
                    domain.map = {p: i for i, p in enumerate(parts)}
                else:
                    Log.error("Do not know what to do here, yet")

            # FILL THE DATA CUBE
            maps = [(unwrap(e.domain.map), result[i])
                    for i, e in enumerate(edges)]
            cubes = DictList()
            for c, s in enumerate(select):
                data = Matrix(*[
                    len(e.domain.partitions) + (1 if e.allow_nulls else 0)
                    for e in edges
                ])
                for rownum, value in enumerate(result[c + num_edges]):
                    coord = [m[r[rownum]] for m, r in maps]
                    data[coord] = value
                cubes.append(data)

            if isinstance(query.select, list):
                return cubes
            else:
                return cubes[0]
コード例 #7
0
ファイル: format.py プロジェクト: davehunt/ActiveData
    def data():
        dims = tuple(
            len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
            for e in new_edges)
        is_sent = Matrix(dims=dims, zeros=0)
        for row, coord, agg in aggs_iterator(aggs, decoders):
            is_sent[coord] = 1

            output = Dict()
            for e, c, d in zip(query.edges, coord, decoders):
                output[e.name] = d.get_value(c)

            for s in select:
                output[s.name] = _pull(s, agg)
            yield output

        # EMIT THE MISSING CELLS IN THE CUBE
        if not query.groupby:
            for c, v in is_sent:
                if not v:
                    output = Dict()
                    for i, d in enumerate(decoders):
                        output[query.edges[i].name] = d.get_value(c[i])

                    for s in select:
                        if s.aggregate == "count":
                            output[s.name] = 0
                    yield output
コード例 #8
0
ファイル: format.py プロジェクト: klahnakoski/MoTreeherder
def format_cube_from_aggop(decoders, aggs, start, query, select):
    agg = drill(aggs)
    matricies = [(s, Matrix(dims=[], zeros=s.default)) for s in select]
    for s, m in matricies:
        m[tuple()] = _pull(s, agg)
    cube = Cube(query.select, [], {s.name: m for s, m in matricies})
    cube.frum = query
    return cube
コード例 #9
0
ファイル: cube.py プロジェクト: davehunt/ActiveData
    def __getitem__(self, item):
        # TODO: SOLVE FUNDAMENTAL QUESTION OF IF SELECTING A PART OF AN
        # EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART
        # AS A select {"name":edge.name, "value":edge.domain.partitions[coord]}
        # PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING
        if isinstance(item, Mapping):
            coordinates = [None] * len(self.edges)

            # MAP DICT TO NUMERIC INDICES
            for name, v in item.items():
                ei, parts = wrap([(i, e.domain.partitions)
                                  for i, e in enumerate(self.edges)
                                  if e.name == name])[0]
                if not parts:
                    Log.error(
                        "Can not find {{name}}=={{value|quote}} in list of edges, maybe this feature is not implemented yet",
                        name=name,
                        value=v)
                part = wrap([p for p in parts if p.value == v])[0]
                if not part:
                    return Null
                else:
                    coordinates[ei] = part.dataIndex

            edges = [e for e, v in zip(self.edges, coordinates) if v is None]
            if not edges:
                # ZERO DIMENSIONAL VALUE
                return wrap({
                    k: v.__getitem__(coordinates)
                    for k, v in self.data.items()
                })
            else:
                output = Cube(select=self.select,
                              edges=wrap([
                                  e for e, v in zip(self.edges, coordinates)
                                  if v is None
                              ]),
                              data={
                                  k: Matrix(values=c.__getitem__(coordinates))
                                  for k, c in self.data.items()
                              })
                return output
        elif isinstance(item, basestring):
            # RETURN A VALUE CUBE
            if self.is_value:
                if item != self.select.name:
                    Log.error("{{name}} not found in cube", name=item)
                return self

            if item not in self.select.name:
                Log.error("{{name}} not found in cube", name=item)

            output = Cube(select=[s for s in self.select if s.name == item][0],
                          edges=self.edges,
                          data={item: self.data[item]})
            return output
        else:
            Log.error("not implemented yet")
コード例 #10
0
ファイル: cube.py プロジェクト: klahnakoski/MoTreeherder
 def _select(self, select):
     selects = listwrap(select)
     is_aggregate = OR(s.aggregate != None and s.aggregate != "none" for s in selects)
     if is_aggregate:
         values = {s.name: Matrix(value=self.data[s.value].aggregate(s.aggregate)) for s in selects}
         return Cube(select, [], values)
     else:
         values = {s.name: self.data[s.value] for s in selects}
         return Cube(select, self.edges, values)
コード例 #11
0
ファイル: setop.py プロジェクト: klahnakoski/Activedata-ETL
def format_cube(T, select, source):
    matricies = {}
    for s in select:
        try:
            if s.value == ".":
                matricies[s.name] = Matrix.wrap(T.select(source))
            elif isinstance(s.value, list):
                matricies[s.name] = Matrix.wrap([tuple(unwraplist(t[source][ss]) for ss in s.value) for t in T])
            else:
                if source == "_source":
                    matricies[s.name] = Matrix.wrap([unwraplist(t[source][s.value]) for t in T])

                elif isinstance(s.value, basestring):  # fields
                    matricies[s.name] = Matrix.wrap([unwraplist(t[source].get(s.value)) for t in T])
                else:
                    matricies[s.name] = Matrix.wrap([unwraplist(t[source].get(s.name)) for t in T])
        except Exception, e:
            Log.error("", e)
コード例 #12
0
ファイル: setop.py プロジェクト: klahnakoski/Activedata-ETL
def es_fieldop(es, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)
    FromES.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(query.where)
        }
    }
    FromES.size = coalesce(query.limit, 200000)
    FromES.fields = DictList()
    for s in select.value:
        if s == "*":
            FromES.fields = None
        elif isinstance(s, list):
            FromES.fields.extend(s)
        elif isinstance(s, Mapping):
            FromES.fields.extend(s.values())
        else:
            FromES.fields.append(s)
    FromES.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]

    data = es09.util.post(es, FromES, query.limit)

    T = data.hits.hits
    matricies = {}
    for s in select:
        if s.value == "*":
            matricies[s.name] = Matrix.wrap([t._source for t in T])
        elif isinstance(s.value, Mapping):
            # for k, v in s.value.items():
            #     matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
            matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T])
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T])
        elif not s.value:
            matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
            except Exception, e:
                Log.error("", e)
コード例 #13
0
ファイル: cube.py プロジェクト: klahnakoski/MoDataSubmission
    def __init__(self, select, edges, data, frum=None):
        """
        data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE
        ALLOWED, USING THE select AND edges TO DESCRIBE THE data
        """

        self.is_value = False if isinstance(select, list) else True
        self.select = select
        self.meta = Dict(format="cube")  # PUT EXTRA MARKUP HERE
        self.is_none = False

        if not all(data.values()):
            is_none = True

        # ENSURE frum IS PROPER FORM
        if isinstance(select, list):
            if edges and OR(not isinstance(v, Matrix) for v in data.values()):
                Log.error("Expecting data to be a dict with Matrix values")

        if not edges:
            if not data:
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.ZERO}
                self.edges = DictList.EMPTY
            elif isinstance(data, Mapping):
                # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
                length = MAX([len(v) for v in data.values()])
                if length >= 1:
                    self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}])
                else:
                    self.edges = DictList.EMPTY
            elif isinstance(data, list):
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.wrap(data)}
                self.edges = wrap(
                    [{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}]
                )
            elif isinstance(data, Matrix):
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: data}
            else:
                if isinstance(select, list):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix(value=data)}
                self.edges = DictList.EMPTY
        else:
            self.edges = wrap(edges)

        self.data = data
コード例 #14
0
ファイル: format.py プロジェクト: klahnakoski/MoTreeherder
    def data():
        dims = tuple(
            len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
            for e in new_edges)
        is_sent = Matrix(dims=dims, zeros=0)
        for row, coord, agg in aggs_iterator(aggs, decoders):
            is_sent[coord] = 1

            output = [d.get_value(c) for c, d in zip(coord, decoders)]
            for s in select:
                output.append(_pull(s, agg))
            yield output
コード例 #15
0
ファイル: format.py プロジェクト: klahnakoski/MoTreeherder
def format_cube(decoders, aggs, start, query, select):
    new_edges = count_dim(aggs, decoders)
    dims = tuple(
        len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
        for e in new_edges)
    matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select]
    for row, coord, agg in aggs_iterator(aggs, decoders):
        for s, m in matricies:
            try:
                v = _pull(s, agg)
                m[coord] = v
            except Exception, e:
                Log.error("", e)
コード例 #16
0
def format_cube_from_aggop(decoders, aggs, start, query, select):
    agg = aggs
    b = coalesce(agg._filter, agg._nested)
    while b:
        agg = b
        b = coalesce(agg._filter, agg._nested)

    matricies = [(s, Matrix(dims=[], zeros=(s.aggregate == "count")))
                 for s in select]
    for s, m in matricies:
        m[tuple()] = agg[s.pull]
    cube = Cube(query.select, [], {s.name: m for s, m in matricies})
    cube.frum = query
    return cube
コード例 #17
0
def format_cube(T, select, source):
    matricies = {}
    for s in select:
        try:
            if s.value == ".":
                matricies[s.name] = Matrix.wrap(T.select(source))
            elif isinstance(s.value, list):
                matricies[s.name] = Matrix.wrap([
                    tuple(unwraplist(t[source][ss]) for ss in s.value)
                    for t in T
                ])
            else:
                if source == "_source":
                    matricies[s.name] = Matrix.wrap(
                        [unwraplist(t[source][s.value]) for t in T])

                elif isinstance(s.value, basestring):  # fields
                    matricies[s.name] = Matrix.wrap(
                        [unwraplist(t[source].get(s.value)) for t in T])
                else:
                    matricies[s.name] = Matrix.wrap(
                        [unwraplist(t[source].get(s.name)) for t in T])
        except Exception, e:
            Log.error("", e)
コード例 #18
0
ファイル: format.py プロジェクト: klahnakoski/MoTreeherder
    def data():
        dims = tuple(
            len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
            for e in new_edges)
        is_sent = Matrix(dims=dims, zeros=0)
        for row, coord, agg in aggs_iterator(aggs, decoders):
            is_sent[coord] = 1

            output = Dict()
            for e, c, d in zip(query.edges, coord, decoders):
                output[e.name] = d.get_value(c)

            for s in select:
                output[s.name] = _pull(s, agg)
            yield output
コード例 #19
0
ファイル: setop.py プロジェクト: davehunt/ActiveData
def es_deepop(es, mvel, query):
    FromES = es09.util.build_es_query(query)

    select = query.edges

    temp_query = query.copy()
    temp_query.select = select
    temp_query.edges = DictList()
    FromES.facets.mvel = {
        "terms": {
            "script_field": mvel.code(temp_query),
            "size": query.limit
        },
        "facet_filter":
        simplify_esfilter(jx_expression(query.where).to_esfilter())
    }

    data = es09.util.post(es, FromES, query.limit)

    rows = unpack_terms(data.facets.mvel, query.edges)
    terms = zip(*rows)

    # NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
    edges = query.edges
    for f, e in enumerate(edges):
        for r in terms[f]:
            e.domain.getPartByKey(r)

        e.index = f
        for p, part in enumerate(e.domain.partitions):
            part.dataIndex = p
        e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    dims = [len(e.domain.partitions) for e in query.edges]
    output = Matrix(*dims)

    # FILL CUBE
    for r in rows:
        term_coord = [
            e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)
        ]
        output[term_coord] = SUM(output[term_coord], r[-1])

    cube = Cube(query.select, query.edges, {query.select.name: output})
    cube.frum = query
    return cube
コード例 #20
0
def format_cube(decoders, aggs, start, query, select):
    new_edges = count_dim(aggs, decoders)
    dims = tuple(
        len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
        for e in new_edges)
    matricies = [(s, Matrix(dims=dims, zeros=(s.aggregate == "count")))
                 for s in select]
    for row, agg in aggs_iterator(aggs, decoders):
        coord = tuple(d.get_index(row) for d in decoders)
        for s, m in matricies:
            try:
                if m[coord]:
                    Log.error("Not expected")
                m[coord] = agg[s.pull]
            except Exception, e:
                tuple(d.get_index(row) for d in decoders)
                Log.error("", e)
コード例 #21
0
ファイル: aggop.py プロジェクト: mozilla/ActiveData-ETL
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD

    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()  # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if is_keyword(s.value):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script":
                        es09.expressions.compile_expression(s.value, query)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {
        s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[
            aggregates[s.aggregate]])
        for s in select
    }
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
コード例 #22
0
ファイル: aggop.py プロジェクト: mozilla/ActiveData-ETL
def es_countop(es, mvel, query):
    """
    RETURN SINGLE COUNT
    """
    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:

        if is_keyword(s.value):
            FromES.facets[s.name] = {
                "terms": {
                    "field": s.value,
                    "size": query.limit,
                },
                "facet_filter": {
                    "exists": {
                        "field": s.value
                    }
                }
            }
        else:
            # COMPLICATED value IS PROBABLY A SCRIPT, USE IT
            FromES.facets[s.name] = {
                "terms": {
                    "script_field":
                    es09.expressions.compile_expression(s.value, query),
                    "size":
                    200000
                }
            }

    data = es09.util.post(es, FromES, query.limit)

    matricies = {}
    for s in select:
        matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
コード例 #23
0
ファイル: format.py プロジェクト: davehunt/ActiveData
def format_cube(decoders, aggs, start, query, select):
    new_edges = count_dim(aggs, decoders)

    dims = []
    for e in new_edges:
        if isinstance(e.value, TupleOp):
            e.allowNulls = False

        if e.allowNulls is False:
            extra = 0
        else:
            extra = 1
        dims.append(len(e.domain.partitions) + extra)

    dims = tuple(dims)
    matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select]
    for row, coord, agg in aggs_iterator(aggs, decoders):
        for s, m in matricies:
            try:
                v = _pull(s, agg)
                m[coord] = v
            except Exception, e:
                Log.error("", e)
コード例 #24
0
def list_aggs(frum, query):
    select = listwrap(query.select)

    is_join = False  # True IF MANY TO MANY JOIN WITH AN EDGE
    for e in query.edges:
        if isinstance(e.domain, DefaultDomain):
            e.domain = SimpleSetDomain(
                partitions=list(sorted(set(frum.select(e.value)))))

    for s in listwrap(query.select):
        s["exec"] = qb_expression_to_function(s.value)

    result = {
        s.name: Matrix(dims=[
            len(e.domain.partitions) + (1 if e.allowNulls else 0)
            for e in query.edges
        ],
                       zeros=s.aggregate == "count")
        for s in select
    }
    where = qb_expression_to_function(query.where)
    for d in filter(where, frum):
        d = d.copy()
        coord = [
        ]  # LIST OF MATCHING COORDINATE FAMILIES, USUALLY ONLY ONE PER FAMILY BUT JOINS WITH EDGES CAN CAUSE MORE
        for e in query.edges:
            coord.append(get_matches(e, d))

        for s in select:
            mat = result[s.name]
            agg = s.aggregate
            var = s.value
            if agg == "count":
                for c in itertools.product(*coord):
                    if var == "." or var == None:
                        mat[c] += 1
                        continue

                    for e, cc in zip(query.edges, c):
                        d[e.name] = cc
                    val = s["exec"](d, c, frum)
                    if val != None:
                        mat[c] += 1
            else:
                for c in itertools.product(*coord):
                    acc = mat[c]
                    if acc == None:
                        acc = windows.name2accumulator.get(agg)
                        if acc == None:
                            Log.error(
                                "select aggregate {{agg}} is not recognized",
                                agg=agg)
                        acc = acc(**s)
                        mat[c] = acc
                    for e, cc in zip(
                            query.edges, c
                    ):  # BECAUSE WE DO NOT KNOW IF s.exec NEEDS THESE EDGES, SO WE PASS THEM ANYWAY
                        d[e.name] = e.domain.partitions[cc]
                    val = s["exec"](d, c, frum)
                    acc.add(val)

    for s in select:
        if s.aggregate == "count":
            continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    output = Cube(select, query.edges, result)
    return output
コード例 #25
0
ファイル: setop.py プロジェクト: mozilla/ActiveData-ETL
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(
        query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([
        s.value == None and s.aggregate not in ("count", "none")
        for s in select
    ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex and len(select) == 1:
        if not select[0].value:
            FromES.query = {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": simplify_esfilter(query.where)
                }
            }
            FromES.size = 1  # PREVENT QUERY CHECKER FROM THROWING ERROR
        elif isKeyword(select[0].value):
            FromES.facets.mvel = {
                "terms": {
                    "field": select[0].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter": simplify_esfilter(query.where)
            }
            if query.sort:
                s = query.sort
                if len(s) > 1:
                    Log.error("can not sort by more than one field")

                s0 = s[0]
                if s0.field != select[0].value:
                    Log.error(
                        "can not sort by anything other than count, or term")

                FromES.facets.terms.order = "term" if s0.sort >= 0 else "reverse_term"
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1:
        if not select[0].value:
            # SPECIAL CASE FOR SINGLE COUNT
            output = Matrix(value=data.hits.total)
            cube = Cube(query.select, [], {select[0].name: output})
        elif isKeyword(select[0].value):
            # SPECIAL CASE FOR SINGLE TERM
            T = data.facets.terms
            output = Matrix.wrap([t.term for t in T])
            cube = Cube(query.select, [], {select[0].name: output})
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(
                select, [],
                {s.name: Matrix(list=output[i])
                 for i, s in enumerate(select)})

    cube.frum = query
    return cube
コード例 #26
0
def _es_terms2(es, mvel, query):
    """
    WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
    """

    # REQUEST VALUES IN FIRST DIMENSION
    q1 = query.copy()
    q1.edges = query.edges[0:1:]
    values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value

    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:
        for i, v in enumerate(values1):
            FromES.facets[s.name + "," + str(i)] = {
                "terms": {
                    "field": query.edges[1].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter":
                simplify_esfilter({
                    "and": [query.where, {
                        "term": {
                            query.edges[0].value: v
                        }
                    }]
                })
            }

    data = es09.util.post(es, FromES, query.limit)

    # UNION ALL TERMS FROM SECOND DIMENSION
    values2 = set()
    for k, f in data.facets.items():
        values2.update(f.terms.term)
    values2 = qb.sort(values2)
    term2index = {v: i for i, v in enumerate(values2)}
    query.edges[1].domain.partitions = DictList([{
        "name": v,
        "value": v
    } for v in values2])

    # MAKE CUBE
    output = {}
    dims = [len(values1), len(values2)]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        coord = facetName.split(",")
        s = [s for s in select if s.name == coord[0]][0]
        i1 = int(coord[1])
        for term in facet.terms:
            i2 = term2index[term.term]
            output[s.name][(i1, i2)] = term[aggregates[s.aggregate]]

    cube = Cube(query.select, query.edges, output)
    cube.query = query
    return cube
コード例 #27
0
ファイル: setop.py プロジェクト: davehunt/ActiveData
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(
        query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([
        s.value == None and s.aggregate not in ("count", "none")
        for s in select
    ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex:
        if len(select) == 1 and not select[0].value or select[0].value == "*":
            FromES = wrap({
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter":
                        simplify_esfilter(
                            jx_expression(query.where).to_esfilter())
                    }
                },
                "sort": query.sort,
                "size": 1
            })
        elif all(isinstance(v, Variable) for v in select.value):
            FromES = wrap({
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": simplify_esfilter(query.where.to_esfilter())
                    }
                },
                "fields": select.value,
                "sort": query.sort,
                "size": coalesce(query.limit, 200000)
            })
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1 and not select[0].value or select[0].value == "*":
        # SPECIAL CASE FOR SINGLE COUNT
        cube = wrap(data).hits.hits._source
    elif isinstance(select[0].value, Variable):
        # SPECIAL CASE FOR SINGLE TERM
        cube = wrap(data).hits.hits.fields
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(
                select, [],
                {s.name: Matrix(list=output[i])
                 for i, s in enumerate(select)})

    return Dict(meta={"esquery": FromES}, data=cube)
コード例 #28
0
def es_terms(es, mvel, query):
    """
    RETURN LIST OF ALL EDGE QUERIES

    EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES
    WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION
    """
    if len(query.edges) == 2:
        return _es_terms2(es, mvel, query)

    select = listwrap(query.select)
    FromES = build_es_query(query)
    packed_term = compileEdges2Term(mvel, query.edges, wrap([]))
    for s in select:
        FromES.facets[s.name] = {
            "terms": {
                "field": packed_term.field,
                "script_field": packed_term.expression,
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }

    term2Parts = packed_term.term2parts

    data = es09.util.post(es, FromES, query.limit)

    # GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS
    # BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN
    for k, f in data.facets.items():
        for t in f.terms:
            term2Parts(t.term)

    # NUMBER ALL EDGES FOR qb INDEXING
    for f, e in enumerate(query.edges):
        e.index = f
        if e.domain.type in ["uid", "default"]:
            # e.domain.partitions = qb.sort(e.domain.partitions, "value")
            for p, part in enumerate(e.domain.partitions):
                part.dataIndex = p
            e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    output = {}
    dims = [
        len(e.domain.partitions) + (1 if e.allowNulls else 0)
        for e in query.edges
    ]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        for term in facet.terms:
            term_coord = term2Parts(term.term).dataIndex
            for s in select:
                try:
                    output[s.name][term_coord] = term[aggregates[s.aggregate]]
                except Exception, e:
                    # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
                    pass
コード例 #29
0
ファイル: terms_stats.py プロジェクト: davehunt/ActiveData
def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []  # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = DictList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error(
                    "There is more than one open-ended edge: self can not be handled"
                )
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value))
                    or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions)
                           for f in facetEdges) * len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {
                "aggregate": "count"
            },
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note(
            "{{theory_count}} theoretical combinations, {{real_count}} actual combos found",
            real_count=len(esFacets),
            theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [
                len(e.domain.partitions) + (1 if e.allowNulls else 0)
                for e in query.edges
            ]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, DictList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error(
            "not implemented yet"
        )  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = DictList()
            constants = DictList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({
                    "name": fedge.domain.name,
                    "value": parts[f]
                })
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field":
                    calcTerm.field,
                    "value_field":
                    s.value if is_keyword(s.value) else None,
                    "value_script":
                    mvel.compile_expression(s.value)
                    if not is_keyword(s.value) else None,
                    "size":
                    coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter(
                    {"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = DictList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [
        len(e.domain.partitions) + (1 if e.allowNulls else 0)
        for e in query.edges
    ]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index] + (
                    special.dataIndex, ) + pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
コード例 #30
0
ファイル: aggs.py プロジェクト: mozilla/ActiveData-ETL
def cube_aggs(frum, query):
    select = listwrap(query.select)

    #MATCH EDGES IN QUERY TO ONES IN frum
    for e in query.edges:
        for fs in frum.select:
            if fs.name == e.value:
                Log.error("Not implemented yet")
        if isinstance(e.domain, DefaultDomain):
            # DEFAULT DOMAINS CAN EASILY BE LOOKED UP FROM frum
            for fe in frum.edges:
                if fe.name == e.value:
                    e.domain = SimpleSetDomain(**fe.domain.as_dict())
                    e.value = e.value + "." + fe.domain.key
                    break
        else:
            for fe in frum.edges:
                if fe.name == e.value:
                    e.value = e.value + "." + fe.domain.key
                    break

    result = {
        s.name: Matrix(dims=[
            len(e.domain.partitions) + (1 if e.allowNulls else 0)
            for e in query.edges
        ],
                       zeros=s.aggregate == "count")
        for s in select
    }
    where = qb_expression_to_function(query.where)
    for d in filter(where, frum.values()):
        coord = [
        ]  # LIST OF MATCHING COORDINATE FAMILIES, USUALLY ONLY ONE PER FAMILY BUT JOINS WITH EDGES CAN CAUSE MORE
        for e in query.edges:
            matches = get_matches(e, d)
            coord.append(matches)
            if len(matches) == 1 and d[e.name] == None:
                d[e.name] = e.domain.partitions[matches[0]]

        for s in select:
            mat = result[s.name]
            agg = s.aggregate
            var = s.value
            expr = qb_expression_to_function(var)
            val = expr(d)
            if agg == "count":
                if var == "." or var == None:
                    for c in itertools.product(*coord):
                        mat[c] += 1
                    continue

                if val != None:
                    for c in itertools.product(*coord):
                        mat[c] += 1
            else:
                for c in itertools.product(*coord):
                    acc = mat[c]
                    if acc == None:
                        acc = windows.name2accumulator.get(agg)
                        if acc == None:
                            Log.error(
                                "select aggregate {{agg}} is not recognized",
                                agg=agg)
                        acc = acc(**s)
                        mat[c] = acc
                    acc.add(val)

    for s in select:
        if s.aggregate == "count":
            continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    return Cube(select, query.edges, result)
コード例 #31
0
def list_aggs(frum, query):
    frum = wrap(frum)
    select = listwrap(query.select)

    for e in query.edges:
        if isinstance(e.domain, DefaultDomain):
            accessor = jx_expression_to_function(e.value)
            unique_values = set(map(accessor, frum))
            if None in unique_values:
                e.allowNulls = coalesce(e.allowNulls, True)
                unique_values -= {None}
            e.domain = SimpleSetDomain(partitions=list(sorted(unique_values)))
        else:
            pass

    s_accessors = [(ss.name, compile_expression(ss.value.to_python())) for ss in select]

    result = {
        s.name: Matrix(
            dims=[len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges],
            zeros=lambda: windows.name2accumulator.get(s.aggregate)(**s)
        )
        for s in select
    }
    where = jx_expression_to_function(query.where)
    coord = [None]*len(query.edges)
    edge_accessor = [(i, make_accessor(e)) for i, e in enumerate(query.edges)]

    net_new_edge_names = set(wrap(query.edges).name) - UNION(e.value.vars() for e in query.edges)
    if net_new_edge_names & UNION(ss.value.vars() for ss in select):
        # s_accessor NEEDS THESE EDGES, SO WE PASS THEM ANYWAY
        for d in filter(where, frum):
            d = d.copy()
            for c, get_matches in edge_accessor:
                coord[c] = get_matches(d)

            for s_name, s_accessor in s_accessors:
                mat = result[s_name]
                for c in itertools.product(*coord):
                    acc = mat[c]
                    for e, cc in zip(query.edges, c):
                        d[e.name] = e.domain.partitions[cc]
                    val = s_accessor(d, c, frum)
                    acc.add(val)
    else:
        # FASTER
        for d in filter(where, frum):
            for c, get_matches in edge_accessor:
                coord[c] = get_matches(d)

            for s_name, s_accessor in s_accessors:
                mat = result[s_name]
                for c in itertools.product(*coord):
                    acc = mat[c]
                    val = s_accessor(d, c, frum)
                    acc.add(val)

    for s in select:
        # if s.aggregate == "count":
        #     continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    from pyLibrary.queries.containers.cube import Cube

    output = Cube(select, query.edges, result)
    return output
コード例 #32
0
ファイル: setop.py プロジェクト: klahnakoski/Activedata-ETL
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select])   # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex and len(select) == 1:
        if not select[0].value:
            FromES.query = {"filtered": {
                "query": {"match_all": {}},
                "filter": simplify_esfilter(query.where)
            }}
            FromES.size = 1  # PREVENT QUERY CHECKER FROM THROWING ERROR
        elif isKeyword(select[0].value):
            FromES.facets.mvel = {
                "terms": {
                    "field": select[0].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter": simplify_esfilter(query.where)
            }
            if query.sort:
                s = query.sort
                if len(s) > 1:
                    Log.error("can not sort by more than one field")

                s0 = s[0]
                if s0.field != select[0].value:
                    Log.error("can not sort by anything other than count, or term")

                FromES.facets.terms.order = "term" if s0.sort >= 0 else "reverse_term"
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1:
        if not select[0].value:
            # SPECIAL CASE FOR SINGLE COUNT
            output = Matrix(value=data.hits.total)
            cube = Cube(query.select, [], {select[0].name: output})
        elif isKeyword(select[0].value):
            # SPECIAL CASE FOR SINGLE TERM
            T = data.facets.terms
            output = Matrix.wrap([t.term for t in T])
            cube = Cube(query.select, [], {select[0].name: output})
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})

    cube.frum = query
    return cube
コード例 #33
0
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select])   # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex:
        if len(select) == 1 and not select[0].value or select[0].value == "*":
            FromES = wrap({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
                }},
                "sort": query.sort,
                "size": 1
            })
        elif all(isinstance(v, Variable) for v in select.value):
            FromES = wrap({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": simplify_esfilter(query.where.to_esfilter())
                }},
                "fields": select.value,
                "sort": query.sort,
                "size": coalesce(query.limit, 200000)
            })
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1 and  not select[0].value or select[0].value == "*":
        # SPECIAL CASE FOR SINGLE COUNT
        cube = wrap(data).hits.hits._source
    elif isinstance(select[0].value, Variable):
        # SPECIAL CASE FOR SINGLE TERM
        cube = wrap(data).hits.hits.fields
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})

    return Data(
        meta={"esquery": FromES},
        data=cube
    )