Esempi in Python per simplify_esfilter, esempi in Python per pyLibrary.queries.expressions.simplify_esfilter

Esempio n. 1

0

Mostra file

File: setop.py Progetto: klahnakoski/Activedata-ETL

def es_fieldop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)
    es_query.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(qb_expression_to_esfilter(query.where))
        }
    }
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = qb_sort_to_es_sort(query.sort)
    es_query.fields = DictList()
    source = "fields"
    for s in select.value:
        if s == "*":
            es_query.fields=None
            source = "_source"
        elif s == ".":
            es_query.fields=None
            source = "_source"
        elif isinstance(s, basestring) and is_keyword(s):
            es_query.fields.append(s)
        elif isinstance(s, list) and es_query.fields is not None:
            es_query.fields.extend(s)
        elif isinstance(s, Mapping) and es_query.fields is not None:
            es_query.fields.extend(s.values())
        elif es_query.fields is not None:
            es_query.fields.append(s)
    es_query.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]

    return extract_rows(es, es_query, source, select, query)

Esempio n. 2

0

Mostra file

File: test_expressions.py Progetto: davehunt/ActiveData

 def test_eq2(self):
     where = {"eq": {"a": 1, "b": 2}}
     result = simplify_esfilter(jx_expression(where).to_esfilter())
     if USE_BOOL_MUST:
         self.assertEqual(
             result,
             {"bool": {
                 "must": [{
                     "term": {
                         "a": 1
                     }
                 }, {
                     "term": {
                         "b": 2
                     }
                 }]
             }})
     else:
         self.assertEqual(result,
                          {"and": [{
                              "term": {
                                  "a": 1
                              }
                          }, {
                              "term": {
                                  "b": 2
                              }
                          }]})

Esempio n. 3

0

Mostra file

def es_fieldop(es, query):
    es_query = es14.util.es_query_template()
    select = listwrap(query.select)
    es_query.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(qb_expression_to_esfilter(query.where))
        }
    }
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = qb_sort_to_es_sort(query.sort)
    es_query.fields = DictList()
    source = "fields"
    for s in select.value:
        if s == "*":
            es_query.fields = None
            source = "_source"
        elif s == ".":
            es_query.fields = None
            source = "_source"
        elif isinstance(s, basestring) and is_keyword(s):
            es_query.fields.append(s)
        elif isinstance(s, list) and es_query.fields is not None:
            es_query.fields.extend(s)
        elif isinstance(s, Mapping) and es_query.fields is not None:
            es_query.fields.extend(s.values())
        elif es_query.fields is not None:
            es_query.fields.append(s)
    es_query.sort = [{
        s.field: "asc" if s.sort >= 0 else "desc"
    } for s in query.sort]

    return extract_rows(es, es_query, source, select, query)

Esempio n. 4

0

Mostra file

File: query.py Progetto: klahnakoski/intermittents

def _normalize_where(where, schema=None):
    if where == None:
        return TRUE_FILTER
    if schema == None:
        return where
    where = simplify_esfilter(_where_terms(where, where, schema))
    return where

Esempio n. 5

0

Mostra file

    def append_query(self, es_query, start):
        #TODO: USE "reverse_nested" QUERY TO PULL THESE

        self.start = start
        for i, v in enumerate(self.fields):
            nest = wrap({
                "aggs": {
                    "_match":
                    set_default(
                        {"terms": {
                            "field": v,
                            "size": self.domain.limit
                        }}, es_query)
                }
            })
            if self.edge.allowNulls:
                nest.aggs._missing = set_default(
                    {"missing": {
                        "field": v
                    }}, es_query
                )  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            es_query = nest

        if self.domain.where:
            filter = simplify_esfilter(self.domain.where)
            es_query = {
                "aggs": {
                    "_filter": set_default({"filter": filter}, es_query)
                }
            }

        return es_query

Esempio n. 6

0

Mostra file

def es_fieldop(es, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)
    FromES.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    }
    FromES.size = coalesce(query.limit, 200000)
    FromES.fields = FlatList()
    for s in select.value:
        if s == "*":
            FromES.fields = None
        elif isinstance(s, list):
            FromES.fields.extend(s)
        elif isinstance(s, Mapping):
            FromES.fields.extend(s.values())
        else:
            FromES.fields.append(s)
    FromES.sort = [{
        s.field: "asc" if s.sort >= 0 else "desc"
    } for s in query.sort]

    data = es09.util.post(es, FromES, query.limit)

    T = data.hits.hits
    matricies = {}
    for s in select:
        if s.value == "*":
            matricies[s.name] = Matrix.wrap([t._source for t in T])
        elif isinstance(s.value, Mapping):
            # for k, v in s.value.items():
            #     matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
            matricies[s.name] = Matrix.wrap([{
                k: unwrap(t.fields).get(v, None)
                for k, v in s.value.items()
            } for t in T])
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([
                tuple(unwrap(t.fields).get(ss, None) for ss in s.value)
                for t in T
            ])
        elif not s.value:
            matricies[s.name] = Matrix.wrap(
                [unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap(
                    [unwrap(t.fields).get(s.value, None) for t in T])
            except Exception as e:
                Log.error("", e)

    cube = Cube(query.select, query.edges, matricies, frum=query)
    cube.frum = query
    return cube

Esempio n. 7

0

Mostra file

    def test_range_packing1(self):
        where = {"and": [
            {"gt": {"a": 20}},
            {"lt": {"a": 40}}
        ]}

        result = simplify_esfilter(jx_expression(where).to_esfilter())
        self.assertEqual(result, {"range": {"a": {"gt": 20, "lt": 40}}})

Esempio n. 8

0

Mostra file

def es_setop(es, query):
    es_query, filters = es14.util.es_query_template(query.frum.name)
    set_default(filters[0], simplify_esfilter(query.where.to_esfilter()))
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = DictList()

    return extract_rows(es, es_query, query)

Esempio n. 9

0

Mostra file

File: util.py Progetto: klahnakoski/MoDevETL

def build_es_query(query):
    output = wrap({"query": {"match_all": {}}, "from": 0, "size": 100 if DEBUG else 0, "sort": [], "facets": {}})

    if DEBUG:
        # TO LIMIT RECORDS TO WHAT'S IN FACETS
        output.query = {"filtered": {"query": {"match_all": {}}, "filter": simplify_esfilter(query.where)}}

    return output

Esempio n. 10

0

Mostra file

File: setop.py Progetto: klahnakoski/esReplicate

def es_setop(es, query):
    es_query, filters = es14.util.es_query_template(query.frum.name)
    set_default(filters[0], simplify_esfilter(query.where.to_esfilter()))
    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = DictList()

    return extract_rows(es, es_query, query)

Esempio n. 11

0

Mostra file

    def test_range_packing2(self):
        where = {"and": [
            {"gte": {"build.date": 1429747200}},
            {"lt": {"build.date": 1429920000}}
        ]}

        result = simplify_esfilter(jx_expression(where).to_esfilter())
        self.assertEqual(result, {"range": {"build.date": {"gte": Date("23 APR 2015").unix, "lt": Date("25 APR 2015").unix}}})

Esempio n. 12

0

Mostra file

File: aggop.py Progetto: mozilla/ActiveData-ETL

def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD

    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()  # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if is_keyword(s.value):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script":
                        es09.expressions.compile_expression(s.value, query)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {
        s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[
            aggregates[s.aggregate]])
        for s in select
    }
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube

Esempio n. 13

0

Mostra file

File: aggs.py Progetto: klahnakoski/intermittents

    def append_query(self, es_query, start):
        self.start = start
        for i, (k, v) in enumerate(self.fields):
            es_query = wrap({"aggs": {
                "_match": set_default({"terms": {"field": v}}, es_query),
                "_missing": set_default({"missing": {"field": v}}, es_query),
            }})

        if self.edge.domain.where:
            filter = simplify_esfilter(self.edge.domain.where)
            es_query = {"aggs": {"_filter": set_default({"filter": filter}, es_query)}}

        return es_query

Esempio n. 14

0

Mostra file

def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD


    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()   # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if isinstance(s.value, Variable):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value.var
                    },
                    "facet_filter": simplify_esfilter(query.where.to_esfilter())
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script": jx_expression_to_function(s.value)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select}
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube

Esempio n. 15

0

Mostra file

File: aggs.py Progetto: mozilla/ChangeDetector

    def append_query(self, es_query, start):
        # TODO: USE "reverse_nested" QUERY TO PULL THESE

        self.start = start
        for i, v in enumerate(self.fields):
            nest = wrap({"aggs": {"_match": set_default({"terms": {"field": v, "size": self.domain.limit}}, es_query)}})
            if self.edge.allowNulls:
                nest.aggs._missing = set_default({"missing": {"field": v}}, es_query)
            es_query = nest

        if self.domain.where:
            filter = simplify_esfilter(self.domain.where)
            es_query = {"aggs": {"_filter": set_default({"filter": filter}, es_query)}}

        return es_query

Esempio n. 16

0

Mostra file

File: setop.py Progetto: klahnakoski/SpotManager

def es_fieldop(es, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)
    FromES.query = {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    }
    FromES.size = coalesce(query.limit, 200000)
    FromES.fields = FlatList()
    for s in select.value:
        if s == "*":
            FromES.fields = None
        elif isinstance(s, list):
            FromES.fields.extend(s)
        elif isinstance(s, Mapping):
            FromES.fields.extend(s.values())
        else:
            FromES.fields.append(s)
    FromES.sort = [{s.field: "asc" if s.sort >= 0 else "desc"} for s in query.sort]

    data = es09.util.post(es, FromES, query.limit)

    T = data.hits.hits
    matricies = {}
    for s in select:
        if s.value == "*":
            matricies[s.name] = Matrix.wrap([t._source for t in T])
        elif isinstance(s.value, Mapping):
            # for k, v in s.value.items():
            #     matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
            matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T])
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T])
        elif not s.value:
            matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
            except Exception as e:
                Log.error("", e)

    cube = Cube(query.select, query.edges, matricies, frum=query)
    cube.frum = query
    return cube

Esempio n. 17

0

Mostra file

File: setop.py Progetto: davehunt/ActiveData

def es_deepop(es, mvel, query):
    FromES = es09.util.build_es_query(query)

    select = query.edges

    temp_query = query.copy()
    temp_query.select = select
    temp_query.edges = DictList()
    FromES.facets.mvel = {
        "terms": {
            "script_field": mvel.code(temp_query),
            "size": query.limit
        },
        "facet_filter":
        simplify_esfilter(jx_expression(query.where).to_esfilter())
    }

    data = es09.util.post(es, FromES, query.limit)

    rows = unpack_terms(data.facets.mvel, query.edges)
    terms = zip(*rows)

    # NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING
    edges = query.edges
    for f, e in enumerate(edges):
        for r in terms[f]:
            e.domain.getPartByKey(r)

        e.index = f
        for p, part in enumerate(e.domain.partitions):
            part.dataIndex = p
        e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    dims = [len(e.domain.partitions) for e in query.edges]
    output = Matrix(*dims)

    # FILL CUBE
    for r in rows:
        term_coord = [
            e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)
        ]
        output[term_coord] = SUM(output[term_coord], r[-1])

    cube = Cube(query.select, query.edges, {query.select.name: output})
    cube.frum = query
    return cube

Esempio n. 18

0

Mostra file

File: setop.py Progetto: klahnakoski/Activedata-ETL

def es_deepop(es, mvel, query):
    FromES = es09.util.build_es_query(query)

    select = query.edges

    temp_query = query.copy()
    temp_query.select = select
    temp_query.edges = DictList()
    FromES.facets.mvel = {
        "terms": {
            "script_field": mvel.code(temp_query),
            "size": query.limit
        },
        "facet_filter": simplify_esfilter(query.where)
    }

    data = es09.util.post(es, FromES, query.limit)

    rows = unpack_terms(data.facets.mvel, query.edges)
    terms = zip(*rows)

    # NUMBER ALL EDGES FOR Qb INDEXING
    edges = query.edges
    for f, e in enumerate(edges):
        for r in terms[f]:
            e.domain.getPartByKey(r)

        e.index = f
        for p, part in enumerate(e.domain.partitions):
            part.dataIndex = p
        e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    dims = [len(e.domain.partitions) for e in query.edges]
    output = Matrix(*dims)

    # FILL CUBE
    for r in rows:
        term_coord = [e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)]
        output[term_coord] = SUM(output[term_coord], r[-1])

    cube = Cube(query.select, query.edges, {query.select.name: output})
    cube.frum = query
    return cube

Esempio n. 19

0

Mostra file

File: util.py Progetto: davehunt/ActiveData

def build_es_query(query):
    output = wrap({
        "query": {"match_all": {}},
        "from": 0,
        "size": 100 if DEBUG else 0,
        "sort": [],
        "facets": {
        }
    })

    if DEBUG:
        # TO LIMIT RECORDS TO WHAT'S IN FACETS
        output.query = {
            "filtered": {
                "query": {
                    "match_all": {}
                },
                "filter": simplify_esfilter(query.where)
            }
        }

    return output

Esempio n. 20

0

Mostra file

    def append_query(self, es_query, start):
        self.start = start
        for i, (k, v) in enumerate(self.fields):
            es_query = wrap({
                "aggs": {
                    "_match": set_default({"terms": {
                        "field": v
                    }}, es_query),
                    "_missing": set_default({"missing": {
                        "field": v
                    }}, es_query),
                }
            })

        if self.edge.domain.where:
            filter = simplify_esfilter(self.edge.domain.where)
            es_query = {
                "aggs": {
                    "_filter": set_default({"filter": filter}, es_query)
                }
            }

        return es_query

Esempio n. 21

0

Mostra file

def es_aggsop(es, frum, query):
    select = listwrap(query.select)

    es_query = Dict()
    new_select = Dict()
    formula = []
    for s in select:
        if s.aggregate == "count" and (s.value == None or s.value == "."):
            s.pull = "doc_count"
        elif is_keyword(s.value):
            new_select[literal_field(s.value)] += [s]
        else:
            formula.append(s)

    for litral_field, many in new_select.items():
        if len(many) > 1:
            canonical_name = literal_field(many[0].name)
            es_query.aggs[canonical_name].stats.field = many[0].value
            for s in many:
                if s.aggregate == "count":
                    s.pull = canonical_name + ".count"
                else:
                    s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
        else:
            s = many[0]
            s.pull = literal_field(s.value) + ".value"
            es_query.aggs[literal_field(
                s.value)][aggregates1_4[s.aggregate]].field = s.value

    for i, s in enumerate(formula):
        new_select[unicode(i)] = s
        s.pull = literal_field(s.name) + ".value"
        es_query.aggs[literal_field(s.name)][aggregates1_4[
            s.aggregate]].script = qb_expression_to_ruby(s.value)

    decoders = [
        AggsDecoder(e, query) for e in coalesce(query.edges, query.groupby, [])
    ]
    start = 0
    for d in decoders:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if query.where:
        filter = simplify_esfilter(query.where)
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)})

    if len(split_field(frum.name)) > 1:
        es_query = wrap({
            "size": 0,
            "aggs": {
                "_nested":
                set_default(
                    {
                        "nested": {
                            "path": join_field(split_field(frum.name)[1::])
                        }
                    }, es_query)
            }
        })

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[
            query.format]
        if query.edges:
            output = formatter(decoders, result.aggregations, start, query,
                               select)
        elif query.groupby:
            output = groupby_formatter(decoders, result.aggregations, start,
                                       query, select)
        else:
            output = aggop_formatter(decoders, result.aggregations, start,
                                     query, select)

        output.meta.es_response_time = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",
                      format=query.format,
                      cause=e)
        Log.error("Some problem", e)

Esempio n. 22

0

Mostra file

File: terms_stats.py Progetto: klahnakoski/MoDataSubmission

def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []    # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = DictList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error("There is more than one open-ended edge: self can not be handled")
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {"aggregate": "count"},
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found",  real_count= len(esFacets),  theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, DictList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error("not implemented yet")  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = DictList()
            constants = DictList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({"name": fedge.domain.name, "value": parts[f]})
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field": calcTerm.field,
                    "value_field": s.value if is_keyword(s.value) else None,
                    "value_script": mvel.compile_expression(s.value) if not is_keyword(s.value) else None,
                    "size": coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter({"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = DictList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube

Esempio n. 23

0

Mostra file

File: terms_stats.py Progetto: klahnakoski/MoDataSubmission

def buildCondition(mvel, edge, partition):
    """
    RETURN AN ES FILTER OBJECT
    """
    output = {}

    if edge.domain.isFacet:
        # MUST USE THIS' esFacet
        condition = wrap(coalesce(partition.where, {"and": []}))

        if partition.min and partition.max and is_keyword(edge.value):
            condition["and"].append({
                "range": {edge.value: {"gte": partition.min, "lt": partition.max}}
            })

        # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
        return simplify_esfilter(condition)
    elif edge.range:
        # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output = {"and": []}

            if edge.range.mode and edge.range.mode == "inclusive":
                # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({"range": {edge.range.min: {"lt": es09.expressions.value2value(partition.max)}}})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.min + " < " + es09.expressions.value2MVEL(partition.max)
                    )}})

                if is_keyword(edge.range.max):
                    output["and"].append({"or": [
                        {"missing": {"field": edge.range.max}},
                        {"range": {edge.range.max, {"gt": es09.expressions.value2value(partition.min)}}}
                    ]})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.max + " > " + es09.expressions.value2MVEL(partition.min))}})

            else:
                # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({"range": {edge.range.min: {"lte": es09.expressions.value2value(partition.min)}}})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min)
                    )}})

                if is_keyword(edge.range.max):
                    output["and"].append({"or": [
                        {"missing": {"field": edge.range.max}},
                        {"range": {edge.range.max, {"gte": es09.expressions.value2value(partition.min)}}}
                    ]})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max
                    )}})
            return output
        else:
            Log.error("Do not know how to handle range query on non-continuous domain")

    elif not edge.value:
        # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
        return partition.esfilter
    elif is_keyword(edge.value):
        # USE FAST ES SYNTAX
        if edge.domain.type in domains.ALGEBRAIC:
            output.range = {}
            output.range[edge.value] = {"gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max)}
        elif edge.domain.type == "set":
            if partition.value:
                if partition.value != edge.domain.getKey(partition):
                    Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former")
                    # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
                output.term = {edge.value: partition.value}
            else:
                output.term = {edge.value: edge.domain.getKey(partition)}

        elif edge.domain.type == "default":
            output.term = dict()
            output.term[edge.value] = partition.value
        else:
            Log.error("Edge \"" + edge.name + "\" is not supported")

        return output
    else:
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output.script = {"script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max)}
        else:
            output.script = {"script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value)}

        code = es09.expressions.addFunctions(output.script.script)
        output.script.script = code.head + code.body
        return output

Esempio n. 24

0

Mostra file

File: setop.py Progetto: klahnakoski/Activedata-ETL

def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select])   # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex and len(select) == 1:
        if not select[0].value:
            FromES.query = {"filtered": {
                "query": {"match_all": {}},
                "filter": simplify_esfilter(query.where)
            }}
            FromES.size = 1  # PREVENT QUERY CHECKER FROM THROWING ERROR
        elif isKeyword(select[0].value):
            FromES.facets.mvel = {
                "terms": {
                    "field": select[0].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter": simplify_esfilter(query.where)
            }
            if query.sort:
                s = query.sort
                if len(s) > 1:
                    Log.error("can not sort by more than one field")

                s0 = s[0]
                if s0.field != select[0].value:
                    Log.error("can not sort by anything other than count, or term")

                FromES.facets.terms.order = "term" if s0.sort >= 0 else "reverse_term"
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1:
        if not select[0].value:
            # SPECIAL CASE FOR SINGLE COUNT
            output = Matrix(value=data.hits.total)
            cube = Cube(query.select, [], {select[0].name: output})
        elif isKeyword(select[0].value):
            # SPECIAL CASE FOR SINGLE TERM
            T = data.facets.terms
            output = Matrix.wrap([t.term for t in T])
            cube = Cube(query.select, [], {select[0].name: output})
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})

    cube.frum = query
    return cube

Esempio n. 25

0

Mostra file

def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []    # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = FlatList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error("There is more than one open-ended edge: self can not be handled")
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {"aggregate": "count"},
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found",  real_count= len(esFacets),  theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, FlatList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error("not implemented yet")  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = FlatList()
            constants = FlatList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({"name": fedge.domain.name, "value": parts[f]})
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field": calcTerm.field,
                    "value_field": s.value if is_keyword(s.value) else None,
                    "value_script": mvel.compile_expression(s.value) if not is_keyword(s.value) else None,
                    "size": coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter({"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = FlatList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube

Esempio n. 26

0

Mostra file

File: aggs.py Progetto: davehunt/ActiveData

def es_aggsop(es, frum, query):
    select = wrap([s.copy() for s in listwrap(query.select)])
    es_column_map = {c.name: unwraplist(c.es_column) for c in frum.schema.all_columns}

    es_query = Dict()
    new_select = Dict()  #MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".":
            s.pull = "doc_count"
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                if frum.typed:
                    # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        #TODO: HANDLE BOTH $value AND $objects TO COUNT
                        Log.error("do not know how to handle")
                    else:
                        s.value.var = "$value"
                        new_select["$value"] += [s]
                else:
                    if s.aggregate in NON_STATISTICAL_AGGS:
                        #TODO:  WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT
                        Log.error("do not know how to handle")
                    else:
                        Log.error('Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate)
            elif s.aggregate == "count":
                s.value = s.value.map(es_column_map)
                new_select["count_"+literal_field(s.value.var)] += [s]
            else:
                s.value = s.value.map(es_column_map)
                new_select[literal_field(s.value.var)] += [s]
        else:
            formula.append(s)

    for canonical_name, many in new_select.items():
        representative = many[0]
        if representative.value.var == ".":
            Log.error("do not know how to handle")
        else:
            field_name = representative.value.var

        # canonical_name=literal_field(many[0].name)
        for s in many:
            if s.aggregate == "count":
                es_query.aggs[literal_field(canonical_name)].value_count.field = field_name
                s.pull = literal_field(canonical_name) + ".value"
            elif s.aggregate == "median":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [50]
                s.pull = key + ".values.50\.0"
            elif s.aggregate == "percentile":
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = literal_field(canonical_name + " percentile")
                if isinstance(s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile:
                    Log.error("Expecting percentile to be a float from 0.0 to 1.0")
                percent = Math.round(s.percentile * 100, decimal=6)

                es_query.aggs[key].percentiles.field = field_name
                es_query.aggs[key].percentiles.percents += [percent]
                s.pull = key + ".values." + literal_field(unicode(percent))
            elif s.aggregate == "cardinality":
                # ES USES DIFFERENT METHOD FOR CARDINALITY
                key = literal_field(canonical_name + " cardinality")

                es_query.aggs[key].cardinality.field = field_name
                s.pull = key + ".value"
            elif s.aggregate == "stats":
                # REGULAR STATS
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].extended_stats.field = field_name

                # GET MEDIAN TOO!
                median_name = literal_field(canonical_name + " percentile")
                es_query.aggs[median_name].percentiles.field = field_name
                es_query.aggs[median_name].percentiles.percents += [50]

                s.pull = {
                    "count": stats_name + ".count",
                    "sum": stats_name + ".sum",
                    "min": stats_name + ".min",
                    "max": stats_name + ".max",
                    "avg": stats_name + ".avg",
                    "sos": stats_name + ".sum_of_squares",
                    "std": stats_name + ".std_deviation",
                    "var": stats_name + ".variance",
                    "median": median_name + ".values.50\.0"
                }
            elif s.aggregate == "union":
                # USE TERMS AGGREGATE TO SIMULATE union
                stats_name = literal_field(canonical_name)
                es_query.aggs[stats_name].terms.field = field_name
                es_query.aggs[stats_name].terms.size = Math.min(s.limit, MAX_LIMIT)
                s.pull = stats_name + ".buckets.key"
            else:
                # PULL VALUE OUT OF THE stats AGGREGATE
                es_query.aggs[literal_field(canonical_name)].extended_stats.field = field_name
                s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate]

    for i, s in enumerate(formula):
        canonical_name = literal_field(s.name)
        abs_value = s.value.map(es_column_map)

        if s.aggregate == "count":
            es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby()
            s.pull = literal_field(canonical_name) + ".value"
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [50]
            s.pull = key + ".values.50\.0"
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = Math.round(s.percentile * 100, decimal=6)

            es_query.aggs[key].percentiles.script = abs_value.to_ruby()
            es_query.aggs[key].percentiles.percents += [percent]
            s.pull = key + ".values." + literal_field(unicode(percent))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"

            es_query.aggs[key].cardinality.script = abs_value.to_ruby()
            s.pull = key + ".value"
        elif s.aggregate == "stats":
            # REGULAR STATS
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].extended_stats.script = abs_value.to_ruby()

            # GET MEDIAN TOO!
            median_name = literal_field(canonical_name + " percentile")
            es_query.aggs[median_name].percentiles.script = abs_value.to_ruby()
            es_query.aggs[median_name].percentiles.percents += [50]

            s.pull = {
                "count": stats_name + ".count",
                "sum": stats_name + ".sum",
                "min": stats_name + ".min",
                "max": stats_name + ".max",
                "avg": stats_name + ".avg",
                "sos": stats_name + ".sum_of_squares",
                "std": stats_name + ".std_deviation",
                "var": stats_name + ".variance",
                "median": median_name + ".values.50\.0"
            }
        elif s.aggregate=="union":
            # USE TERMS AGGREGATE TO SIMULATE union
            stats_name = literal_field(canonical_name)
            es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby()
            s.pull = stats_name + ".buckets.key"
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
            es_query.aggs[canonical_name].extended_stats.script = abs_value.to_ruby()

    decoders = get_decoders_by_depth(query)
    start = 0

    vars_ = query.where.vars()

    #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested
    split_where = split_expression_by_depth(query.where, schema=frum, map_=es_column_map)

    if len(split_field(frum.name)) > 1:
        if any(split_where[2::]):
            Log.error("Where clause is too deep")

        for d in decoders[1]:
            es_query = d.append_query(es_query, start)
            start += d.num_columns

        if split_where[1]:
            #TODO: INCLUDE FILTERS ON EDGES
            filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter())
            es_query = Dict(
                aggs={"_filter": set_default({"filter": filter_}, es_query)}
            )

        es_query = wrap({
            "aggs": {"_nested": set_default(
                {
                    "nested": {
                        "path": frum.query_path
                    }
                },
                es_query
            )}
        })
    else:
        if any(split_where[1::]):
            Log.error("Where clause is too deep")

    for d in decoders[0]:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if split_where[0]:
        #TODO: INCLUDE FILTERS ON EDGES
        filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter())
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )
    # </TERRIBLE SECTION>

    if not es_query:
        es_query = wrap({"query": {"match_all": {}}})

    es_query.size = 0

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting")
        with format_time:
            decoders = [d for ds in decoders for d in ds]
            result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
            if query.edges:
                output = formatter(decoders, result.aggregations, start, query, select)
            elif query.groupby:
                output = groupby_formatter(decoders, result.aggregations, start, query, select)
            else:
                output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e)
        Log.error("Some problem", e)

Esempio n. 27

0

Mostra file

def buildCondition(mvel, edge, partition):
    """
    RETURN AN ES FILTER OBJECT
    """
    output = {}

    if edge.domain.isFacet:
        # MUST USE THIS' esFacet
        condition = wrap(coalesce(partition.where, {"and": []}))

        if partition.min and partition.max and is_keyword(edge.value):
            condition["and"].append({
                "range": {edge.value: {"gte": partition.min, "lt": partition.max}}
            })

        # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT)
        return simplify_esfilter(condition)
    elif edge.range:
        # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output = {"and": []}

            if edge.range.mode and edge.range.mode == "inclusive":
                # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({"range": {edge.range.min: {"lt": es09.expressions.value2value(partition.max)}}})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.min + " < " + es09.expressions.value2MVEL(partition.max)
                    )}})

                if is_keyword(edge.range.max):
                    output["and"].append({"or": [
                        {"missing": {"field": edge.range.max}},
                        {"range": {edge.range.max, {"gt": es09.expressions.value2value(partition.min)}}}
                    ]})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.max + " > " + es09.expressions.value2MVEL(partition.min))}})

            else:
                # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE
                if is_keyword(edge.range.min):
                    output["and"].append({"range": {edge.range.min: {"lte": es09.expressions.value2value(partition.min)}}})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min)
                    )}})

                if is_keyword(edge.range.max):
                    output["and"].append({"or": [
                        {"missing": {"field": edge.range.max}},
                        {"range": {edge.range.max, {"gte": es09.expressions.value2value(partition.min)}}}
                    ]})
                else:
                    # WHOA!! SUPER SLOW!!
                    output["and"].append({"script": {"script": mvel.compile_expression(
                        es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max
                    )}})
            return output
        else:
            Log.error("Do not know how to handle range query on non-continuous domain")

    elif not edge.value:
        # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE)
        return partition.esfilter
    elif is_keyword(edge.value):
        # USE FAST ES SYNTAX
        if edge.domain.type in domains.ALGEBRAIC:
            output.range = {}
            output.range[edge.value] = {"gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max)}
        elif edge.domain.type == "set":
            if partition.value:
                if partition.value != edge.domain.getKey(partition):
                    Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former")
                    # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS
                output.term = {edge.value: partition.value}
            else:
                output.term = {edge.value: edge.domain.getKey(partition)}

        elif edge.domain.type == "default":
            output.term = dict()
            output.term[edge.value] = partition.value
        else:
            Log.error("Edge \"" + edge.name + "\" is not supported")

        return output
    else:
        # USE MVEL CODE
        if edge.domain.type in domains.ALGEBRAIC:
            output.script = {"script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max)}
        else:
            output.script = {"script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value)}

        code = es09.expressions.addFunctions(output.script.script)
        output.script.script = code.head + code.body
        return output

Esempio n. 28

0

Mostra file

File: setop.py Progetto: mozilla/ActiveData-ETL

def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(
        query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([
        s.value == None and s.aggregate not in ("count", "none")
        for s in select
    ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex and len(select) == 1:
        if not select[0].value:
            FromES.query = {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": simplify_esfilter(query.where)
                }
            }
            FromES.size = 1  # PREVENT QUERY CHECKER FROM THROWING ERROR
        elif isKeyword(select[0].value):
            FromES.facets.mvel = {
                "terms": {
                    "field": select[0].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter": simplify_esfilter(query.where)
            }
            if query.sort:
                s = query.sort
                if len(s) > 1:
                    Log.error("can not sort by more than one field")

                s0 = s[0]
                if s0.field != select[0].value:
                    Log.error(
                        "can not sort by anything other than count, or term")

                FromES.facets.terms.order = "term" if s0.sort >= 0 else "reverse_term"
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1:
        if not select[0].value:
            # SPECIAL CASE FOR SINGLE COUNT
            output = Matrix(value=data.hits.total)
            cube = Cube(query.select, [], {select[0].name: output})
        elif isKeyword(select[0].value):
            # SPECIAL CASE FOR SINGLE TERM
            T = data.facets.terms
            output = Matrix.wrap([t.term for t in T])
            cube = Cube(query.select, [], {select[0].name: output})
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(
                select, [],
                {s.name: Matrix(list=output[i])
                 for i, s in enumerate(select)})

    cube.frum = query
    return cube

Esempio n. 29

0

Mostra file

File: aggs.py Progetto: klahnakoski/intermittents

def es_aggsop(es, frum, query):
    select = listwrap(query.select)

    es_query = Dict()
    new_select = Dict()
    formula = []
    for s in select:
        if s.aggregate == "count" and (s.value == None or s.value == "."):
            s.pull = "doc_count"
        elif is_keyword(s.value):
            new_select[literal_field(s.value)] += [s]
        else:
            formula.append(s)

    for litral_field, many in new_select.items():
        if len(many)>1:
            canonical_name=literal_field(many[0].name)
            es_query.aggs[canonical_name].stats.field = many[0].value
            for s in many:
                if s.aggregate == "count":
                    s.pull = canonical_name + ".count"
                else:
                    s.pull = canonical_name + "." + aggregates1_4[s.aggregate]
        else:
            s = many[0]
            s.pull = literal_field(s.value) + ".value"
            es_query.aggs[literal_field(s.value)][aggregates1_4[s.aggregate]].field = s.value

    for i, s in enumerate(formula):
        new_select[unicode(i)] = s
        s.pull = literal_field(s.name) + ".value"
        es_query.aggs[literal_field(s.name)][aggregates1_4[s.aggregate]].script = qb_expression_to_ruby(s.value)

    decoders = [AggsDecoder(e, query) for e in coalesce(query.edges, query.groupby, [])]
    start = 0
    for d in decoders:
        es_query = d.append_query(es_query, start)
        start += d.num_columns

    if query.where:
        filter = simplify_esfilter(query.where)
        es_query = Dict(
            aggs={"_filter": set_default({"filter": filter}, es_query)}
        )

    if len(split_field(frum.name)) > 1:
        es_query = wrap({
            "size": 0,
            "aggs": {"_nested": set_default({
                "nested": {
                    "path": join_field(split_field(frum.name)[1::])
                }
            }, es_query)}
        })

    with Timer("ES query time") as es_duration:
        result = es09.util.post(es, es_query, query.limit)

    try:
        formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format]
        if query.edges:
            output = formatter(decoders, result.aggregations, start, query, select)
        elif query.groupby:
            output = groupby_formatter(decoders, result.aggregations, start, query, select)
        else:
            output = aggop_formatter(decoders, result.aggregations, start, query, select)

        output.meta.es_response_time = es_duration.seconds
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",  format= query.format, cause=e)
        Log.error("Some problem", e)

Esempio n. 30

0

Mostra file

 def test_in(self):
     where = {"in": {"a": [1, 2]}}
     result = simplify_esfilter(jx_expression(where).to_esfilter())
     self.assertEqual(result, {"terms": {"a": [1, 2]}})

Esempio n. 31

0

Mostra file

File: setop.py Progetto: davehunt/ActiveData

def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(
        query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([
        s.value == None and s.aggregate not in ("count", "none")
        for s in select
    ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex:
        if len(select) == 1 and not select[0].value or select[0].value == "*":
            FromES = wrap({
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter":
                        simplify_esfilter(
                            jx_expression(query.where).to_esfilter())
                    }
                },
                "sort": query.sort,
                "size": 1
            })
        elif all(isinstance(v, Variable) for v in select.value):
            FromES = wrap({
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": simplify_esfilter(query.where.to_esfilter())
                    }
                },
                "fields": select.value,
                "sort": query.sort,
                "size": coalesce(query.limit, 200000)
            })
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter":
            simplify_esfilter(jx_expression(query.where).to_esfilter())
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1 and not select[0].value or select[0].value == "*":
        # SPECIAL CASE FOR SINGLE COUNT
        cube = wrap(data).hits.hits._source
    elif isinstance(select[0].value, Variable):
        # SPECIAL CASE FOR SINGLE TERM
        cube = wrap(data).hits.hits.fields
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(
                select, [],
                {s.name: Matrix(list=output[i])
                 for i, s in enumerate(select)})

    return Dict(meta={"esquery": FromES}, data=cube)

Esempio n. 32

0

Mostra file

def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select])   # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex:
        if len(select) == 1 and not select[0].value or select[0].value == "*":
            FromES = wrap({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
                }},
                "sort": query.sort,
                "size": 1
            })
        elif all(isinstance(v, Variable) for v in select.value):
            FromES = wrap({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": simplify_esfilter(query.where.to_esfilter())
                }},
                "fields": select.value,
                "sort": query.sort,
                "size": coalesce(query.limit, 200000)
            })
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(jx_expression(query.where).to_esfilter())
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1 and  not select[0].value or select[0].value == "*":
        # SPECIAL CASE FOR SINGLE COUNT
        cube = wrap(data).hits.hits._source
    elif isinstance(select[0].value, Variable):
        # SPECIAL CASE FOR SINGLE TERM
        cube = wrap(data).hits.hits.fields
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})

    return Data(
        meta={"esquery": FromES},
        data=cube
    )

Esempio n. 33

0

Mostra file

 def test_eq1(self):
     where = {"eq": {"a": 20}}
     result = simplify_esfilter(jx_expression(where).to_esfilter())
     self.assertEqual(result, {"term": {"a": 20}})

Esempio n. 34

0

Mostra file

def es_deepop(es, query):
    schema = query.frum.schema
    columns = schema.columns
    query_path = schema.query_path

    map_to_local = {k: get_pull(c[0]) for k, c in schema.lookup.items()}

    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es14.util.es_query_template(query.frum.name)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, schema)
    for i, f in enumerate(es_filters):
        # PROBLEM IS {"match_all": {}} DOES NOT SURVIVE set_default()
        for k, v in unwrap(simplify_esfilter(AndOp("and", wheres[i]).to_esfilter())).items():
            f[k] = v

    if not wheres[1]:
        more_filter = {
            "and": [
                simplify_esfilter(AndOp("and", wheres[0]).to_esfilter()),
                {"not": {
                    "nested": {
                        "path": query_path,
                        "filter": {
                            "match_all": {}
                        }
                    }
                }}
            ]
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = []

    is_list = isinstance(query.select, list)
    new_select = FlatList()

    i = 0
    for s in listwrap(query.select):
        if isinstance(s.value, LeavesOp):
            if isinstance(s.value.term, Variable):
                if s.value.term.var == ".":
                    # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
                    for c in columns:
                        if c.type not in STRUCT and c.es_column != "_id":
                            if c.nested_path[0] == ".":
                                es_query.fields += [c.es_column]
                            new_select.append({
                                "name": c.names[query_path],
                                "pull": get_pull(c),
                                "nested_path": c.nested_path[0],
                                "put": {"name": literal_field(c.names[query_path]), "index": i, "child": "."}
                            })
                            i += 1

                    # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
                    col_names = set(c.names[query_path] for c in columns)
                    for n in new_select:
                        if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                            n.name = n.name.lstrip(".")
                            n.put.name = literal_field(n.name)
                            col_names.add(n.name)
                else:
                    prefix = schema[s.value.term.var][0].names["."] + "."
                    prefix_length = len(prefix)
                    for c in columns:
                        cname = c.names["."]
                        if cname.startswith(prefix) and c.type not in STRUCT:
                            pull = get_pull(c)
                            if c.nested_path[0] == ".":
                                es_query.fields += [c.es_column]

                            new_select.append({
                                "name": s.name + "." + cname[prefix_length:],
                                "pull": pull,
                                "nested_path": c.nested_path[0],
                                "put": {
                                    "name": s.name + "." + literal_field(cname[prefix_length:]),
                                    "index": i,
                                    "child": "."
                                }
                            })
                            i += 1
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                for c in columns:
                    if c.type not in STRUCT and c.es_column != "_id":
                        if len(c.nested_path) == 1:
                            es_query.fields += [c.es_column]
                        new_select.append({
                            "name": c.name,
                            "pull": get_pull(c),
                            "nested_path": c.nested_path[0],
                            "put": {"name": ".", "index": i, "child": c.es_column}
                        })
                i += 1
            elif s.value.var == "_id":
                new_select.append({
                    "name": s.name,
                    "value": s.value.var,
                    "pull": "_id",
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            else:
                prefix = schema[s.value.var][0]
                if not prefix:
                    net_columns = []
                else:
                    parent = prefix.es_column+"."
                    prefix_length = len(parent)
                    net_columns = [c for c in columns if c.es_column.startswith(parent) and c.type not in STRUCT]

                if not net_columns:
                    pull = get_pull(prefix)
                    if len(prefix.nested_path) == 1:
                        es_query.fields += [prefix.es_column]
                    new_select.append({
                        "name": s.name,
                        "pull": pull,
                        "nested_path": prefix.nested_path[0],
                        "put": {"name": s.name, "index": i, "child": "."}
                    })
                else:
                    done = set()
                    for n in net_columns:
                        # THE COLUMNS CAN HAVE DUPLICATE REFERNCES TO THE SAME ES_COLUMN
                        if n.es_column in done:
                            continue
                        done.add(n.es_column)

                        pull = get_pull(n)
                        if len(n.nested_path) == 1:
                            es_query.fields += [n.es_column]
                        new_select.append({
                            "name": s.name,
                            "pull": pull,
                            "nested_path": n.nested_path[0],
                            "put": {"name": s.name, "index": i, "child": n.es_column[prefix_length:]}
                        })
                i += 1
        else:
            expr = s.value
            for v in expr.vars():
                for c in schema[v]:
                    if c.nested_path[0] == ".":
                        es_query.fields += [c.es_column]
                    # else:
                    #     Log.error("deep field not expected")

            pull = EXPRESSION_PREFIX + s.name
            post_expressions[pull] = compile_expression(expr.map(map_to_local).to_python())

            new_select.append({
                "name": s.name if is_list else ".",
                "pull": pull,
                "value": expr.__data__(),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es09.util.post(
            es,
            Data(
                filter=more_filter,
                fields=es_query.fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    #</COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)

Esempio n. 35

0

Mostra file

File: deep.py Progetto: klahnakoski/MoDataSubmission

def es_deepop(es, query):
    columns = query.frum.get_columns(query.frum.name)
    query_path = query.frum.query_path
    columns = UniqueIndex(keys=["name"], data=sorted(columns, lambda a, b: cmp(len(listwrap(b.nested_path)), len(listwrap(a.nested_path)))), fail_on_dup=False)
    map_to_es_columns = {c.name: c.es_column for c in columns}
    map_to_local = {
        c.name: "_inner" + c.es_column[len(listwrap(c.nested_path)[0]):] if c.nested_path else "fields." + literal_field(c.es_column)
        for c in columns
    }
    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es14.util.es_query_template(query.frum.name)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, query.frum, map_to_es_columns)
    for i, f in enumerate(es_filters):
        # PROBLEM IS {"match_all": {}} DOES NOT SURVIVE set_default()
        for k, v in unwrap(simplify_esfilter(AndOp("and", wheres[i]).to_esfilter())).items():
            f[k] = v


    if not wheres[1]:
        more_filter = {
            "and": [
                simplify_esfilter(AndOp("and", wheres[0]).to_esfilter()),
                {"not": {
                    "nested": {
                        "path": query_path,
                        "filter": {
                            "match_all": {}
                        }
                    }
                }}
            ]
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, queries.query.DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort)
    es_query.fields = []

    is_list = isinstance(query.select, list)
    new_select = DictList()

    def get_pull(column):
        if column.nested_path:
            return "_inner" + column.es_column[len(listwrap(column.nested_path)[0]):]
        else:
            return "fields." + literal_field(column.es_column)

    i = 0
    for s in listwrap(query.select):
        if isinstance(s.value, LeavesOp):
            if isinstance(s.value.term, Variable):
                if s.value.term.var==".":
                    # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
                    for c in columns:
                        if c.relative and c.type not in ["nested", "object"]:
                            if not c.nested_path:
                                es_query.fields += [c.es_column]
                            new_select.append({
                                "name": c.name,
                                "pull": get_pull(c),
                                "nested_path": listwrap(c.nested_path)[0],
                                "put": {"name": literal_field(c.name), "index": i, "child": "."}
                            })
                            i += 1

                    # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
                    col_names = [c.name for c in columns if c.relative]
                    for n in new_select:
                        if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                            n.name = n.put.name = n.name.lstrip(".")
                else:
                    column = s.term.value.var+"."
                    prefix = len(column)
                    for c in columns:
                        if c.name.startswith(column) and c.type not in ["object", "nested"]:
                            pull = get_pull(c)
                            if len(listwrap(c.nested_path)) == 0:
                                es_query.fields += [c.es_column]

                            new_select.append({
                                "name": s.name + "." + c.name[prefix:],
                                "pull": pull,
                                "nested_path": listwrap(c.nested_path)[0],
                                "put": {"name": s.name + "." + literal_field(c.name[prefix:]), "index": i, "child": "."}
                            })
                            i += 1
        elif isinstance(s.value, Variable):
            if s.value.var == ".":
                for c in columns:
                    if c.relative and c.type not in ["nested", "object"]:
                        if not c.nested_path:
                            es_query.fields += [c.es_column]
                        new_select.append({
                            "name": c.name,
                            "pull": get_pull(c),
                            "nested_path": listwrap(c.nested_path)[0],
                            "put": {"name": ".", "index": i, "child": c.es_column}
                        })
                i += 1
            elif s.value.var == "_id":
                new_select.append({
                    "name": s.name,
                    "value": s.value.var,
                    "pull": "_id",
                    "put": {"name": s.name, "index": i, "child": "."}
                })
                i += 1
            else:
                column = columns[(s.value.var,)]
                parent = column.es_column+"."
                prefix = len(parent)
                net_columns = [c for c in columns if c.es_column.startswith(parent) and c.type not in ["object", "nested"]]
                if not net_columns:
                    pull = get_pull(column)
                    if not column.nested_path:
                        es_query.fields += [column.es_column]
                    new_select.append({
                        "name": s.name,
                        "pull": pull,
                        "nested_path": listwrap(column.nested_path)[0],
                        "put": {"name": s.name, "index": i, "child": "."}
                    })
                else:
                    done = set()
                    for n in net_columns:
                        # THE COLUMNS CAN HAVE DUPLICATE REFERNCES TO THE SAME ES_COLUMN
                        if n.es_column in done:
                            continue
                        done.add(n.es_column)

                        pull = get_pull(n)
                        if not n.nested_path:
                            es_query.fields += [n.es_column]
                        new_select.append({
                            "name": s.name,
                            "pull": pull,
                            "nested_path": listwrap(n.nested_path)[0],
                            "put": {"name": s.name, "index": i, "child": n.es_column[prefix:]}
                        })
                i += 1
        else:
            expr = s.value
            for v in expr.vars():
                for n in columns:
                    if n.name == v:
                        if not n.nested_path:
                            es_query.fields += [n.es_column]

            pull = EXPRESSION_PREFIX + s.name
            post_expressions[pull] = compile_expression(expr.map(map_to_local).to_python())

            new_select.append({
                "name": s.name if is_list else ".",
                "pull": pull,
                "value": expr.to_dict(),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es09.util.post(
            es,
            Dict(
                filter=more_filter,
                fields=es_query.fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es09.util.post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    #</COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception, e:
        Log.error("problem formatting", e)

Esempio n. 36

0

Mostra file

 def test_ne2(self):
     where = {"neq": {"a": 1}}
     result = simplify_esfilter(jx_expression(where).to_esfilter())
     self.assertEqual(result, {"not": {"term": {"a": 1}}})