Beispiel #1
0
def to_esfilter(self, schema):
    # OR(x) == NOT(AND(NOT(xi) for xi in x))
    output = es_not(es_and([
        NotOp("not", t).partial_eval().to_esfilter(schema)
        for t in self.terms
    ]))
    return output
def to_es14_filter(self, schema):
    # OR(x) == NOT(AND(NOT(xi) for xi in x))
    output = es_not(es_and([
        NotOp("not", t).partial_eval().to_es14_filter(schema)
        for t in self.terms
    ]))
    return output
Beispiel #3
0
def to_esfilter(self, schema):
    if isinstance(self.expr, Variable):
        cols = schema.leaves(self.expr.var)
        if not cols:
            return {"match_all": {}}
        elif len(cols) == 1:
            return es_missing(cols[0].es_column)
        else:
            return es_and([es_missing(c.es_column) for c in cols])
    else:
        return ScriptOp(
            "script",
            self.to_es_script(schema).script(schema)).to_esfilter(schema)
Beispiel #4
0
def to_esfilter(self, schema):
    if isinstance(self.expr, Variable):
        cols = schema.leaves(self.expr.var)
        if not cols:
            return {"match_all": {}}
        elif len(cols) == 1:
            return es_missing(cols[0].es_column)
        else:
            return es_and([
                es_missing(c.es_column) for c in cols
            ])
    else:
        return ScriptOp("script", self.to_es_script(schema).script(schema)).to_esfilter(schema)
Beispiel #5
0
def to_esfilter(self, schema):
    if not len(self.terms):
        return {"match_all": {}}
    else:
        return es_and([t.to_esfilter(schema) for t in self.terms])
Beispiel #6
0
def _normalize(esfilter):
    """
    TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING
    REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS
    """
    if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal:
        return esfilter

    # Log.note("from: " + convert.value2json(esfilter))
    isDiff = True

    while isDiff:
        isDiff = False

        if esfilter['and']:
            terms = esfilter['and']
            for (i0, t0), (i1,
                           t1) in itertools.product(enumerate(terms),
                                                    enumerate(terms)):
                if i0 == i1:
                    continue  # SAME, IGNORE
                # TERM FILTER ALREADY ASSUMES EXISTENCE
                with suppress_exception:
                    if t0.exists.field != None and t0.exists.field == t1.term.items(
                    )[0][0]:
                        terms[i0] = MATCH_ALL
                        continue

                # IDENTICAL CAN BE REMOVED
                with suppress_exception:
                    if t0 == t1:
                        terms[i0] = MATCH_ALL
                        continue

                # MERGE range FILTER WITH SAME FIELD
                if i0 > i1:
                    continue  # SAME, IGNORE
                with suppress_exception:
                    f0, tt0 = t0.range.items()[0]
                    f1, tt1 = t1.range.items()[0]
                    if f0 == f1:
                        set_default(terms[i0].range[literal_field(f1)], tt1)
                        terms[i1] = MATCH_ALL

            output = []
            for a in terms:
                if isinstance(a, (list, set)):
                    from mo_logs import Log

                    Log.error("and clause is not allowed a list inside a list")
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_
                if a == MATCH_ALL:
                    isDiff = True
                    continue
                if a == MATCH_NONE:
                    return MATCH_NONE
                if a['and']:
                    isDiff = True
                    a.isNormal = None
                    output.extend(a['and'])
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return MATCH_ALL
            elif len(output) == 1:
                # output[0].isNormal = True
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = es_and(output)
            continue

        if esfilter.bool.should:
            output = []
            for a in esfilter.bool.should:
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_

                if a.bool.should:
                    a.isNormal = None
                    isDiff = True
                    output.extend(a.bool.should)
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return MATCH_NONE
            elif len(output) == 1:
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = wrap({"bool": {"should": output}})
            continue

        if esfilter.term != None:
            if esfilter.term.keys():
                esfilter.isNormal = True
                return esfilter
            else:
                return MATCH_ALL

        if esfilter.terms:
            for k, v in esfilter.terms.items():
                if len(v) > 0:
                    if OR(vv == None for vv in v):
                        rest = [vv for vv in v if vv != None]
                        if len(rest) > 0:
                            output = es_or(
                                [es_missing(k), {
                                    "terms": {
                                        k: rest
                                    }
                                }])
                        else:
                            output = es_missing(k)
                        output.isNormal = True
                        return output
                    else:
                        esfilter.isNormal = True
                        return esfilter
            return MATCH_NONE

        if esfilter['not']:
            _sub = esfilter['not']
            sub = _normalize(_sub)
            if sub == MATCH_NONE:
                return MATCH_ALL
            elif sub == MATCH_ALL:
                return MATCH_NONE
            elif sub is not _sub:
                sub.isNormal = None
                return wrap({"not": sub, "isNormal": True})
            else:
                sub.isNormal = None

    esfilter.isNormal = True
    return esfilter
Beispiel #7
0
def to_esfilter(self, schema):
    if not len(self.terms):
        return {"match_all": {}}
    else:
        return es_and([t.to_esfilter(schema) for t in self.terms])
Beispiel #8
0
def _normalize(esfilter):
    """
    TODO: DO NOT USE Data, WE ARE SPENDING TOO MUCH TIME WRAPPING/UNWRAPPING
    REALLY, WE JUST COLLAPSE CASCADING `and` AND `or` FILTERS
    """
    if esfilter == MATCH_ALL or esfilter == MATCH_NONE or esfilter.isNormal:
        return esfilter

    # Log.note("from: " + convert.value2json(esfilter))
    isDiff = True

    while isDiff:
        isDiff = False

        if esfilter['and']:
            terms = esfilter['and']
            for (i0, t0), (i1, t1) in itertools.product(enumerate(terms), enumerate(terms)):
                if i0 == i1:
                    continue  # SAME, IGNORE
                # TERM FILTER ALREADY ASSUMES EXISTENCE
                with suppress_exception:
                    if t0.exists.field != None and t0.exists.field == t1.term.items()[0][0]:
                        terms[i0] = MATCH_ALL
                        continue

                # IDENTICAL CAN BE REMOVED
                with suppress_exception:
                    if t0 == t1:
                        terms[i0] = MATCH_ALL
                        continue

                # MERGE range FILTER WITH SAME FIELD
                if i0 > i1:
                    continue  # SAME, IGNORE
                with suppress_exception:
                    f0, tt0 = t0.range.items()[0]
                    f1, tt1 = t1.range.items()[0]
                    if f0 == f1:
                        set_default(terms[i0].range[literal_field(f1)], tt1)
                        terms[i1] = MATCH_ALL

            output = []
            for a in terms:
                if isinstance(a, (list, set)):
                    from mo_logs import Log

                    Log.error("and clause is not allowed a list inside a list")
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_
                if a == MATCH_ALL:
                    isDiff = True
                    continue
                if a == MATCH_NONE:
                    return MATCH_NONE
                if a['and']:
                    isDiff = True
                    a.isNormal = None
                    output.extend(a['and'])
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return MATCH_ALL
            elif len(output) == 1:
                # output[0].isNormal = True
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = es_and(output)
            continue

        if esfilter.bool.should:
            output = []
            for a in esfilter.bool.should:
                a_ = _normalize(a)
                if a_ is not a:
                    isDiff = True
                a = a_

                if a.bool.should:
                    a.isNormal = None
                    isDiff = True
                    output.extend(a.bool.should)
                else:
                    a.isNormal = None
                    output.append(a)
            if not output:
                return MATCH_NONE
            elif len(output) == 1:
                esfilter = output[0]
                break
            elif isDiff:
                esfilter = wrap({"bool": {"should": output}})
            continue

        if esfilter.term != None:
            if esfilter.term.keys():
                esfilter.isNormal = True
                return esfilter
            else:
                return MATCH_ALL

        if esfilter.terms:
            for k, v in esfilter.terms.items():
                if len(v) > 0:
                    if OR(vv == None for vv in v):
                        rest = [vv for vv in v if vv != None]
                        if len(rest) > 0:
                            output = es_or([
                                es_missing(k),
                                {"terms": {k: rest}}
                            ])
                        else:
                            output = es_missing(k)
                        output.isNormal = True
                        return output
                    else:
                        esfilter.isNormal = True
                        return esfilter
            return MATCH_NONE

        if esfilter['not']:
            _sub = esfilter['not']
            sub = _normalize(_sub)
            if sub == MATCH_NONE:
                return MATCH_ALL
            elif sub == MATCH_ALL:
                return MATCH_NONE
            elif sub is not _sub:
                sub.isNormal = None
                return wrap({"not": sub, "isNormal": True})
            else:
                sub.isNormal = None

    esfilter.isNormal = True
    return esfilter
Beispiel #9
0
def es_setop(es, query):
    schema = query.frum.schema

    es_query, filters = es_query_template(schema.query_path[0])
    nested_filter = None
    set_default(filters[0], query.where.partial_eval().to_es14_filter(schema))
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.fields = FlatList()

    selects = wrap([s.copy() for s in listwrap(query.select)])
    new_select = FlatList()
    schema = query.frum.schema
    # columns = schema.columns
    # nested_columns = set(c.name for c in columns if c.nested_path[0] != ".")

    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(select.value, LeavesOp) and isinstance(
                select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(
                    select.name, relative_field(untype_path(c.name), term.var))
                if c.jx_type == NESTED:
                    es_query.fields = ["_source"]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        },
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                elif c.nested_path[0] != ".":
                    pass  # THE NESTED PARENT WILL CAPTURE THIS
                else:
                    es_query.fields += [c.es_column]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        }
                    })
                    put_index += 1
        elif isinstance(select.value, Variable):
            s_column = select.value.var
            # LEAVES OF OBJECT
            leaves = schema.leaves(s_column)
            nested_selects = {}
            if leaves:
                if s_column == "." or any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    es_query.fields = ["_source"]
                    for c in leaves:
                        if len(c.nested_path) == 1:
                            jx_name = untype_path(c.name)
                            new_select.append({
                                "name":
                                select.name,
                                "value":
                                Variable(c.es_column),
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": relative_field(jx_name, s_column)
                                },
                                "pull":
                                get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        if len(c.nested_path) == 1:
                            jx_name = untype_path(c.name)
                            if c.jx_type == NESTED:
                                es_query.fields = ["_source"]
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child":
                                        relative_field(jx_name, s_column)
                                    },
                                    "pull":
                                    get_pull_source(c.es_column)
                                })

                            else:
                                es_query.fields += [c.es_column]
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child":
                                        relative_field(jx_name, s_column)
                                    }
                                })
                        else:
                            if not nested_filter:
                                where = filters[0].copy()
                                nested_filter = [where]
                                for k in filters[0].keys():
                                    filters[0][k] = None
                                set_default(
                                    filters[0],
                                    es_and([where, es_or(nested_filter)]))

                            nested_path = c.nested_path[0]
                            if nested_path not in nested_selects:
                                where = nested_selects[nested_path] = Data()
                                nested_filter += [where]
                                where.nested.path = nested_path
                                where.nested.query.match_all = {}
                                where.nested.inner_hits._source = False
                                where.nested.inner_hits.fields += [c.es_column]

                                child = relative_field(
                                    untype_path(
                                        relative_field(c.name,
                                                       schema.query_path[0])),
                                    s_column)
                                pull = accumulate_nested_doc(
                                    nested_path,
                                    Variable(
                                        relative_field(
                                            s_column,
                                            unnest_path(nested_path))))
                                new_select.append({
                                    "name": select.name,
                                    "value": select.value,
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": child
                                    },
                                    "pull": pull
                                })
                            else:
                                nested_selects[
                                    nested_path].nested.inner_hits.fields += [
                                        c.es_column
                                    ]
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    }
                })
            put_index += 1
        else:
            painless = select.value.partial_eval().to_es14_script(schema)
            es_query.script_fields[literal_field(select.name)] = es_script(
                painless.script(schema))
            new_select.append({
                "name":
                select.name,
                "pull":
                jx_expression_to_function("fields." +
                                          literal_field(select.name)),
                "put": {
                    "name": select.name,
                    "index": put_index,
                    "child": "."
                }
            })
            put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif isinstance(n.value, Variable):
            if es_query.fields[0] == "_source":
                es_query.fields = ["_source"]
                n.pull = get_pull_source(n.value.var)
            if n.value.var == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(
                    concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    with Timer("call to ES", silent=not DEBUG) as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)