Beispiel #1
0
def es_setop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    new_select, split_select = get_selects(query)

    split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
    es_query = es_query_proto(query_path, split_select, split_wheres, schema)
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    with Timer("call to ES", silent=DEBUG) as call_timer:
        result = es.search(es_query)

    # Log.note("{{result}}", result=result)

    T = result.hits.hits

    try:
        formatter, _, mime_type = set_formatters[query.format]

        with Timer("formatter", silent=True):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
def extract_aggs(select, query_path, schema):
    """
    RETURN ES AGGREGATIONS
    """

    new_select = Data(
    )  # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if is_op(s.value, Variable_):
            s.query_path = query_path
            if s.aggregate == "count":
                new_select["count_" + literal_field(s.value.var)] += [s]
            else:
                new_select[literal_field(s.value.var)] += [s]
        elif s.aggregate:
            op, split_select = split_expression_by_path(s.value,
                                                        schema,
                                                        lang=Painless)
            for si_key, si_value in split_select.items():
                if si_value:
                    if s.query_path:
                        Log.error(
                            "can not handle more than one depth per select")
                    s.query_path = si_key
            formula.append(s)

    acc = Aggs()
    agg_field(acc, new_select, query_path, schema)
    agg_formula(acc, formula, query_path, schema)
    return acc
Beispiel #3
0
def build_es_query(select, query_path, schema, query):
    acc = extract_aggs(select, query_path, schema)
    acc = NestedAggs(query_path).add(acc)
    split_decoders = get_decoders_by_path(query)
    split_wheres = split_expression_by_path(query.where,
                                            schema=schema,
                                            lang=ES52)
    start = 0
    decoders = [None] * (len(query.edges) + len(query.groupby))
    paths = list(
        reversed(sorted(set(split_wheres.keys())
                        | set(split_decoders.keys()))))
    for path in paths:
        decoder = split_decoders.get(path, Null)
        where = split_wheres.get(path, Null)

        for d in decoder:
            decoders[d.edge.dim] = d
            acc = d.append_query(path, acc)
            start += d.num_columns

        if where:
            acc = FilterAggs("_filter", AndOp(where), None).add(acc)
        acc = NestedAggs(path).add(acc)
    acc = NestedAggs('.').add(acc)
    acc = simplify(acc)
    es_query = wrap(acc.to_es(schema))
    es_query.size = 0
    return acc, decoders, es_query
Beispiel #4
0
    def append_query(self, query_path, es_query):
        domain = self.domain
        domain_key = domain.key
        value = Painless[self.edge.value]
        cnv = pull_functions[value.type]
        include = tuple(cnv(p[domain_key]) for p in domain.partitions)

        schema = self.schema
        exists = Painless[AndOp([InOp([value,
                                       Literal(include)])])].partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if is_op(value, Variable):
            es_field = first(schema.leaves(
                value.var)).es_column  # ALREADY CHECKED THERE IS ONLY ONE
            match = TermsAggs(
                "_match", {
                    "field": es_field,
                    "size": limit,
                    "order": {
                        "_term": self.sorted
                    } if self.sorted else None
                }, self)
        else:
            match = TermsAggs("_match", {
                "script": text(value.to_es_script(schema)),
                "size": limit
            }, self)
        output = Aggs().add(
            FilterAggs("_filter", exists, None).add(match.add(es_query)))

        if self.edge.allowNulls:
            # IF ALL NESTED COLUMNS ARE NULL, DOES THE FILTER PASS?
            # MISSING AT THE QUERY DEPTH
            op, split = split_expression_by_path(NotOp(exists), schema)
            for i, p in enumerate(reversed(sorted(split.keys()))):
                e = split.get(p)
                if e:
                    not_match = NestedAggs(p).add(
                        FilterAggs("_missing" + text(i), e,
                                   self).add(es_query))
                    output.add(not_match)
        return output
Beispiel #5
0
def es_bulksetop(esq, frum, query):
    abs_limit = MIN([query.limit, MAX_DOCUMENTS])
    guid = Random.base64(32, extra="-_")

    schema = query.frum.schema
    query_path = schema.query_path[0]
    new_select, split_select = get_selects(query)
    split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
    es_query = es_query_proto(query_path, split_select, split_wheres, schema)
    es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE])
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)
    if not es_query.sort:
        es_query.sort = ["_doc"]

    formatter = formatters[query.format](abs_limit, new_select, query)

    Thread.run(
        "Download " + guid,
        extractor,
        guid,
        abs_limit,
        esq,
        es_query,
        formatter,
        parent_thread=Null,
    ).release()

    output = wrap({
        "url": URL_PREFIX / (guid + ".json"),
        "status": URL_PREFIX / (guid + ".status.json"),
        "meta": {
            "format": query.format,
            "es_query": es_query,
            "limit": abs_limit
        },
    })
    return output
Beispiel #6
0
def get_selects(query):
    schema = query.frum.schema
    split_select = {".": ESSelect(".")}

    def get_select(path):
        es_select = split_select.get(path)
        if not es_select:
            es_select = split_select[path] = ESSelect(path)
        return es_select

    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()
    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(
                    select.name, relative_field(untype_path(c.name), term.var)
                )
                if c.jx_type == NESTED:
                    get_select(".").set_op = True
                    new_select.append(
                        {
                            "name": full_name,
                            "value": Variable(c.es_column),
                            "put": {
                                "name": literal_field(full_name),
                                "index": put_index,
                                "child": ".",
                            },
                            "pull": get_pull_source(c.es_column),
                        }
                    )
                    put_index += 1
                else:
                    get_select(c.nested_path[0]).fields.append(c.es_column)
                    new_select.append(
                        {
                            "name": full_name,
                            "value": Variable(c.es_column),
                            "put": {
                                "name": literal_field(full_name),
                                "index": put_index,
                                "child": ".",
                            },
                        }
                    )
                    put_index += 1
        elif is_op(select.value, Variable):
            s_column = select.value.var

            if s_column == ".":
                # PULL ALL SOURCE
                get_select(".").set_op = True
                new_select.append(
                    {
                        "name": select.name,
                        "value": select.value,
                        "put": {"name": select.name, "index": put_index, "child": "."},
                        "pull": get_pull_source("."),
                    }
                )
                continue

            leaves = schema.leaves(s_column)  # LEAVES OF OBJECT
            # nested_selects = {}
            if leaves:
                if any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    get_select(".").set_op = True
                    for c in leaves:
                        if (
                            len(c.nested_path) == 1
                        ):  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
                            pre_child = join_field(
                                decode_property(n) for n in split_field(c.name)
                            )
                            new_select.append(
                                {
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": untype_path(
                                            relative_field(pre_child, s_column)
                                        ),
                                    },
                                    "pull": get_pull_source(c.es_column),
                                }
                            )
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        c_nested_path = c.nested_path[0]
                        if c_nested_path == ".":
                            if c.es_column == "_id":
                                new_select.append(
                                    {
                                        "name": select.name,
                                        "value": Variable(c.es_column),
                                        "put": {
                                            "name": select.name,
                                            "index": put_index,
                                            "child": ".",
                                        },
                                        "pull": lambda row: row._id,
                                    }
                                )
                            elif c.jx_type == NESTED:
                                get_select(".").set_op = True
                                pre_child = join_field(
                                    decode_property(n) for n in split_field(c.name)
                                )
                                new_select.append(
                                    {
                                        "name": select.name,
                                        "value": Variable(c.es_column),
                                        "put": {
                                            "name": select.name,
                                            "index": put_index,
                                            "child": untype_path(
                                                relative_field(pre_child, s_column)
                                            ),
                                        },
                                        "pull": get_pull_source(c.es_column),
                                    }
                                )
                            else:
                                get_select(c_nested_path).fields.append(c.es_column)
                                pre_child = join_field(
                                    decode_property(n) for n in split_field(c.name)
                                )
                                new_select.append(
                                    {
                                        "name": select.name,
                                        "value": Variable(c.es_column),
                                        "put": {
                                            "name": select.name,
                                            "index": put_index,
                                            "child": untype_path(
                                                relative_field(pre_child, s_column)
                                            ),
                                        },
                                    }
                                )
                        else:
                            es_select = get_select(c_nested_path)
                            es_select.fields.append(c.es_column)

                            child = relative_field(
                                untype_path(
                                    relative_field(c.name, schema.query_path[0])
                                ),
                                s_column,
                            )
                            pull = accumulate_nested_doc(
                                c_nested_path,
                                Variable(
                                    relative_field(s_column, unnest_path(c_nested_path))
                                ),
                            )
                            new_select.append(
                                {
                                    "name": select.name,
                                    "value": select.value,
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": child,
                                    },
                                    "pull": pull,
                                }
                            )
            else:
                new_select.append(
                    {
                        "name": select.name,
                        "value": Variable("$dummy"),
                        "put": {"name": select.name, "index": put_index, "child": "."},
                    }
                )
            put_index += 1
        else:
            split_scripts = split_expression_by_path(
                select.value, schema, lang=Painless
            )
            for p, script in split_scripts.items():
                es_select = get_select(p)
                es_select.scripts[select.name] = {
                    "script": text(
                        Painless[first(script)].partial_eval().to_es_script(schema)
                    )
                }
                new_select.append(
                    {
                        "name": select.name,
                        "pull": jx_expression_to_function(
                            "fields." + literal_field(select.name)
                        ),
                        "put": {"name": select.name, "index": put_index, "child": "."},
                    }
                )
                put_index += 1
    for n in new_select:
        if n.pull:
            continue
        elif is_op(n.value, Variable):
            if get_select(".").set_op:
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(
                    concat_field("fields", literal_field(n.value.var))
                )
        else:
            Log.error("Do not know what to do")
    return new_select, split_select
Beispiel #7
0
def es_setop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    split_select = {".": ESSelect('.')}

    def get_select(path):
        es_select = split_select.get(path)
        if not es_select:
            es_select = split_select[path] = ESSelect(path)
        return es_select


    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(select.name, relative_field(untype_path(c.name), term.var))
                if c.jx_type == NESTED:
                    get_select('.').use_source = True
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."},
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                else:
                    get_select(c.nested_path[0]).fields.append(c.es_column)
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."}
                    })
                    put_index += 1
        elif is_op(select.value, Variable):
            s_column = select.value.var

            if s_column == ".":
                # PULL ALL SOURCE
                get_select('.').use_source = True
                new_select.append({
                    "name": select.name,
                    "value": select.value,
                    "put": {"name": select.name, "index": put_index, "child": "."},
                    "pull": get_pull_source(".")
                })
                continue

            leaves = schema.leaves(s_column)  # LEAVES OF OBJECT
            # nested_selects = {}
            if leaves:
                if any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    get_select('.').use_source = True
                    for c in leaves:
                        if len(c.nested_path) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
                            pre_child = join_field(decode_property(n) for n in split_field(c.name))
                            new_select.append({
                                "name": select.name,
                                "value": Variable(c.es_column),
                                "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
                                "pull": get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        c_nested_path = c.nested_path[0]
                        if c_nested_path == ".":
                            if c.es_column == "_id":
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": "."},
                                    "pull": lambda row: row._id
                                })
                            elif c.jx_type == NESTED:
                                get_select('.').use_source = True
                                pre_child = join_field(decode_property(n) for n in split_field(c.name))
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
                                    "pull": get_pull_source(c.es_column)
                                })
                            else:
                                get_select(c_nested_path).fields.append(c.es_column)
                                pre_child = join_field(decode_property(n) for n in split_field(c.name))
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))}
                                })
                        else:
                            es_select = get_select(c_nested_path)
                            es_select.fields.append(c.es_column)

                            child = relative_field(untype_path(relative_field(c.name, schema.query_path[0])), s_column)
                            pull = accumulate_nested_doc(c_nested_path, Variable(relative_field(s_column, unnest_path(c_nested_path))))
                            new_select.append({
                                "name": select.name,
                                "value": select.value,
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": child
                                },
                                "pull": pull
                            })
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {"name": select.name, "index": put_index, "child": "."}
                })
            put_index += 1
        else:
            split_scripts = split_expression_by_path(select.value, schema, lang=Painless)
            for p, script in split_scripts.items():
                es_select = get_select(p)
                es_select.scripts[select.name] = {"script": text_type(Painless[first(script)].partial_eval().to_es_script(schema))}
                new_select.append({
                    "name": select.name,
                    "pull": jx_expression_to_function("fields." + literal_field(select.name)),
                    "put": {"name": select.name, "index": put_index, "child": "."}
                })
                put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif is_op(n.value, Variable):
            if get_select('.').use_source:
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
    es_query = es_query_proto(query_path, split_select, split_wheres, schema)
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    with Timer("call to ES", silent=True) as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    # Log.note("{{output}}", output=T)

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter", silent=True):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
Beispiel #8
0
def agg_formula(acc, formula, query_path, schema):
    # DUPLICATED FOR SCRIPTS, MAYBE THIS CAN BE PUT INTO A LANGUAGE?
    for i, s in enumerate(formula):
        canonical_name = s.name
        s_path = [
            k for k, v in split_expression_by_path(
                s.value, schema=schema, lang=Painless).items() if v
        ]
        if len(s_path) == 0:
            # FOR CONSTANTS
            nest = NestedAggs(query_path)
            acc.add(nest)
        elif len(s_path) == 1:
            nest = NestedAggs(first(s_path))
            acc.add(nest)
        else:
            raise Log.error("do not know how to handle")

        if is_op(s.value, TupleOp):
            if s.aggregate == "count":
                # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
                s.pull = jx_expression_to_function("doc_count")
            elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
                if s.aggregate in ('max', 'maximum'):
                    dir = 1
                    op = "max"
                else:
                    dir = -1
                    op = 'min'

                nully = Painless[TupleOp(
                    [NULL] *
                    len(s.value.terms))].partial_eval().to_es_script(schema)
                selfy = text(
                    Painless[s.value].partial_eval().to_es_script(schema))

                script = {
                    "scripted_metric": {
                        'init_script':
                        'params._agg.best = ' + nully + '.toArray();',
                        'map_script':
                        'params._agg.best = ' + expand_template(
                            MAX_OF_TUPLE, {
                                "expr1": "params._agg.best",
                                "expr2": selfy,
                                "dir": dir,
                                "op": op
                            }) + ";",
                        'combine_script':
                        'return params._agg.best',
                        'reduce_script':
                        'return params._aggs.stream().' + op + '(' +
                        expand_template(COMPARE_TUPLE, {
                            "dir": dir,
                            "op": op
                        }) + ').get()',
                    }
                }
                nest.add(
                    NestedAggs(query_path).add(
                        ExprAggs(canonical_name, script, s)))
                s.pull = jx_expression_to_function("value")
            else:
                Log.error("{{agg}} is not a supported aggregate over a tuple",
                          agg=s.aggregate)
        elif s.aggregate == "count":
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "value_count": {
                            "script":
                            text(Painless[s.value].partial_eval().to_es_script(
                                schema))
                        }
                    }, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            nest.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "script": text(
                                Painless[s.value].to_es_script(schema)),
                            "percents": [50]
                        }
                    }, s))
            s.pull = jx_expression_to_function(join_field(["50.0"]))
        elif s.aggregate in ("and", "or"):
            key = literal_field(canonical_name + " " + s.aggregate)
            op = aggregates[s.aggregate]
            nest.add(
                ExprAggs(
                    key, {
                        op: {
                            "script":
                            text(Painless[NumberOp(
                                s.value)].to_es_script(schema))
                        }
                    }, s))
            # get_name = concat_field(canonical_name, "value")
            s.pull = jx_expression_to_function({
                "case": [{
                    "when": {
                        "eq": {
                            "value": 1
                        }
                    },
                    "then": True
                }, {
                    "when": {
                        "eq": {
                            "value": 0
                        }
                    },
                    "then": False
                }]
            })
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = mo_math.round(s.percentile * 100, decimal=6)
            nest.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "script": text(
                                Painless[s.value].to_es_script(schema)),
                            "percents": [percent]
                        }
                    }, s))
            s.pull = jx_expression_to_function(
                join_field(["values", text(percent)]))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"
            nest.add(
                ExprAggs(
                    key, {
                        "cardinality": {
                            "script": text(
                                Painless[s.value].to_es_script(schema))
                        }
                    }, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "stats":
            # REGULAR STATS
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "extended_stats": {
                            "script": text(
                                Painless[s.value].to_es_script(schema))
                        }
                    }, s))
            s.pull = get_pull_stats()

            # GET MEDIAN TOO!
            select_median = s.copy()
            select_median.pull = jx_expression_to_function(
                {"select": [{
                    "name": "median",
                    "value": "values.50\\.0"
                }]})

            nest.add(
                ExprAggs(
                    canonical_name + "_percentile", {
                        "percentiles": {
                            "script": text(
                                Painless[s.value].to_es_script(schema)),
                            "percents": [50]
                        }
                    }, select_median))
            s.pull = get_pull_stats()
        elif s.aggregate == "union":
            # USE TERMS AGGREGATE TO SIMULATE union
            nest.add(
                TermsAggs(canonical_name, {
                    "script_field":
                    text(Painless[s.value].to_es_script(schema))
                }, s))
            s.pull = jx_expression_to_function("key")
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = jx_expression_to_function(aggregates[s.aggregate])
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "extended_stats": {
                            "script":
                            text(
                                NumberOp(s.value).partial_eval().to_es_script(
                                    schema))
                        }
                    }, s))
Beispiel #9
0
def es_aggsop(es, frum, query):
    query = query.copy()  # WE WILL MARK UP THIS QUERY
    schema = frum.schema
    query_path = schema.query_path[0]
    select = listwrap(query.select)

    new_select = Data(
    )  # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING
    formula = []
    for s in select:
        if is_op(s.value, Variable_):
            s.query_path = query_path
            if s.aggregate == "count":
                new_select["count_" + literal_field(s.value.var)] += [s]
            else:
                new_select[literal_field(s.value.var)] += [s]
        elif s.aggregate:
            split_select = split_expression_by_path(s.value,
                                                    schema,
                                                    lang=Painless)
            for si_key, si_value in split_select.items():
                if si_value:
                    if s.query_path:
                        Log.error(
                            "can not handle more than one depth per select")
                    s.query_path = si_key
            formula.append(s)

    acc = Aggs()
    for _, many in new_select.items():
        for s in many:
            canonical_name = s.name
            if s.aggregate in ("value_count", "count"):
                columns = frum.schema.values(s.value.var,
                                             exclude_type=(OBJECT, NESTED))
            else:
                columns = frum.schema.values(s.value.var)

            if s.aggregate == "count":
                canonical_names = []
                for column in columns:
                    es_name = column.es_column + "_count"
                    if column.jx_type == EXISTS:
                        if column.nested_path[0] == query_path:
                            canonical_names.append("doc_count")
                            acc.add(
                                NestedAggs(column.nested_path[0]).add(
                                    CountAggs(s)))
                    else:
                        canonical_names.append("value")
                        acc.add(
                            NestedAggs(column.nested_path[0]).add(
                                ExprAggs(es_name, {
                                    "value_count": {
                                        "field": column.es_column
                                    }
                                }, s)))
                if len(canonical_names) == 1:
                    s.pull = jx_expression_to_function(canonical_names[0])
                else:
                    s.pull = jx_expression_to_function(
                        {"add": canonical_names})
            elif s.aggregate == "median":
                columns = [
                    c for c in columns if c.jx_type in (NUMBER, INTEGER)
                ]
                if len(columns) != 1:
                    Log.error(
                        "Do not know how to perform median on columns with more than one type (script probably)"
                    )
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = canonical_name + " percentile"
                acc.add(
                    ExprAggs(
                        key, {
                            "percentiles": {
                                "field": first(columns).es_column,
                                "percents": [50]
                            }
                        }, s))
                s.pull = jx_expression_to_function("values.50\\.0")
            elif s.aggregate == "percentile":
                columns = [
                    c for c in columns if c.jx_type in (NUMBER, INTEGER)
                ]
                if len(columns) != 1:
                    Log.error(
                        "Do not know how to perform percentile on columns with more than one type (script probably)"
                    )
                # ES USES DIFFERENT METHOD FOR PERCENTILES
                key = canonical_name + " percentile"
                if is_text(
                        s.percentile) or s.percetile < 0 or 1 < s.percentile:
                    Log.error(
                        "Expecting percentile to be a float from 0.0 to 1.0")
                percent = mo_math.round(s.percentile * 100, decimal=6)

                acc.add(
                    ExprAggs(
                        key, {
                            "percentiles": {
                                "field": first(columns).es_column,
                                "percents": [percent],
                                "tdigest": {
                                    "compression": 2
                                }
                            }
                        }, s))
                s.pull = jx_expression_to_function(
                    join_field(["values", text_type(percent)]))
            elif s.aggregate == "cardinality":
                for column in columns:
                    path = column.es_column + "_cardinality"
                    acc.add(
                        ExprAggs(path,
                                 {"cardinality": {
                                     "field": column.es_column
                                 }}, s))
                s.pull = jx_expression_to_function("value")
            elif s.aggregate == "stats":
                columns = [
                    c for c in columns if c.jx_type in (NUMBER, INTEGER)
                ]
                if len(columns) != 1:
                    Log.error(
                        "Do not know how to perform stats on columns with more than one type (script probably)"
                    )
                # REGULAR STATS
                acc.add(
                    ExprAggs(canonical_name, {
                        "extended_stats": {
                            "field": first(columns).es_column
                        }
                    }, s))
                s.pull = get_pull_stats()

                # GET MEDIAN TOO!
                select_median = s.copy()
                select_median.pull = jx_expression_to_function(
                    {"select": [{
                        "name": "median",
                        "value": "values.50\\.0"
                    }]})

                acc.add(
                    ExprAggs(
                        canonical_name + "_percentile", {
                            "percentiles": {
                                "field": first(columns).es_column,
                                "percents": [50]
                            }
                        }, select_median))

            elif s.aggregate == "union":
                for column in columns:
                    script = {
                        "scripted_metric": {
                            'init_script':
                            'params._agg.terms = new HashSet()',
                            'map_script':
                            'for (v in doc[' + quote(column.es_column) +
                            '].values) params._agg.terms.add(v);',
                            'combine_script':
                            'return params._agg.terms.toArray()',
                            'reduce_script':
                            'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()',
                        }
                    }
                    stats_name = column.es_column
                    acc.add(
                        NestedAggs(column.nested_path[0]).add(
                            ExprAggs(stats_name, script, s)))
                s.pull = jx_expression_to_function("value")
            elif s.aggregate == "count_values":
                # RETURN MAP FROM VALUE TO THE NUMBER OF TIMES FOUND IN THE DOCUMENTS
                # NOT A NESTED DOC, RATHER A MULTIVALUE FIELD
                for column in columns:
                    script = {
                        "scripted_metric": {
                            'params': {
                                "_agg": {}
                            },
                            'init_script':
                            'params._agg.terms = new HashMap()',
                            'map_script':
                            'for (v in doc[' + quote(column.es_column) +
                            '].values) params._agg.terms.put(v, Optional.ofNullable(params._agg.terms.get(v)).orElse(0)+1);',
                            'combine_script':
                            'return params._agg.terms',
                            'reduce_script':
                            '''
                            HashMap output = new HashMap(); 
                            for (agg in params._aggs) {
                                if (agg!=null){
                                    for (e in agg.entrySet()) {
                                        String key = String.valueOf(e.getKey());
                                        output.put(key, e.getValue() + Optional.ofNullable(output.get(key)).orElse(0));
                                    } 
                                }
                            } 
                            return output;
                        '''
                        }
                    }
                    stats_name = encode_property(column.es_column)
                    acc.add(
                        NestedAggs(column.nested_path[0]).add(
                            ExprAggs(stats_name, script, s)))
                s.pull = jx_expression_to_function("value")
            else:
                if not columns:
                    s.pull = jx_expression_to_function(NULL)
                else:
                    for c in columns:
                        acc.add(
                            NestedAggs(c.nested_path[0]).add(
                                ExprAggs(
                                    canonical_name,
                                    {"extended_stats": {
                                        "field": c.es_column
                                    }}, s)))
                    s.pull = jx_expression_to_function(aggregates[s.aggregate])

    for i, s in enumerate(formula):
        s_path = [
            k for k, v in split_expression_by_path(
                s.value, schema=schema, lang=Painless).items() if v
        ]
        if len(s_path) == 0:
            # FOR CONSTANTS
            nest = NestedAggs(query_path)
            acc.add(nest)
        elif len(s_path) == 1:
            nest = NestedAggs(first(s_path))
            acc.add(nest)
        else:
            Log.error("do not know how to handle")

        canonical_name = s.name
        if is_op(s.value, TupleOp):
            if s.aggregate == "count":
                # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY
                s.pull = jx_expression_to_function("doc_count")
            elif s.aggregate in ('max', 'maximum', 'min', 'minimum'):
                if s.aggregate in ('max', 'maximum'):
                    dir = 1
                    op = "max"
                else:
                    dir = -1
                    op = 'min'

                nully = Painless[TupleOp(
                    [NULL] *
                    len(s.value.terms))].partial_eval().to_es_script(schema)
                selfy = text_type(
                    Painless[s.value].partial_eval().to_es_script(schema))

                script = {
                    "scripted_metric": {
                        'init_script':
                        'params._agg.best = ' + nully + ';',
                        'map_script':
                        'params._agg.best = ' + expand_template(
                            MAX_OF_TUPLE, {
                                "expr1": "params._agg.best",
                                "expr2": selfy,
                                "dir": dir,
                                "op": op
                            }) + ";",
                        'combine_script':
                        'return params._agg.best',
                        'reduce_script':
                        'return params._aggs.stream().' + op + '(' +
                        expand_template(COMPARE_TUPLE, {
                            "dir": dir,
                            "op": op
                        }) + ').get()',
                    }
                }
                nest.add(
                    NestedAggs(query_path).add(
                        ExprAggs(canonical_name, script, s)))
                s.pull = jx_expression_to_function("value")
            else:
                Log.error("{{agg}} is not a supported aggregate over a tuple",
                          agg=s.aggregate)
        elif s.aggregate == "count":
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "value_count": {
                            "script":
                            text_type(Painless[
                                s.value].partial_eval().to_es_script(schema))
                        }
                    }, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "median":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            nest.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema)),
                            "percents": [50]
                        }
                    }, s))
            s.pull = jx_expression_to_function(join_field(["50.0"]))
        elif s.aggregate == "percentile":
            # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT
            key = literal_field(canonical_name + " percentile")
            percent = mo_math.round(s.percentile * 100, decimal=6)
            nest.add(
                ExprAggs(
                    key, {
                        "percentiles": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema)),
                            "percents": [percent]
                        }
                    }, s))
            s.pull = jx_expression_to_function(
                join_field(["values", text_type(percent)]))
        elif s.aggregate == "cardinality":
            # ES USES DIFFERENT METHOD FOR CARDINALITY
            key = canonical_name + " cardinality"
            nest.add(
                ExprAggs(
                    key, {
                        "cardinality": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema))
                        }
                    }, s))
            s.pull = jx_expression_to_function("value")
        elif s.aggregate == "stats":
            # REGULAR STATS
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "extended_stats": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema))
                        }
                    }, s))
            s.pull = get_pull_stats()

            # GET MEDIAN TOO!
            select_median = s.copy()
            select_median.pull = jx_expression_to_function(
                {"select": [{
                    "name": "median",
                    "value": "values.50\\.0"
                }]})

            nest.add(
                ExprAggs(
                    canonical_name + "_percentile", {
                        "percentiles": {
                            "script":
                            text_type(Painless[s.value].to_es_script(schema)),
                            "percents": [50]
                        }
                    }, select_median))
            s.pull = get_pull_stats()
        elif s.aggregate == "union":
            # USE TERMS AGGREGATE TO SIMULATE union
            nest.add(
                TermsAggs(
                    canonical_name, {
                        "script_field":
                        text_type(Painless[s.value].to_es_script(schema))
                    }, s))
            s.pull = jx_expression_to_function("key")
        else:
            # PULL VALUE OUT OF THE stats AGGREGATE
            s.pull = jx_expression_to_function(aggregates[s.aggregate])
            nest.add(
                ExprAggs(
                    canonical_name, {
                        "extended_stats": {
                            "script":
                            text_type(
                                NumberOp(s.value).partial_eval().to_es_script(
                                    schema))
                        }
                    }, s))

    acc = NestedAggs(query_path).add(acc)
    split_decoders = get_decoders_by_path(query)
    split_wheres = split_expression_by_path(query.where,
                                            schema=frum.schema,
                                            lang=ES52)

    start = 0
    decoders = [None] * (len(query.edges) + len(query.groupby))
    paths = list(reversed(sorted(split_wheres.keys() | split_decoders.keys())))
    for path in paths:
        literal_path = literal_field(path)
        decoder = split_decoders[literal_path]
        where = split_wheres[literal_path]

        for d in decoder:
            decoders[d.edge.dim] = d
            acc = d.append_query(path, acc)
            start += d.num_columns

        if where:
            acc = FilterAggs("_filter", AndOp(where), None).add(acc)
        acc = NestedAggs(path).add(acc)

    acc = NestedAggs('.').add(acc)
    acc = simplify(acc)
    es_query = wrap(acc.to_es(schema))

    es_query.size = 0

    with Timer("ES query time", silent=not DEBUG) as es_duration:
        result = es_post(es, es_query, query.limit)

    try:
        format_time = Timer("formatting", silent=not DEBUG)
        with format_time:
            # result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total)  # IT APPEARS THE OLD doc_count IS GONE
            aggs = unwrap(result.aggregations)

            formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[
                query.format]
            if query.edges:
                output = formatter(aggs, acc, query, decoders, select)
            elif query.groupby:
                output = groupby_formatter(aggs, acc, query, decoders, select)
            else:
                output = aggop_formatter(aggs, acc, query, decoders, select)

        output.meta.timing.formatting = format_time.duration
        output.meta.timing.es_search = es_duration.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        if query.format not in format_dispatch:
            Log.error("Format {{format|quote}} not supported yet",
                      format=query.format,
                      cause=e)
        Log.error("Some problem", cause=e)
Beispiel #10
0
def es_setop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    split_select = {".": ESSelect('.')}

    def get_select(path):
        es_select = split_select.get(path)
        if not es_select:
            es_select = split_select[path] = ESSelect(path)
        return es_select

    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term,
                                                   Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(
                    select.name, relative_field(untype_path(c.name), term.var))
                if c.jx_type == NESTED:
                    get_select('.').use_source = True
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        },
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                else:
                    get_select(c.nested_path[0]).fields.append(c.es_column)
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        }
                    })
                    put_index += 1
        elif is_op(select.value, Variable):
            s_column = select.value.var

            if s_column == ".":
                # PULL ALL SOURCE
                get_select('.').use_source = True
                new_select.append({
                    "name": select.name,
                    "value": select.value,
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    },
                    "pull": get_pull_source(".")
                })
                continue

            leaves = schema.leaves(s_column)  # LEAVES OF OBJECT
            # nested_selects = {}
            if leaves:
                if any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    get_select('.').use_source = True
                    for c in leaves:
                        if len(
                                c.nested_path
                        ) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
                            pre_child = join_field(
                                decode_property(n)
                                for n in split_field(c.name))
                            new_select.append({
                                "name":
                                select.name,
                                "value":
                                Variable(c.es_column),
                                "put": {
                                    "name":
                                    select.name,
                                    "index":
                                    put_index,
                                    "child":
                                    untype_path(
                                        relative_field(pre_child, s_column))
                                },
                                "pull":
                                get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        c_nested_path = c.nested_path[0]
                        if c_nested_path == ".":
                            if c.es_column == "_id":
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": "."
                                    },
                                    "pull":
                                    lambda row: row._id
                                })
                            elif c.jx_type == NESTED:
                                get_select('.').use_source = True
                                pre_child = join_field(
                                    decode_property(n)
                                    for n in split_field(c.name))
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name":
                                        select.name,
                                        "index":
                                        put_index,
                                        "child":
                                        untype_path(
                                            relative_field(
                                                pre_child, s_column))
                                    },
                                    "pull":
                                    get_pull_source(c.es_column)
                                })
                            else:
                                get_select(c_nested_path).fields.append(
                                    c.es_column)
                                pre_child = join_field(
                                    decode_property(n)
                                    for n in split_field(c.name))
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name":
                                        select.name,
                                        "index":
                                        put_index,
                                        "child":
                                        untype_path(
                                            relative_field(
                                                pre_child, s_column))
                                    }
                                })
                        else:
                            es_select = get_select(c_nested_path)
                            es_select.fields.append(c.es_column)

                            child = relative_field(
                                untype_path(
                                    relative_field(c.name,
                                                   schema.query_path[0])),
                                s_column)
                            pull = accumulate_nested_doc(
                                c_nested_path,
                                Variable(
                                    relative_field(
                                        s_column, unnest_path(c_nested_path))))
                            new_select.append({
                                "name": select.name,
                                "value": select.value,
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": child
                                },
                                "pull": pull
                            })
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    }
                })
            put_index += 1
        else:
            split_scripts = split_expression_by_path(select.value,
                                                     schema,
                                                     lang=Painless)
            for p, script in split_scripts.items():
                es_select = get_select(p)
                es_select.scripts[select.name] = {
                    "script":
                    text_type(Painless[first(
                        script)].partial_eval().to_es_script(schema))
                }
                new_select.append({
                    "name":
                    select.name,
                    "pull":
                    jx_expression_to_function("fields." +
                                              literal_field(select.name)),
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    }
                })
                put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif is_op(n.value, Variable):
            if get_select('.').use_source:
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(
                    concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
    es_query = es_query_proto(query_path, split_select, split_wheres, schema)
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    with Timer("call to ES", silent=DEBUG) as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    # Log.note("{{output}}", output=T)

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter", silent=True):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
Beispiel #11
0
def get_selects(query):
    schema = query.frum.schema
    query_level = len(schema.query_path)
    query_path = schema.query_path[0]
    # SPLIT select INTO ES_SELECT AND RESULTSET SELECT
    split_select = OrderedDict((p, ESSelectOp(p)) for p in schema.query_path)

    def expand_split_select(c_nested_path):
        es_select = split_select.get(c_nested_path)
        if not es_select:
            temp = [(k, v) for k, v in split_select.items()]
            split_select.clear()
            split_select.update({c_nested_path: ESSelectOp(c_nested_path)})
            split_select.update(temp)
        return split_select[c_nested_path]

    new_select = FlatList()
    post_expressions = {}

    selects = list_to_data([unwrap(s.copy()) for s in listwrap(query.select)])

    # WHAT PATH IS _source USED, IF ANY?
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term,
                                                   Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            if any(c.jx_type == NESTED for c in leaves):
                split_select["."].source_path = "."
        elif is_op(select.value, Variable):
            for selected_column in schema.values(select.value.var,
                                                 exclude_type=(OBJECT,
                                                               EXISTS)):
                if selected_column.jx_type == NESTED:
                    expand_split_select(
                        selected_column.es_column
                    ).source_path = selected_column.es_column
                    continue
                leaves = schema.leaves(selected_column.es_column)
                for c in leaves:
                    if c.jx_type == NESTED:
                        split_select[c.es_column].source_path = c.es_column

    # IF WE GET THE SOURCE FOR PARENT, WE ASSUME WE GOT SOURCE FOR CHILD
    source_path = None
    source_level = 0
    for level, es_select in enumerate(reversed(list(split_select.values()))):
        if source_path:
            es_select.source_path = source_path
        elif es_select.source_path:
            source_level = level + 1
            source_path = es_select.source_path

    def get_pull_source(c):
        nested_path = c.nested_path
        nested_level = len(nested_path)
        pos = text(nested_level)

        if nested_level <= query_level:
            if not source_level or nested_level < source_level:
                field = join_field([pos, "fields", c.es_column])
                return jx_expression_to_function(field)
            elif nested_level == source_level:
                field = relative_field(c.es_column, nested_path[0])

                def pull_source(row):
                    return untyped(row.get(pos, Null)._source[field])

                return pull_source
            else:
                field = relative_field(c.es_column, nested_path[0])

                def pull_property(row):
                    return untyped(row.get(pos, Null)[field])

                return pull_property
        else:
            pos = text(query_level)

            if not source_level or nested_level < source_level:
                # PULL FIELDS AND THEN AGGREGATE THEM
                value = jx_expression_to_function(
                    join_field(["fields", c.es_column]))
                name = literal_field(nested_path[0])
                index = jx_expression_to_function("_nested.offset")

                def pull_nested_field(doc):
                    hits = doc.get(pos, Null).inner_hits[name].hits.hits
                    if not hits:
                        return []

                    temp = [(index(h), value(h)) for h in hits]
                    acc = [None] * len(temp)
                    for i, v in temp:
                        acc[i] = unwraplist(v)
                    return acc

                return pull_nested_field
            else:
                # PULL SOURCES
                value = jx_expression_to_function(
                    concat_field("_source",
                                 relative_field(c.es_column, nested_path[0])))
                name = literal_field(nested_path[0])
                index = jx_expression_to_function(
                    join_field(["_nested"] * (len(c.nested_path) - 1) +
                               ["offset"]))

                def pull_nested_source(doc):
                    hits = doc.get(pos, Null).inner_hits[name].hits.hits
                    if not hits:
                        return []

                    temp = [(index(h), value(h)) for h in hits]
                    acc = [None] * len(temp)
                    for i, v in temp:
                        acc[i] = untyped(v)
                    return acc

                return pull_nested_source

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term,
                                                   Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                c_nested_path = c.nested_path[0]
                simple_name = relative_field(c.es_column,
                                             query_path).lstrip(".")
                name = concat_field(select.name, untype_path(simple_name))
                put_name = concat_field(
                    select.name, literal_field(untype_path(simple_name)))
                split_select[c_nested_path].fields.append(c.es_column)
                new_select.append({
                    "name": name,
                    "value": Variable(c.es_column),
                    "put": {
                        "name": put_name,
                        "index": put_index,
                        "child": ".",
                    },
                    "pull": get_pull_source(c),
                })
                put_index += 1
        elif is_op(select.value, Variable):
            if select.value.var == ".":
                # PULL ALL SOURCE
                new_select.append({
                    "name":
                    select.name,
                    "value":
                    select.value,
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    },
                    "pull":
                    get_pull_source(
                        Data(es_column=query_path,
                             nested_path=schema.query_path)),
                })
                continue

            for selected_column in schema.values(select.value.var,
                                                 exclude_type=(EXISTS,
                                                               OBJECT)):
                if selected_column.jx_type == NESTED:
                    new_select.append({
                        "name":
                        select.name,
                        "value":
                        select.value,
                        "put": {
                            "name": select.name,
                            "index": put_index,
                            "child": "."
                        },
                        "pull":
                        get_pull_source(
                            Data(
                                es_column=selected_column.es_column,
                                nested_path=(selected_column.es_column, ) +
                                selected_column.nested_path,
                            )),
                    })
                    continue

                leaves = schema.leaves(
                    selected_column.es_column,
                    exclude_type=INTERNAL)  # LEAVES OF OBJECT
                if leaves:
                    for c in leaves:
                        if c.es_column == "_id":
                            new_select.append({
                                "name": select.name,
                                "value": Variable(c.es_column),
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": ".",
                                },
                                "pull": pull_id,
                            })
                            continue
                        c_nested_path = c.nested_path[0]
                        expand_split_select(c_nested_path).fields.append(
                            c.es_column)
                        child = untype_path(
                            relative_field(
                                c.es_column,
                                selected_column.es_column,
                            ))
                        new_select.append({
                            "name": select.name,
                            "value": Variable(c.es_column),
                            "put": {
                                "name": select.name,
                                "index": put_index,
                                "child": child,
                            },
                            "pull": get_pull_source(c),
                        })

                else:
                    new_select.append({
                        "name": select.name,
                        "value": NULL,
                        "put": {
                            "name": select.name,
                            "index": put_index,
                            "child": "."
                        },
                    })
                put_index += 1
        else:
            op, split_scripts = split_expression_by_path(select.value,
                                                         schema,
                                                         lang=Painless)
            for p, script in split_scripts.items():
                es_select = split_select[p]
                es_select.scripts[select.name] = {
                    "script":
                    text(Painless[script].partial_eval().to_es_script(schema))
                }
                new_select.append({
                    "name":
                    select.name,
                    "pull":
                    jx_expression_to_function(
                        join_field([
                            text(p),
                            "fields",
                            select.name,
                        ])),
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    },
                })
                put_index += 1

    def inners(query_path, parent_pos):
        """
        :param query_path:
        :return:  ITERATOR OVER TUPLES ROWS AS TUPLES, WHERE  row[len(nested_path)] HAS INNER HITS
                  AND row[0] HAS post_expressions
        """
        pos = text(int(parent_pos) + 1)
        if not query_path:

            def base_case(row):
                extra = {}
                for k, e in post_expressions.items():
                    extra[k] = e(row)
                row["0"] = extra
                yield row

            return base_case

        if pos == "1":
            more = inners(query_path[:-1], "1")

            def first_case(results):
                for result in results:
                    for hit in result.hits.hits:
                        seed = {"0": None, pos: hit}
                        for row in more(seed):
                            yield row

            return first_case

        else:
            more = inners(query_path[:-1], pos)
            if source_path and source_path < query_path[-1]:
                rel_path = relative_field(query_path[-1], source_path)

                def source(acc):
                    for inner_row in acc[parent_pos][rel_path]:
                        acc[pos] = inner_row
                        for tt in more(acc):
                            yield tt

                return source
            else:
                path = literal_field(query_path[-1])

                def recurse(acc):
                    hits = acc[parent_pos].inner_hits[path].hits.hits
                    if hits:
                        for inner_row in hits:
                            acc[pos] = inner_row
                            for tt in more(acc):
                                yield tt
                    else:
                        for tt in more(acc):
                            yield tt

                return recurse

    return new_select, split_select, inners(schema.query_path, "0")