Exemple #1
0
    def new_leaves(self, column_name):
        """
        :param column_name:
        :return: ALL COLUMNS THAT START WITH column_name, INCLUDING DEEP COLUMNS
        """
        column_name = unnest_path(column_name)
        columns = self.columns
        all_paths = self.snowflake.sorted_query_paths

        output = {}
        for c in columns:
            if c.name == "_id" and column_name != "_id":
                continue
            if c.jx_type in OBJECTS:
                continue
            if c.cardinality == 0:
                continue
            for path in all_paths:
                if not startswith_field(
                        unnest_path(relative_field(c.name, path)),
                        column_name):
                    continue
                existing = output.get(path)
                if not existing:
                    output[path] = [c]
                    continue
                if len(path) > len(c.nested_path[0]):
                    continue
                if any("." + t + "." in c.es_column
                       for t in (STRING_TYPE, NUMBER_TYPE, BOOLEAN_TYPE)):
                    # ELASTICSEARCH field TYPES ARE NOT ALLOWED
                    continue
                # ONLY THE DEEPEST COLUMN WILL BE CHOSEN
                output[path].append(c)
        return set(output.values())
Exemple #2
0
    def new_leaves(self, column_name):
        """
        :param column_name:
        :return: ALL COLUMNS THAT START WITH column_name, INCLUDING DEEP COLUMNS
        """
        column_name = unnest_path(column_name)
        columns = self.columns
        all_paths = self.snowflake.sorted_query_paths

        output = {}
        for c in columns:
            if c.name == "_id" and column_name != "_id":
                continue
            if c.jx_type in OBJECTS:
                continue
            if c.cardinality == 0:
                continue
            for path in all_paths:
                if not startswith_field(unnest_path(relative_field(c.name, path)), column_name):
                    continue
                existing = output.get(path)
                if not existing:
                    output[path] = [c]
                    continue
                if len(path) > len(c.nested_path[0]):
                    continue
                if any("." + t + "." in c.es_column for t in (STRING_TYPE, NUMBER_TYPE, BOOLEAN_TYPE)):
                    # ELASTICSEARCH field TYPES ARE NOT ALLOWED
                    continue
                # ONLY THE DEEPEST COLUMN WILL BE CHOSEN
                output[path].append(c)
        return set(output.values())
Exemple #3
0
def _indexer(columns, query_path):
    all_names = set(unnest_path(n) for c in columns
                    for n in c.names.values()) | {"."}

    lookup_leaves = {}  # ALL LEAF VARIABLES
    for full_name in all_names:
        for c in columns:
            cname = c.names[query_path]
            nfp = unnest_path(cname)
            if (startswith_field(nfp, full_name)
                    and c.type not in [EXISTS, OBJECT, NESTED]
                    and (c.es_column != "_id" or full_name == "_id")):
                cs = lookup_leaves.setdefault(full_name, set())
                cs.add(c)
                cs = lookup_leaves.setdefault(untype_path(full_name), set())
                cs.add(c)

    lookup_variables = {}  # ALL NOT-NESTED VARIABLES
    for full_name in all_names:
        for c in columns:
            cname = c.names[query_path]
            nfp = unnest_path(cname)
            if (startswith_field(nfp, full_name)
                    and c.type not in [EXISTS, OBJECT]
                    and (c.es_column != "_id" or full_name == "_id")
                    and startswith_field(c.nested_path[0], query_path)):
                cs = lookup_variables.setdefault(full_name, set())
                cs.add(c)
                cs = lookup_variables.setdefault(untype_path(full_name), set())
                cs.add(c)

    relative_lookup = {}
    for c in columns:
        try:
            cname = c.names[query_path]
            cs = relative_lookup.setdefault(cname, set())
            cs.add(c)

            ucname = untype_path(cname)
            cs = relative_lookup.setdefault(ucname, set())
            cs.add(c)
        except Exception as e:
            Log.error("Should not happen", cause=e)

    if query_path != ".":
        # ADD ABSOLUTE NAMES TO THE NAMESAPCE
        absolute_lookup, more_leaves, more_variables = _indexer(columns, ".")
        for k, cs in absolute_lookup.items():
            if k not in relative_lookup:
                relative_lookup[k] = cs
        for k, cs in more_leaves.items():
            if k not in lookup_leaves:
                lookup_leaves[k] = cs
        for k, cs in more_variables.items():
            if k not in lookup_variables:
                lookup_variables[k] = cs

    return relative_lookup, lookup_leaves, lookup_variables
Exemple #4
0
 def values(self, name):
     """
     RETURN VALUES FOR THE GIVEN PATH NAME
     :param name:
     :return:
     """
     return list(self.lookup_variables.get(unnest_path(name), Null))
Exemple #5
0
    def denormalized(self):
        """
        THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
        THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
        """
        with self.locker:
            self._update_meta()
            output = [
                {
                    "table": c.es_index,
                    "name": untype_path(c.name),
                    "cardinality": c.cardinality,
                    "es_column": c.es_column,
                    "es_index": c.es_index,
                    "last_updated": c.last_updated,
                    "count": c.count,
                    "nested_path": [unnest_path(n) for n in c.nested_path],
                    "es_type": c.es_type,
                    "type": c.jx_type,
                }
                for tname, css in self.data.items()
                for cname, cs in css.items()
                for c in cs
                if c.jx_type not in STRUCT  # and c.es_column != "_id"
            ]

        from jx_python.containers.list_usingPythonList import ListContainer

        return ListContainer(
            self.name,
            data=output,
            schema=jx_base.Schema("meta.columns", SIMPLE_METADATA_COLUMNS),
        )
Exemple #6
0
    def denormalized(self):
        """
        THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
        THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
        """
        output = [
            {
                "table": concat_field(c.es_index, untype_path(table)),
                "name": untype_path(name),
                "cardinality": c.cardinality,
                "es_column": c.es_column,
                "es_index": c.es_index,
                "last_updated": c.last_updated,
                "count": c.count,
                "nested_path": [unnest_path(n) for n in c.nested_path],
                "type": c.type
            } for tname, css in self.data.items() for cname, cs in css.items()
            for c in cs if c.type not in STRUCT  # and c.es_column != "_id"
            for table, name in c.names.items()
        ]
        if not self.meta_schema:
            self.meta_schema = get_schema_from_list("meta\\.columns", output)

        from jx_python.containers.list_usingPythonList import ListContainer
        return ListContainer("meta\\.columns",
                             data=output,
                             schema=self.meta_schema)
Exemple #7
0
 def values(self, name):
     """
     RETURN VALUES FOR THE GIVEN PATH NAME
     :param name:
     :return:
     """
     return list(self.lookup_variables.get(unnest_path(name), Null))
Exemple #8
0
    def leaves(self, column_name):
        """
        :param column_name:
        :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
        """
        clean_name = unnest_path(column_name)

        if clean_name != column_name:
            clean_name = column_name
            cleaner = lambda x: x
        else:
            cleaner = unnest_path


        columns = self.columns
        # TODO: '.' IMPLIES ALL FIELDS FROM ABSOLUTE PERPECTIVE, ALL OTHERS ARE A RELATIVE PERSPECTIVE
        # TODO: HOW TO REFER TO FIELDS THAT MAY BE SHADOWED BY A RELATIVE NAME?
        for path in reversed(self.query_path) if clean_name == '.' else self.query_path:
            output = [
                c
                for c in columns
                if (
                    (c.name != "_id" or clean_name == "_id") and
                    (
                        (c.jx_type == EXISTS and column_name.endswith("." + EXISTS_TYPE)) or
                        c.jx_type not in OBJECTS or
                        (clean_name == '.' and c.cardinality == 0)
                    ) and
                    startswith_field(cleaner(relative_field(c.name, path)), clean_name)
                )
            ]
            if output:
                return set(output)
        return set()
    def denormalized(self):
        """
        THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
        THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
        """
        with self.locker:
            self._update_meta()
            output = [
                {
                    "table": c.es_index,
                    "name": untype_path(c.name),
                    "cardinality": c.cardinality,
                    "es_column": c.es_column,
                    "es_index": c.es_index,
                    "last_updated": c.last_updated,
                    "count": c.count,
                    "nested_path": [unnest_path(n) for n in c.nested_path],
                    "es_type": c.es_type,
                    "type": c.jx_type,
                } for tname, css in self.data.items()
                for cname, cs in css.items() for c in cs
                if c.jx_type not in STRUCT  # and c.es_column != "_id"
            ]

        from jx_python.containers.list_usingPythonList import ListContainer

        return ListContainer(
            self.name,
            data=output,
            schema=jx_base.Schema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS),
        )
Exemple #10
0
    def leaves(self, column_name):
        """
        :param column_name:
        :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
        """
        clean_name = unnest_path(column_name)

        if clean_name != column_name:
            clean_name = column_name
            cleaner = lambda x: x
        else:
            cleaner = unnest_path

        columns = self.columns
        # TODO: '.' IMPLIES ALL FIELDS FROM ABSOLUTE PERPECTIVE, ALL OTHERS ARE A RELATIVE PERSPECTIVE
        # TODO: HOW TO REFER TO FIELDS THAT MAY BE SHADOWED BY A RELATIVE NAME?
        for path in reversed(
                self.query_path) if clean_name == '.' else self.query_path:
            output = [
                c for c in columns
                if ((c.name != "_id" or clean_name == "_id") and (
                    (c.jx_type == EXISTS and column_name.endswith(
                        "." + EXISTS_TYPE)) or c.jx_type not in OBJECTS or
                    (clean_name == '.' and c.cardinality == 0))
                    and startswith_field(cleaner(relative_field(c.name, path)),
                                         clean_name))
            ]
            if output:
                return set(output)
        return set()
Exemple #11
0
 def leaves(self, column_name):
     """
     :param column_name:
     :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
     """
     column_name = unnest_path(column_name)
     columns = self.columns
     deep_path = self.query_path[0]
     for path in self.query_path:
         output = [
             c for c in columns
             if ((c.names['.'] != "_id" or column_name == "_id")
                 and c.jx_type not in OBJECTS and startswith_field(
                     unnest_path(c.names[path]), column_name))
         ]
         if output:
             return output
     return []
Exemple #12
0
    def leaves(self, name):
        """
        RETURN LEAVES OF GIVEN PATH NAME
        pull leaves, considering query_path and namespace
        pull all first-level properties
        pull leaves, including parent leaves
        pull the head of any tree by name
        :param name:
        :return:
        """

        return list(self.lookup_leaves.get(unnest_path(name), Null))
Exemple #13
0
    def leaves(self, name, meta=False):
        """
        RETURN LEAVES OF GIVEN PATH NAME
        pull leaves, considering query_path and namespace
        pull all first-level properties
        pull leaves, including parent leaves
        pull the head of any tree by name
        :param name:
        :return:
        """

        return list(self.lookup_leaves.get(unnest_path(name), Null))
Exemple #14
0
 def leaves(self, column_name):
     """
     :param column_name:
     :return: ALL COLUMNS THAT START WITH column_name, NOT INCLUDING DEEPER NESTED COLUMNS
     """
     column_name = unnest_path(column_name)
     columns = self.columns
     deep_path = self.query_path[0]
     for path in self.query_path:
         output = [
             c
             for c in columns
             if (
                 (c.names['.'] != "_id" or column_name == "_id") and
                 c.jx_type not in OBJECTS and
                 startswith_field(unnest_path(c.names[path]), column_name)
             )
         ]
         if output:
             return output
     return []
Exemple #15
0
 def values(self, name):
     """
     RETURN VALUES FOR THE GIVEN PATH NAME
     :param name:
     :return:
     """
     full_name = unnest_path(name)
     return list(
         set([
             c for c in self.lookup.get(full_name, Null)
             if c.type in PRIMITIVE and (
                 c.es_column != "_id"
             )  # MULTIVALUES ARE LEGIT, SO NESTED IS FINE: and self.query_path == c.nested_path[0]
         ]))
Exemple #16
0
 def map_to_es(self):
     """
     RETURN A MAP FROM THE NAMESPACE TO THE es_column NAME
     """
     output = {}
     for path in self.query_path:
         set_default(
             output, {
                 k: c.es_column
                 for c in self.snowflake.columns if c.jx_type not in STRUCT
                 for rel_name in [c.names[path]] for k in
                 [rel_name,
                  untype_path(rel_name),
                  unnest_path(rel_name)]
             })
     return output
Exemple #17
0
 def values(self, column_name):
     """
     RETURN ALL COLUMNS THAT column_name REFERES TO
     """
     column_name = unnest_path(column_name)
     columns = self.columns
     deep_path = self.query_path[0]
     for path in self.query_path:
         output = [
             c for c in columns
             if (c.jx_type not in STRUCT
                 and untype_path(c.names[path]) == column_name)
         ]
         if output:
             return output
     return output
Exemple #18
0
 def map_to_es(self):
     """
     RETURN A MAP FROM THE NAMESPACE TO THE es_column NAME
     """
     output = {}
     for path in self.query_path:
         set_default(
             output,
             {
                 k: c.es_column
                 for c in self.snowflake.columns
                 if c.jx_type not in STRUCT
                 for rel_name in [c.names[path]]
                 for k in [rel_name, untype_path(rel_name), unnest_path(rel_name)]
             }
         )
     return output
Exemple #19
0
 def values(self, column_name, exclude_type=STRUCT):
     """
     RETURN ALL COLUMNS THAT column_name REFERS TO
     """
     column_name = unnest_path(column_name)
     columns = self.columns
     output = []
     for path in self.query_path:
         full_path = untype_path(concat_field(path, column_name))
         for c in columns:
             if c.jx_type in exclude_type:
                 continue
             # if c.cardinality == 0:
             #     continue
             if untype_path(c.name) == full_path:
                 output.append(c)
         if output:
             return output
     return []
Exemple #20
0
 def values(self, column_name, exclude_type=STRUCT):
     """
     RETURN ALL COLUMNS THAT column_name REFERS TO
     """
     column_name = unnest_path(column_name)
     columns = self.columns
     output = []
     for path in self.query_path:
         full_path = untype_path(concat_field(path, column_name))
         for c in columns:
             if c.jx_type in exclude_type:
                 continue
             # if c.cardinality == 0:
             #     continue
             if untype_path(c.name) == full_path:
                 output.append(c)
         if output:
             return output
     return []
Exemple #21
0
 def values(self, column_name):
     """
     RETURN ALL COLUMNS THAT column_name REFERES TO
     """
     column_name = unnest_path(column_name)
     columns = self.columns
     deep_path = self.query_path[0]
     for path in self.query_path:
         output = [
             c
             for c in columns
             if (
                 c.jx_type not in STRUCT and
                 untype_path(c.names[path]) == column_name
             )
         ]
         if output:
             return output
     return output
Exemple #22
0
def es_setop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    split_select = {".": ESSelect('.')}

    def get_select(path):
        es_select = split_select.get(path)
        if not es_select:
            es_select = split_select[path] = ESSelect(path)
        return es_select


    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(select.name, relative_field(untype_path(c.name), term.var))
                if c.jx_type == NESTED:
                    get_select('.').use_source = True
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."},
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                else:
                    get_select(c.nested_path[0]).fields.append(c.es_column)
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."}
                    })
                    put_index += 1
        elif is_op(select.value, Variable):
            s_column = select.value.var

            if s_column == ".":
                # PULL ALL SOURCE
                get_select('.').use_source = True
                new_select.append({
                    "name": select.name,
                    "value": select.value,
                    "put": {"name": select.name, "index": put_index, "child": "."},
                    "pull": get_pull_source(".")
                })
                continue

            leaves = schema.leaves(s_column)  # LEAVES OF OBJECT
            # nested_selects = {}
            if leaves:
                if any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    get_select('.').use_source = True
                    for c in leaves:
                        if len(c.nested_path) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
                            pre_child = join_field(decode_property(n) for n in split_field(c.name))
                            new_select.append({
                                "name": select.name,
                                "value": Variable(c.es_column),
                                "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
                                "pull": get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        c_nested_path = c.nested_path[0]
                        if c_nested_path == ".":
                            if c.es_column == "_id":
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": "."},
                                    "pull": lambda row: row._id
                                })
                            elif c.jx_type == NESTED:
                                get_select('.').use_source = True
                                pre_child = join_field(decode_property(n) for n in split_field(c.name))
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
                                    "pull": get_pull_source(c.es_column)
                                })
                            else:
                                get_select(c_nested_path).fields.append(c.es_column)
                                pre_child = join_field(decode_property(n) for n in split_field(c.name))
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))}
                                })
                        else:
                            es_select = get_select(c_nested_path)
                            es_select.fields.append(c.es_column)

                            child = relative_field(untype_path(relative_field(c.name, schema.query_path[0])), s_column)
                            pull = accumulate_nested_doc(c_nested_path, Variable(relative_field(s_column, unnest_path(c_nested_path))))
                            new_select.append({
                                "name": select.name,
                                "value": select.value,
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": child
                                },
                                "pull": pull
                            })
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {"name": select.name, "index": put_index, "child": "."}
                })
            put_index += 1
        else:
            split_scripts = split_expression_by_path(select.value, schema, lang=Painless)
            for p, script in split_scripts.items():
                es_select = get_select(p)
                es_select.scripts[select.name] = {"script": text_type(Painless[first(script)].partial_eval().to_es_script(schema))}
                new_select.append({
                    "name": select.name,
                    "pull": jx_expression_to_function("fields." + literal_field(select.name)),
                    "put": {"name": select.name, "index": put_index, "child": "."}
                })
                put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif is_op(n.value, Variable):
            if get_select('.').use_source:
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
    es_query = es_query_proto(query_path, split_select, split_wheres, schema)
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    with Timer("call to ES", silent=True) as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    # Log.note("{{output}}", output=T)

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter", silent=True):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
Exemple #23
0
def get_selects(query):
    schema = query.frum.schema
    split_select = {".": ESSelect(".")}

    def get_select(path):
        es_select = split_select.get(path)
        if not es_select:
            es_select = split_select[path] = ESSelect(path)
        return es_select

    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()
    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(
                    select.name, relative_field(untype_path(c.name), term.var)
                )
                if c.jx_type == NESTED:
                    get_select(".").set_op = True
                    new_select.append(
                        {
                            "name": full_name,
                            "value": Variable(c.es_column),
                            "put": {
                                "name": literal_field(full_name),
                                "index": put_index,
                                "child": ".",
                            },
                            "pull": get_pull_source(c.es_column),
                        }
                    )
                    put_index += 1
                else:
                    get_select(c.nested_path[0]).fields.append(c.es_column)
                    new_select.append(
                        {
                            "name": full_name,
                            "value": Variable(c.es_column),
                            "put": {
                                "name": literal_field(full_name),
                                "index": put_index,
                                "child": ".",
                            },
                        }
                    )
                    put_index += 1
        elif is_op(select.value, Variable):
            s_column = select.value.var

            if s_column == ".":
                # PULL ALL SOURCE
                get_select(".").set_op = True
                new_select.append(
                    {
                        "name": select.name,
                        "value": select.value,
                        "put": {"name": select.name, "index": put_index, "child": "."},
                        "pull": get_pull_source("."),
                    }
                )
                continue

            leaves = schema.leaves(s_column)  # LEAVES OF OBJECT
            # nested_selects = {}
            if leaves:
                if any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    get_select(".").set_op = True
                    for c in leaves:
                        if (
                            len(c.nested_path) == 1
                        ):  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
                            pre_child = join_field(
                                decode_property(n) for n in split_field(c.name)
                            )
                            new_select.append(
                                {
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": untype_path(
                                            relative_field(pre_child, s_column)
                                        ),
                                    },
                                    "pull": get_pull_source(c.es_column),
                                }
                            )
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        c_nested_path = c.nested_path[0]
                        if c_nested_path == ".":
                            if c.es_column == "_id":
                                new_select.append(
                                    {
                                        "name": select.name,
                                        "value": Variable(c.es_column),
                                        "put": {
                                            "name": select.name,
                                            "index": put_index,
                                            "child": ".",
                                        },
                                        "pull": lambda row: row._id,
                                    }
                                )
                            elif c.jx_type == NESTED:
                                get_select(".").set_op = True
                                pre_child = join_field(
                                    decode_property(n) for n in split_field(c.name)
                                )
                                new_select.append(
                                    {
                                        "name": select.name,
                                        "value": Variable(c.es_column),
                                        "put": {
                                            "name": select.name,
                                            "index": put_index,
                                            "child": untype_path(
                                                relative_field(pre_child, s_column)
                                            ),
                                        },
                                        "pull": get_pull_source(c.es_column),
                                    }
                                )
                            else:
                                get_select(c_nested_path).fields.append(c.es_column)
                                pre_child = join_field(
                                    decode_property(n) for n in split_field(c.name)
                                )
                                new_select.append(
                                    {
                                        "name": select.name,
                                        "value": Variable(c.es_column),
                                        "put": {
                                            "name": select.name,
                                            "index": put_index,
                                            "child": untype_path(
                                                relative_field(pre_child, s_column)
                                            ),
                                        },
                                    }
                                )
                        else:
                            es_select = get_select(c_nested_path)
                            es_select.fields.append(c.es_column)

                            child = relative_field(
                                untype_path(
                                    relative_field(c.name, schema.query_path[0])
                                ),
                                s_column,
                            )
                            pull = accumulate_nested_doc(
                                c_nested_path,
                                Variable(
                                    relative_field(s_column, unnest_path(c_nested_path))
                                ),
                            )
                            new_select.append(
                                {
                                    "name": select.name,
                                    "value": select.value,
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": child,
                                    },
                                    "pull": pull,
                                }
                            )
            else:
                new_select.append(
                    {
                        "name": select.name,
                        "value": Variable("$dummy"),
                        "put": {"name": select.name, "index": put_index, "child": "."},
                    }
                )
            put_index += 1
        else:
            split_scripts = split_expression_by_path(
                select.value, schema, lang=Painless
            )
            for p, script in split_scripts.items():
                es_select = get_select(p)
                es_select.scripts[select.name] = {
                    "script": text(
                        Painless[first(script)].partial_eval().to_es_script(schema)
                    )
                }
                new_select.append(
                    {
                        "name": select.name,
                        "pull": jx_expression_to_function(
                            "fields." + literal_field(select.name)
                        ),
                        "put": {"name": select.name, "index": put_index, "child": "."},
                    }
                )
                put_index += 1
    for n in new_select:
        if n.pull:
            continue
        elif is_op(n.value, Variable):
            if get_select(".").set_op:
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(
                    concat_field("fields", literal_field(n.value.var))
                )
        else:
            Log.error("Do not know what to do")
    return new_select, split_select
Exemple #24
0
    def __new__(cls, e=None, query=None, *args, **kwargs):
        e.allowNulls = coalesce(e.allowNulls, True)

        if e.value and e.domain.type == "default":
            # if query.groupby:
            #     return object.__new__(DefaultDecoder, e)

            if is_text(e.value):
                Log.error("Expecting Variable or Expression, not plain string")

            if is_op(e.value, LeavesOp):
                return object.__new__(ObjectDecoder)
            elif is_op(e.value, TupleOp):
                # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
                # JUST PULL THE FIELDS
                if not all(is_op(t, Variable) for t in e.value.terms):
                    Log.error("Can only handle variables in tuples")

                e.domain = Data(dimension={"fields": e.value.terms})
                return object.__new__(DimFieldListDecoder)

            elif is_op(e.value, Variable):
                schema = query.frum.schema
                cols = schema.leaves(e.value.var)
                if not cols:
                    return object.__new__(DefaultDecoder)
                if len(cols) > 1:
                    return object.__new__(ObjectDecoder)
                col = first(cols)
                limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)

                if col.cardinality == None:
                    DEBUG and Log.warning(
                        "metadata for column {{name|quote}} (id={{id}}) is not ready",
                        name=concat_field(col.es_index, col.es_column),
                        id=id(col))
                    if unnest_path(e.value.var) in KNOWN_MULTITYPES:
                        Log.warning("{{var}} is not multivalued",
                                    var=e.value.var)
                        return object.__new__(MultivalueDecoder)

                    e.domain = set_default(DefaultDomain(limit=limit),
                                           e.domain.__data__())
                    return object.__new__(DefaultDecoder)
                elif col.multi <= 1 and col.partitions == None:
                    if unnest_path(e.value.var) in KNOWN_MULTITYPES:
                        Log.warning("{{var}} is not multivalued",
                                    var=e.value.var)
                        return object.__new__(MultivalueDecoder)
                    e.domain = set_default(DefaultDomain(limit=limit),
                                           e.domain.__data__())
                    return object.__new__(DefaultDecoder)
                else:
                    DEBUG and Log.note("id={{id}} has parts!!!", id=id(col))
                    if col.multi > 1:
                        return object.__new__(MultivalueDecoder)

                    partitions = col.partitions[:limit:]
                    if e.domain.sort == -1:
                        partitions = list(reversed(sorted(partitions)))
                    else:
                        partitions = sorted(partitions)
                    e.domain = SimpleSetDomain(partitions=partitions,
                                               limit=limit)

            else:
                return object.__new__(DefaultDecoder)

        if e.value and e.domain.type in PARTITION:
            return object.__new__(SetDecoder)
        if isinstance(e.domain.dimension, Dimension):
            e.domain = e.domain.dimension.getDomain()
            return object.__new__(SetDecoder)
        if e.value and e.domain.type == "time":
            return object.__new__(TimeDecoder)
        if e.range:
            return object.__new__(GeneralRangeDecoder)
        if e.value and e.domain.type == "duration":
            return object.__new__(DurationDecoder)
        elif e.value and e.domain.type == "range":
            return object.__new__(RangeDecoder)
        elif not e.value and e.domain.dimension.fields:
            # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
            # JUST PULL THE FIELDS
            fields = e.domain.dimension.fields
            if is_data(fields):
                Log.error("No longer allowed: All objects are expressions")
            else:
                return object.__new__(DimFieldListDecoder)
        elif not e.value and all(e.domain.partitions.where):
            return object.__new__(GeneralSetDecoder)
        else:
            Log.error("domain type of {{type}} is not supported yet",
                      type=e.domain.type)
Exemple #25
0
    def _parse_properties(self, alias, mapping):
        abs_columns = elasticsearch.parse_properties(alias, ".", ROOT_PATH, mapping.properties)
        if DEBUG and any(c.cardinality == 0 and c.name != '_id' for c in abs_columns):
            Log.warning(
                "Some columns are not stored in {{url}} {{index|quote}} table:\n{{names}}",
                url=self.es_cluster.url,
                index=alias,
                names=[
                    ".".join((c.es_index, c.name))
                    for c in abs_columns
                    if c.cardinality == 0
                ]
            )

        with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, silent=not DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns if c.es_type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(ROOT_PATH)

            # ENSURE ALL TABLES HAVE THE QUERY PATHS SET
            self.alias_to_query_paths[alias] = query_paths
            for i, a in self.index_to_alias.items():
                if a == alias:
                    self.alias_to_query_paths[i] = query_paths

            # ENSURE COLUMN HAS CORRECT jx_type
            # PICK DEEPEST NESTED PROPERTY AS REPRESENTATIVE
            output = []
            best = {}
            for abs_column in abs_columns:
                abs_column.jx_type = jx_type(abs_column)
                if abs_column.jx_type not in STRUCT:
                    clean_name = unnest_path(abs_column.name)
                    other = best.get(clean_name)
                    if other:
                        if len(other.nested_path) < len(abs_column.nested_path):
                            output.remove(other)
                            self.meta.columns.update({"clear": ".", "where": {"eq": {"es_column": other.es_column, "es_index": other.es_index}}})
                        else:
                            continue
                    best[clean_name] = abs_column
                output.append(abs_column)

            # REGISTER ALL COLUMNS
            canonicals = []
            for abs_column in output:
                canonical = self.meta.columns.add(abs_column)
                canonicals.append(canonical)

            self.todo.extend(canonicals)
            return canonicals
Exemple #26
0
def es_setop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    split_select = {".": ESSelect('.')}

    def get_select(path):
        es_select = split_select.get(path)
        if not es_select:
            es_select = split_select[path] = ESSelect(path)
        return es_select

    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term,
                                                   Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(
                    select.name, relative_field(untype_path(c.name), term.var))
                if c.jx_type == NESTED:
                    get_select('.').use_source = True
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        },
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                else:
                    get_select(c.nested_path[0]).fields.append(c.es_column)
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        }
                    })
                    put_index += 1
        elif is_op(select.value, Variable):
            s_column = select.value.var

            if s_column == ".":
                # PULL ALL SOURCE
                get_select('.').use_source = True
                new_select.append({
                    "name": select.name,
                    "value": select.value,
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    },
                    "pull": get_pull_source(".")
                })
                continue

            leaves = schema.leaves(s_column)  # LEAVES OF OBJECT
            # nested_selects = {}
            if leaves:
                if any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    get_select('.').use_source = True
                    for c in leaves:
                        if len(
                                c.nested_path
                        ) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
                            pre_child = join_field(
                                decode_property(n)
                                for n in split_field(c.name))
                            new_select.append({
                                "name":
                                select.name,
                                "value":
                                Variable(c.es_column),
                                "put": {
                                    "name":
                                    select.name,
                                    "index":
                                    put_index,
                                    "child":
                                    untype_path(
                                        relative_field(pre_child, s_column))
                                },
                                "pull":
                                get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        c_nested_path = c.nested_path[0]
                        if c_nested_path == ".":
                            if c.es_column == "_id":
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": "."
                                    },
                                    "pull":
                                    lambda row: row._id
                                })
                            elif c.jx_type == NESTED:
                                get_select('.').use_source = True
                                pre_child = join_field(
                                    decode_property(n)
                                    for n in split_field(c.name))
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name":
                                        select.name,
                                        "index":
                                        put_index,
                                        "child":
                                        untype_path(
                                            relative_field(
                                                pre_child, s_column))
                                    },
                                    "pull":
                                    get_pull_source(c.es_column)
                                })
                            else:
                                get_select(c_nested_path).fields.append(
                                    c.es_column)
                                pre_child = join_field(
                                    decode_property(n)
                                    for n in split_field(c.name))
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name":
                                        select.name,
                                        "index":
                                        put_index,
                                        "child":
                                        untype_path(
                                            relative_field(
                                                pre_child, s_column))
                                    }
                                })
                        else:
                            es_select = get_select(c_nested_path)
                            es_select.fields.append(c.es_column)

                            child = relative_field(
                                untype_path(
                                    relative_field(c.name,
                                                   schema.query_path[0])),
                                s_column)
                            pull = accumulate_nested_doc(
                                c_nested_path,
                                Variable(
                                    relative_field(
                                        s_column, unnest_path(c_nested_path))))
                            new_select.append({
                                "name": select.name,
                                "value": select.value,
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": child
                                },
                                "pull": pull
                            })
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    }
                })
            put_index += 1
        else:
            split_scripts = split_expression_by_path(select.value,
                                                     schema,
                                                     lang=Painless)
            for p, script in split_scripts.items():
                es_select = get_select(p)
                es_select.scripts[select.name] = {
                    "script":
                    text_type(Painless[first(
                        script)].partial_eval().to_es_script(schema))
                }
                new_select.append({
                    "name":
                    select.name,
                    "pull":
                    jx_expression_to_function("fields." +
                                              literal_field(select.name)),
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    }
                })
                put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif is_op(n.value, Variable):
            if get_select('.').use_source:
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(
                    concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
    es_query = es_query_proto(query_path, split_select, split_wheres, schema)
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    with Timer("call to ES", silent=DEBUG) as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    # Log.note("{{output}}", output=T)

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter", silent=True):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
Exemple #27
0
    def _parse_properties(self, alias, mapping):
        abs_columns = elasticsearch.parse_properties(alias, ".", ROOT_PATH,
                                                     mapping.properties)
        if DEBUG and any(c.cardinality == 0 and c.name != '_id'
                         for c in abs_columns):
            Log.warning(
                "Some columns are not stored in {{url}} {{index|quote}} table:\n{{names}}",
                url=self.es_cluster.url,
                index=alias,
                names=[
                    ".".join((c.es_index, c.name)) for c in abs_columns
                    if c.cardinality == 0
                ])

        with Timer("upserting {{num}} columns", {"num": len(abs_columns)},
                   silent=not DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns
                           if c.es_type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(ROOT_PATH)

            # ENSURE ALL TABLES HAVE THE QUERY PATHS SET
            self.alias_to_query_paths[alias] = query_paths
            for i, a in self.index_to_alias.items():
                if a == alias:
                    self.alias_to_query_paths[i] = query_paths

            # ENSURE COLUMN HAS CORRECT jx_type
            # PICK DEEPEST NESTED PROPERTY AS REPRESENTATIVE
            output = []
            best = {}
            for abs_column in abs_columns:
                abs_column.jx_type = jx_type(abs_column)
                if abs_column.jx_type not in STRUCT:
                    clean_name = unnest_path(abs_column.name)
                    other = best.get(clean_name)
                    if other:
                        if len(other.nested_path) < len(
                                abs_column.nested_path):
                            output.remove(other)
                            self.meta.columns.update({
                                "clear": ".",
                                "where": {
                                    "eq": {
                                        "es_column": other.es_column,
                                        "es_index": other.es_index
                                    }
                                }
                            })
                        else:
                            continue
                    best[clean_name] = abs_column
                output.append(abs_column)

            # REGISTER ALL COLUMNS
            canonicals = []
            for abs_column in output:
                canonical = self.meta.columns.add(abs_column)
                canonicals.append(canonical)

            self.todo.extend(canonicals)
            return canonicals
Exemple #28
0
def es_setop(es, query):
    schema = query.frum.schema

    es_query, filters = es_query_template(schema.query_path[0])
    nested_filter = None
    set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.stored_fields = FlatList()

    selects = wrap([s.copy() for s in listwrap(query.select)])
    new_select = FlatList()
    schema = query.frum.schema
    # columns = schema.columns
    # nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".")

    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var))
                if c.jx_type == NESTED:
                    es_query.stored_fields = ["_source"]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."},
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                elif c.nested_path[0] != ".":
                    pass  # THE NESTED PARENT WILL CAPTURE THIS
                else:
                    es_query.stored_fields += [c.es_column]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."}
                    })
                    put_index += 1
        elif isinstance(select.value, Variable):
            s_column = select.value.var
            # LEAVES OF OBJECT
            leaves = schema.leaves(s_column)
            nested_selects = {}
            if leaves:
                if s_column == '.':
                    # PULL ALL SOURCE
                    es_query.stored_fields = ["_source"]
                    new_select.append({
                        "name": select.name,
                        "value": select.value,
                        "put": {"name": select.name, "index": put_index, "child": "."},
                        "pull": get_pull_source(".")
                    })
                elif any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    es_query.stored_fields = ["_source"]
                    for c in leaves:
                        if len(c.nested_path) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES
                            jx_name = untype_path(c.names["."])
                            new_select.append({
                                "name": select.name,
                                "value": Variable(c.es_column),
                                "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
                                "pull": get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        if len(c.nested_path) == 1:
                            jx_name = untype_path(c.names["."])
                            if c.jx_type == NESTED:
                                es_query.stored_fields = ["_source"]
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
                                    "pull": get_pull_source(c.es_column)
                                })

                            else:
                                es_query.stored_fields += [c.es_column]
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}
                                })
                        else:
                            if not nested_filter:
                                where = filters[0].copy()
                                nested_filter = [where]
                                for k in filters[0].keys():
                                    filters[0][k] = None
                                set_default(
                                    filters[0],
                                    es_and([where, es_or(nested_filter)])
                                )

                            nested_path = c.nested_path[0]
                            if nested_path not in nested_selects:
                                where = nested_selects[nested_path] = Data()
                                nested_filter += [where]
                                where.nested.path = nested_path
                                where.nested.query.match_all = {}
                                where.nested.inner_hits._source = False
                                where.nested.inner_hits.stored_fields += [c.es_column]

                                child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column)
                                pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path))))
                                new_select.append({
                                    "name": select.name,
                                    "value": select.value,
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": child
                                    },
                                    "pull": pull
                                })
                            else:
                                nested_selects[nested_path].nested.inner_hits.stored_fields += [c.es_column]
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {"name": select.name, "index": put_index, "child": "."}
                })
            put_index += 1
        else:
            painless = select.value.partial_eval().to_es_script(schema)
            es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema))
            new_select.append({
                "name": select.name,
                "pull": jx_expression_to_function("fields." + literal_field(select.name)),
                "put": {"name": select.name, "index": put_index, "child": "."}
            })
            put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif isinstance(n.value, Variable):
            if es_query.stored_fields[0] == "_source":
                es_query.stored_fields = ["_source"]
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter"):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
Exemple #29
0
def _indexer(columns, query_path):
    all_names = set(unnest_path(n) for c in columns for n in c.names.values()) | {"."}

    lookup_leaves = {}  # ALL LEAF VARIABLES
    for full_name in all_names:
        for c in columns:
            cname = c.names[query_path]
            nfp = unnest_path(cname)
            if (
                startswith_field(nfp, full_name) and
                c.es_type not in [EXISTS, OBJECT, NESTED] and
                (c.es_column != "_id" or full_name == "_id")
            ):
                cs = lookup_leaves.setdefault(full_name, set())
                cs.add(c)
                cs = lookup_leaves.setdefault(untype_path(full_name), set())
                cs.add(c)

    lookup_variables = {}  # ALL NOT-NESTED VARIABLES
    for full_name in all_names:
        for c in columns:
            cname = c.names[query_path]
            nfp = unnest_path(cname)
            if (
                startswith_field(nfp, full_name) and
                c.es_type not in [EXISTS, OBJECT] and
                (c.es_column != "_id" or full_name == "_id") and
                startswith_field(c.nested_path[0], query_path)
            ):
                cs = lookup_variables.setdefault(full_name, set())
                cs.add(c)
                cs = lookup_variables.setdefault(untype_path(full_name), set())
                cs.add(c)

    relative_lookup = {}
    for c in columns:
        try:
            cname = c.names[query_path]
            cs = relative_lookup.setdefault(cname, set())
            cs.add(c)

            ucname = untype_path(cname)
            cs = relative_lookup.setdefault(ucname, set())
            cs.add(c)
        except Exception as e:
            Log.error("Should not happen", cause=e)

    if query_path != ".":
        # ADD ABSOLUTE NAMES TO THE NAMESAPCE
        absolute_lookup, more_leaves, more_variables = _indexer(columns, ".")
        for k, cs in absolute_lookup.items():
            if k not in relative_lookup:
                relative_lookup[k] = cs
        for k, cs in more_leaves.items():
            if k not in lookup_leaves:
                lookup_leaves[k] = cs
        for k, cs in more_variables.items():
            if k not in lookup_variables:
                lookup_variables[k] = cs

    return relative_lookup, lookup_leaves, lookup_variables
Exemple #30
0
def es_setop(es, query):
    schema = query.frum.schema

    es_query, filters = es_query_template(schema.query_path[0])
    nested_filter = None
    set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.stored_fields = FlatList()

    selects = wrap([s.copy() for s in listwrap(query.select)])
    new_select = FlatList()
    schema = query.frum.schema
    # columns = schema.columns
    # nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".")

    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(select.value, LeavesOp) and isinstance(
                select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(
                    select.name,
                    relative_field(untype_path(c.names["."]), term.var))
                if c.jx_type == NESTED:
                    es_query.stored_fields = ["_source"]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        },
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                elif c.nested_path[0] != ".":
                    pass  # THE NESTED PARENT WILL CAPTURE THIS
                else:
                    es_query.stored_fields += [c.es_column]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        }
                    })
                    put_index += 1
        elif isinstance(select.value, Variable):
            s_column = select.value.var
            # LEAVES OF OBJECT
            leaves = schema.leaves(s_column)
            nested_selects = {}
            if leaves:
                if s_column == '.':
                    # PULL ALL SOURCE
                    es_query.stored_fields = ["_source"]
                    new_select.append({
                        "name": select.name,
                        "value": select.value,
                        "put": {
                            "name": select.name,
                            "index": put_index,
                            "child": "."
                        },
                        "pull": get_pull_source(".")
                    })
                elif any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    es_query.stored_fields = ["_source"]
                    for c in leaves:
                        if len(
                                c.nested_path
                        ) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES
                            jx_name = untype_path(c.names["."])
                            new_select.append({
                                "name":
                                select.name,
                                "value":
                                Variable(c.es_column),
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": relative_field(jx_name, s_column)
                                },
                                "pull":
                                get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        if len(c.nested_path) == 1:
                            jx_name = untype_path(c.names["."])
                            if c.jx_type == NESTED:
                                es_query.stored_fields = ["_source"]
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child":
                                        relative_field(jx_name, s_column)
                                    },
                                    "pull":
                                    get_pull_source(c.es_column)
                                })

                            else:
                                es_query.stored_fields += [c.es_column]
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child":
                                        relative_field(jx_name, s_column)
                                    }
                                })
                        else:
                            if not nested_filter:
                                where = filters[0].copy()
                                nested_filter = [where]
                                for k in filters[0].keys():
                                    filters[0][k] = None
                                set_default(
                                    filters[0],
                                    es_and([where, es_or(nested_filter)]))

                            nested_path = c.nested_path[0]
                            if nested_path not in nested_selects:
                                where = nested_selects[nested_path] = Data()
                                nested_filter += [where]
                                where.nested.path = nested_path
                                where.nested.query.match_all = {}
                                where.nested.inner_hits._source = False
                                where.nested.inner_hits.stored_fields += [
                                    c.es_column
                                ]

                                child = relative_field(
                                    untype_path(c.names[schema.query_path[0]]),
                                    s_column)
                                pull = accumulate_nested_doc(
                                    nested_path,
                                    Variable(
                                        relative_field(
                                            s_column,
                                            unnest_path(nested_path))))
                                new_select.append({
                                    "name": select.name,
                                    "value": select.value,
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": child
                                    },
                                    "pull": pull
                                })
                            else:
                                nested_selects[
                                    nested_path].nested.inner_hits.stored_fields += [
                                        c.es_column
                                    ]
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    }
                })
            put_index += 1
        else:
            painless = select.value.partial_eval().to_es_script(schema)
            es_query.script_fields[literal_field(select.name)] = es_script(
                painless.script(schema))
            new_select.append({
                "name":
                select.name,
                "pull":
                jx_expression_to_function("fields." +
                                          literal_field(select.name)),
                "put": {
                    "name": select.name,
                    "index": put_index,
                    "child": "."
                }
            })
            put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif isinstance(n.value, Variable):
            if es_query.stored_fields[0] == "_source":
                es_query.stored_fields = ["_source"]
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(
                    concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter"):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)