Beispiel #1
0
    def _parse_properties(self, alias, mapping, meta):
        abs_columns = elasticsearch.parse_properties(alias, None,
                                                     mapping.properties)
        with Timer("upserting {{num}} columns", {"num": len(abs_columns)},
                   debug=DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns
                           if c.es_type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(SELF_PATH)
            query_paths.append(ROOT_PATH)
            self.alias_to_query_paths[alias] = query_paths

            # ADD RELATIVE NAMES
            for abs_column in abs_columns:
                abs_column.last_updated = None
                abs_column.jx_type = es_type_to_json_type[abs_column.es_type]
                for query_path in query_paths:
                    abs_column.names[query_path[0]] = relative_field(
                        abs_column.names["."], query_path[0])
                self.todo.add(self.meta.columns.add(abs_column))
        pass
Beispiel #2
0
    def _get_columns(self, table=None):
        # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE
        alias_done = set()
        index = split_field(table)[0]
        query_path = split_field(table)[1:]
        metadata = self.default_es.get_metadata(index=index)
        for index, meta in qb.sort(metadata.indices.items(), {"value": 0, "sort": -1}):
            for _, properties in meta.mappings.items():
                columns = _elasticsearch.parse_properties(index, None, properties.properties)
                columns = columns.filter(lambda r: not r.abs_name.startswith("other.") and not r.abs_name.startswith("previous_values.cf_"))  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
                with Timer("upserting {{num}} columns", {"num": len(columns)}, debug=DEBUG):
                    with self.columns.locker:
                        for c in columns:
                            # ABSOLUTE
                            c.table = join_field([index]+query_path)
                            self.upsert_column(c)

                            for alias in meta.aliases:
                                # ONLY THE LATEST ALIAS IS CHOSEN TO GET COLUMNS
                                if alias in alias_done:
                                    continue
                                alias_done.add(alias)
                                c = copy(c)
                                c.table = join_field([alias]+query_path)
                                self.upsert_column(c)
    def __init__(self, es=None, id_column="_id"):
        self.es = es
        self.id_column = id_column
        self.remove_id = True if id_column == "_id" else False

        if es:
            _schema = Data()
            for c in parse_properties(es.settings.alias, ".",
                                      es.get_properties()):
                if c.type not in (OBJECT, NESTED):
                    _schema[c.names["."]] = c
            self.schema = unwrap(_schema)
        else:
            self.schema = {}
    def __init__(self, es=None, id_expression="_id"):
        self.es = es
        self.id_column = id_expression
        self.get_id = jx_expression_to_function(id_expression)
        self.remove_id = True if id_expression == "_id" else False

        if es:
            _schema = Data()
            for c in parse_properties(es.settings.alias, ".", ROOT_PATH,
                                      es.get_properties()):
                if c.es_type not in (OBJECT, NESTED):
                    _schema[c.name] = c
            self.schema = unwrap(_schema)
        else:
            self.schema = {}
Beispiel #5
0
    def _parse_properties(self, abs_index, properties, meta):
        # IT IS IMPORTANT THAT NESTED PROPERTIES NAME ALL COLUMNS, AND
        # ALL COLUMNS ARE GIVEN NAMES FOR ALL NESTED PROPERTIES
        abs_columns = _elasticsearch.parse_properties(abs_index, None,
                                                      properties.properties)
        abs_columns = abs_columns.filter(  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
            lambda r: not r.es_column.startswith("other.") and not r.es_column.
            startswith("previous_values.cf_") and not r.es_index.startswith(
                "debug") and r.es_column.find("=") == -1 and r.es_column.find(
                    " ") == -1)

        def add_column(c, query_path):
            c.last_updated = Date.now()
            if query_path[0] != ".":
                c.names[query_path[0]] = relative_field(
                    c.names["."], query_path[0])

            with self.meta.columns.locker:
                self._upsert_column(c)
                for alias in meta.aliases:
                    c = copy(c)
                    c.es_index = alias
                    self._upsert_column(c)

        with Timer("upserting {{num}} columns", {"num": len(abs_columns)},
                   debug=DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns
                           if c.type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(ROOT_PATH)

            # ADD RELATIVE COLUMNS
            for abs_column in abs_columns:
                for query_path in query_paths:
                    add_column(abs_column, query_path)
    def __init__(self, es=None, id_info=None):
        self.es = es
        self.id_info = id_info
        self.get_id = jx.get(id_info.field)
        self.get_version = jx.get(id_info.version)

        if es:
            _schema = Data()
            for c in parse_properties(es.settings.alias, ".", ROOT_PATH, es.get_properties()):
                if c.es_type in (OBJECT, NESTED):
                    _schema[c.name] = {}
                else:
                    _schema[c.name] = c
            self.schema = unwrap(_schema)
        else:
            self.schema = {}
Beispiel #7
0
    def _parse_properties(self, abs_index, properties, meta):
        # IT IS IMPORTANT THAT NESTED PROPERTIES NAME ALL COLUMNS, AND
        # ALL COLUMNS ARE GIVEN NAMES FOR ALL NESTED PROPERTIES
        def add_column(c, query_path):
            c.last_updated = Date.now() - TOO_OLD
            if query_path[0] != ".":
                c.names[query_path[0]] = relative_field(
                    c.names["."], query_path[0])

            with self.meta.columns.locker:
                for alias in meta.aliases:
                    c_ = copy(c)
                    c_.es_index = alias
                    self._upsert_column(c_)
                self._upsert_column(c)

        abs_columns = elasticsearch.parse_properties(abs_index, None,
                                                     properties.properties)
        self.abs_columns.update(abs_columns)
        with Timer("upserting {{num}} columns", {"num": len(abs_columns)},
                   debug=DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns
                           if c.type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(SELF_PATH)

            # ADD RELATIVE COLUMNS
            for abs_column in abs_columns:
                abs_column = abs_column.__copy__()
                abs_column.type = es_type_to_json_type[abs_column.type]
                for query_path in query_paths:
                    add_column(abs_column, query_path)
        pass
Beispiel #8
0
    def _parse_properties(self, abs_index, properties, meta):
        # IT IS IMPORTANT THAT NESTED PROPERTIES NAME ALL COLUMNS, AND
        # ALL COLUMNS ARE GIVEN NAMES FOR ALL NESTED PROPERTIES
        abs_columns = _elasticsearch.parse_properties(abs_index, None, properties.properties)
        abs_columns = abs_columns.filter(  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
            lambda r: not r.es_column.startswith("other.") and
                      not r.es_column.startswith("previous_values.cf_") and
                      not r.es_index.startswith("debug") and
                      r.es_column.find("=") == -1 and
                      r.es_column.find(" ") == -1
        )

        def add_column(c, query_path):
            c.last_updated = Date.now()
            if query_path[0] != ".":
                c.names[query_path[0]] = relative_field(c.names["."], query_path[0])

            with self.meta.columns.locker:
                self._upsert_column(c)
                for alias in meta.aliases:
                    c = copy(c)
                    c.es_index = alias
                    self._upsert_column(c)

        with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns if c.type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(ROOT_PATH)

            # ADD RELATIVE COLUMNS
            for abs_column in abs_columns:
                for query_path in query_paths:
                    add_column(abs_column, query_path)
Beispiel #9
0
    def _parse_properties(self, alias, mapping, meta):
        abs_columns = elasticsearch.parse_properties(alias, None,
                                                     mapping.properties)
        if any(c.cardinality == 0 and c.names['.'] != '_id'
               for c in abs_columns):
            Log.warning("Some columns are not stored {{names}}",
                        names=[
                            ".".join((c.es_index, c.names['.']))
                            for c in abs_columns if c.cardinality == 0
                        ])

        with Timer("upserting {{num}} columns", {"num": len(abs_columns)},
                   silent=not DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns
                           if c.es_type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(SELF_PATH)
            query_paths.append(ROOT_PATH)
            self.alias_to_query_paths[alias] = query_paths
            for i in self.index_to_alias.get_domain(alias):
                self.alias_to_query_paths[i] = query_paths

            # ADD RELATIVE NAMES
            for abs_column in abs_columns:
                abs_column.last_updated = None
                abs_column.jx_type = jx_type(abs_column)
                for query_path in query_paths:
                    abs_column.names[query_path[0]] = relative_field(
                        abs_column.names["."], query_path[0])
                self.todo.add(self.meta.columns.add(abs_column))
        pass
Beispiel #10
0
    def _parse_properties(self, alias, mapping, meta):
        abs_columns = elasticsearch.parse_properties(alias, None, mapping.properties)
        if any(c.cardinality == 0 and c.names['.'] != '_id' for c in abs_columns):
            Log.warning(
                "Some columns are not stored {{names}}",
                names=[
                    ".".join((c.es_index, c.names['.']))
                    for c in abs_columns
                    if c.cardinality == 0
                ]
            )

        with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, silent=not DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns if c.es_type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(SELF_PATH)
            query_paths.append(ROOT_PATH)
            self.alias_to_query_paths[alias] = query_paths
            for i in self.index_to_alias.get_domain(alias):
                self.alias_to_query_paths[i] = query_paths

            # ADD RELATIVE NAMES
            for abs_column in abs_columns:
                abs_column.last_updated = None
                abs_column.jx_type = jx_type(abs_column)
                for query_path in query_paths:
                    abs_column.names[query_path[0]] = relative_field(abs_column.names["."], query_path[0])
                self.todo.add(self.meta.columns.add(abs_column))
        pass
Beispiel #11
0
    def _parse_properties(self, abs_index, properties, meta):
        abs_columns = _elasticsearch.parse_properties(abs_index, None,
                                                      properties.properties)
        abs_columns = abs_columns.filter(  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
            lambda r: not r.es_column.startswith("other.") and not r.es_column.
            startswith("previous_values.cf_") and not r.es_index.startswith(
                "debug"))
        with Timer("upserting {{num}} columns", {"num": len(abs_columns)},
                   debug=DEBUG):

            def add_column(c, query_path):
                c.last_updated = Date.now()
                if query_path:
                    c.table = c.es_index + "." + query_path.last()
                else:
                    c.table = c.es_index

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        if query_path:
                            c.table = alias + "." + query_path.last()
                        else:
                            c.table = alias
                        self._upsert_column(c)

            # EACH query_path IS A LIST OF EVER-INCREASING PATHS THROUGH EACH NESTED LEVEL
            query_paths = wrap([[c.es_column] for c in abs_columns
                                if c.type == "nested"])
            for a, b in itertools.product(query_paths, query_paths):
                aa = a.last()
                bb = b.last()
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) < len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(0, aa)
                        break
            query_paths.append([])

            for c in abs_columns:
                # ADD RELATIVE COLUMNS
                full_path = listwrap(c.nested_path)
                abs_depth = len(full_path)
                abs_parent = coalesce(full_path.last(), "")
                for query_path in query_paths:
                    rel_depth = len(query_path)

                    # ABSOLUTE
                    add_column(copy(c), query_path)
                    cc = copy(c)
                    cc.relative = True

                    if not query_path:
                        add_column(cc, query_path)
                        continue

                    rel_parent = query_path.last()

                    if c.es_column.startswith(rel_parent + "."):
                        cc.name = c.es_column[len(rel_parent) + 1:]
                        add_column(cc, query_path)
                    elif c.es_column == rel_parent:
                        cc.name = "."
                        add_column(cc, query_path)
                    elif not abs_parent:
                        # THIS RELATIVE NAME (..o) ALSO NEEDS A RELATIVE NAME (o)
                        # AND THEN REMOVE THE SHADOWED
                        cc.name = "." + ("." *
                                         (rel_depth - abs_depth)) + c.es_column
                        add_column(cc, query_path)
                    elif rel_parent.startswith(abs_parent + "."):
                        cc.name = "." + ("." *
                                         (rel_depth - abs_depth)) + c.es_column
                        add_column(cc, query_path)
                    elif rel_parent != abs_parent:
                        # SIBLING NESTED PATHS ARE INVISIBLE
                        pass
                    else:
                        Log.error("logic error")
Beispiel #12
0
    def _parse_properties(self, abs_index, properties, meta):
        abs_columns = _elasticsearch.parse_properties(abs_index, None, properties.properties)
        abs_columns = abs_columns.filter(  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
            lambda r: not r.es_column.startswith("other.") and
                      not r.es_column.startswith("previous_values.cf_") and
                      not r.es_index.startswith("debug")
        )
        with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG):
            def add_column(c, query_path):
                c.last_updated = Date.now()
                if query_path:
                    c.table = c.es_index + "." + query_path.last()
                else:
                    c.table = c.es_index

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        if query_path:
                            c.table = alias + "." + query_path.last()
                        else:
                            c.table = alias
                        self._upsert_column(c)

            # EACH query_path IS A LIST OF EVER-INCREASING PATHS THROUGH EACH NESTED LEVEL
            query_paths = wrap([[c.es_column] for c in abs_columns if c.type == "nested"])
            for a, b in itertools.product(query_paths, query_paths):
                aa = a.last()
                bb = b.last()
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) < len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(0, aa)
                        break
            query_paths.append([])

            for c in abs_columns:
                # ADD RELATIVE COLUMNS
                full_path = listwrap(c.nested_path)
                abs_depth = len(full_path)
                abs_parent = coalesce(full_path.last(), "")
                for query_path in query_paths:
                    rel_depth = len(query_path)

                    # ABSOLUTE
                    add_column(copy(c), query_path)
                    cc = copy(c)
                    cc.relative = True

                    if not query_path:
                        add_column(cc, query_path)
                        continue

                    rel_parent = query_path.last()

                    if c.es_column.startswith(rel_parent+"."):
                        cc.name = c.es_column[len(rel_parent)+1:]
                        add_column(cc, query_path)
                    elif c.es_column == rel_parent:
                        cc.name = "."
                        add_column(cc, query_path)
                    elif not abs_parent:
                        # THIS RELATIVE NAME (..o) ALSO NEEDS A RELATIVE NAME (o)
                        # AND THEN REMOVE THE SHADOWED
                        cc.name = "." + ("." * (rel_depth - abs_depth)) + c.es_column
                        add_column(cc, query_path)
                    elif rel_parent.startswith(abs_parent+"."):
                        cc.name = "." + ("." * (rel_depth - abs_depth)) + c.es_column
                        add_column(cc, query_path)
                    elif rel_parent != abs_parent:
                        # SIBLING NESTED PATHS ARE INVISIBLE
                        pass
                    else:
                        Log.error("logic error")
Beispiel #13
0
    def _parse_properties(self, alias, mapping):
        abs_columns = elasticsearch.parse_properties(alias, ".", ROOT_PATH,
                                                     mapping.properties)
        if DEBUG and any(c.cardinality == 0 and c.name != '_id'
                         for c in abs_columns):
            Log.warning(
                "Some columns are not stored in {{url}} {{index|quote}} table:\n{{names}}",
                url=self.es_cluster.url,
                index=alias,
                names=[
                    ".".join((c.es_index, c.name)) for c in abs_columns
                    if c.cardinality == 0
                ])

        with Timer("upserting {{num}} columns", {"num": len(abs_columns)},
                   silent=not DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns
                           if c.es_type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(ROOT_PATH)

            # ENSURE ALL TABLES HAVE THE QUERY PATHS SET
            self.alias_to_query_paths[alias] = query_paths
            for i, a in self.index_to_alias.items():
                if a == alias:
                    self.alias_to_query_paths[i] = query_paths

            # ENSURE COLUMN HAS CORRECT jx_type
            # PICK DEEPEST NESTED PROPERTY AS REPRESENTATIVE
            output = []
            best = {}
            for abs_column in abs_columns:
                abs_column.jx_type = jx_type(abs_column)
                if abs_column.jx_type not in STRUCT:
                    clean_name = unnest_path(abs_column.name)
                    other = best.get(clean_name)
                    if other:
                        if len(other.nested_path) < len(
                                abs_column.nested_path):
                            output.remove(other)
                            self.meta.columns.update({
                                "clear": ".",
                                "where": {
                                    "eq": {
                                        "es_column": other.es_column,
                                        "es_index": other.es_index
                                    }
                                }
                            })
                        else:
                            continue
                    best[clean_name] = abs_column
                output.append(abs_column)

            # REGISTER ALL COLUMNS
            canonicals = []
            for abs_column in output:
                canonical = self.meta.columns.add(abs_column)
                canonicals.append(canonical)

            self.todo.extend(canonicals)
            return canonicals
Beispiel #14
0
    def _parse_properties(self, abs_index, properties, meta):
        abs_columns = _elasticsearch.parse_properties(abs_index, None, properties.properties)
        abs_columns = abs_columns.filter(  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
            lambda r: not r.es_column.startswith("other.") and
                      not r.es_column.startswith("previous_values.cf_") and
                      not r.es_index.startswith("debug") and
                      r.es_column.find("=")==-1 and
                      r.es_column.find(" ")==-1
        )
        with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG):
            def add_column(c, query_path):
                c.last_updated = Date.now()
                c.table = join_field([c.es_index]+split_field(query_path[0]))

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        c.table = join_field([alias]+split_field(query_path[0]))
                        self._upsert_column(c)

            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns if c.type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(ROOT_PATH)

            # ADD RELATIVE COLUMNS
            for abs_column in abs_columns:
                full_path = abs_column.nested_path
                abs_depth = len(full_path)-1
                abs_parent = full_path[1] if abs_depth else ""

                for query_path in query_paths:
                    rel_depth = len(query_path)-1
                    rel_parent = query_path[0]
                    rel_column = copy(abs_column)
                    rel_column.relative = True

                    add_column(copy(abs_column), query_path)

                    if rel_parent == ".":
                        add_column(rel_column, query_path)
                    elif abs_column.es_column.startswith(rel_parent+"."):
                        rel_column.name = abs_column.es_column[len(rel_parent)+1:]
                        add_column(rel_column, query_path)
                    elif abs_column.es_column == rel_parent:
                        rel_column.name = "."
                        add_column(rel_column, query_path)
                    elif not abs_parent:
                        # THIS RELATIVE NAME (..o) ALSO NEEDS A RELATIVE NAME (o)
                        # AND THEN REMOVE THE SHADOWED
                        rel_column.name = "." + ("." * (rel_depth - abs_depth)) + abs_column.es_column
                        add_column(rel_column, query_path)
                    elif rel_parent.startswith(abs_parent+"."):
                        rel_column.name = "." + ("." * (rel_depth - abs_depth)) + abs_column.es_column
                        add_column(rel_column, query_path)
                    elif rel_parent != abs_parent:
                        # SIBLING NESTED PATHS ARE INVISIBLE
                        pass
                    else:
                        Log.error("logic error")
Beispiel #15
0
    def _parse_properties(self, abs_index, properties, meta):
        abs_columns = _elasticsearch.parse_properties(abs_index, None, properties.properties)
        abs_columns = abs_columns.filter(  # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED
            lambda r: not r.es_column.startswith("other.") and
                      not r.es_column.startswith("previous_values.cf_") and
                      not r.es_index.startswith("debug") and
                      r.es_column.find("=")==-1 and
                      r.es_column.find(" ")==-1
        )
        with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG):
            def add_column(c, query_path):
                c.last_updated = Date.now()
                c.table = join_field([c.es_index]+split_field(query_path[0]))

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        c.table = join_field([alias]+split_field(query_path[0]))
                        self._upsert_column(c)

            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns if c.type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(ROOT_PATH)

            # ADD RELATIVE COLUMNS
            for abs_column in abs_columns:
                full_path = abs_column.nested_path
                abs_depth = len(full_path)-1
                abs_parent = full_path[1] if abs_depth else ""

                for query_path in query_paths:
                    rel_depth = len(query_path)-1
                    rel_parent = query_path[0]
                    rel_column = copy(abs_column)
                    rel_column.relative = True

                    add_column(copy(abs_column), query_path)

                    if rel_parent == ".":
                        add_column(rel_column, query_path)
                    elif abs_column.es_column.startswith(rel_parent+"."):
                        rel_column.name = abs_column.es_column[len(rel_parent)+1:]
                        add_column(rel_column, query_path)
                    elif abs_column.es_column == rel_parent:
                        rel_column.name = "."
                        add_column(rel_column, query_path)
                    elif not abs_parent:
                        # THIS RELATIVE NAME (..o) ALSO NEEDS A RELATIVE NAME (o)
                        # AND THEN REMOVE THE SHADOWED
                        rel_column.name = "." + ("." * (rel_depth - abs_depth)) + abs_column.es_column
                        add_column(rel_column, query_path)
                    elif rel_parent.startswith(abs_parent+"."):
                        rel_column.name = "." + ("." * (rel_depth - abs_depth)) + abs_column.es_column
                        add_column(rel_column, query_path)
                    elif rel_parent != abs_parent:
                        # SIBLING NESTED PATHS ARE INVISIBLE
                        pass
                    else:
                        Log.error("logic error")
Beispiel #16
0
    def _parse_properties(self, alias, mapping):
        abs_columns = elasticsearch.parse_properties(alias, ".", ROOT_PATH, mapping.properties)
        if DEBUG and any(c.cardinality == 0 and c.name != '_id' for c in abs_columns):
            Log.warning(
                "Some columns are not stored in {{url}} {{index|quote}} table:\n{{names}}",
                url=self.es_cluster.url,
                index=alias,
                names=[
                    ".".join((c.es_index, c.name))
                    for c in abs_columns
                    if c.cardinality == 0
                ]
            )

        with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, silent=not DEBUG):
            # LIST OF EVERY NESTED PATH
            query_paths = [[c.es_column] for c in abs_columns if c.es_type == "nested"]
            for a, b in itertools.product(query_paths, query_paths):
                aa = a[0]
                bb = b[0]
                if aa and bb.startswith(aa):
                    for i, b_prefix in enumerate(b):
                        if len(b_prefix) > len(aa):
                            continue
                        if aa == b_prefix:
                            break  # SPLIT ALREADY FOUND
                        b.insert(i, aa)
                        break
            for q in query_paths:
                q.append(".")
            query_paths.append(ROOT_PATH)

            # ENSURE ALL TABLES HAVE THE QUERY PATHS SET
            self.alias_to_query_paths[alias] = query_paths
            for i, a in self.index_to_alias.items():
                if a == alias:
                    self.alias_to_query_paths[i] = query_paths

            # ENSURE COLUMN HAS CORRECT jx_type
            # PICK DEEPEST NESTED PROPERTY AS REPRESENTATIVE
            output = []
            best = {}
            for abs_column in abs_columns:
                abs_column.jx_type = jx_type(abs_column)
                if abs_column.jx_type not in STRUCT:
                    clean_name = unnest_path(abs_column.name)
                    other = best.get(clean_name)
                    if other:
                        if len(other.nested_path) < len(abs_column.nested_path):
                            output.remove(other)
                            self.meta.columns.update({"clear": ".", "where": {"eq": {"es_column": other.es_column, "es_index": other.es_index}}})
                        else:
                            continue
                    best[clean_name] = abs_column
                output.append(abs_column)

            # REGISTER ALL COLUMNS
            canonicals = []
            for abs_column in output:
                canonical = self.meta.columns.add(abs_column)
                canonicals.append(canonical)

            self.todo.extend(canonicals)
            return canonicals