Python join_field Examples, mo_dots.join_field Python Examples

Example #1

0

Show file

File: test_generator.py Project: ubdussamad/mo-parquet

    def _run_test(self, config):
        """
        :param config: list of {name: nature} objects
        :return: test function
        """
        generator = make_const
        for c in reversed(config):
            for name, rep_type in c.items()[:1]:
                generator = rep_type_to_generator[rep_type](name, generator)

        schema = SchemaTree(locked=True)
        path = []
        for c in config:
            for name, rep_type in c.items()[:1]:
                path.append(name)
                schema.add(join_field(path), rep_type, int)

        # THESE TESTS ASSUME ONLY ONE LEAF
        full_name = join_field([name for c in config for name, rep_type in c.items()[:1]])

        data, values, rep_level, def_level = zip(*list(generator()))
        expected_values = {full_name: sum(values, [])}
        expected_reps = {full_name: sum(rep_level, [])}
        expected_defs = {full_name: sum(def_level, [])}

        table = rows_to_columns(list(data), schema)
        self.assertEqual(table.values, expected_values)
        self.assertEqual(table.reps, expected_reps)
        self.assertEqual(table.defs, expected_defs)

Example #2

0

Show file

    def _decode_object(self, index, c, parent_path, query_path, expected_vars):
        if "." in expected_vars:
            if len(self.done[0]) <= len(parent_path) and all(
                    d == p for d, p in zip(self.done[0], parent_path)):
                Log.error("Can not pick up more variables, iterator is done")

            if query_path:
                Log.error(
                    "Can not extract objects that contain the iteration",
                    var=join_field(query_path),
                )

            index = self._assign_token(index, c, expected_vars)
            # c, index = self.skip_whitespace(index)
            yield index
            return

        did_yield = False
        while True:
            c, index = self.skip_whitespace(index)
            if c == b",":
                continue
            elif c == b'"':
                name, index = self.simple_token(index, c)

                c, index = self.skip_whitespace(index)
                if c != b":":
                    Log.error("Expecting colon")
                c, index = self.skip_whitespace(index)

                child_expected = needed(name, expected_vars)
                child_path = parent_path + [name]
                if any(child_expected):
                    if not query_path:
                        index = self._assign_token(index, c, child_expected)
                    elif query_path[0] == name:
                        for index in self._decode_token(
                                index, c, child_path, query_path[1:],
                                child_expected):
                            did_yield = True
                            yield index
                    else:
                        if len(self.done[0]) <= len(child_path):
                            Log.error(
                                "Can not pick up more variables, iterator over {{path}}"
                                " is done",
                                path=join_field(self.done[0]),
                            )
                        index = self._assign_token(index, c, child_expected)
                elif query_path and query_path[0] == name:
                    for index in self._decode_token(index, c, child_path,
                                                    query_path[1:],
                                                    child_expected):
                        yield index
                else:
                    index = self.jump_to_end(index, c)
            elif c == b"}":
                if not did_yield:
                    yield index
                break

Example #3

0

Show file

File: typed_encoder.py Project: yoyogias2011/TUID

def untype_path(encoded):
    if encoded.startswith(".."):
        remainder = encoded.lstrip(".")
        back = len(encoded) - len(remainder) - 1
        return ("." * back) + join_field(decode_property(c) for c in split_field(remainder) if not c.startswith(TYPE_PREFIX))
    else:
        return join_field(decode_property(c) for c in split_field(encoded) if not c.startswith(TYPE_PREFIX))

Example #4

0

Show file

File: typed_encoder.py Project: klahnakoski/pyLibrary

def untype_path(encoded):
    if encoded.startswith(".."):
        remainder = encoded.lstrip(".")
        back = len(encoded) - len(remainder) - 1
        return ("." * back) + join_field(decode_property(c) for c in split_field(remainder) if not c.startswith(TYPE_PREFIX))
    else:
        return join_field(decode_property(c) for c in split_field(encoded) if not c.startswith(TYPE_PREFIX))

Example #5

0

Show file

    def _edges_op(self, query, frum):
        query = query.copy()  # WE WILL BE MARKING UP THE QUERY
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = []  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        frum_path = split_field(frum)
        base_table = join_field(frum_path[0:1])
        path = join_field(frum_path[1:])
        nest_to_alias = {
            nested_path: quote_column("__" + unichr(ord('a') + i) + "__")
            for i, (nested_path, sub_table) in enumerate(self.sf.tables.items())
        }

        schema = self.sf.tables[relative_field(frum, self.sf.fact)].schema

        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = quote_column(join_field([base_table] + split_field(tables[0].nest))) + tables[0].alias
        for previous, t in zip(tables, tables[1::]):
            from_sql += (
                SQL_LEFT_JOIN + quote_column(concat_field(base_table, t.nest)) + t.alias +
                SQL_ON + join_column(t.alias, quoted_PARENT) + " = " + join_column(previous.alias, quoted_UID)
            )

        main_filter = query.where.to_sql(schema, boolean=True)[0].sql.b

        # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH
        ons = []
        join_types = []
        wheres = []
        null_ons = [EXISTS_COLUMN + SQL_IS_NULL]
        groupby = []
        null_groupby = []
        orderby = []
        domains = []

        select_clause = [SQL_ONE + EXISTS_COLUMN] + [quote_column(c.es_column) for c in self.sf.tables['.'].columns]

        for edge_index, query_edge in enumerate(query.edges):
            edge_alias = quote_column("e" + text_type(edge_index))

            if query_edge.value:
                edge_values = [p for c in query_edge.value.to_sql(schema).sql for p in c.items()]

            elif not query_edge.value and any(query_edge.domain.partitions.where):
                case = SQL_CASE
                for pp, p in enumerate(query_edge.domain.partitions):
                    w = p.where.to_sql(schema)[0].sql.b
                    t = quote_value(pp)
                    case += SQL_WHEN + w + SQL_THEN + t
                case += SQL_ELSE + SQL_NULL + SQL_END  # quote value with length of partitions
                edge_values = [("n", case)]

            elif query_edge.range:
                edge_values = query_edge.range.min.to_sql(schema)[0].sql.items() + query_edge.range.max.to_sql(schema)[
                    0].sql.items()

Example #6

0

Show file

def _get_schema_from_list(frum, table_name, prefix_path, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param prefix_path: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix_path)
            column = columns[full_name]
            if not column:
                column = Column(names={table_name: full_name},
                                es_column=full_name,
                                es_index=".",
                                type="undefined",
                                nested_path=nested_path)
                columns.add(column)
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix_path + [name])
                column = columns[full_name]
                if not column:
                    column = Column(names={table_name: full_name},
                                    es_column=full_name,
                                    es_index=".",
                                    type="undefined",
                                    nested_path=nested_path)
                    columns.add(column)
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], table_name,
                                          prefix_path + [name], nested_path,
                                          columns)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist(
                        [join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(value, table_name,
                                          prefix_path + [name], newpath,
                                          columns)

Example #7

0

Show file

    def _nest_column(self, column, new_path):
        destination_table = join_field([self.name] + split_field(new_path))
        existing_table = join_field([self.name] +
                                    split_field(column.nested_path[0]))

        # FIND THE INNER COLUMNS WE WILL BE MOVING
        new_columns = {}
        for cname, cols in self.columns.items():
            if startswith_field(cname, column.names[self.name]):
                new_columns[cname] = set()
                for col in cols:
                    new_columns[cname].add(col)
                    col.nested_path = [new_path] + col.nested_path

        # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO?

        # DEFINE A NEW TABLE?
        # LOAD THE COLUMNS
        command = "PRAGMA table_info(" + quote_table(destination_table) + ")"
        details = self.db.query(command)
        if details.data:
            raise Log.error("not expected, new nesting!")
        from jx_sqlite.query_table import QueryTable
        self.nested_tables[new_path] = sub_table = QueryTable(
            destination_table, self.db, exists=False)

        self.db.execute("ALTER TABLE " + quote_table(sub_table.name) +
                        " ADD COLUMN " + quoted_PARENT + " INTEGER")
        self.db.execute("ALTER TABLE " + quote_table(sub_table.name) +
                        " ADD COLUMN " + quote_table(ORDER) + " INTEGER")
        for cname, cols in new_columns.items():
            for c in cols:
                sub_table.add_column(c)

        # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY
        all_cols = [c for _, cols in sub_table.columns.items() for c in cols]
        if not all_cols:
            has_nested_data = "0"
        elif len(all_cols) == 1:
            has_nested_data = _quote_column(all_cols[0]) + " is NOT NULL"
        else:
            has_nested_data = "COALESCE(" + \
                              ",".join(_quote_column(c) for c in all_cols) + \
                              ") IS NOT NULL"

        # FILL TABLE WITH EXISTING COLUMN DATA
        command = "INSERT INTO " + quote_table(destination_table) + "(\n" + \
                  ",\n".join(
                      [quoted_UID, quoted_PARENT, quote_table(ORDER)] +
                      [_quote_column(c) for _, cols in sub_table.columns.items() for c in cols]
                  ) + \
                  "\n)\n" + \
                  "\nSELECT\n" + ",".join(
            [quoted_UID, quoted_UID, "0"] +
            [_quote_column(c) for _, cols in sub_table.columns.items() for c in cols]
        ) + \
                  "\nFROM\n" + quote_table(existing_table) + \
                  "\nWHERE\n" + has_nested_data
        self.db.execute(command)

Example #8

0

Show file

File: meta.py Project: klahnakoski/SpotManager

def _get_schema_from_list(frum, table_name, prefix_path, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param prefix_path: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix_path)
            column = columns[full_name]
            if not column:
                column = Column(
                    names={table_name: full_name},
                    es_column=full_name,
                    es_index=".",
                    type="undefined",
                    nested_path=nested_path
                )
                columns.add(column)
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix_path + [name])
                column = columns[full_name]
                if not column:
                    column = Column(
                        names={table_name: full_name},
                        es_column=full_name,
                        es_index=".",
                        type="undefined",
                        nested_path=nested_path
                    )
                    columns.add(column)
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], table_name, prefix_path + [name], nested_path, columns)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, table_name, prefix_path + [name], newpath, columns)

Example #9

0

Show file

    def getFrameVariables(self, body):
        contextVariables = []
        columns = self.fromData.columns

        parentVarNames = set()  # ALL PARENTS OF VARIABLES WITH "." IN NAME
        body = body.replace(".?", ".")

        for i, c in enumerate(columns):
            j = body.find(c.name, 0)
            while j >= 0:
                s = j
                j = body.find(c.name, s + 1)

                test0 = body[s - 1:s + len(c.name) + 1:]
                test3 = body[s - 8:s + len(c.name):]

                if test0[:-1] == "\"" + c.name:
                    continue
                if test3 == "_source." + c.name:
                    continue

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name +
                                                " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

                body = body.replace(c.name, "-" * len(c.name))

                if self.isLean or c.useSource:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name +
                                                " = getSourceValue(\"" +
                                                c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = _source[\"" +
                                                c.name + "\"];\n")
                else:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getDocValue(\"" +
                                                c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = getDocValue(\"" +
                                                c.name + "\");\n")
                break

        return "".join(contextVariables)

Example #10

0

Show file

            def add_column(c, query_path):
                c.last_updated = Date.now()
                c.table = join_field([c.es_index]+split_field(query_path[0]))

                with self.meta.columns.locker:
                    self._upsert_column(c)
                    for alias in meta.aliases:
                        c = copy(c)
                        c.table = join_field([alias]+split_field(query_path[0]))
                        self._upsert_column(c)

Example #11

0

Show file

File: constants.py Project: mars-f/ActiveData

def set(constants):
    """
    REACH INTO THE MODULES AND OBJECTS TO SET CONSTANTS.
    THINK OF THIS AS PRIMITIVE DEPENDENCY INJECTION FOR MODULES.
    USEFUL FOR SETTING DEBUG FLAGS.
    """
    if not constants:
        return
    constants = wrap(constants)

    for k, new_value in constants.leaves():
        errors = []
        try:
            old_value = mo_dots_set_attr(sys.modules, k, new_value)
            continue
        except Exception as e:
            errors.append(e)

        # ONE MODULE IS MISSING, THE CALLING MODULE
        try:
            caller_globals = sys._getframe(1).f_globals
            caller_file = caller_globals["__file__"]
            if not caller_file.endswith(".py"):
                raise Exception("do not know how to handle non-python caller")
            caller_module = caller_file[:-3].replace("/", ".")

            path = split_field(k)
            for i, p in enumerate(path):
                if i == 0:
                    continue
                prefix = join_field(path[:1])
                name = join_field(path[i:])
                if caller_module.endswith(prefix):
                    old_value = mo_dots_set_attr(caller_globals, name, new_value)
                    if DEBUG:
                        from mo_logs import Log

                        Log.note(
                            "Changed {{module}}[{{attribute}}] from {{old_value}} to {{new_value}}",
                            module=prefix,
                            attribute=name,
                            old_value=old_value,
                            new_value=new_value
                        )
                    break
        except Exception as e:
            errors.append(e)

        if errors:
            from mo_logs import Log

            Log.error("Can not set constant {{path}}", path=k, cause=errors)

Example #12

0

Show file

File: constants.py Project: rv404674/TUID

def set(constants):
    """
    REACH INTO THE MODULES AND OBJECTS TO SET CONSTANTS.
    THINK OF THIS AS PRIMITIVE DEPENDENCY INJECTION FOR MODULES.
    USEFUL FOR SETTING DEBUG FLAGS.
    """
    if not constants:
        return
    constants = wrap(constants)

    for k, new_value in constants.leaves():
        errors = []
        try:
            old_value = mo_dots_set_attr(sys.modules, k, new_value)
            continue
        except Exception as e:
            errors.append(e)

        # ONE MODULE IS MISSING, THE CALLING MODULE
        try:
            caller_globals = sys._getframe(1).f_globals
            caller_file = caller_globals["__file__"]
            if not caller_file.endswith(".py"):
                raise Exception("do not know how to handle non-python caller")
            caller_module = caller_file[:-3].replace("/", ".")

            path = split_field(k)
            for i, p in enumerate(path):
                if i == 0:
                    continue
                prefix = join_field(path[:1])
                name = join_field(path[i:])
                if caller_module.endswith(prefix):
                    old_value = mo_dots_set_attr(caller_globals, name, new_value)
                    if DEBUG:
                        from mo_logs import Log

                        Log.note(
                            "Changed {{module}}[{{attribute}}] from {{old_value}} to {{new_value}}",
                            module=prefix,
                            attribute=name,
                            old_value=old_value,
                            new_value=new_value
                        )
                    break
        except Exception as e:
            errors.append(e)

        if errors:
            from mo_logs import Log

            Log.error("Can not set constant {{path}}", path=k, cause=errors)

Example #13

0

Show file

File: expressions.py Project: rv404674/TUID

    def getFrameVariables(self, body):
        contextVariables = []
        columns = self.fromData.columns

        parentVarNames = set()    # ALL PARENTS OF VARIABLES WITH "." IN NAME
        body = body.replace(".?", ".")

        for i, c in enumerate(columns):
            j = body.find(c.name, 0)
            while j >= 0:
                s = j
                j = body.find(c.name, s + 1)

                test0 = body[s - 1: s + len(c.name) + 1:]
                test3 = body[s - 8: s + len(c.name):]

                if test0[:-1] == "\"" + c.name:
                    continue
                if test3 == "_source." + c.name:
                    continue

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

                body = body.replace(c.name, "-"*len(c.name))

                if self.isLean or c.useSource:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
                else:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                break

        return "".join(contextVariables)

Example #14

0

Show file

def get_pull_stats(stats_name, median_name):
    return jx_expression_to_function({"select": [
        {"name": "count", "value": join_field([stats_name, "count"])},
        {"name": "sum", "value": join_field([stats_name, "sum"])},
        {"name": "min", "value": join_field([stats_name, "min"])},
        {"name": "max", "value": join_field([stats_name, "max"])},
        {"name": "avg", "value": join_field([stats_name, "avg"])},
        {"name": "sos", "value": join_field([stats_name, "sum_of_squares"])},
        {"name": "std", "value": join_field([stats_name, "std_deviation"])},
        {"name": "var", "value": join_field([stats_name, "variance"])},
        {"name": "median", "value": join_field([median_name, "values", "50.0"])}
    ]})

Example #15

0

Show file

File: setop.py Project: klahnakoski/pyLibrary

def get_pull_stats(stats_name, median_name):
    return jx_expression_to_function({"select": [
        {"name": "count", "value": join_field([stats_name, "count"])},
        {"name": "sum", "value": join_field([stats_name, "sum"])},
        {"name": "min", "value": join_field([stats_name, "min"])},
        {"name": "max", "value": join_field([stats_name, "max"])},
        {"name": "avg", "value": join_field([stats_name, "avg"])},
        {"name": "sos", "value": join_field([stats_name, "sum_of_squares"])},
        {"name": "std", "value": join_field([stats_name, "std_deviation"])},
        {"name": "var", "value": join_field([stats_name, "variance"])},
        {"name": "median", "value": join_field([median_name, "values", "50.0"])}
    ]})

Example #16

0

Show file

File: __init__.py Project: team-githubs/ActiveData

def find_container(frum, after):
    """
    :param frum:
    :return:
    """
    global namespace
    if not namespace:
        if not container.config.default.settings:
            Log.error(
                "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
            )
        namespace = ElasticsearchMetadata(container.config.default.settings)
    if not frum:
        Log.error("expecting json query expression with from clause")

    # FORCE A RELOAD
    namespace.get_columns(frum, after=after)

    if is_text(frum):
        if frum in container_cache:
            return container_cache[frum]

        path = split_field(frum)
        if path[0] == "meta":
            if path[1] == "columns":
                return namespace.meta.columns.denormalized()
            elif path[1] == "tables":
                return namespace.meta.tables
            else:
                fact_table_name = join_field(path[:2])
        else:
            fact_table_name = path[0]

        type_ = container.config.default.type

        settings = set_default(
            {
                "alias": fact_table_name,
                "name": frum,
                "exists": True
            },
            container.config.default.settings,
        )
        settings.type = None
        output = container.type2container[type_](settings)
        container_cache[frum] = output
        return output
    elif is_data(frum) and frum.type and container.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return container.type2container[frum.type](frum.settings)
    elif is_data(frum) and (frum["from"] or is_container(frum["from"])):
        from jx_base.query import QueryOp

        return QueryOp.wrap(frum)
    elif is_container(frum):
        return ListContainer("test_list", frum)
    else:
        return frum

Example #17

0

Show file

    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception as e:
                Log.error("{{name}} does not exist", name=fieldname)
            if is_list(d) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1:])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])
        return fieldname, None

Example #18

0

Show file

File: deep.py Project: klahnakoski/SpotManager

def get_pull(column):
    if column.nested_path[0] == ".":
        return concat_field("fields", literal_field(column.es_column))
    else:
        depth = len(split_field(column.nested_path[0]))
        rel_name = split_field(column.es_column)[depth:]
        return join_field(["_inner"] + rel_name)

Example #19

0

Show file

    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        try:
            # LAST TIME WE GOT INFO FOR THIS TABLE
            short_name = join_field(split_field(table_name)[0:1])
            table = self.get_table(short_name)[0]

            if not table:
                table = Table(
                    name=short_name,
                    url=None,
                    query_path=None,
                    timestamp=Date.now()
                )
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._get_columns(table=short_name)
            elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE:
                table.timestamp = Date.now()
                self._get_columns(table=short_name)

            with self.meta.columns.locker:
                columns = self.meta.columns.find(table_name, column_name)
            if columns:
                columns = jx.sort(columns, "name")
                # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                while len(self.todo) and not all(columns.get("last_updated")):
                    if DEBUG:
                        Log.note("waiting for columns to update {{columns|json}}", columns=[c.table+"."+c.es_column for c in columns if not c.last_updated])
                    Till(seconds=1).wait()
                return columns
        except Exception, e:
            Log.error("Not expected", cause=e)

Example #20

0

Show file

def typed_encode(value, flake):
    """
    RETURN (typed_value, flake_update, added_nested) TUPLES
    :param value: THE RECORD TO CONVERT TO STRICT TYPED FORM
    :param flake: LOOKUP SCHEMA, WILL BE UPDATED WITH CHANGES
    :return: (record, update, nested) TUPLE
    """
    _ = flake.columns  # ENSURE WE HAVE INTERNAL STRUCTURES FILLED
    output, update, nested = _typed_encode(value, flake.schema)
    if update:
        # REFRESH COLUMNS
        flake._columns = None
        _ = flake.columns

    worker = to_data(output)
    for path, field in flake._top_level_fields.items():
        worker[field] = worker[path]
        worker[path] = None

        # DO NOT LEAVE ANY EMPTY OBJECT RESIDUE
        _path = split_field(path)
        for i, _ in jx.reverse(enumerate(_path)):
            sub_path = join_field(_path[:i])
            v = worker[sub_path]
            if is_data(v) and not worker[sub_path].keys():
                worker[sub_path] = None
            else:
                break

    return output, update, nested

Example #21

0

Show file

File: setop.py Project: yoyogias2011/TUID

def get_pull(column):
    if column.nested_path[0] == ".":
        return concat_field("fields", literal_field(column.es_column))
    else:
        depth = len(split_field(column.nested_path[0]))
        rel_name = split_field(column.es_column)[depth:]
        return join_field(["_inner"] + rel_name)

Example #22

0

Show file

File: leaves_op.py Project: armenzg/smart-scheduling

 def to_bq(self, schema, not_null=False, boolean=False):
     if not is_op(self.term, Variable):
         Log.error("Can only handle Variable")
     term = self.term.var
     prefix_length = len(split_field(term))
     output = wrap(
         [
             {
                 "name": join_field(
                     split_field(schema.get_column_name(c))[prefix_length:]
                 ),
                 "sql": Variable(schema.get_column_name(c)).to_bq(schema)[0].sql,
             }
             for c in schema.columns
             if startswith_field(c.name, term)
             and (
                 (
                     c.jx_type not in (EXISTS, OBJECT, NESTED)
                     and startswith_field(schema.nested_path[0], c.nested_path[0])
                 )
                 or (
                     c.jx_type not in (EXISTS, OBJECT)
                     and schema.nested_path[0] == c.nested_path[0]
                 )
             )
         ]
     )
     return output

Example #23

0

Show file

File: jx.py Project: klahnakoski/pyLibrary

    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception as e:
                Log.error("{{name}} does not exist", name=fieldname)
            if is_list(d) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1 :])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])
        return fieldname, None

Example #24

0

Show file

File: typed_encoder.py Project: yoyogias2011/TUID

def unnest_path(encoded):
    if encoded.startswith(".."):
        encoded = encoded.lstrip(".")
        if not encoded:
            encoded = "."

    return join_field(decode_property(c) for c in split_field(encoded) if c != NESTED_TYPE)

Example #25

0

Show file

def get_nested_path(typed_path):
    # CONSTRUCT THE nested_path FROM THE typed_path
    path = split_field(typed_path)
    parent = "."
    nested_path = (parent, )
    for i, p in enumerate(path[:-1]):
        if p == ARRAY_KEY:
            step = concat_field(parent, join_field(path[0:i + 1]))
            nested_path = (step, ) + nested_path
    return nested_path

Example #26

0

Show file

    def add_column_to_schema(self, nest_to_schema, column):
        abs_table = literal_field(self.name)
        abs_name = column.names[abs_table]

        for nest, schema in nest_to_schema.items():
            rel_table = literal_field(
                join_field([self.name] + split_field(nest)))
            rel_name = relative_field(abs_name, nest)

            column.names[rel_table] = rel_name

Example #27

0

Show file

def exists_variable(path):
    """
    RETURN THE VARIABLE THAT WILL INDICATE OBJECT (OR ARRAY) EXISTS (~e~)
    """
    steps = split_field(path)
    if not steps:
        return EXISTS_TYPE
    if steps[-1] == NESTED_TYPE:
        steps = steps[:-1]
    return join_field(steps + [EXISTS_TYPE])

Example #28

0

Show file

File: __init__.py Project: nknick99/MySQL-to-S3

def untyped_column(column_name):
    """
    :param column_name:  DATABASE COLUMN NAME
    :return: (NAME, TYPE) PAIR
    """
    if "$" in column_name:
        path = split_field(column_name)
        return join_field(path[:-1]), path[-1][1:]
    else:
        return column_name, None

Example #29

0

Show file

def get_schema_from_list(table_name, frum):
    """
    SCAN THE LIST FOR COLUMN TYPES
    """
    columns = UniqueIndex(keys=(join_field(["names", table_name]), ))
    _get_schema_from_list(frum,
                          table_name,
                          prefix_path=[],
                          nested_path=ROOT_PATH,
                          columns=columns)
    return Schema(table_name=table_name, columns=columns)

Example #30

0

Show file

File: expressions.py Project: rv404674/TUID

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

Example #31

0

Show file

def to_sql(self, schema, not_null=False, boolean=False):
    if not isinstance(self.term, Variable):
        Log.error("Can only handle Variable")
    term = self.term.var
    prefix_length = len(split_field(term))
    return wrap([{
        "name":
        join_field(split_field(schema.get_column_name(c))[prefix_length:]),
        "sql":
        Variable(schema.get_column_name(c)).to_sql(schema)[0].sql
    } for n, cols in schema.map_to_sql(term).items() for c in cols])

Example #32

0

Show file

File: query.py Project: maggienj/ActiveData

def _test_mode_wait(query):
    """
    WAIT FOR METADATA TO ARRIVE ON INDEX
    :param query: dict() OF REQUEST BODY
    :return: nothing
    """
    try:
        m = meta.singlton
        now = Date.now()
        end_time = now + MINUTE

        # MARK COLUMNS DIRTY
        m.meta.columns.update({
            "clear": ["partitions", "count", "cardinality", "last_updated"],
            "where": {
                "eq": {
                    "es_index": join_field(split_field(query["from"])[0:1])
                }
            }
        })

        # BE SURE THEY ARE ON THE todo QUEUE FOR RE-EVALUATION
        cols = [
            c for c in m.get_columns(table_name=query["from"], force=True)
            if c.type not in STRUCT
        ]
        for c in cols:
            Log.note("Mark {{column}} dirty at {{time}}",
                     column=c.names["."],
                     time=now)
            c.last_updated = now - TOO_OLD
            m.todo.push(c)

        while end_time > now:
            # GET FRESH VERSIONS
            cols = [
                c for c in m.get_columns(table_name=query["from"])
                if c.type not in STRUCT
            ]
            for c in cols:
                if not c.last_updated or c.cardinality == None:
                    Log.note(
                        "wait for column (table={{col.es_index}}, name={{col.es_column}}) metadata to arrive",
                        col=c)
                    break
            else:
                break
            Till(seconds=1).wait()
        for c in cols:
            Log.note(
                "fresh column name={{column.name}} updated={{column.last_updated|date}} parts={{column.partitions}}",
                column=c)
    except Exception, e:
        Log.warning("could not pickup columns", cause=e)

Example #33

0

Show file

File: expressions.py Project: klahnakoski/tuid_experiment

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

Example #34

0

Show file

File: meta.py Project: klahnakoski/SpotManager

    def _get_columns(self, table=None):
        # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE
        table_path = split_field(table)
        es_index = table_path[0]
        query_path = join_field(table_path[1:])
        meta = self.es_metadata.indices[es_index]
        if not meta or self.last_es_metadata < Date.now() - OLD_METADATA:
            self.es_metadata = self.default_es.get_metadata(force=True)
            meta = self.es_metadata.indices[es_index]

        for _, properties in meta.mappings.items():
            properties.properties["_id"] = {"type": "string", "index": "not_analyzed"}
            self._parse_properties(meta.index, properties, meta)

Example #35

0

Show file

File: schema.py Project: ubdussamad/mo-parquet

        def _worker(start):
            output = SchemaTree()
            root = parquet_schema[index[0]]

            output.element = root
            max = start + root.num_children
            while index[0] < max:
                name = join_field(
                    split_field(parquet_schema[index[0]].name)[-1:])
                index[0] += 1
                child = _worker(index[0])
                output.more[name] = child
            return output

Example #36

0

Show file

    def _get_columns(self, table=None):
        # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE
        table_path = split_field(table)
        es_index = table_path[0]
        query_path = join_field(table_path[1:])
        meta = self.es_metadata.indices[es_index]
        if not meta or self.last_es_metadata < Date.now() - OLD_METADATA:
            self.es_metadata = self.default_es.get_metadata(force=True)
            meta = self.es_metadata.indices[es_index]

        for _, properties in meta.mappings.items():
            properties.properties["_id"] = {"type": "string", "index": "not_analyzed"}
            self._parse_properties(meta.index, properties, meta)

Example #37

0

Show file

File: meta.py Project: nknick99/MySQL-to-S3

    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        table_path = split_field(table_name)
        es_index_name = table_path[0]
        query_path = join_field(table_path[1:])
        table = self.get_table(es_index_name)[0]
        abs_column_name = None if column_name == None else concat_field(
            query_path, column_name)

        try:
            # LAST TIME WE GOT INFO FOR THIS TABLE
            if not table:
                table = Table(name=es_index_name,
                              url=None,
                              query_path=['.'],
                              timestamp=Date.now())
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._get_columns(table=es_index_name)
            elif force or table.timestamp == None or table.timestamp < Date.now(
            ) - MAX_COLUMN_METADATA_AGE:
                table.timestamp = Date.now()
                self._get_columns(table=es_index_name)

            with self.meta.columns.locker:
                columns = self.meta.columns.find(es_index_name, column_name)
            if columns:
                columns = jx.sort(columns, "names.\.")
                # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                while len(self.todo) and not all(columns.get("last_updated")):
                    if DEBUG:
                        Log.note(
                            "waiting for columns to update {{columns|json}}",
                            columns=[
                                c.es_index + "." + c.es_column for c in columns
                                if not c.last_updated
                            ])
                    Till(seconds=1).wait()
                return columns
        except Exception as e:
            Log.error("Not expected", cause=e)

        if abs_column_name:
            Log.error("no columns matching {{table}}.{{column}}",
                      table=table_name,
                      column=abs_column_name)
        else:
            self._get_columns(table=table_name)  # TO TEST WHAT HAPPENED
            Log.error("no columns for {{table}}?!", table=table_name)

Example #38

0

Show file

File: query_op.py Project: klahnakoski/jx-python

def _normalize_select_no_context(select, schema=None):
    """
    SAME NORMALIZE, BUT NO SOURCE OF COLUMNS
    """
    if is_text(select):
        select = Data(value=select)
    else:
        select = to_data(select)

    output = select.copy()
    if not select.value:
        output.name = coalesce(select.name, select.aggregate)
        if output.name:
            output.value = jx_expression(".", schema=schema)
        elif len(select):
            Log.error(BAD_SELECT, select=select)
        else:
            return Null
    elif is_text(select.value):
        if select.value.endswith("*"):
            path = split_field(select.value)
            var = join_field(path[:-1])
            name = var.strip(".")
            if not name:
                name = "."
            output.name = coalesce(select.name, select.aggregate, name)
            output.value = LeavesOp(Variable(var), prefix=select.prefix)
        elif select.value == ".":
            output.name = coalesce(select.name, select.aggregate, ".")
            output.value = jx_expression(select.value, schema=schema)
        else:
            output.name = coalesce(select.name, select.value.lstrip("."),
                                   select.aggregate)
            output.value = jx_expression(select.value, schema=schema)
    elif is_number(output.value):
        if not output.name:
            output.name = text(output.value)
        output.value = jx_expression(select.value, schema=schema)
    else:
        output.value = jx_expression(select.value, schema=schema)

    if not output.name:
        Log.error("expecting select to have a name: {{select}}", select=select)
    if output.name.endswith(".*"):
        Log.error("{{name|quote}} is invalid select", name=output.name)

    output.aggregate = coalesce(canonical_aggregates[select.aggregate].name,
                                select.aggregate, "none")
    output.default = coalesce(select.default,
                              canonical_aggregates[output.aggregate].default)
    return output

Example #39

0

Show file

 def __init__(self, table_name, columns):
     """
     :param table_name: THE FACT TABLE
     :param query_path: PATH TO ARM OF SNOWFLAKE
     :param columns: ALL COLUMNS IN SNOWFLAKE
     """
     self._columns = copy(columns)
     table_path = split_field(table_name)
     self.table = join_field(
         table_path[:1]
     )  # USED AS AN EXPLICIT STATEMENT OF PERSPECTIVE IN THE DATABASE
     query_path = join_field(
         table_path[1:])  # TODO: REPLACE WITH THE nested_path ARRAY
     if query_path == ".":
         self.query_path = query_path
     else:
         query_path += "." + NESTED_TYPE
         self.query_path = [
             c for c in columns
             if c.type == NESTED and c.names["."] == query_path
         ][0].es_column
     self.lookup, self.lookup_leaves, self.lookup_variables = _indexer(
         columns, self.query_path)

Example #40

0

Show file

File: typed_encoder.py Project: klahnakoski/pyLibrary

def unnest_path(encoded):
    if encoded.startswith(".."):
        remainder = encoded.lstrip(".")
        back = len(encoded) - len(remainder)
        return ("." * back) + unnest_path(remainder)

    path = split_field(encoded)
    if not path:
        return "."
    if path[-1] == NESTED_TYPE:
        path = path[:-1]
        if not path:
            return "."

    return join_field([decode_property(c) for c in path[:-1] if not c.startswith(TYPE_PREFIX)] + [decode_property(path[-1])])

Example #41

0

Show file

File: stream.py Project: rv404674/TUID

    def _decode_object(index, c, parent_path, query_path, expected_vars):
        if "." in expected_vars:
            if len(done[0]) <= len(parent_path) and all(d == p for d, p in zip(done[0], parent_path)):
                Log.error("Can not pick up more variables, iterator is done")

            if query_path:
                Log.error("Can not extract objects that contain the iteration", var=join_field(query_path))

            index = _assign_token(index, c, expected_vars)
            # c, index = skip_whitespace(index)
            yield index
            return

        did_yield = False
        while True:
            c, index = skip_whitespace(index)
            if c == b',':
                continue
            elif c == b'"':
                name, index = simple_token(index, c)

                c, index = skip_whitespace(index)
                if c != b':':
                    Log.error("Expecting colon")
                c, index = skip_whitespace(index)

                child_expected = needed(name, expected_vars)
                child_path = parent_path + [name]
                if any(child_expected):
                    if not query_path:
                        index = _assign_token(index, c, child_expected)
                    elif query_path[0] == name:
                        for index in _decode_token(index, c, child_path, query_path[1:], child_expected):
                            did_yield = True
                            yield index
                    else:
                        if len(done[0]) <= len(child_path):
                            Log.error("Can not pick up more variables, iterator over {{path}} is done", path=join_field(done[0]))
                        index = _assign_token(index, c, child_expected)
                elif query_path and query_path[0] == name:
                    for index in _decode_token(index, c, child_path, query_path[1:], child_expected):
                        yield index
                else:
                    index = jump_to_end(index, c)
            elif c == b"}":
                if not did_yield:
                    yield index
                break

Example #42

0

Show file

File: __init__.py Project: klahnakoski/SpotManager

    def __init__(self, table_name, columns):
        """
        :param table_name: THE FACT TABLE
        :param query_path: PATH TO ARM OF SNOWFLAKE
        :param columns: ALL COLUMNS IN SNOWFLAKE
        """
        table_path = split_field(table_name)
        self.table = table_path[0]  # USED AS AN EXPLICIT STATEMENT OF PERSPECTIVE IN THE DATABASE
        self.query_path = join_field(table_path[1:])
        self._columns = copy(columns)

        lookup = self.lookup = _index(columns, self.query_path)
        if self.query_path != ".":
            alternate = _index(columns, ".")
            for k,v in alternate.items():
                lookup.setdefault(k, v)

Example #43

0

Show file

File: meta.py Project: klahnakoski/SpotManager

    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        table_path = split_field(table_name)
        es_index_name = table_path[0]
        query_path = join_field(table_path[1:])
        table = self.get_table(es_index_name)[0]
        abs_column_name = None if column_name == None else concat_field(query_path, column_name)

        try:
            # LAST TIME WE GOT INFO FOR THIS TABLE
            if not table:
                table = Table(
                    name=es_index_name,
                    url=None,
                    query_path=None,
                    timestamp=Date.now()
                )
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._get_columns(table=es_index_name)
            elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE:
                table.timestamp = Date.now()
                self._get_columns(table=es_index_name)

            with self.meta.columns.locker:
                columns = self.meta.columns.find(es_index_name, column_name)
            if columns:
                columns = jx.sort(columns, "names.\.")
                # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                while len(self.todo) and not all(columns.get("last_updated")):
                    if DEBUG:
                        Log.note("waiting for columns to update {{columns|json}}", columns=[c.es_index+"."+c.es_column for c in columns if not c.last_updated])
                    Till(seconds=1).wait()
                return columns
        except Exception as e:
            Log.error("Not expected", cause=e)

        if abs_column_name:
            Log.error("no columns matching {{table}}.{{column}}", table=table_name, column=abs_column_name)
        else:
            self._get_columns(table=table_name)  # TO TEST WHAT HAPPENED
            Log.error("no columns for {{table}}?!", table=table_name)

Example #44

0

Show file

File: container.py Project: klahnakoski/pyLibrary

    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif is_many(frum):
            return _ListContainer(frum)
        elif is_text(frum):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info")

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                },
                config.default.settings
            )
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif is_data(frum):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)

Example #45

0

Show file

File: expressions.py Project: rv404674/TUID

    def compile_expression(self, expression, constants=None):
        # EXPAND EXPRESSION WITH ANY CONSTANTS
        expression = setValues(expression, constants)

        fromPath = self.fromData.name           # FIRST NAME IS THE INDEX
        indexName = join_field(split_field(fromPath)[:1:])

        context = self.getFrameVariables(expression)
        if context == "":
            return addFunctions(expression).head+expression

        func = UID()
        code = addFunctions(context+expression)
        output = code.head + \
            'var ' + func + ' = function(' + indexName + '){\n' + \
            context + \
            expression + ";\n" + \
            '};\n' + \
            func + '(_source)\n'

        return Compiled(output)

Example #46

0

Show file

File: util.py Project: klahnakoski/SpotManager

def es_query_template(path):
    """
    RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE
    :param path:
    :return:
    """
    sub_path = split_field(path)[1:]

    if sub_path:
        f0 = {}
        f1 = {}
        output = wrap({
            "filter": {"and": [
                f0,
                {"nested": {
                    "path": join_field(sub_path),
                    "filter": f1,
                    "inner_hits": {"size": 100000}
                }}
            ]},
            "from": 0,
            "size": 0,
            "sort": []
        })
        return output, wrap([f0, f1])
    else:
        f0 = {}
        output = wrap({
            "query": {"filtered": {
                "filter": f0
            }},
            "from": 0,
            "size": 0,
            "sort": []
        })
        return output, wrap([f0])

Example #47

0

Show file

File: table.py Project: klahnakoski/pyLibrary

def untype_path(path):
    return join_field(c for c in split_field(path) if not c.startswith(TYPE_PREFIX))

Example #48

0

Show file

File: setop.py Project: klahnakoski/pyLibrary

def es_setop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    split_select = {".": ESSelect('.')}

    def get_select(path):
        es_select = split_select.get(path)
        if not es_select:
            es_select = split_select[path] = ESSelect(path)
        return es_select


    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(select.name, relative_field(untype_path(c.name), term.var))
                if c.jx_type == NESTED:
                    get_select('.').use_source = True
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."},
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                else:
                    get_select(c.nested_path[0]).fields.append(c.es_column)
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."}
                    })
                    put_index += 1
        elif is_op(select.value, Variable):
            s_column = select.value.var

            if s_column == ".":
                # PULL ALL SOURCE
                get_select('.').use_source = True
                new_select.append({
                    "name": select.name,
                    "value": select.value,
                    "put": {"name": select.name, "index": put_index, "child": "."},
                    "pull": get_pull_source(".")
                })
                continue

            leaves = schema.leaves(s_column)  # LEAVES OF OBJECT
            # nested_selects = {}
            if leaves:
                if any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    get_select('.').use_source = True
                    for c in leaves:
                        if len(c.nested_path) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRST LEVEL PROPERTIES
                            pre_child = join_field(decode_property(n) for n in split_field(c.name))
                            new_select.append({
                                "name": select.name,
                                "value": Variable(c.es_column),
                                "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
                                "pull": get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        c_nested_path = c.nested_path[0]
                        if c_nested_path == ".":
                            if c.es_column == "_id":
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": "."},
                                    "pull": lambda row: row._id
                                })
                            elif c.jx_type == NESTED:
                                get_select('.').use_source = True
                                pre_child = join_field(decode_property(n) for n in split_field(c.name))
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))},
                                    "pull": get_pull_source(c.es_column)
                                })
                            else:
                                get_select(c_nested_path).fields.append(c.es_column)
                                pre_child = join_field(decode_property(n) for n in split_field(c.name))
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": untype_path(relative_field(pre_child, s_column))}
                                })
                        else:
                            es_select = get_select(c_nested_path)
                            es_select.fields.append(c.es_column)

                            child = relative_field(untype_path(relative_field(c.name, schema.query_path[0])), s_column)
                            pull = accumulate_nested_doc(c_nested_path, Variable(relative_field(s_column, unnest_path(c_nested_path))))
                            new_select.append({
                                "name": select.name,
                                "value": select.value,
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": child
                                },
                                "pull": pull
                            })
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {"name": select.name, "index": put_index, "child": "."}
                })
            put_index += 1
        else:
            split_scripts = split_expression_by_path(select.value, schema, lang=Painless)
            for p, script in split_scripts.items():
                es_select = get_select(p)
                es_select.scripts[select.name] = {"script": text_type(Painless[first(script)].partial_eval().to_es_script(schema))}
                new_select.append({
                    "name": select.name,
                    "pull": jx_expression_to_function("fields." + literal_field(select.name)),
                    "put": {"name": select.name, "index": put_index, "child": "."}
                })
                put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif is_op(n.value, Variable):
            if get_select('.').use_source:
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    split_wheres = split_expression_by_path(query.where, schema, lang=ES52)
    es_query = es_query_proto(query_path, split_select, split_wheres, schema)
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    with Timer("call to ES", silent=True) as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    # Log.note("{{output}}", output=T)

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter", silent=True):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)

Example #49

0

Show file

File: dimensions.py Project: klahnakoski/pyLibrary

    def getDomain(self, **kwargs):
        # kwargs.depth IS MEANT TO REACH INTO SUB-PARTITIONS
        kwargs = wrap(kwargs)
        kwargs.depth = coalesce(kwargs.depth, len(self.fields)-1 if is_list(self.fields) else None)

        if not self.partitions and self.edges:
            # USE EACH EDGE AS A PARTITION, BUT isFacet==True SO IT ALLOWS THE OVERLAP
            partitions = [
                {
                    "name": v.name,
                    "value": v.name,
                    "where": v.where,
                    "style": v.style,
                    "weight": v.weight  # YO! WHAT DO WE *NOT* COPY?
                }
                for i, v in enumerate(self.edges)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT) and v.where
            ]
            self.isFacet = True
        elif kwargs.depth == None:  # ASSUME self.fields IS A dict
            partitions = FlatList()
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    break
                partitions.append({
                    "name":part.name,
                    "value":part.value,
                    "where":part.where,
                    "style":coalesce(part.style, part.parent.style),
                    "weight":part.weight   # YO!  WHAT DO WE *NOT* COPY?
                })
        elif kwargs.depth == 0:
            partitions = [
                {
                    "name":v.name,
                    "value":v.value,
                    "where":v.where,
                    "style":v.style,
                    "weight":v.weight   # YO!  WHAT DO WE *NOT* COPY?
                }
                for i, v in enumerate(self.partitions)
                if i < coalesce(self.limit, DEFAULT_QUERY_LIMIT)]
        elif kwargs.depth == 1:
            partitions = FlatList()
            rownum = 0
            for i, part in enumerate(self.partitions):
                if i >= coalesce(self.limit, DEFAULT_QUERY_LIMIT):
                    continue
                rownum += 1
                try:
                    for j, subpart in enumerate(part.partitions):
                        partitions.append({
                            "name":join_field(split_field(subpart.parent.name) + [subpart.name]),
                            "value":subpart.value,
                            "where":subpart.where,
                            "style":coalesce(subpart.style, subpart.parent.style),
                            "weight":subpart.weight   # YO!  WHAT DO WE *NOT* COPY?
                        })
                except Exception as e:
                    Log.error("", e)
        else:
            Log.error("deeper than 2 is not supported yet")

        return Domain(
            type=self.type,
            name=self.name,
            partitions=wrap(partitions),
            min=self.min,
            max=self.max,
            interval=self.interval,
            # THE COMPLICATION IS THAT SOMETIMES WE WANT SIMPLE PARTITIONS, LIKE
            # STRINGS, DATES, OR NUMBERS.  OTHER TIMES WE WANT PARTITION OBJECTS
            # WITH NAME, VALUE, AND OTHER MARKUP.
            # USUALLY A "set" IS MEANT TO BE SIMPLE, BUT THE end() FUNCTION IS
            # OVERRIDES EVERYTHING AND IS EXPLICIT.  - NOT A GOOD SOLUTION BECAUSE
            # end() IS USED BOTH TO INDICATE THE QUERY PARTITIONS *AND* DISPLAY
            # COORDINATES ON CHARTS

            # PLEASE SPLIT end() INTO value() (replacing the string value) AND
            # label() (for presentation)
            value="name" if not self.value and self.partitions else self.value,
            key="value",
            label=coalesce(self.label, (self.type == "set" and self.name)),
            end=coalesce(self.end, (self.type == "set" and self.name)),
            isFacet=self.isFacet,
            dimension=self
        )

Example #50

0

Show file

File: dimensions.py Project: klahnakoski/pyLibrary

    def __init__(self, dim, parent, jx):
        dim = wrap(dim)

        self.name = dim.name
        self.parent = coalesce(parent)
        self.full_name = join_field(split_field(self.parent.full_name)+[self.name])
        self.edges = None  # FOR NOW
        dot.set_default(self, dim)
        self.where = dim.where
        self.type = coalesce(dim.type, "set")
        self.limit = coalesce(dim.limit, DEFAULT_QUERY_LIMIT)
        self.index = coalesce(dim.index, coalesce(parent, Null).index, jx.settings.index)

        if not self.index:
            Log.error("Expecting an index name")

        # ALLOW ACCESS TO SUB-PART BY NAME (IF ONLY THERE IS NO NAME COLLISION)
        self.edges = Data()
        for e in listwrap(dim.edges):
            new_e = Dimension(e, self, jx)
            self.edges[new_e.full_name] = new_e

        self.partitions = wrap(coalesce(dim.partitions, []))
        parse_partition(self)

        fields = coalesce(dim.field, dim.fields)
        if not fields:
            return  # NO FIELDS TO SEARCH
        elif is_data(fields):
            self.fields = wrap(fields)
            edges = wrap([{"name": k, "value": v, "allowNulls": False} for k, v in self.fields.items()])
        else:
            self.fields = listwrap(fields)
            edges = wrap([{"name": f, "value": f, "index": i, "allowNulls": False} for i, f in enumerate(self.fields)])

        if dim.partitions:
            return  # ALREADY HAVE PARTS
        if self.type not in KNOWN - ALGEBRAIC:
            return  # PARTS OR TOO FUZZY (OR TOO NUMEROUS) TO FETCH

        jx.get_columns()
        with Timer("Get parts of {{name}}", {"name": self.name}):
            parts = jx.query({
                "from": self.index,
                "select": {"name": "count", "aggregate": "count"},
                "edges": edges,
                "where": self.where,
                "limit": self.limit
            })
            Log.note("{{name}} has {{num}} parts",  name= self.name,  num= len(parts))

        d = parts.edges[0].domain

        if dim.path:
            if len(edges) > 1:
                Log.error("Not supported yet")
            # EACH TERM RETURNED IS A PATH INTO A PARTITION TREE
            temp = Data(partitions=[])
            for i, count in enumerate(parts):
                a = dim.path(d.getEnd(d.partitions[i]))
                if not is_list(a):
                    Log.error("The path function on " + dim.name + " must return an ARRAY of parts")
                addParts(
                    temp,
                    dim.path(d.getEnd(d.partitions[i])),
                    count,
                    0
                )
            self.value = coalesce(dim.value, "name")
            self.partitions = temp.partitions
        elif is_data(fields):
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS

            partitions = FlatList()
            for g, p in parts.groupby(edges):
                if p:
                    partitions.append({
                        "value": g,
                        "where": {"and": [
                            {"term": {e.value: g[e.name]}}
                            for e in edges
                        ]},
                        "count": int(p)
                    })
            self.partitions = partitions
        elif len(edges) == 1:
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS

            # SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
            self.partitions = wrap([
                {
                    "name": str(d.partitions[i].name),  # CONVERT TO STRING
                    "value": d.getEnd(d.partitions[i]),
                    "where": {"term": {edges[0].value: d.partitions[i].value}},
                    "count": count
                }
                for i, count in enumerate(parts)
            ])
            self.order = {p.value: i for i, p in enumerate(self.partitions)}
        elif len(edges) == 2:
            self.value = "name"  # USE THE "name" ATTRIBUTE OF PARTS
            d2 = parts.edges[1].domain

            # SIMPLE LIST OF PARTS RETURNED, BE SURE TO INTERRELATE THEM
            array = parts.data.values()[0].cube  # DIG DEEP INTO RESULT (ASSUME SINGLE VALUE CUBE, WITH NULL AT END)

            def edges2value(*values):
                if is_data(fields):
                    output = Data()
                    for e, v in transpose(edges, values):
                        output[e.name] = v
                    return output
                else:
                    return tuple(values)

            self.partitions = wrap([
                {
                    "name": str(d.partitions[i].name),  # CONVERT TO STRING
                    "value": d.getEnd(d.partitions[i]),
                    "where": {"term": {edges[0].value: d.partitions[i].value}},
                    "count": SUM(subcube),
                    "partitions": [
                        {
                            "name": str(d2.partitions[j].name),  # CONVERT TO STRING
                            "value": edges2value(d.getEnd(d.partitions[i]), d2.getEnd(d2.partitions[j])),
                            "where": {"and": [
                                {"term": {edges[0].value: d.partitions[i].value}},
                                {"term": {edges[1].value: d2.partitions[j].value}}
                            ]},
                            "count": count2
                        }
                        for j, count2 in enumerate(subcube)
                        if count2 > 0  # ONLY INCLUDE PROPERTIES THAT EXIST
                    ]
                }
                for i, subcube in enumerate(array)
            ])
        else:
            Log.error("Not supported")

        parse_partition(self)  # RELATE THE PARTS TO THE PARENTS

Example #51

0

Show file

File: meta.py Project: klahnakoski/pyLibrary

def _get_schema_from_list(frum, table_name, parent, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param parent: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = python_type_to_json_type[d.__class__]

        if row_type != "object":
            # EXPECTING PRIMITIVE VALUE
            full_name = parent
            column = columns[full_name]
            if not column:
                column = Column(
                    name=concat_field(table_name, full_name),
                    es_column=full_name,
                    es_index=".",
                    es_type=d.__class__.__name__,
                    jx_type=None,  # WILL BE SET BELOW
                    last_updated=Date.now(),
                    nested_path=nested_path,
                )
                columns.add(column)
            column.es_type = _merge_python_type(column.es_type, d.__class__)
            column.jx_type = python_type_to_json_type[column.es_type]
        else:
            for name, value in d.items():
                full_name = concat_field(parent, name)
                column = columns[full_name]
                if not column:
                    column = Column(
                        name=concat_field(table_name, full_name),
                        es_column=full_name,
                        es_index=".",
                        es_type=value.__class__.__name__,
                        jx_type=None,  # WILL BE SET BELOW
                        last_updated=Date.now(),
                        nested_path=nested_path,
                    )
                    columns.add(column)
                if is_container(value):  # GET TYPE OF MULTIVALUE
                    v = list(value)
                    if len(v) == 0:
                        this_type = none_type.__name__
                    elif len(v) == 1:
                        this_type = v[0].__class__.__name__
                    else:
                        this_type = reduce(
                            _merge_python_type, (vi.__class__.__name__ for vi in value)
                        )
                else:
                    this_type = value.__class__.__name__
                column.es_type = _merge_python_type(column.es_type, this_type)
                column.jx_type = python_type_to_json_type[column.es_type]

                if this_type in {"object", "dict", "Mapping", "Data"}:
                    _get_schema_from_list(
                        [value], table_name, full_name, nested_path, columns
                    )
                elif this_type in {"list", "FlatList"}:
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(
                        value, table_name, full_name, newpath, columns
                    )

Example #52

0

Show file

File: meta.py Project: rv404674/TUID

 def get_schema(self, name):
     if name == "meta.columns":
         return self.meta.columns.schema
     query_path = split_field(name)
     root, rest = query_path[0], join_field(query_path[1:])
     return self.get_snowflake(root).get_schema(rest)

Example #53

0

Show file

File: __init__.py Project: rv404674/TUID

 def query_path(self):
     return join_field(split_field(self.name)[1:])