Python untyped_column Examples, jx_sqlite.utils.untyped_column Python Examples

Example #1

0

Show file

    def denormalized(self):
        """
        THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
        THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
        """
        with self.locker:
            self._update_meta()
            output = [
                {
                    "table": c.es_index,
                    "name": untyped_column(c.name)[0],
                    "cardinality": c.cardinality,
                    "es_column": c.es_column,
                    "es_index": c.es_index,
                    "last_updated": c.last_updated,
                    "count": c.count,
                    "nested_path": [unnest_path(n) for n in c.nested_path],
                    "es_type": c.es_type,
                    "type": c.jx_type,
                } for tname, css in self.data.items()
                for cname, cs in css.items() for c in cs
                if c.jx_type not in STRUCT  # and c.es_column != "_id"
            ]

        from jx_python.containers.list_usingPythonList import ListContainer

        return ListContainer(
            self.name,
            data=output,
            schema=jx_base.Schema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS),
        )

Example #2

0

Show file

    def _nest_column(self, column):
        new_path, type_ = untyped_column(column.es_column)
        if type_ != SQL_NESTED_TYPE:
            Log.error("only nested types can be nested")
        destination_table = concat_field(self.fact_name, new_path)
        existing_table = concat_field(self.fact_name, column.nested_path[0])

        # FIND THE INNER COLUMNS WE WILL BE MOVING
        moving_columns = []
        for c in self.columns:
            if destination_table != column.es_index and column.es_column == c.es_column:
                moving_columns.append(c)
                c.nested_path = new_path

        # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO?

        # LOAD THE COLUMNS
        data = self.namespace.db.about(destination_table)
        if not data:
            # DEFINE A NEW TABLE
            command = (
                SQL_CREATE + quote_column(destination_table) + sql_iso(sql_list([
                    quoted_UID + "INTEGER",
                    quoted_PARENT + "INTEGER",
                    quoted_ORDER + "INTEGER",
                    "PRIMARY KEY " + sql_iso(quoted_UID),
                    "FOREIGN KEY " + sql_iso(quoted_PARENT) + " REFERENCES " + quote_column(existing_table) + sql_iso(quoted_UID)
                ]))
            )
            with self.namespace.db.transaction() as t:
                t.execute(command)
                self.add_table([new_path]+column.nested_path)

        # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY
        if not moving_columns:
            return

        column.es_index = destination_table
        with self.namespace.db.transaction() as t:
            t.execute(
                "ALTER TABLE " + quote_column(destination_table) +
                " ADD COLUMN " + quote_column(column.es_column) + " " + column.es_type
            )

            # Deleting parent columns
            for col in moving_columns:
                column = col.es_column
                tmp_table = "tmp_" + existing_table
                columns = list(map(text, t.query(SQL_SELECT + SQL_STAR + SQL_FROM + quote_column(existing_table) + SQL_LIMIT + SQL_ZERO).header))
                t.execute(
                    "ALTER TABLE " + quote_column(existing_table) +
                    " RENAME TO " + quote_column(tmp_table)
                )
                t.execute(
                    SQL_CREATE + quote_column(existing_table) + SQL_AS +
                    SQL_SELECT + sql_list([quote_column(c) for c in columns if c != column]) +
                    SQL_FROM + quote_column(tmp_table)
                )
                t.execute("DROP TABLE " + quote_column(tmp_table))

Example #3

0

Show file

File: meta_columns.py Project: armenzg/smart-scheduling

    def _load_from_database(self):
        # FIND ALL TABLES
        result = self.db.query(
            sql_query({
                "from": "sqlite_master",
                "where": {
                    "eq": {
                        "type": "table"
                    }
                },
                "orderby": "name"
            }))
        tables = wrap([{k: d
                        for k, d in zip(result.header, row)}
                       for row in result.data])
        last_nested_path = ["."]
        for table in tables:
            if table.name.startswith("__"):
                continue
            base_table, nested_path = tail_field(table.name)

            # FIND COMMON NESTED PATH SUFFIX
            if nested_path == ".":
                last_nested_path = []
            else:
                for i, p in enumerate(last_nested_path):
                    if startswith_field(nested_path, p):
                        last_nested_path = last_nested_path[i:]
                        break
                else:
                    last_nested_path = []

            full_nested_path = [nested_path] + last_nested_path
            self._snowflakes[literal_field(base_table)] += [full_nested_path]

            # LOAD THE COLUMNS
            details = self.db.about(table.name)

            for cid, name, dtype, notnull, dfft_value, pk in details:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                self.add(
                    Column(name=cname,
                           jx_type=coalesce(sql_type_to_json_type.get(ctype),
                                            IS_NULL),
                           nested_path=full_nested_path,
                           es_type=dtype,
                           es_column=name,
                           es_index=table.name,
                           last_updated=Date.now()))
            last_nested_path = full_nested_path

Example #4

0

Show file

        def _flatten(data,
                     uid,
                     parent_id,
                     order,
                     full_path,
                     nested_path,
                     row=None,
                     guid=None):
            """
            :param data: the data we are pulling apart
            :param uid: the uid we are giving this doc
            :param parent_id: the parent id of this (sub)doc
            :param order: the number of siblings before this one
            :param full_path: path to this (sub)doc
            :param nested_path: list of paths, deepest first
            :param row: we will be filling this
            :return:
            """
            table = concat_field(self.name, nested_path[0])
            insertion = doc_collection[nested_path[0]]
            if not row:
                row = {GUID: guid, UID: uid, PARENT: parent_id, ORDER: order}
                insertion.rows.append(row)

            if is_data(data):
                items = [(concat_field(full_path, k), v)
                         for k, v in wrap(data).leaves()]
            else:
                # PRIMITIVE VALUES
                items = [(full_path, data)]

            for cname, v in items:
                jx_type = get_jx_type(v)
                if jx_type is None:
                    continue

                insertion = doc_collection[nested_path[0]]
                if jx_type == NESTED:
                    c = first(cc for cc in insertion.active_columns +
                              snowflake.columns if cc.jx_type in STRUCT
                              and untyped_column(cc.name)[0] == cname)
                else:
                    c = first(cc for cc in insertion.active_columns +
                              snowflake.columns
                              if cc.jx_type == jx_type and cc.name == cname)

                if isinstance(c, list):
                    Log.error("confused")

                if not c:
                    # WHAT IS THE NESTING LEVEL FOR THIS PATH?
                    deeper_nested_path = "."
                    for path in snowflake.query_paths:
                        if startswith_field(cname, path[0]) and len(
                                deeper_nested_path) < len(path):
                            deeper_nested_path = path

                    c = Column(name=cname,
                               jx_type=jx_type,
                               es_type=json_type_to_sqlite_type.get(
                                   jx_type, jx_type),
                               es_column=typed_column(
                                   cname, json_type_to_sql_type.get(jx_type)),
                               es_index=table,
                               cardinality=0,
                               nested_path=nested_path,
                               last_updated=Date.now())
                    if jx_type == NESTED:
                        snowflake.query_paths.append(c.es_column)
                        required_changes.append({'nest': c})
                    else:
                        insertion.active_columns.add(c)
                        required_changes.append({"add": c})
                elif c.jx_type == NESTED and jx_type == OBJECT:
                    # ALWAYS PROMOTE OBJECTS TO NESTED
                    jx_type = NESTED
                    v = [v]
                elif len(c.nested_path) < len(nested_path):
                    from_doc = doc_collection.get(c.nested_path[0], None)
                    column = c.es_column
                    from_doc.active_columns.remove(c)
                    snowflake._remove_column(c)
                    required_changes.append({"nest": c})
                    deep_c = Column(name=cname,
                                    jx_type=jx_type,
                                    es_type=json_type_to_sqlite_type.get(
                                        jx_type, jx_type),
                                    es_column=typed_column(
                                        cname,
                                        json_type_to_sql_type.get(jx_type)),
                                    es_index=table,
                                    nested_path=nested_path,
                                    last_updated=Date.now())
                    snowflake._add_column(deep_c)
                    snowflake._drop_column(c)
                    from_doc.active_columns.remove(c)

                    for r in from_doc.rows:
                        r1 = unwrap(r)
                        if column in r1:
                            row1 = {
                                UID: self.container.next_uid(),
                                PARENT: r1["__id__"],
                                ORDER: 0,
                                column: r1[column]
                            }
                            insertion.rows.append(row1)
                elif len(c.nested_path) > len(nested_path):
                    insertion = doc_collection[c.nested_path[0]]
                    row = {
                        UID: self.container.next_uid(),
                        PARENT: uid,
                        ORDER: order
                    }
                    insertion.rows.append(row)

                # BE SURE TO NEST VALUES, IF NEEDED
                if jx_type == NESTED:
                    deeper_nested_path = [cname] + nested_path
                    if not doc_collection.get(cname):
                        doc_collection[cname] = Data(active_columns=Queue(),
                                                     rows=[])
                    for i, r in enumerate(v):
                        child_uid = self.container.next_uid()
                        _flatten(r, child_uid, uid, i, cname,
                                 deeper_nested_path)
                elif jx_type == OBJECT:
                    _flatten(v,
                             uid,
                             parent_id,
                             order,
                             cname,
                             nested_path,
                             row=row)
                elif c.jx_type:
                    row[c.es_column] = v

Example #5

0

Show file

File: edges_table.py Project: armenzg/smart-scheduling

                column_number = len(outer_selects)
                outer_selects.append(sql)
                index_to_column[column_number] = ColumnMapping(
                    push_name=s.name,
                    push_column_name=s.name,
                    push_column=si,
                    push_child=".",
                    pull=get_column(column_number),
                    sql=sql,
                    column_alias=quote_column(s.name),
                    type=sql_type_to_json_type["n"]
                )
            elif s.aggregate == "count" and (not query.edges and not query.groupby):
                value = s.value.var
                columns = [c.es_column for c in self.snowflake.columns if untyped_column(c.es_column)[0] == value]
                sql = SQL("+").join(sql_count(quote_column(col)) for col in columns)
                column_number = len(outer_selects)
                outer_selects.append(sql_alias(sql, _make_column_name(column_number)))
                index_to_column[column_number] = ColumnMapping(
                    push_name=s.name,
                    push_column_name=s.name,
                    push_column=si,
                    push_child=".",
                    pull=get_column(column_number),
                    sql=sql,
                    column_alias=_make_column_name(column_number),
                    type=sql_type_to_json_type["n"]
                )
            elif s.aggregate == "percentile":
                if not isinstance(s.percentile, (int, float)):

Example #6

0

Show file

File: query_table.py Project: armenzg/smart-scheduling

    def query_metadata(self, query):
        frum, query['from'] = query['from'], self
        schema = self.snowflake.tables["."].schema
        query = QueryOp.wrap(query, schema)
        columns = self.snowflake.columns
        where = query.where
        table_name = None
        column_name = None

        if query.edges or query.groupby:
            Log.error("Aggregates(groupby or edge) are not supported")

        if where.op == "eq" and where.lhs.var == "table":
            table_name = mo_json.json2value(where.rhs.json)
        elif where.op == "eq" and where.lhs.var == "name":
            column_name = mo_json.json2value(where.rhs.json)
        else:
            Log.error("Only simple filters are expected like: \"eq\" on table and column name")

        tables = [concat_field(self.snowflake.fact_name, i) for i in self.tables.keys()]

        metadata = []
        if columns[-1].es_column != GUID:
            columns.append(Column(
                name=GUID,
                jx_type=STRING,
                es_column=GUID,
                es_index=self.snowflake.fact_name,
                nested_path=["."]
            ))

        for tname, table in zip(t, tables):
            if table_name != None and table_name != table:
                continue

            for col in columns:
                cname, ctype = untyped_column(col.es_column)
                if column_name != None and column_name != cname:
                    continue

                metadata.append((table, relative_field(col.name, tname), col.type, unwraplist(col.nested_path)))

        if query.format == "cube":
            num_rows = len(metadata)
            header = ["table", "name", "type", "nested_path"]
            temp_data = dict(zip(header, zip(*metadata)))
            return Data(
                meta={"format": "cube"},
                data=temp_data,
                edges=[{
                    "name": "rownum",
                    "domain": {
                        "type": "rownum",
                        "min": 0,
                        "max": num_rows,
                        "interval": 1
                    }
                }]
            )
        elif query.format == "table":
            header = ["table", "name", "type", "nested_path"]
            return Data(
                meta={"format": "table"},
                header=header,
                data=metadata
            )
        else:
            header = ["table", "name", "type", "nested_path"]
            return Data(
                meta={"format": "list"},
                data=[dict(zip(header, r)) for r in metadata]
            )

Example #7

0

Show file

File: schema.py Project: armenzg/smart-scheduling

 def column(self, prefix):
     full_name = untyped_column(concat_field(self.nested_path, prefix))
     return set(c for c in self.snowflake.namespace.columns.find(
         self.snowflake.fact_name) for k, t in [untyped_column(c.name)]
                if k == full_name and k != GUID
                if c.jx_type not in [OBJECT, EXISTS])