Esempio n. 1
0
    def denormalized(self):
        """
        THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
        THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
        """
        with self.locker:
            self._update_meta()
            output = [
                {
                    "table": c.es_index,
                    "name": untyped_column(c.name)[0],
                    "cardinality": c.cardinality,
                    "es_column": c.es_column,
                    "es_index": c.es_index,
                    "last_updated": c.last_updated,
                    "count": c.count,
                    "nested_path": [unnest_path(n) for n in c.nested_path],
                    "es_type": c.es_type,
                    "type": c.jx_type,
                }
                for tname, css in self.data.items()
                for cname, cs in css.items()
                for c in cs
                if c.jx_type not in STRUCT  # and c.es_column != "_id"
            ]

        from jx_python.containers.list_usingPythonList import ListContainer

        return ListContainer(
            self.name,
            data=output,
            schema=jx_base.Schema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS),
        )
Esempio n. 2
0
    def _nest_column(self, column):
        new_path, type_ = untyped_column(column.es_column)
        if type_ != SQL_NESTED_TYPE:
            Log.error("only nested types can be nested")
        destination_table = concat_field(self.fact_name, new_path)
        existing_table = concat_field(self.fact_name, column.nested_path[0])

        # FIND THE INNER COLUMNS WE WILL BE MOVING
        moving_columns = []
        for c in self.columns:
            if destination_table != column.es_index and column.es_column == c.es_column:
                moving_columns.append(c)
                c.nested_path = new_path

        # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO?

        # LOAD THE COLUMNS
        data = self.namespace.db.about(destination_table)
        if not data:
            # DEFINE A NEW TABLE
            command = (
                SQL_CREATE + quote_column(destination_table) + sql_iso(sql_list([
                    quoted_UID + "INTEGER",
                    quoted_PARENT + "INTEGER",
                    quoted_ORDER + "INTEGER",
                    "PRIMARY KEY " + sql_iso(quoted_UID),
                    "FOREIGN KEY " + sql_iso(quoted_PARENT) + " REFERENCES " + quote_column(existing_table) + sql_iso(quoted_UID)
                ]))
            )
            with self.namespace.db.transaction() as t:
                t.execute(command)
                self.add_table([new_path]+column.nested_path)

        # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY
        if not moving_columns:
            return

        column.es_index = destination_table
        with self.namespace.db.transaction() as t:
            t.execute(
                "ALTER TABLE " + quote_column(destination_table) +
                " ADD COLUMN " + quote_column(column.es_column) + " " + column.es_type
            )

            # Deleting parent columns
            for col in moving_columns:
                column = col.es_column
                tmp_table = "tmp_" + existing_table
                columns = list(map(text, t.query(SQL_SELECT + SQL_STAR + SQL_FROM + quote_column(existing_table) + SQL_LIMIT + SQL_ZERO).header))
                t.execute(
                    "ALTER TABLE " + quote_column(existing_table) +
                    " RENAME TO " + quote_column(tmp_table)
                )
                t.execute(
                    SQL_CREATE + quote_column(existing_table) + SQL_AS +
                    SQL_SELECT + sql_list([quote_column(c) for c in columns if c != column]) +
                    SQL_FROM + quote_column(tmp_table)
                )
                t.execute("DROP TABLE " + quote_column(tmp_table))
Esempio n. 3
0
    def __init__(self, db):
        self.db = db
        self._snowflakes = {}  # MAP FROM BASE TABLE TO LIST OF NESTED TABLES
        self._columns = ColumnList()

        # FIND ALL TABLES
        result = self.db.query(
            "SELECT * FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = wrap([{k: d[i]
                        for i, k in enumerate(result.header)}
                       for d in result.data])
        last_nested_path = []
        for table in tables:
            if table.name.startswith("__"):
                continue
            base_table, nested_path = tail_field(table.name)

            # FIND COMMON NESTED PATH SUFFIX
            for i, p in enumerate(last_nested_path):
                if startswith_field(nested_path, p):
                    last_nested_path = last_nested_path[i:]
                    break
            else:
                last_nested_path = []

            full_nested_path = [nested_path] + last_nested_path
            nested_tables = self._snowflakes.setdefault(
                base_table, [nested_path] + last_nested_path)
            nested_tables.append(
                jx_base.TableDesc(name=table.name,
                                  nested_path=full_nested_path))

            # LOAD THE COLUMNS
            command = "PRAGMA table_info" + sql_iso(quote_column(table.name))
            details = self.db.query(command)

            for cid, name, dtype, notnull, dfft_value, pk in details.data:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                self._columns.add(
                    Column(
                        name=cname,  # I THINK COLUMNS HAVE THIER FULL PATH
                        jx_type=coalesce(
                            ctype, {
                                "TEXT": "string",
                                "REAL": "number",
                                "INTEGER": "integer"
                            }.get(dtype)),
                        nested_path=full_nested_path,
                        es_type=dtype,
                        es_column=name,
                        es_index=table.name))
            last_nested_path = full_nested_path
Esempio n. 4
0
    def read_db(self):
        """
        PULL SCHEMA FROM DATABASE, BUILD THE MODEL
        :return: None
        """

        # FIND ALL TABLES
        result = self.db.query(
            "SELECT * FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = wrap([{k: d[i]
                        for i, k in enumerate(result.header)}
                       for d in result.data])
        tables_found = False
        for table in tables:
            if table.name.startswith("__"):
                continue
            tables_found = True
            nested_path = [
                join_field(split_field(tab.name)[1:])
                for tab in jx.reverse(tables)
                if startswith_field(table.name, tab.name)
            ]
            self.add_table_to_schema(nested_path)

            # LOAD THE COLUMNS
            command = "PRAGMA table_info" + sql_iso(quote_column(table.name))
            details = self.db.query(command)

            for cid, name, dtype, notnull, dfft_value, pk in details.data:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                column = Column(names={
                    np: relative_field(cname, np)
                    for np in nested_path
                },
                                type=coalesce(
                                    ctype, {
                                        "TEXT": "string",
                                        "REAL": "number",
                                        "INTEGER": "integer"
                                    }.get(dtype)),
                                nested_path=nested_path,
                                es_column=name,
                                es_index=table.name)

                self.add_column_to_schema(column)

        return tables_found
Esempio n. 5
0
    def _load_from_database(self):
        # FIND ALL TABLES
        result = self.db.query(sql_query({
            "from": "sqlite_master",
            "where": {"eq": {"type": "table"}},
            "orderby": "name"
        }))
        tables = wrap([{k: d for k, d in zip(result.header, row)} for row in result.data])
        last_nested_path = ["."]
        for table in tables:
            if table.name.startswith("__"):
                continue
            base_table, nested_path = tail_field(table.name)

            # FIND COMMON NESTED PATH SUFFIX
            if nested_path == ".":
                last_nested_path = []
            else:
                for i, p in enumerate(last_nested_path):
                    if startswith_field(nested_path, p):
                        last_nested_path = last_nested_path[i:]
                        break
                else:
                    last_nested_path = []

            full_nested_path = [nested_path] + last_nested_path
            self._snowflakes[literal_field(base_table)] += [full_nested_path]

            # LOAD THE COLUMNS
            details = self.db.about(table.name)

            for cid, name, dtype, notnull, dfft_value, pk in details:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                self.add(Column(
                    name=cname,
                    jx_type=coalesce(sql_type_to_json_type.get(ctype), IS_NULL),
                    nested_path=full_nested_path,
                    es_type=dtype,
                    es_column=name,
                    es_index=table.name,
                    last_updated=Date.now()
                ))
            last_nested_path = full_nested_path
Esempio n. 6
0
    def _load_from_database(self):
        # FIND ALL TABLES
        result = self.db.query(
            "SELECT * FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = wrap([{k: d
                        for k, d in zip(result.header, row)}
                       for row in result.data])
        last_nested_path = []
        for table in tables:
            if table.name.startswith("__"):
                continue
            base_table, nested_path = tail_field(table.name)

            # FIND COMMON NESTED PATH SUFFIX
            for i, p in enumerate(last_nested_path):
                if startswith_field(nested_path, p):
                    last_nested_path = last_nested_path[i:]
                    break
            else:
                last_nested_path = []

            full_nested_path = [nested_path] + last_nested_path
            self._snowflakes[literal_field(base_table)] += [full_nested_path]

            # LOAD THE COLUMNS
            command = "PRAGMA table_info" + sql_iso(quote_column(table.name))
            details = self.db.query(command)

            for cid, name, dtype, notnull, dfft_value, pk in details.data:
                if name.startswith("__"):
                    continue
                cname, ctype = untyped_column(name)
                self.add(
                    Column(name=cname,
                           jx_type=coalesce(sql_type_to_json_type.get(ctype),
                                            IS_NULL),
                           nested_path=full_nested_path,
                           es_type=dtype,
                           es_column=name,
                           es_index=table.name,
                           last_updated=Date.now()))
            last_nested_path = full_nested_path
Esempio n. 7
0
     outer_selects.append(sql)
     index_to_column[column_number] = ColumnMapping(
         push_name=s.name,
         push_column_name=s.name,
         push_column=si,
         push_child=".",
         pull=get_column(column_number),
         sql=sql,
         column_alias=quote_column(s.name),
         type=sql_type_to_json_type["n"])
 elif s.aggregate == "count" and (not query.edges
                                  and not query.groupby):
     value = s.value.var
     columns = [
         c.es_column for c in self.snowflake.columns
         if untyped_column(c.es_column)[0] == value
     ]
     sql = SQL("+").join(
         sql_count(quote_column(col)) for col in columns)
     column_number = len(outer_selects)
     outer_selects.append(
         sql_alias(sql, _make_column_name(column_number)))
     index_to_column[column_number] = ColumnMapping(
         push_name=s.name,
         push_column_name=s.name,
         push_column=si,
         push_child=".",
         pull=get_column(column_number),
         sql=sql,
         column_alias=_make_column_name(column_number),
         type=sql_type_to_json_type["n"])
Esempio n. 8
0
    def query_metadata(self, query):
        frum, query['from'] = query['from'], self
        schema = self.sf.tables["."].schema
        query = QueryOp.wrap(query, schema)
        columns = self.sf.columns
        where = query.where
        table_name = None
        column_name = None

        if query.edges or query.groupby:
            Log.error("Aggregates(groupby or edge) are not supported")

        if where.op == "eq" and where.lhs.var == "table":
            table_name = mo_json.json2value(where.rhs.json)
        elif where.op == "eq" and where.lhs.var == "name":
            column_name = mo_json.json2value(where.rhs.json)
        else:
            Log.error("Only simple filters are expected like: \"eq\" on table and column name")

        t = [i for i in columns[0].names.keys()]
        tables = [concat_field(self.sf.fact, i) for i in t]

        metadata = []
        if columns[-1].es_column != GUID:
            columns.append(Column(
                names={i: relative_field(GUID, i) for i in t},
                type="string",
                es_column=GUID,
                es_index=self.sf.fact,
                nested_path=["."]
            ))

        for tname, table in zip(t, tables):
            if table_name != None and table_name != table:
                continue

            for col in columns:
                cname, ctype = untyped_column(col.es_column)
                if column_name != None and column_name != cname:
                    continue

                metadata.append((table, col.names[tname], col.type, unwraplist(col.nested_path)))

        if query.format == "cube":
            num_rows = len(metadata)
            header = ["table", "name", "type", "nested_path"]
            temp_data = dict(zip(header, zip(*metadata)))
            return Data(
                meta={"format": "cube"},
                data=temp_data,
                edges=[{
                    "name": "rownum",
                    "domain": {
                        "type": "rownum",
                        "min": 0,
                        "max": num_rows,
                        "interval": 1
                    }
                }]
            )
        elif query.format == "table":
            header = ["table", "name", "type", "nested_path"]
            return Data(
                meta={"format": "table"},
                header=header,
                data=metadata
            )
        else:
            header = ["table", "name", "type", "nested_path"]
            return Data(
                meta={"format": "list"},
                data=[dict(zip(header, r)) for r in metadata]
            )
                column_number = len(outer_selects)
                outer_selects.append(sql)
                index_to_column[column_number] = ColumnMapping(
                    push_name=s.name,
                    push_column_name=s.name,
                    push_column=si,
                    push_child=".",
                    pull=get_column(column_number),
                    sql=sql,
                    column_alias=quote_column(s.name),
                    type=sql_type_to_json_type["n"]
                )
            elif s.aggregate == "count" and (not query.edges and not query.groupby):
                value = s.value.var
                columns = [c.es_column for c in self.snowflake.columns if untyped_column(c.es_column)[0] == value]
                sql = SQL("+").join(sql_count(quote_column(col)) for col in columns)
                column_number = len(outer_selects)
                outer_selects.append(sql_alias(sql, _make_column_name(column_number)))
                index_to_column[column_number] = ColumnMapping(
                    push_name=s.name,
                    push_column_name=s.name,
                    push_column=si,
                    push_child=".",
                    pull=get_column(column_number),
                    sql=sql,
                    column_alias=_make_column_name(column_number),
                    type=sql_type_to_json_type["n"]
                )
            elif s.aggregate == "percentile":
                if not isinstance(s.percentile, (int, float)):
Esempio n. 10
0
        def _flatten(data,
                     uid,
                     parent_id,
                     order,
                     full_path,
                     nested_path,
                     row=None,
                     guid=None):
            """
            :param data: the data we are pulling apart
            :param uid: the uid we are giving this doc
            :param parent_id: the parent id of this (sub)doc
            :param order: the number of siblings before this one
            :param full_path: path to this (sub)doc
            :param nested_path: list of paths, deepest first
            :param row: we will be filling this
            :return:
            """
            table = concat_field(self.name, nested_path[0])
            insertion = doc_collection[nested_path[0]]
            if not row:
                row = {GUID: guid, UID: uid, PARENT: parent_id, ORDER: order}
                insertion.rows.append(row)

            if isinstance(data, Mapping):
                items = ((concat_field(full_path, k), v)
                         for k, v in wrap(data).leaves())
            else:
                # PRIMITIVE VALUES
                items = [(full_path, data)]

            for cname, v in items:
                value_type = get_type(v)
                if value_type is None:
                    continue

                if value_type == NESTED:
                    c = unwraplist([
                        cc for cc in snowflake.columns if cc.jx_type in STRUCT
                        and untyped_column(cc.name) == cname
                    ])
                else:
                    c = unwraplist([
                        cc for cc in snowflake.columns
                        if cc.jx_type == value_type and cc.name == cname
                    ])

                insertion = doc_collection[nested_path[0]]
                if not c:
                    # WHAT IS THE NESTING LEVEL FOR THIS PATH?
                    deeper_nested_path = "."
                    for path in snowflake.query_paths:
                        if startswith_field(cname, path[0]) and len(
                                deeper_nested_path) < len(path):
                            deeper_nested_path = path

                    c = Column(name=cname,
                               jx_type=value_type,
                               es_type=json_type_to_sqlite_type.get(
                                   value_type, value_type),
                               es_column=typed_column(
                                   cname,
                                   json_type_to_sql_type.get(value_type)),
                               es_index=table,
                               nested_path=nested_path,
                               last_updated=Date.now())
                    if value_type == "nested":
                        snowflake.query_paths.append(c.es_column)
                        required_changes.append({'nest': (c, nested_path)})
                    else:
                        snowflake.columns.append(c)
                        required_changes.append({"add": c})

                        # INSIDE IF BLOCK BECAUSE WE DO NOT WANT IT TO ADD WHAT WE columns.get() ALREADY
                        insertion.active_columns.add(c)
                elif c.jx_type == "nested" and value_type == "object":
                    value_type = "nested"
                    v = [v]
                elif len(c.nested_path) < len(nested_path):
                    from_doc = doc_collection.get(c.nested_path[0], None)
                    column = c.es_column
                    from_doc.active_columns.remove(c)
                    snowflake._remove_column(c)
                    required_changes.append({"nest": (c, nested_path)})
                    deep_c = Column(name=cname,
                                    jx_type=value_type,
                                    es_type=json_type_to_sqlite_type.get(
                                        value_type, value_type),
                                    es_column=typed_column(
                                        cname,
                                        json_type_to_sql_type.get(value_type)),
                                    es_index=table,
                                    nested_path=nested_path,
                                    last_updated=Date.now())
                    snowflake._add_column(deep_c)
                    snowflake._drop_column(c)
                    from_doc.active_columns.remove(c)

                    for r in from_doc.rows:
                        r1 = unwrap(r)
                        if column in r1:
                            row1 = {
                                UID: self.container.next_uid(),
                                PARENT: r1["__id__"],
                                ORDER: 0,
                                column: r1[column]
                            }
                            insertion.rows.append(row1)
                elif len(c.nested_path) > len(nested_path):
                    insertion = doc_collection[c.nested_path[0]]
                    row = {
                        UID: self.container.next_uid(),
                        PARENT: uid,
                        ORDER: order
                    }
                    insertion.rows.append(row)

                # BE SURE TO NEST VALUES, IF NEEDED
                if value_type == "nested":
                    row[c.es_column] = "."
                    deeper_nested_path = [cname] + nested_path
                    insertion = doc_collection.get(cname, None)
                    if not insertion:
                        insertion = doc_collection[cname] = Data(
                            active_columns=set(), rows=[])
                    for i, r in enumerate(v):
                        child_uid = self.container.next_uid()
                        _flatten(r, child_uid, uid, i, cname,
                                 deeper_nested_path)
                elif value_type == "object":
                    row[c.es_column] = "."
                    _flatten(v,
                             uid,
                             parent_id,
                             order,
                             cname,
                             nested_path,
                             row=row)
                elif c.jx_type:
                    insertion.active_columns.add(c)
                    row[c.es_column] = v
Esempio n. 11
0
    def __init__(self, name, db=None, uid=GUID, exists=False, kwargs=None):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        global _config
        Container.__init__(self, frum=None)
        if db:
            self.db = db
        else:
            self.db = db = Sqlite()

        if not _config:
            from pyLibrary.queries.containers import config as _config
            if not _config.default:
                _config.default = {"type": "sqlite", "settings": {"db": db}}

        self.name = name
        self.uid = listwrap(uid)
        self._next_uid = 1
        self._make_digits_table()

        self.uid_accessor = jx.get(self.uid)
        self.nested_tables = OrderedDict(
        )  # MAP FROM NESTED PATH TO Table OBJECT, PARENTS PROCEED CHILDREN
        self.nested_tables["."] = self
        self.columns = Index(
            keys=[join_field(["names", self.name])]
        )  # MAP FROM DOCUMENT ABS PROPERTY NAME TO THE SET OF SQL COLUMNS IT REPRESENTS (ONE FOR EACH REALIZED DATATYPE)

        if not exists:
            for u in self.uid:
                if u == GUID:
                    pass
                else:
                    c = Column(names={name: u},
                               type="string",
                               es_column=typed_column(u, "string"),
                               es_index=name)
                    self.add_column_to_schema(self.nested_tables, c)

            command = ("CREATE TABLE " + quote_table(name) + "(" +
                       (",".join([quoted_UID + " INTEGER"] + [
                           _quote_column(c) + " " + sql_types[c.type]
                           for u, cs in self.columns.items() for c in cs
                       ])) + ", PRIMARY KEY (" + (", ".join([quoted_UID] + [
                           _quote_column(c) for u in self.uid
                           for c in self.columns[u]
                       ])) + "))")

            self.db.execute(command)
        else:
            # LOAD THE COLUMNS
            command = "PRAGMA table_info(" + quote_table(name) + ")"
            details = self.db.query(command)

            for r in details:
                cname = untyped_column(r[1])
                ctype = r[2].lower()
                column = Column(names={name: cname},
                                type=ctype,
                                nested_path=['.'],
                                es_column=typed_column(cname, ctype),
                                es_index=name)

                self.add_column_to_schema(self.columns, column)
Esempio n. 12
0
 def column(self, prefix):
     full_name = untyped_column(concat_field(self.nested_path, prefix))
     return set(c for c in self.snowflake.namespace.columns.find(
         self.snowflake.fact_name) for k, t in [untyped_column(c.name)]
                if k == full_name and k != GUID
                if c.jx_type not in [OBJECT, EXISTS])