def _put_cluster(self, *, database_uri: str, cluster: str, executor: ExecuteQuery) -> None: cluster_uri: str = make_cluster_uri(database_uri=database_uri, cluster_name=cluster) node_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Cluster, key=cluster_uri, key_property_name=self.key_property_name, name=cluster) _link(executor=executor, g=self.g, edge_label=EdgeTypes.Cluster, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Database, vertex1_key=database_uri, vertex2_id=node_id)
def add_read_count(self, *, table_uri: str, user_id: str, read_count: int) -> None: # TODO: use READ_BY instead of READ edges with self.query_executor() as executor: _link(executor=executor, g=self.g, edge_label=EdgeTypes.Read, key_property_name=self.key_property_name, vertex1_label=VertexTypes.User, vertex1_key=user_id, vertex2_label=VertexTypes.Table, vertex2_key=table_uri, read_count=read_count)
def _put_app_table_relation(self, *, app_key: str, table_uri: str, executor: ExecuteQuery) -> None: # try the usual app, but also fallback to a non-standard name (prefixed by app_) for key in (app_key, f'app-{app_key}'): count = executor(query=_V(g=self.g, label=VertexTypes.Application, key=key).count(), get=FromResultSet.getOnly) if count > 0: _link(executor=executor, g=self.g, edge_label=EdgeTypes.Generates, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Application, vertex1_key=key, vertex2_label=VertexTypes.Table, vertex2_key=table_uri) _expire_other_links(executor=executor, g=self.g, edge_label=EdgeTypes.Generates, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Table, vertex1_key=table_uri, vertex2_label=VertexTypes.Application, vertex2_key=key, edge_direction=Direction.IN) return # if app isn't found, the owner may be a user if self._get_user(executor=executor, id=app_key): LOGGER.debug(f'{app_key} is not a real app but it was marked as owner: {table_uri}') self._add_owner(executor=executor, table_uri=table_uri, owner=app_key) return LOGGER.debug(f'{app_key} is not a real app nor an owner: {table_uri}')
def _put_schema(self, *, cluster_uri: str, schema: str, executor: ExecuteQuery) -> None: schema_uri: str = make_schema_uri(cluster_uri=cluster_uri, schema_name=schema) node_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Schema, key=schema_uri, key_property_name=self.key_property_name, name=schema) _link(executor=executor, g=self.g, edge_label=EdgeTypes.Schema, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Cluster, vertex1_key=cluster_uri, vertex2_id=node_id)
def _put_programmatic_table_description(self, *, table_uri: str, description: ProgrammaticDescription, executor: ExecuteQuery) -> None: g = _V(g=self.g, label=VertexTypes.Table, key=table_uri).id() table_vertex_id = executor(query=g, get=FromResultSet.getOptional) if not table_vertex_id: # if the table doesn't exist, don't try to import a description return None desc_key = make_description_uri(subject_uri=table_uri, source=description.source) vertex_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Description, key=desc_key, key_property_name=self.key_property_name, description=description.text, source=description.source) _link(executor=executor, g=self.g, edge_label=EdgeTypes.Description, key_property_name=self.key_property_name, vertex1_id=table_vertex_id, vertex2_id=vertex_id)
def _link(self, **kwargs: Any) -> None: with self.get_proxy().query_executor() as executor: return _link(executor=executor, execute=FromResultSet.iterate, g=self.get_proxy().g, key_property_name=self.get_proxy().key_property_name, **kwargs)
def _put_column(self, *, table_uri: str, column: Column, executor: ExecuteQuery) -> None: # TODO: could do these async column_uri: str = make_column_uri(table_uri=table_uri, column_name=column.name) vertex_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Column, key=column_uri, key_property_name=self.key_property_name, **_properties_of(column, 'name', 'col_type', 'sort_order')) _link( executor=executor, g=self.g, edge_label=EdgeTypes.Column, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Table, vertex1_key=table_uri, vertex2_id=vertex_id) # Add the description if present if column.description is not None: self._put_column_description( executor=executor, table_uri=table_uri, column_name=column.name, description=column.description) # stats are handled elsewhere but it would be weird to get # them here if column.stats: raise RuntimeError(f'stats, data_subject_type, data_storage_security are handled elsewhere!')
def _put_table(self, *, table: Table, executor: ExecuteQuery) -> None: # note: I hate this API where we pass a name, get back nothing and then recapitulate the key logic. - self._put_database(database=table.database, executor=executor) database_uri: str = make_database_uri(database_name=table.database) self._put_cluster(cluster=table.cluster, database_uri=database_uri, executor=executor) cluster_uri: str = make_cluster_uri(database_uri=database_uri, cluster_name=table.cluster) self._put_schema(schema=table.schema, cluster_uri=cluster_uri, executor=executor) schema_uri: str = make_schema_uri(cluster_uri=cluster_uri, schema_name=table.schema) table_uri: str = make_table_uri(schema_uri=schema_uri, table_name=table.name) table_vertex_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Table, key=table_uri, key_property_name=self.key_property_name, is_view=table.is_view, name=table.name) _link(executor=executor, g=self.g, edge_label=EdgeTypes.Table, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Schema, vertex1_key=schema_uri, vertex2_id=table_vertex_id) if table.table_writer: self._put_app_table_relation(executor=executor, app_key=table.table_writer.id, table_uri=table_uri) # Attach table description if table.description is not None: self._put_table_description(executor=executor, table_uri=table_uri, description=table.description) for description in table.programmatic_descriptions: self._put_programmatic_table_description(executor=executor, table_uri=table_uri, description=description) # create tags for tag in table.tags: self._add_tag(executor=executor, id=table_uri, tag=tag.tag_name) self._put_updated_timestamp(executor=executor) # create columns for column in table.columns: self._put_column(executor=executor, table_uri=table_uri, column=column)