def _put_cluster(self, *, database_uri: str, cluster: str, executor: ExecuteQuery) -> None: cluster_uri: str = make_cluster_uri(database_uri=database_uri, cluster_name=cluster) node_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Cluster, key=cluster_uri, key_property_name=self.key_property_name, name=cluster) _link(executor=executor, g=self.g, edge_label=EdgeTypes.Cluster, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Database, vertex1_key=database_uri, vertex2_id=node_id)
def _put_schema(self, *, cluster_uri: str, schema: str, executor: ExecuteQuery) -> None: schema_uri: str = make_schema_uri(cluster_uri=cluster_uri, schema_name=schema) node_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Schema, key=schema_uri, key_property_name=self.key_property_name, name=schema) _link(executor=executor, g=self.g, edge_label=EdgeTypes.Schema, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Cluster, vertex1_key=cluster_uri, vertex2_id=node_id)
def _upsert(self, **kwargs: Any) -> None: with self.get_proxy().query_executor() as executor: return _upsert( executor=executor, execute=FromResultSet.iterate, g=self.get_proxy().g, key_property_name=self.get_proxy().key_property_name, **kwargs)
def _put_programmatic_table_description(self, *, table_uri: str, description: ProgrammaticDescription, executor: ExecuteQuery) -> None: g = _V(g=self.g, label=VertexTypes.Table, key=table_uri).id() table_vertex_id = executor(query=g, get=FromResultSet.getOptional) if not table_vertex_id: # if the table doesn't exist, don't try to import a description return None desc_key = make_description_uri(subject_uri=table_uri, source=description.source) vertex_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Description, key=desc_key, key_property_name=self.key_property_name, description=description.text, source=description.source) _link(executor=executor, g=self.g, edge_label=EdgeTypes.Description, key_property_name=self.key_property_name, vertex1_id=table_vertex_id, vertex2_id=vertex_id)
def _put_column(self, *, table_uri: str, column: Column, executor: ExecuteQuery) -> None: # TODO: could do these async column_uri: str = make_column_uri(table_uri=table_uri, column_name=column.name) vertex_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Column, key=column_uri, key_property_name=self.key_property_name, **_properties_of(column, 'name', 'col_type', 'sort_order')) _link( executor=executor, g=self.g, edge_label=EdgeTypes.Column, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Table, vertex1_key=table_uri, vertex2_id=vertex_id) # Add the description if present if column.description is not None: self._put_column_description( executor=executor, table_uri=table_uri, column_name=column.name, description=column.description) # stats are handled elsewhere but it would be weird to get # them here if column.stats: raise RuntimeError(f'stats, data_subject_type, data_storage_security are handled elsewhere!')
def _put_table(self, *, table: Table, executor: ExecuteQuery) -> None: # note: I hate this API where we pass a name, get back nothing and then recapitulate the key logic. - self._put_database(database=table.database, executor=executor) database_uri: str = make_database_uri(database_name=table.database) self._put_cluster(cluster=table.cluster, database_uri=database_uri, executor=executor) cluster_uri: str = make_cluster_uri(database_uri=database_uri, cluster_name=table.cluster) self._put_schema(schema=table.schema, cluster_uri=cluster_uri, executor=executor) schema_uri: str = make_schema_uri(cluster_uri=cluster_uri, schema_name=table.schema) table_uri: str = make_table_uri(schema_uri=schema_uri, table_name=table.name) table_vertex_id: Any = _upsert(executor=executor, g=self.g, label=VertexTypes.Table, key=table_uri, key_property_name=self.key_property_name, is_view=table.is_view, name=table.name) _link(executor=executor, g=self.g, edge_label=EdgeTypes.Table, key_property_name=self.key_property_name, vertex1_label=VertexTypes.Schema, vertex1_key=schema_uri, vertex2_id=table_vertex_id) if table.table_writer: self._put_app_table_relation(executor=executor, app_key=table.table_writer.id, table_uri=table_uri) # Attach table description if table.description is not None: self._put_table_description(executor=executor, table_uri=table_uri, description=table.description) for description in table.programmatic_descriptions: self._put_programmatic_table_description(executor=executor, table_uri=table_uri, description=description) # create tags for tag in table.tags: self._add_tag(executor=executor, id=table_uri, tag=tag.tag_name) self._put_updated_timestamp(executor=executor) # create columns for column in table.columns: self._put_column(executor=executor, table_uri=table_uri, column=column)
def _put_database(self, *, database: str, executor: ExecuteQuery) -> None: database_uri = make_database_uri(database_name=database) _upsert(executor=executor, g=self.g, label=VertexTypes.Database, key=database_uri, key_property_name=self.key_property_name, name=database)
def _put_app(self, *, data: Application, executor: ExecuteQuery) -> None: _upsert(executor=executor, g=self.g, label=VertexTypes.Application, key=data.id, key_property_name=self.key_property_name, **_properties_except(data))
def _put_user(self, *, data: User, executor: ExecuteQuery) -> None: if data.user_id is None: raise NotImplementedError(f'Must pass some user_id to derive vertex key') _upsert(executor=executor, g=self.g, label=VertexTypes.User, key=data.user_id, key_property_name=self.key_property_name, **_properties_except(data))
def _put_updated_timestamp(self, executor: ExecuteQuery) -> datetime: t = timestamp() _upsert(executor=executor, g=self.g, label=VertexTypes.Updatedtimestamp, key=AMUNDSEN_TIMESTAMP_KEY, key_property_name=self.key_property_name, latest_timestamp=t) return t
def test_upsert_thrice(self) -> None: executor = mock.Mock(wraps=self.get_proxy().query_executor()) # test that we will insert db_name = Fixtures.next_database() database_uri = f'database://{db_name}' vertex_type = VertexType( label=VertexTypes.Database.value.label, properties=VertexTypes.Database.value.properties + tuple([Property(name='foo', type=GremlinType.String)])) exists = self._get(label=vertex_type, key=database_uri, extra_traversal=__.count()) self.assertEqual(exists, 0) _upsert(executor=executor, g=self.get_proxy().g, key_property_name=self.get_proxy().key_property_name, label=vertex_type, key=database_uri, name='test', foo='bar') exists = self._get(label=vertex_type, key=database_uri, extra_traversal=__.count()) self.assertEqual(exists, 1) id = self._get(label=vertex_type, key=database_uri, extra_traversal=__.id()) executor.reset_mock() _upsert(executor=executor, g=self.get_proxy().g, key_property_name=self.get_proxy().key_property_name, label=vertex_type, key=database_uri, name='test') exists = self._get(label=vertex_type, key=database_uri, extra_traversal=__.count()) self.assertEqual(exists, 1) self.assertEqual(executor.call_count, 2) # first one is the get: self.assertEqual(executor.call_args_list[0][1]['query'].bytecode, __.V(id).valueMap(True).bytecode) # the second one should be like self.assertEqual(executor.call_args_list[1][1]['query'].bytecode, __.V(id).id().bytecode) executor.reset_mock() _upsert(executor=executor, g=self.get_proxy().g, key_property_name=self.get_proxy().key_property_name, label=vertex_type, key=database_uri, name='test2', foo=None) exists = self._get(label=vertex_type, key=database_uri, extra_traversal=__.count()) self.assertEqual(exists, 1) self.assertEqual(executor.call_count, 2) # first one is the get: self.assertEqual(executor.call_args_list[0][1]['query'].bytecode, __.V(id).valueMap(True).bytecode) # the second one should be like self.assertEqual( executor.call_args_list[1][1]['query'].bytecode, __.V(id).sideEffect(__.properties('foo').drop()).property( Cardinality.single, 'name', 'test2').id().bytecode)