def _create_next_node(self) -> Iterator[GraphNode]: yield self._create_table_node() if self.description: node_key = self._get_table_description_key(self.description) yield self.description.get_node(node_key) # Create the table tag nodes if self.tags: for tag in self.tags: yield TagMetadata.create_tag_node(tag) for col in self.columns: column_node = GraphNode(key=self._get_col_key(col), label=ColumnMetadata.COLUMN_NODE_LABEL, attributes={ ColumnMetadata.COLUMN_NAME: col.name, ColumnMetadata.COLUMN_TYPE: col.type, ColumnMetadata.COLUMN_ORDER: col.sort_order }) yield column_node if col.description: node_key = self._get_col_description_key(col, col.description) yield col.description.get_node(node_key) if col.badges: col_badge_metadata = BadgeMetadata( start_label=ColumnMetadata.COLUMN_NODE_LABEL, start_key=self._get_col_key(col), badges=col.badges) for node in col_badge_metadata.create_nodes(): yield node # Database, cluster, schema others = [ GraphNode(key=self._get_database_key(), label=TableMetadata.DATABASE_NODE_LABEL, attributes={'name': self.database}), GraphNode(key=self._get_cluster_key(), label=TableMetadata.CLUSTER_NODE_LABEL, attributes={'name': self.cluster}), GraphNode(key=self._get_schema_key(), label=TableMetadata.SCHEMA_NODE_LABEL, attributes={'name': self.schema}) ] for node_tuple in others: if node_tuple.key not in TableMetadata.serialized_nodes_keys: TableMetadata.serialized_nodes_keys.add(node_tuple.key) yield node_tuple
class TestBadge(unittest.TestCase): def setUp(self) -> None: super(TestBadge, self).setUp() self.badge_metada = BadgeMetadata( start_label='Column', start_key='hive://default.base/test/ds', badges=[badge1, badge2]) def test_get_badge_key(self) -> None: badge_key = self.badge_metada.get_badge_key(badge1.name) self.assertEqual(badge_key, badge1.name) def test_create_nodes(self) -> None: nodes = self.badge_metada.create_nodes() self.assertEqual(len(nodes), 2) node1 = { NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name), NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, BadgeMetadata.BADGE_CATEGORY: badge1.category } node2 = { NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name), NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, BadgeMetadata.BADGE_CATEGORY: badge2.category } serialized_nodes = [ neo4_serializer.serialize_node(node) for node in nodes ] self.assertTrue(node1 in serialized_nodes) self.assertTrue(node2 in serialized_nodes) def test_create_nodes_neptune(self) -> None: nodes = self.badge_metada.create_nodes() serialized_nodes = [ neptune_serializer.convert_node(node) for node in nodes ] expected_node1 = { NEPTUNE_HEADER_ID: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name), NEPTUNE_HEADER_LABEL: BadgeMetadata.BADGE_NODE_LABEL, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB, BadgeMetadata.BADGE_CATEGORY + ':String(single)': badge1.category } expected_node2 = { NEPTUNE_HEADER_ID: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name), NEPTUNE_HEADER_LABEL: BadgeMetadata.BADGE_NODE_LABEL, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB, BadgeMetadata.BADGE_CATEGORY + ':String(single)': badge2.category } self.assertTrue(expected_node1 in serialized_nodes) self.assertTrue(expected_node2 in serialized_nodes) def test_bad_key_entity_match(self) -> None: column_label = 'Column' table_key = 'hive://default.base/test' self.assertRaises(Exception, BadgeMetadata, start_label=column_label, start_key=table_key, badges=[badge1, badge2]) def test_bad_entity_label(self) -> None: user_label = 'User' table_key = 'hive://default.base/test' self.assertRaises(Exception, BadgeMetadata, start_label=user_label, start_key=table_key, badges=[badge1, badge2]) def test_create_relation(self) -> None: relations = self.badge_metada.create_relation() serialized_relations = [ neo4_serializer.serialize_relationship(relation) for relation in relations ] self.assertEqual(len(relations), 2) relation1 = { RELATION_START_LABEL: self.badge_metada.start_label, RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, RELATION_START_KEY: self.badge_metada.start_key, RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name), RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, } relation2 = { RELATION_START_LABEL: self.badge_metada.start_label, RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, RELATION_START_KEY: self.badge_metada.start_key, RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name), RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, } self.assertTrue(relation1 in serialized_relations) self.assertTrue(relation2 in serialized_relations) def test_create_relation_neptune(self) -> None: relations = self.badge_metada.create_relation() serialized_relations: List[Dict] = sum([ neptune_serializer.convert_relationship(rel) for rel in relations ], []) neptune_forward_expected_1 = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=self.badge_metada.start_key, to_vertex_id=BadgeMetadata.get_badge_key(badge1.name), label=BadgeMetadata.BADGE_RELATION_TYPE, ), NEPTUNE_RELATIONSHIP_HEADER_FROM: self.badge_metada.start_key, NEPTUNE_RELATIONSHIP_HEADER_TO: BadgeMetadata.get_badge_key(badge1.name), NEPTUNE_HEADER_LABEL: BadgeMetadata.BADGE_RELATION_TYPE, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_reversed_expected_1 = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=BadgeMetadata.get_badge_key(badge1.name), to_vertex_id=self.badge_metada.start_key, label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE), NEPTUNE_RELATIONSHIP_HEADER_FROM: BadgeMetadata.get_badge_key(badge1.name), NEPTUNE_RELATIONSHIP_HEADER_TO: self.badge_metada.start_key, NEPTUNE_HEADER_LABEL: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_forward_expected_2 = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=self.badge_metada.start_key, to_vertex_id=BadgeMetadata.get_badge_key(badge2.name), label=BadgeMetadata.BADGE_RELATION_TYPE, ), NEPTUNE_RELATIONSHIP_HEADER_FROM: self.badge_metada.start_key, NEPTUNE_RELATIONSHIP_HEADER_TO: BadgeMetadata.get_badge_key(badge2.name), NEPTUNE_HEADER_LABEL: BadgeMetadata.BADGE_RELATION_TYPE, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_reversed_expected_2 = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=BadgeMetadata.get_badge_key(badge2.name), to_vertex_id=self.badge_metada.start_key, label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, ), NEPTUNE_RELATIONSHIP_HEADER_FROM: BadgeMetadata.get_badge_key(badge2.name), NEPTUNE_RELATIONSHIP_HEADER_TO: self.badge_metada.start_key, NEPTUNE_HEADER_LABEL: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } self.assertTrue(neptune_forward_expected_1 in serialized_relations) self.assertTrue(neptune_reversed_expected_1 in serialized_relations) self.assertTrue(neptune_forward_expected_2 in serialized_relations) self.assertTrue(neptune_reversed_expected_2 in serialized_relations)
class TestBadge(unittest.TestCase): def setUp(self) -> None: super(TestBadge, self).setUp() self.badge_metada = BadgeMetadata( start_label='Column', start_key='hive://default.base/test/ds', badges=[badge1, badge2]) def test_get_badge_key(self) -> None: badge_key = self.badge_metada.get_badge_key(badge1.name) self.assertEqual(badge_key, badge1.name) def test_create_nodes(self) -> None: nodes = self.badge_metada.create_nodes() self.assertEqual(len(nodes), 2) node1 = { NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name), NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, BadgeMetadata.BADGE_CATEGORY: badge1.category } node2 = { NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name), NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL, BadgeMetadata.BADGE_CATEGORY: badge2.category } serialized_nodes = [ neo4_serializer.serialize_node(node) for node in nodes ] self.assertTrue(node1 in serialized_nodes) self.assertTrue(node2 in serialized_nodes) def test_bad_key_entity_match(self) -> None: column_label = 'Column' table_key = 'hive://default.base/test' self.assertRaises(Exception, BadgeMetadata, start_label=column_label, start_key=table_key, badges=[badge1, badge2]) def test_bad_entity_label(self) -> None: user_label = 'User' table_key = 'hive://default.base/test' self.assertRaises(Exception, BadgeMetadata, start_label=user_label, start_key=table_key, badges=[badge1, badge2]) def test_create_relation(self) -> None: relations = self.badge_metada.create_relation() serialized_relations = [ neo4_serializer.serialize_relationship(relation) for relation in relations ] self.assertEqual(len(relations), 2) relation1 = { RELATION_START_LABEL: self.badge_metada.start_label, RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, RELATION_START_KEY: self.badge_metada.start_key, RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name), RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, } relation2 = { RELATION_START_LABEL: self.badge_metada.start_label, RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL, RELATION_START_KEY: self.badge_metada.start_key, RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name), RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE, RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE, } self.assertTrue(relation1 in serialized_relations) self.assertTrue(relation2 in serialized_relations)
def _create_next_node(self) -> Iterator[Any]: # noqa: C901 table_node = {NODE_LABEL: TableMetadata.TABLE_NODE_LABEL, NODE_KEY: self._get_table_key(), TableMetadata.TABLE_NAME: self.name, TableMetadata.IS_VIEW: self.is_view} if self.attrs: for k, v in self.attrs.items(): if k not in table_node: table_node[k] = v yield table_node if self.description: node_key = self._get_table_description_key(self.description) yield self.description.get_node_dict(node_key) # Create the table tag node if self.tags: for tag in self.tags: yield TagMetadata.create_tag_node(tag) for col in self.columns: yield { NODE_LABEL: ColumnMetadata.COLUMN_NODE_LABEL, NODE_KEY: self._get_col_key(col), ColumnMetadata.COLUMN_NAME: col.name, ColumnMetadata.COLUMN_TYPE: col.type, ColumnMetadata.COLUMN_ORDER: col.sort_order} if col.description: node_key = self._get_col_description_key(col, col.description) yield col.description.get_node_dict(node_key) if col.badges: badge_metadata = BadgeMetadata(db_name=self._get_database_key(), schema=self._get_schema_key(), start_label=ColumnMetadata.COLUMN_NODE_LABEL, start_key=self._get_col_key(col), badges=col.badges, cluster=self._get_cluster_key()) badge_nodes = badge_metadata.create_nodes() for node in badge_nodes: yield node # Database, cluster, schema others = [NodeTuple(key=self._get_database_key(), name=self.database, label=TableMetadata.DATABASE_NODE_LABEL), NodeTuple(key=self._get_cluster_key(), name=self.cluster, label=TableMetadata.CLUSTER_NODE_LABEL), NodeTuple(key=self._get_schema_key(), name=self.schema, label=TableMetadata.SCHEMA_NODE_LABEL) ] for node_tuple in others: if node_tuple not in TableMetadata.serialized_nodes: TableMetadata.serialized_nodes.add(node_tuple) yield { NODE_LABEL: node_tuple.label, NODE_KEY: node_tuple.key, 'name': node_tuple.name }