Beispiel #1
0
    def _create_next_node(self) -> Iterator[GraphNode]:
        yield self._create_table_node()

        if self.description:
            node_key = self._get_table_description_key(self.description)
            yield self.description.get_node(node_key)

        # Create the table tag nodes
        if self.tags:
            for tag in self.tags:
                yield TagMetadata.create_tag_node(tag)

        for col in self.columns:
            column_node = GraphNode(key=self._get_col_key(col),
                                    label=ColumnMetadata.COLUMN_NODE_LABEL,
                                    attributes={
                                        ColumnMetadata.COLUMN_NAME: col.name,
                                        ColumnMetadata.COLUMN_TYPE: col.type,
                                        ColumnMetadata.COLUMN_ORDER:
                                        col.sort_order
                                    })
            yield column_node

            if col.description:
                node_key = self._get_col_description_key(col, col.description)
                yield col.description.get_node(node_key)

            if col.badges:
                col_badge_metadata = BadgeMetadata(
                    start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                    start_key=self._get_col_key(col),
                    badges=col.badges)
                for node in col_badge_metadata.create_nodes():
                    yield node

        # Database, cluster, schema
        others = [
            GraphNode(key=self._get_database_key(),
                      label=TableMetadata.DATABASE_NODE_LABEL,
                      attributes={'name': self.database}),
            GraphNode(key=self._get_cluster_key(),
                      label=TableMetadata.CLUSTER_NODE_LABEL,
                      attributes={'name': self.cluster}),
            GraphNode(key=self._get_schema_key(),
                      label=TableMetadata.SCHEMA_NODE_LABEL,
                      attributes={'name': self.schema})
        ]

        for node_tuple in others:
            if node_tuple.key not in TableMetadata.serialized_nodes_keys:
                TableMetadata.serialized_nodes_keys.add(node_tuple.key)
                yield node_tuple
Beispiel #2
0
class TestBadge(unittest.TestCase):
    def setUp(self) -> None:
        super(TestBadge, self).setUp()
        self.badge_metada = BadgeMetadata(
            start_label='Column',
            start_key='hive://default.base/test/ds',
            badges=[badge1, badge2])

    def test_get_badge_key(self) -> None:
        badge_key = self.badge_metada.get_badge_key(badge1.name)
        self.assertEqual(badge_key, badge1.name)

    def test_create_nodes(self) -> None:
        nodes = self.badge_metada.create_nodes()
        self.assertEqual(len(nodes), 2)

        node1 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge1.category
        }
        node2 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge2.category
        }
        serialized_nodes = [
            neo4_serializer.serialize_node(node) for node in nodes
        ]

        self.assertTrue(node1 in serialized_nodes)
        self.assertTrue(node2 in serialized_nodes)

    def test_create_nodes_neptune(self) -> None:
        nodes = self.badge_metada.create_nodes()
        serialized_nodes = [
            neptune_serializer.convert_node(node) for node in nodes
        ]

        expected_node1 = {
            NEPTUNE_HEADER_ID:
            BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_NODE_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            BadgeMetadata.BADGE_CATEGORY + ':String(single)':
            badge1.category
        }

        expected_node2 = {
            NEPTUNE_HEADER_ID:
            BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_NODE_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            BadgeMetadata.BADGE_CATEGORY + ':String(single)':
            badge2.category
        }

        self.assertTrue(expected_node1 in serialized_nodes)
        self.assertTrue(expected_node2 in serialized_nodes)

    def test_bad_key_entity_match(self) -> None:
        column_label = 'Column'
        table_key = 'hive://default.base/test'

        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=column_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_bad_entity_label(self) -> None:
        user_label = 'User'
        table_key = 'hive://default.base/test'
        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=user_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_create_relation(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations = [
            neo4_serializer.serialize_relationship(relation)
            for relation in relations
        ]
        self.assertEqual(len(relations), 2)

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }

        self.assertTrue(relation1 in serialized_relations)
        self.assertTrue(relation2 in serialized_relations)

    def test_create_relation_neptune(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations: List[Dict] = sum([
            neptune_serializer.convert_relationship(rel) for rel in relations
        ], [])

        neptune_forward_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=self.badge_metada.start_key,
                to_vertex_id=BadgeMetadata.get_badge_key(badge1.name),
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            self.badge_metada.start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            BadgeMetadata.get_badge_key(badge1.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=BadgeMetadata.get_badge_key(badge1.name),
                to_vertex_id=self.badge_metada.start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            BadgeMetadata.get_badge_key(badge1.name),
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            self.badge_metada.start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_forward_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=self.badge_metada.start_key,
                to_vertex_id=BadgeMetadata.get_badge_key(badge2.name),
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            self.badge_metada.start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            BadgeMetadata.get_badge_key(badge2.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=BadgeMetadata.get_badge_key(badge2.name),
                to_vertex_id=self.badge_metada.start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            BadgeMetadata.get_badge_key(badge2.name),
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            self.badge_metada.start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        self.assertTrue(neptune_forward_expected_1 in serialized_relations)
        self.assertTrue(neptune_reversed_expected_1 in serialized_relations)
        self.assertTrue(neptune_forward_expected_2 in serialized_relations)
        self.assertTrue(neptune_reversed_expected_2 in serialized_relations)
Beispiel #3
0
class TestBadge(unittest.TestCase):
    def setUp(self) -> None:
        super(TestBadge, self).setUp()
        self.badge_metada = BadgeMetadata(
            start_label='Column',
            start_key='hive://default.base/test/ds',
            badges=[badge1, badge2])

    def test_get_badge_key(self) -> None:
        badge_key = self.badge_metada.get_badge_key(badge1.name)
        self.assertEqual(badge_key, badge1.name)

    def test_create_nodes(self) -> None:
        nodes = self.badge_metada.create_nodes()
        self.assertEqual(len(nodes), 2)

        node1 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge1.category
        }
        node2 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge2.category
        }
        serialized_nodes = [
            neo4_serializer.serialize_node(node) for node in nodes
        ]

        self.assertTrue(node1 in serialized_nodes)
        self.assertTrue(node2 in serialized_nodes)

    def test_bad_key_entity_match(self) -> None:
        column_label = 'Column'
        table_key = 'hive://default.base/test'

        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=column_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_bad_entity_label(self) -> None:
        user_label = 'User'
        table_key = 'hive://default.base/test'
        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=user_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_create_relation(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations = [
            neo4_serializer.serialize_relationship(relation)
            for relation in relations
        ]
        self.assertEqual(len(relations), 2)

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }

        self.assertTrue(relation1 in serialized_relations)
        self.assertTrue(relation2 in serialized_relations)
Beispiel #4
0
    def _create_next_node(self) -> Iterator[Any]:  # noqa: C901

        table_node = {NODE_LABEL: TableMetadata.TABLE_NODE_LABEL,
                      NODE_KEY: self._get_table_key(),
                      TableMetadata.TABLE_NAME: self.name,
                      TableMetadata.IS_VIEW: self.is_view}
        if self.attrs:
            for k, v in self.attrs.items():
                if k not in table_node:
                    table_node[k] = v
        yield table_node

        if self.description:
            node_key = self._get_table_description_key(self.description)
            yield self.description.get_node_dict(node_key)

        # Create the table tag node
        if self.tags:
            for tag in self.tags:
                yield TagMetadata.create_tag_node(tag)

        for col in self.columns:
            yield {
                NODE_LABEL: ColumnMetadata.COLUMN_NODE_LABEL,
                NODE_KEY: self._get_col_key(col),
                ColumnMetadata.COLUMN_NAME: col.name,
                ColumnMetadata.COLUMN_TYPE: col.type,
                ColumnMetadata.COLUMN_ORDER: col.sort_order}

            if col.description:
                node_key = self._get_col_description_key(col, col.description)
                yield col.description.get_node_dict(node_key)

            if col.badges:
                badge_metadata = BadgeMetadata(db_name=self._get_database_key(),
                                               schema=self._get_schema_key(),
                                               start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                                               start_key=self._get_col_key(col),
                                               badges=col.badges,
                                               cluster=self._get_cluster_key())
                badge_nodes = badge_metadata.create_nodes()
                for node in badge_nodes:
                    yield node

        # Database, cluster, schema
        others = [NodeTuple(key=self._get_database_key(),
                            name=self.database,
                            label=TableMetadata.DATABASE_NODE_LABEL),
                  NodeTuple(key=self._get_cluster_key(),
                            name=self.cluster,
                            label=TableMetadata.CLUSTER_NODE_LABEL),
                  NodeTuple(key=self._get_schema_key(),
                            name=self.schema,
                            label=TableMetadata.SCHEMA_NODE_LABEL)
                  ]

        for node_tuple in others:
            if node_tuple not in TableMetadata.serialized_nodes:
                TableMetadata.serialized_nodes.add(node_tuple)
                yield {
                    NODE_LABEL: node_tuple.label,
                    NODE_KEY: node_tuple.key,
                    'name': node_tuple.name
                }