Exemple #1
0
class TestBadge(unittest.TestCase):
    def setUp(self) -> None:
        super(TestBadge, self).setUp()
        self.badge_metada = BadgeMetadata(
            start_label='Column',
            start_key='hive://default.base/test/ds',
            badges=[badge1, badge2])

    def test_get_badge_key(self) -> None:
        badge_key = self.badge_metada.get_badge_key(badge1.name)
        self.assertEqual(badge_key, badge1.name)

    def test_create_nodes(self) -> None:
        nodes = self.badge_metada.create_nodes()
        self.assertEqual(len(nodes), 2)

        node1 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge1.category
        }
        node2 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge2.category
        }
        serialized_nodes = [
            neo4_serializer.serialize_node(node) for node in nodes
        ]

        self.assertTrue(node1 in serialized_nodes)
        self.assertTrue(node2 in serialized_nodes)

    def test_create_nodes_neptune(self) -> None:
        nodes = self.badge_metada.create_nodes()
        serialized_nodes = [
            neptune_serializer.convert_node(node) for node in nodes
        ]

        expected_node1 = {
            NEPTUNE_HEADER_ID:
            BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_NODE_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            BadgeMetadata.BADGE_CATEGORY + ':String(single)':
            badge1.category
        }

        expected_node2 = {
            NEPTUNE_HEADER_ID:
            BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_NODE_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            BadgeMetadata.BADGE_CATEGORY + ':String(single)':
            badge2.category
        }

        self.assertTrue(expected_node1 in serialized_nodes)
        self.assertTrue(expected_node2 in serialized_nodes)

    def test_bad_key_entity_match(self) -> None:
        column_label = 'Column'
        table_key = 'hive://default.base/test'

        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=column_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_bad_entity_label(self) -> None:
        user_label = 'User'
        table_key = 'hive://default.base/test'
        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=user_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_create_relation(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations = [
            neo4_serializer.serialize_relationship(relation)
            for relation in relations
        ]
        self.assertEqual(len(relations), 2)

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }

        self.assertTrue(relation1 in serialized_relations)
        self.assertTrue(relation2 in serialized_relations)

    def test_create_relation_neptune(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations: List[Dict] = sum([
            neptune_serializer.convert_relationship(rel) for rel in relations
        ], [])

        neptune_forward_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=self.badge_metada.start_key,
                to_vertex_id=BadgeMetadata.get_badge_key(badge1.name),
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            self.badge_metada.start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            BadgeMetadata.get_badge_key(badge1.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=BadgeMetadata.get_badge_key(badge1.name),
                to_vertex_id=self.badge_metada.start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            BadgeMetadata.get_badge_key(badge1.name),
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            self.badge_metada.start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_forward_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=self.badge_metada.start_key,
                to_vertex_id=BadgeMetadata.get_badge_key(badge2.name),
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            self.badge_metada.start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            BadgeMetadata.get_badge_key(badge2.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=BadgeMetadata.get_badge_key(badge2.name),
                to_vertex_id=self.badge_metada.start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            BadgeMetadata.get_badge_key(badge2.name),
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            self.badge_metada.start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        self.assertTrue(neptune_forward_expected_1 in serialized_relations)
        self.assertTrue(neptune_reversed_expected_1 in serialized_relations)
        self.assertTrue(neptune_forward_expected_2 in serialized_relations)
        self.assertTrue(neptune_reversed_expected_2 in serialized_relations)
Exemple #2
0
class TestBadge(unittest.TestCase):
    def setUp(self) -> None:
        super(TestBadge, self).setUp()
        self.badge_metada = BadgeMetadata(
            start_label='Column',
            start_key='hive://default.base/test/ds',
            badges=[badge1, badge2])

    def test_get_badge_key(self) -> None:
        badge_key = self.badge_metada.get_badge_key(badge1.name)
        self.assertEqual(badge_key, badge1.name)

    def test_create_nodes(self) -> None:
        nodes = self.badge_metada.create_nodes()
        self.assertEqual(len(nodes), 2)

        node1 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge1.category
        }
        node2 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge2.category
        }
        serialized_nodes = [
            neo4_serializer.serialize_node(node) for node in nodes
        ]

        self.assertTrue(node1 in serialized_nodes)
        self.assertTrue(node2 in serialized_nodes)

    def test_bad_key_entity_match(self) -> None:
        column_label = 'Column'
        table_key = 'hive://default.base/test'

        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=column_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_bad_entity_label(self) -> None:
        user_label = 'User'
        table_key = 'hive://default.base/test'
        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=user_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_create_relation(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations = [
            neo4_serializer.serialize_relationship(relation)
            for relation in relations
        ]
        self.assertEqual(len(relations), 2)

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }

        self.assertTrue(relation1 in serialized_relations)
        self.assertTrue(relation2 in serialized_relations)
Exemple #3
0
    def _create_next_relation(self) -> Iterator[GraphRelationship]:
        schema_table_relationship = GraphRelationship(
            start_key=self._get_schema_key(),
            start_label=TableMetadata.SCHEMA_NODE_LABEL,
            end_key=self._get_table_key(),
            end_label=TableMetadata.TABLE_NODE_LABEL,
            type=TableMetadata.SCHEMA_TABLE_RELATION_TYPE,
            reverse_type=TableMetadata.TABLE_SCHEMA_RELATION_TYPE,
            attributes={})
        yield schema_table_relationship

        if self.description:
            yield self.description.get_relation(
                TableMetadata.TABLE_NODE_LABEL, self._get_table_key(),
                self._get_table_description_key(self.description))

        if self.tags:
            for tag in self.tags:
                tag_relationship = GraphRelationship(
                    start_label=TableMetadata.TABLE_NODE_LABEL,
                    start_key=self._get_table_key(),
                    end_label=TagMetadata.TAG_NODE_LABEL,
                    end_key=TagMetadata.get_tag_key(tag),
                    type=TableMetadata.TABLE_TAG_RELATION_TYPE,
                    reverse_type=TableMetadata.TAG_TABLE_RELATION_TYPE,
                    attributes={})
                yield tag_relationship

        for col in self.columns:
            column_relationship = GraphRelationship(
                start_label=TableMetadata.TABLE_NODE_LABEL,
                start_key=self._get_table_key(),
                end_label=ColumnMetadata.COLUMN_NODE_LABEL,
                end_key=self._get_col_key(col),
                type=TableMetadata.TABLE_COL_RELATION_TYPE,
                reverse_type=TableMetadata.COL_TABLE_RELATION_TYPE,
                attributes={})
            yield column_relationship

            if col.description:
                yield col.description.get_relation(
                    ColumnMetadata.COLUMN_NODE_LABEL, self._get_col_key(col),
                    self._get_col_description_key(col, col.description))

            if col.badges:
                badge_metadata = BadgeMetadata(
                    start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                    start_key=self._get_col_key(col),
                    badges=col.badges)
                badge_relations = badge_metadata.create_relation()
                for relation in badge_relations:
                    yield relation

        others = [
            GraphRelationship(
                start_label=TableMetadata.DATABASE_NODE_LABEL,
                end_label=TableMetadata.CLUSTER_NODE_LABEL,
                start_key=self._get_database_key(),
                end_key=self._get_cluster_key(),
                type=TableMetadata.DATABASE_CLUSTER_RELATION_TYPE,
                reverse_type=TableMetadata.CLUSTER_DATABASE_RELATION_TYPE,
                attributes={}),
            GraphRelationship(
                start_label=TableMetadata.CLUSTER_NODE_LABEL,
                end_label=TableMetadata.SCHEMA_NODE_LABEL,
                start_key=self._get_cluster_key(),
                end_key=self._get_schema_key(),
                type=TableMetadata.CLUSTER_SCHEMA_RELATION_TYPE,
                reverse_type=TableMetadata.SCHEMA_CLUSTER_RELATION_TYPE,
                attributes={})
        ]

        for rel_tuple in others:
            if (rel_tuple.start_key, rel_tuple.end_key,
                    rel_tuple.type) not in TableMetadata.serialized_rels_keys:
                TableMetadata.serialized_rels_keys.add(
                    (rel_tuple.start_key, rel_tuple.end_key, rel_tuple.type))
                yield rel_tuple
Exemple #4
0
    def _create_next_relation(self) -> Iterator[Any]:

        yield {
            RELATION_START_LABEL: TableMetadata.SCHEMA_NODE_LABEL,
            RELATION_END_LABEL: TableMetadata.TABLE_NODE_LABEL,
            RELATION_START_KEY: self._get_schema_key(),
            RELATION_END_KEY: self._get_table_key(),
            RELATION_TYPE: TableMetadata.SCHEMA_TABLE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: TableMetadata.TABLE_SCHEMA_RELATION_TYPE
        }

        if self.description:
            yield self.description.get_relation(TableMetadata.TABLE_NODE_LABEL,
                                                self._get_table_key(),
                                                self._get_table_description_key(self.description))

        if self.tags:
            for tag in self.tags:
                yield {
                    RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL,
                    RELATION_END_LABEL: TagMetadata.TAG_NODE_LABEL,
                    RELATION_START_KEY: self._get_table_key(),
                    RELATION_END_KEY: TagMetadata.get_tag_key(tag),
                    RELATION_TYPE: TableMetadata.TABLE_TAG_RELATION_TYPE,
                    RELATION_REVERSE_TYPE: TableMetadata.TAG_TABLE_RELATION_TYPE,
                }

        for col in self.columns:
            yield {
                RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL,
                RELATION_END_LABEL: ColumnMetadata.COLUMN_NODE_LABEL,
                RELATION_START_KEY: self._get_table_key(),
                RELATION_END_KEY: self._get_col_key(col),
                RELATION_TYPE: TableMetadata.TABLE_COL_RELATION_TYPE,
                RELATION_REVERSE_TYPE: TableMetadata.COL_TABLE_RELATION_TYPE
            }

            if col.description:
                yield col.description.get_relation(ColumnMetadata.COLUMN_NODE_LABEL,
                                                   self._get_col_key(col),
                                                   self._get_col_description_key(col, col.description))
            if col.badges:
                badge_metadata = BadgeMetadata(db_name=self._get_database_key(),
                                               schema=self._get_schema_key(),
                                               start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                                               start_key=self._get_col_key(col),
                                               badges=col.badges,
                                               cluster=self._get_cluster_key())
                badge_relations = badge_metadata.create_relation()
                for relation in badge_relations:
                    yield relation

        others = [
            RelTuple(start_label=TableMetadata.DATABASE_NODE_LABEL,
                     end_label=TableMetadata.CLUSTER_NODE_LABEL,
                     start_key=self._get_database_key(),
                     end_key=self._get_cluster_key(),
                     type=TableMetadata.DATABASE_CLUSTER_RELATION_TYPE,
                     reverse_type=TableMetadata.CLUSTER_DATABASE_RELATION_TYPE),
            RelTuple(start_label=TableMetadata.CLUSTER_NODE_LABEL,
                     end_label=TableMetadata.SCHEMA_NODE_LABEL,
                     start_key=self._get_cluster_key(),
                     end_key=self._get_schema_key(),
                     type=TableMetadata.CLUSTER_SCHEMA_RELATION_TYPE,
                     reverse_type=TableMetadata.SCHEMA_CLUSTER_RELATION_TYPE)
        ]

        for rel_tuple in others:
            if rel_tuple not in TableMetadata.serialized_rels:
                TableMetadata.serialized_rels.add(rel_tuple)
                yield {
                    RELATION_START_LABEL: rel_tuple.start_label,
                    RELATION_END_LABEL: rel_tuple.end_label,
                    RELATION_START_KEY: rel_tuple.start_key,
                    RELATION_END_KEY: rel_tuple.end_key,
                    RELATION_TYPE: rel_tuple.type,
                    RELATION_REVERSE_TYPE: rel_tuple.reverse_type
                }