Esempio n. 1
0
    def _create_column_nodes(self, col: ColumnMetadata) -> Iterator[GraphNode]:
        column_node = GraphNode(key=self._get_col_key(col),
                                label=ColumnMetadata.COLUMN_NODE_LABEL,
                                attributes={
                                    ColumnMetadata.COLUMN_NAME: col.name,
                                    ColumnMetadata.COLUMN_TYPE: col.type,
                                    ColumnMetadata.COLUMN_ORDER: col.sort_order
                                })
        yield column_node

        if col.description:
            node_key = self._get_col_description_key(col, col.description)
            yield col.description.get_node(node_key)

        if col.badges:
            col_badge_metadata = BadgeMetadata(
                start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                start_key=self._get_col_key(col),
                badges=col.badges)
            badge_nodes = col_badge_metadata.get_badge_nodes()
            for node in badge_nodes:
                yield node

        type_metadata = col.get_type_metadata()
        if type_metadata:
            yield from type_metadata.create_node_iterator()
Esempio n. 2
0
    def test_create_relation(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations = [
            neo4_serializer.serialize_relationship(relation)
            for relation in relations
        ]
        self.assertEqual(len(relations), 2)

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }

        self.assertTrue(relation1 in serialized_relations)
        self.assertTrue(relation2 in serialized_relations)
Esempio n. 3
0
    def test_create_relation(self) -> None:
        actual = []
        relation = self.badge_metada.create_next_relation()
        while relation:
            serialized_relation = neo4_serializer.serialize_relationship(
                relation)
            actual.append(serialized_relation)
            relation = self.badge_metada.create_next_relation()

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        expected = [relation1, relation2]

        self.assertEqual(expected, actual)
Esempio n. 4
0
    def _create_column_relations(
            self, col: ColumnMetadata) -> Iterator[GraphRelationship]:
        column_relationship = GraphRelationship(
            start_label=TableMetadata.TABLE_NODE_LABEL,
            start_key=self._get_table_key(),
            end_label=ColumnMetadata.COLUMN_NODE_LABEL,
            end_key=self._get_col_key(col),
            type=TableMetadata.TABLE_COL_RELATION_TYPE,
            reverse_type=TableMetadata.COL_TABLE_RELATION_TYPE,
            attributes={})
        yield column_relationship

        if col.description:
            yield col.description.get_relation(
                ColumnMetadata.COLUMN_NODE_LABEL, self._get_col_key(col),
                self._get_col_description_key(col, col.description))

        if col.badges:
            badge_metadata = BadgeMetadata(
                start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                start_key=self._get_col_key(col),
                badges=col.badges)
            badge_relations = badge_metadata.get_badge_relations()
            for relation in badge_relations:
                yield relation

        type_metadata = col.get_type_metadata()
        if type_metadata:
            yield from type_metadata.create_relation_iterator()
Esempio n. 5
0
 def setUp(self) -> None:
     super(TestBadge, self).setUp()
     self.badge_metada = BadgeMetadata(
         db_name='hive',
         schema=SCHEMA,
         start_label='Column',
         start_key='hive://default.base/test/ds',
         cluster=CLUSTER,
         badges=[badge1, badge2])
Esempio n. 6
0
    def _create_next_node(self) -> Iterator[GraphNode]:
        yield self._create_table_node()

        if self.description:
            node_key = self._get_table_description_key(self.description)
            yield self.description.get_node(node_key)

        # Create the table tag nodes
        if self.tags:
            for tag in self.tags:
                tag_node = TagMetadata(tag).get_node()
                yield tag_node

        for col in self.columns:
            column_node = GraphNode(key=self._get_col_key(col),
                                    label=ColumnMetadata.COLUMN_NODE_LABEL,
                                    attributes={
                                        ColumnMetadata.COLUMN_NAME: col.name,
                                        ColumnMetadata.COLUMN_TYPE: col.type,
                                        ColumnMetadata.COLUMN_ORDER:
                                        col.sort_order
                                    })
            yield column_node

            if col.description:
                node_key = self._get_col_description_key(col, col.description)
                yield col.description.get_node(node_key)

            if col.badges:
                col_badge_metadata = BadgeMetadata(
                    start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                    start_key=self._get_col_key(col),
                    badges=col.badges)
                badge_nodes = col_badge_metadata.get_badge_nodes()
                for node in badge_nodes:
                    yield node

        # Database, cluster, schema
        others = [
            GraphNode(key=self._get_database_key(),
                      label=TableMetadata.DATABASE_NODE_LABEL,
                      attributes={'name': self.database}),
            GraphNode(key=self._get_cluster_key(),
                      label=TableMetadata.CLUSTER_NODE_LABEL,
                      attributes={'name': self.cluster}),
            GraphNode(key=self._get_schema_key(),
                      label=TableMetadata.SCHEMA_NODE_LABEL,
                      attributes={'name': self.schema})
        ]

        for node_tuple in others:
            if node_tuple.key not in TableMetadata.serialized_nodes_keys:
                TableMetadata.serialized_nodes_keys.add(node_tuple.key)
                yield node_tuple
Esempio n. 7
0
    def _load_csv(self) -> None:
        with open(self.badge_file_location, 'r') as fin:
            self.badges = [dict(i) for i in csv.DictReader(fin)]
        # print("BADGES: " + str(self.badges))

        parsed_badges = defaultdict(list)
        for badge_dict in self.badges:
            db = badge_dict['database']
            cluster = badge_dict['cluster']
            schema = badge_dict['schema']
            table_name = badge_dict['table_name']
            id = self._get_key(db, cluster, schema, table_name)
            badge = Badge(name=badge_dict['name'],
                          category=badge_dict['category'])
            parsed_badges[id].append(badge)

        with open(self.table_file_location, 'r') as fin:
            tables = [dict(i) for i in csv.DictReader(fin)]

        results = []
        for table_dict in tables:
            db = table_dict['database']
            cluster = table_dict['cluster']
            schema = table_dict['schema']
            table_name = table_dict['name']
            id = self._get_key(db, cluster, schema, table_name)
            badges = parsed_badges[id]

            if badges is None:
                badges = []
            badge_metadata = BadgeMetadata(start_label=TableMetadata.TABLE_NODE_LABEL,
                                           start_key=id,
                                           badges=badges)
            results.append(badge_metadata)
        self._iter = iter(results)
Esempio n. 8
0
    def test_create_relation_neptune(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations: List[Dict] = sum([
            neptune_serializer.convert_relationship(rel) for rel in relations
        ], [])

        neptune_forward_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=self.badge_metada.start_key,
                to_vertex_id=BadgeMetadata.get_badge_key(badge1.name),
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            self.badge_metada.start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            BadgeMetadata.get_badge_key(badge1.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=BadgeMetadata.get_badge_key(badge1.name),
                to_vertex_id=self.badge_metada.start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            BadgeMetadata.get_badge_key(badge1.name),
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            self.badge_metada.start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_forward_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=self.badge_metada.start_key,
                to_vertex_id=BadgeMetadata.get_badge_key(badge2.name),
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            self.badge_metada.start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            BadgeMetadata.get_badge_key(badge2.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=BadgeMetadata.get_badge_key(badge2.name),
                to_vertex_id=self.badge_metada.start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            BadgeMetadata.get_badge_key(badge2.name),
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            self.badge_metada.start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        self.assertTrue(neptune_forward_expected_1 in serialized_relations)
        self.assertTrue(neptune_reversed_expected_1 in serialized_relations)
        self.assertTrue(neptune_forward_expected_2 in serialized_relations)
        self.assertTrue(neptune_reversed_expected_2 in serialized_relations)
Esempio n. 9
0
    def _create_next_relation(self) -> Iterator[GraphRelationship]:
        schema_table_relationship = GraphRelationship(
            start_key=self._get_schema_key(),
            start_label=TableMetadata.SCHEMA_NODE_LABEL,
            end_key=self._get_table_key(),
            end_label=TableMetadata.TABLE_NODE_LABEL,
            type=TableMetadata.SCHEMA_TABLE_RELATION_TYPE,
            reverse_type=TableMetadata.TABLE_SCHEMA_RELATION_TYPE,
            attributes={})
        yield schema_table_relationship

        if self.description:
            yield self.description.get_relation(
                TableMetadata.TABLE_NODE_LABEL, self._get_table_key(),
                self._get_table_description_key(self.description))

        if self.tags:
            for tag in self.tags:
                tag_relationship = GraphRelationship(
                    start_label=TableMetadata.TABLE_NODE_LABEL,
                    start_key=self._get_table_key(),
                    end_label=TagMetadata.TAG_NODE_LABEL,
                    end_key=TagMetadata.get_tag_key(tag),
                    type=TableMetadata.TABLE_TAG_RELATION_TYPE,
                    reverse_type=TableMetadata.TAG_TABLE_RELATION_TYPE,
                    attributes={})
                yield tag_relationship

        for col in self.columns:
            column_relationship = GraphRelationship(
                start_label=TableMetadata.TABLE_NODE_LABEL,
                start_key=self._get_table_key(),
                end_label=ColumnMetadata.COLUMN_NODE_LABEL,
                end_key=self._get_col_key(col),
                type=TableMetadata.TABLE_COL_RELATION_TYPE,
                reverse_type=TableMetadata.COL_TABLE_RELATION_TYPE,
                attributes={})
            yield column_relationship

            if col.description:
                yield col.description.get_relation(
                    ColumnMetadata.COLUMN_NODE_LABEL, self._get_col_key(col),
                    self._get_col_description_key(col, col.description))

            if col.badges:
                badge_metadata = BadgeMetadata(
                    start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                    start_key=self._get_col_key(col),
                    badges=col.badges)
                badge_relations = badge_metadata.create_relation()
                for relation in badge_relations:
                    yield relation

        others = [
            GraphRelationship(
                start_label=TableMetadata.DATABASE_NODE_LABEL,
                end_label=TableMetadata.CLUSTER_NODE_LABEL,
                start_key=self._get_database_key(),
                end_key=self._get_cluster_key(),
                type=TableMetadata.DATABASE_CLUSTER_RELATION_TYPE,
                reverse_type=TableMetadata.CLUSTER_DATABASE_RELATION_TYPE,
                attributes={}),
            GraphRelationship(
                start_label=TableMetadata.CLUSTER_NODE_LABEL,
                end_label=TableMetadata.SCHEMA_NODE_LABEL,
                start_key=self._get_cluster_key(),
                end_key=self._get_schema_key(),
                type=TableMetadata.CLUSTER_SCHEMA_RELATION_TYPE,
                reverse_type=TableMetadata.SCHEMA_CLUSTER_RELATION_TYPE,
                attributes={})
        ]

        for rel_tuple in others:
            if (rel_tuple.start_key, rel_tuple.end_key,
                    rel_tuple.type) not in TableMetadata.serialized_rels_keys:
                TableMetadata.serialized_rels_keys.add(
                    (rel_tuple.start_key, rel_tuple.end_key, rel_tuple.type))
                yield rel_tuple
Esempio n. 10
0
class TestBadge(unittest.TestCase):
    def setUp(self) -> None:
        super(TestBadge, self).setUp()
        self.badge_metada = BadgeMetadata(
            start_label='Column',
            start_key='hive://default.base/test/ds',
            badges=[badge1, badge2])

    def test_get_badge_key(self) -> None:
        badge_key = self.badge_metada.get_badge_key(badge1.name)
        self.assertEqual(badge_key, badge1.name)

    def test_create_nodes(self) -> None:
        node1 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge1.category
        }
        node2 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge2.category
        }
        expected = [node1, node2]

        actual = []
        node = self.badge_metada.create_next_node()
        while node:
            serialized_node = neo4_serializer.serialize_node(node)
            actual.append(serialized_node)
            node = self.badge_metada.create_next_node()

        self.assertEqual(expected, actual)

    def test_create_nodes_neptune(self) -> None:
        actual = []
        node = self.badge_metada.create_next_node()
        while node:
            serialized_node = neptune_serializer.convert_node(node)
            actual.append(serialized_node)
            node = self.badge_metada.create_next_node()
        node_key_1 = BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name)
        node_id_1 = BadgeMetadata.BADGE_NODE_LABEL + ":" + node_key_1
        expected_node1 = {
            NEPTUNE_HEADER_ID:
            node_id_1,
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            node_key_1,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_NODE_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            BadgeMetadata.BADGE_CATEGORY + ':String(single)':
            badge1.category
        }
        node_key_2 = BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name)
        node_id_2 = BadgeMetadata.BADGE_NODE_LABEL + ":" + node_key_2
        expected_node2 = {
            NEPTUNE_HEADER_ID:
            node_id_2,
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            node_key_2,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_NODE_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            BadgeMetadata.BADGE_CATEGORY + ':String(single)':
            badge2.category
        }
        expected = [expected_node1, expected_node2]

        self.assertEqual(expected, actual)

    def test_bad_key_entity_match(self) -> None:
        column_label = 'Column'
        table_key = 'hive://default.base/test'

        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=column_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_bad_entity_label(self) -> None:
        user_label = 'User'
        table_key = 'hive://default.base/test'
        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=user_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_create_relation(self) -> None:
        actual = []
        relation = self.badge_metada.create_next_relation()
        while relation:
            serialized_relation = neo4_serializer.serialize_relationship(
                relation)
            actual.append(serialized_relation)
            relation = self.badge_metada.create_next_relation()

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        expected = [relation1, relation2]

        self.assertEqual(expected, actual)

    def test_create_relation_neptune(self) -> None:
        actual = []
        relation = self.badge_metada.create_next_relation()
        while relation:
            serialized_relations = neptune_serializer.convert_relationship(
                relation)
            actual.append(serialized_relations)
            relation = self.badge_metada.create_next_relation()

        badge_id_1 = BadgeMetadata.BADGE_NODE_LABEL + ':' + BadgeMetadata.get_badge_key(
            badge1.name)
        badge_id_2 = BadgeMetadata.BADGE_NODE_LABEL + ':' + BadgeMetadata.get_badge_key(
            badge2.name)
        start_key = self.badge_metada.start_label + ':' + self.badge_metada.start_key

        neptune_forward_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=start_key,
                to_vertex_id=badge_id_1,
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=start_key,
                to_vertex_id=badge_id_1,
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            badge_id_1,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=badge_id_1,
                to_vertex_id=start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=badge_id_1,
                to_vertex_id=start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            badge_id_1,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_forward_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=start_key,
                to_vertex_id=badge_id_2,
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=start_key,
                to_vertex_id=badge_id_2,
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            badge_id_2,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=badge_id_2,
                to_vertex_id=start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            ),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=badge_id_2,
                to_vertex_id=start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            badge_id_2,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }
        expected = [[neptune_forward_expected_1, neptune_reversed_expected_1],
                    [neptune_forward_expected_2, neptune_reversed_expected_2]]

        self.assertEqual(expected, actual)

    def test_create_records(self) -> None:
        expected = [{
            'rk':
            BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            'category':
            badge1.category
        }, {
            'rk':
            BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            'category':
            badge2.category
        }]

        actual = []
        record = self.badge_metada.create_next_record()
        while record:
            serialized_record = mysql_serializer.serialize_record(record)
            actual.append(serialized_record)
            record = self.badge_metada.create_next_record()

        self.assertEqual(expected, actual)
Esempio n. 11
0
    def test_create_relation_neptune(self) -> None:
        actual = []
        relation = self.badge_metada.create_next_relation()
        while relation:
            serialized_relations = neptune_serializer.convert_relationship(
                relation)
            actual.append(serialized_relations)
            relation = self.badge_metada.create_next_relation()

        badge_id_1 = BadgeMetadata.BADGE_NODE_LABEL + ':' + BadgeMetadata.get_badge_key(
            badge1.name)
        badge_id_2 = BadgeMetadata.BADGE_NODE_LABEL + ':' + BadgeMetadata.get_badge_key(
            badge2.name)
        start_key = self.badge_metada.start_label + ':' + self.badge_metada.start_key

        neptune_forward_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=start_key,
                to_vertex_id=badge_id_1,
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=start_key,
                to_vertex_id=badge_id_1,
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            badge_id_1,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=badge_id_1,
                to_vertex_id=start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=badge_id_1,
                to_vertex_id=start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            badge_id_1,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_forward_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=start_key,
                to_vertex_id=badge_id_2,
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=start_key,
                to_vertex_id=badge_id_2,
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            badge_id_2,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=badge_id_2,
                to_vertex_id=start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            ),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id=badge_id_2,
                to_vertex_id=start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            badge_id_2,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }
        expected = [[neptune_forward_expected_1, neptune_reversed_expected_1],
                    [neptune_forward_expected_2, neptune_reversed_expected_2]]

        self.assertEqual(expected, actual)
Esempio n. 12
0
    def _create_next_relation(self) -> Iterator[Any]:

        yield {
            RELATION_START_LABEL: TableMetadata.SCHEMA_NODE_LABEL,
            RELATION_END_LABEL: TableMetadata.TABLE_NODE_LABEL,
            RELATION_START_KEY: self._get_schema_key(),
            RELATION_END_KEY: self._get_table_key(),
            RELATION_TYPE: TableMetadata.SCHEMA_TABLE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: TableMetadata.TABLE_SCHEMA_RELATION_TYPE
        }

        if self.description:
            yield self.description.get_relation(TableMetadata.TABLE_NODE_LABEL,
                                                self._get_table_key(),
                                                self._get_table_description_key(self.description))

        if self.tags:
            for tag in self.tags:
                yield {
                    RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL,
                    RELATION_END_LABEL: TagMetadata.TAG_NODE_LABEL,
                    RELATION_START_KEY: self._get_table_key(),
                    RELATION_END_KEY: TagMetadata.get_tag_key(tag),
                    RELATION_TYPE: TableMetadata.TABLE_TAG_RELATION_TYPE,
                    RELATION_REVERSE_TYPE: TableMetadata.TAG_TABLE_RELATION_TYPE,
                }

        for col in self.columns:
            yield {
                RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL,
                RELATION_END_LABEL: ColumnMetadata.COLUMN_NODE_LABEL,
                RELATION_START_KEY: self._get_table_key(),
                RELATION_END_KEY: self._get_col_key(col),
                RELATION_TYPE: TableMetadata.TABLE_COL_RELATION_TYPE,
                RELATION_REVERSE_TYPE: TableMetadata.COL_TABLE_RELATION_TYPE
            }

            if col.description:
                yield col.description.get_relation(ColumnMetadata.COLUMN_NODE_LABEL,
                                                   self._get_col_key(col),
                                                   self._get_col_description_key(col, col.description))
            if col.badges:
                badge_metadata = BadgeMetadata(db_name=self._get_database_key(),
                                               schema=self._get_schema_key(),
                                               start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                                               start_key=self._get_col_key(col),
                                               badges=col.badges,
                                               cluster=self._get_cluster_key())
                badge_relations = badge_metadata.create_relation()
                for relation in badge_relations:
                    yield relation

        others = [
            RelTuple(start_label=TableMetadata.DATABASE_NODE_LABEL,
                     end_label=TableMetadata.CLUSTER_NODE_LABEL,
                     start_key=self._get_database_key(),
                     end_key=self._get_cluster_key(),
                     type=TableMetadata.DATABASE_CLUSTER_RELATION_TYPE,
                     reverse_type=TableMetadata.CLUSTER_DATABASE_RELATION_TYPE),
            RelTuple(start_label=TableMetadata.CLUSTER_NODE_LABEL,
                     end_label=TableMetadata.SCHEMA_NODE_LABEL,
                     start_key=self._get_cluster_key(),
                     end_key=self._get_schema_key(),
                     type=TableMetadata.CLUSTER_SCHEMA_RELATION_TYPE,
                     reverse_type=TableMetadata.SCHEMA_CLUSTER_RELATION_TYPE)
        ]

        for rel_tuple in others:
            if rel_tuple not in TableMetadata.serialized_rels:
                TableMetadata.serialized_rels.add(rel_tuple)
                yield {
                    RELATION_START_LABEL: rel_tuple.start_label,
                    RELATION_END_LABEL: rel_tuple.end_label,
                    RELATION_START_KEY: rel_tuple.start_key,
                    RELATION_END_KEY: rel_tuple.end_key,
                    RELATION_TYPE: rel_tuple.type,
                    RELATION_REVERSE_TYPE: rel_tuple.reverse_type
                }
Esempio n. 13
0
    def _create_record_iterator(self) -> Iterator[RDSModel]:
        # Database, Cluster, Schema
        others: List[RDSModel] = [
            RDSDatabase(rk=self._get_database_key(), name=self.database),
            RDSCluster(rk=self._get_cluster_key(),
                       name=self.cluster,
                       database_rk=self._get_database_key()),
            RDSSchema(rk=self._get_schema_key(),
                      name=self.schema,
                      cluster_rk=self._get_cluster_key())
        ]

        for record in others:
            if record.rk not in TableMetadata.serialized_records_keys:
                TableMetadata.serialized_records_keys.add(record.rk)
                yield record

        # Table
        yield RDSTable(rk=self._get_table_key(),
                       name=self.name,
                       is_view=self.is_view,
                       schema_rk=self._get_schema_key())

        # Table description
        if self.description:
            description_record_key = self._get_table_description_key(
                self.description)
            if self.description.label == DescriptionMetadata.DESCRIPTION_NODE_LABEL:
                yield RDSTableDescription(
                    rk=description_record_key,
                    description_source=self.description.source,
                    description=self.description.text,
                    table_rk=self._get_table_key())
            else:
                yield RDSTableProgrammaticDescription(
                    rk=description_record_key,
                    description_source=self.description.source,
                    description=self.description.text,
                    table_rk=self._get_table_key())

        # Tag
        for tag in self.tags:
            tag_record = TagMetadata(tag).get_record()
            yield tag_record

            table_tag_record = RDSTableTag(table_rk=self._get_table_key(),
                                           tag_rk=TagMetadata.get_tag_key(tag))
            yield table_tag_record

        # Column
        for col in self.columns:
            yield RDSTableColumn(rk=self._get_col_key(col),
                                 name=col.name,
                                 type=col.type,
                                 sort_order=col.sort_order,
                                 table_rk=self._get_table_key())

            if col.description:
                description_record_key = self._get_col_description_key(
                    col, col.description)
                yield RDSColumnDescription(
                    rk=description_record_key,
                    description_source=col.description.source,
                    description=col.description.text,
                    column_rk=self._get_col_key(col))

            if col.badges:
                badge_metadata = BadgeMetadata(
                    start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                    start_key=self._get_col_key(col),
                    badges=col.badges)

                badge_records = badge_metadata.get_badge_records()
                for badge_record in badge_records:
                    yield badge_record

                    column_badge_record = RDSColumnBadge(
                        column_rk=self._get_col_key(col),
                        badge_rk=badge_record.rk)
                    yield column_badge_record
Esempio n. 14
0
class TestBadge(unittest.TestCase):
    def setUp(self) -> None:
        super(TestBadge, self).setUp()
        self.badge_metada = BadgeMetadata(
            start_label='Column',
            start_key='hive://default.base/test/ds',
            badges=[badge1, badge2])

    def test_get_badge_key(self) -> None:
        badge_key = self.badge_metada.get_badge_key(badge1.name)
        self.assertEqual(badge_key, badge1.name)

    def test_create_nodes(self) -> None:
        nodes = self.badge_metada.create_nodes()
        self.assertEqual(len(nodes), 2)

        node1 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge1.category
        }
        node2 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge2.category
        }
        serialized_nodes = [
            neo4_serializer.serialize_node(node) for node in nodes
        ]

        self.assertTrue(node1 in serialized_nodes)
        self.assertTrue(node2 in serialized_nodes)

    def test_bad_key_entity_match(self) -> None:
        column_label = 'Column'
        table_key = 'hive://default.base/test'

        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=column_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_bad_entity_label(self) -> None:
        user_label = 'User'
        table_key = 'hive://default.base/test'
        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=user_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_create_relation(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations = [
            neo4_serializer.serialize_relationship(relation)
            for relation in relations
        ]
        self.assertEqual(len(relations), 2)

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }

        self.assertTrue(relation1 in serialized_relations)
        self.assertTrue(relation2 in serialized_relations)
Esempio n. 15
0
class TestBadge(unittest.TestCase):
    def setUp(self) -> None:
        super(TestBadge, self).setUp()
        self.badge_metada = BadgeMetadata(
            start_label='Column',
            start_key='hive://default.base/test/ds',
            badges=[badge1, badge2])

    def test_get_badge_key(self) -> None:
        badge_key = self.badge_metada.get_badge_key(badge1.name)
        self.assertEqual(badge_key, badge1.name)

    def test_create_nodes(self) -> None:
        nodes = self.badge_metada.create_nodes()
        self.assertEqual(len(nodes), 2)

        node1 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge1.category
        }
        node2 = {
            NODE_KEY: BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NODE_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            BadgeMetadata.BADGE_CATEGORY: badge2.category
        }
        serialized_nodes = [
            neo4_serializer.serialize_node(node) for node in nodes
        ]

        self.assertTrue(node1 in serialized_nodes)
        self.assertTrue(node2 in serialized_nodes)

    def test_create_nodes_neptune(self) -> None:
        nodes = self.badge_metada.create_nodes()
        serialized_nodes = [
            neptune_serializer.convert_node(node) for node in nodes
        ]

        expected_node1 = {
            NEPTUNE_HEADER_ID:
            BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge1.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_NODE_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            BadgeMetadata.BADGE_CATEGORY + ':String(single)':
            badge1.category
        }

        expected_node2 = {
            NEPTUNE_HEADER_ID:
            BadgeMetadata.BADGE_KEY_FORMAT.format(badge=badge2.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_NODE_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            BadgeMetadata.BADGE_CATEGORY + ':String(single)':
            badge2.category
        }

        self.assertTrue(expected_node1 in serialized_nodes)
        self.assertTrue(expected_node2 in serialized_nodes)

    def test_bad_key_entity_match(self) -> None:
        column_label = 'Column'
        table_key = 'hive://default.base/test'

        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=column_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_bad_entity_label(self) -> None:
        user_label = 'User'
        table_key = 'hive://default.base/test'
        self.assertRaises(Exception,
                          BadgeMetadata,
                          start_label=user_label,
                          start_key=table_key,
                          badges=[badge1, badge2])

    def test_create_relation(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations = [
            neo4_serializer.serialize_relationship(relation)
            for relation in relations
        ]
        self.assertEqual(len(relations), 2)

        relation1 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge1.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }
        relation2 = {
            RELATION_START_LABEL: self.badge_metada.start_label,
            RELATION_END_LABEL: BadgeMetadata.BADGE_NODE_LABEL,
            RELATION_START_KEY: self.badge_metada.start_key,
            RELATION_END_KEY: BadgeMetadata.get_badge_key(badge2.name),
            RELATION_TYPE: BadgeMetadata.BADGE_RELATION_TYPE,
            RELATION_REVERSE_TYPE: BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
        }

        self.assertTrue(relation1 in serialized_relations)
        self.assertTrue(relation2 in serialized_relations)

    def test_create_relation_neptune(self) -> None:
        relations = self.badge_metada.create_relation()
        serialized_relations: List[Dict] = sum([
            neptune_serializer.convert_relationship(rel) for rel in relations
        ], [])

        neptune_forward_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=self.badge_metada.start_key,
                to_vertex_id=BadgeMetadata.get_badge_key(badge1.name),
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            self.badge_metada.start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            BadgeMetadata.get_badge_key(badge1.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_1 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=BadgeMetadata.get_badge_key(badge1.name),
                to_vertex_id=self.badge_metada.start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            BadgeMetadata.get_badge_key(badge1.name),
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            self.badge_metada.start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_forward_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=self.badge_metada.start_key,
                to_vertex_id=BadgeMetadata.get_badge_key(badge2.name),
                label=BadgeMetadata.BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            self.badge_metada.start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            BadgeMetadata.get_badge_key(badge2.name),
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected_2 = {
            NEPTUNE_HEADER_ID:
            "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=BadgeMetadata.get_badge_key(badge2.name),
                to_vertex_id=self.badge_metada.start_key,
                label=BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            BadgeMetadata.get_badge_key(badge2.name),
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            self.badge_metada.start_key,
            NEPTUNE_HEADER_LABEL:
            BadgeMetadata.INVERSE_BADGE_RELATION_TYPE,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        self.assertTrue(neptune_forward_expected_1 in serialized_relations)
        self.assertTrue(neptune_reversed_expected_1 in serialized_relations)
        self.assertTrue(neptune_forward_expected_2 in serialized_relations)
        self.assertTrue(neptune_reversed_expected_2 in serialized_relations)
Esempio n. 16
0
    def _get_extract_iter(self) -> Iterator[Union[TableMetadata, BadgeMetadata, TableSource, TableLineage]]:
        """
        Generates the extract iterator for all of the model types created by the dbt files.
        """
        dbt_id_to_table_key = {}
        for tbl_node, manifest_content in self._dbt_manifest['nodes'].items():

            if manifest_content['resource_type'] == DBT_MODEL_TYPE and tbl_node in self._dbt_catalog['nodes']:
                LOGGER.info(
                    'Extracting dbt {}.{}'.format(manifest_content['schema'], manifest_content[self._model_name_key])
                )

                catalog_content = self._dbt_catalog['nodes'][tbl_node]

                tbl_columns: List[ColumnMetadata] = self._get_column_values(
                    manifest_columns=manifest_content['columns'], catalog_columns=catalog_content['columns']
                )

                desc, desc_src = self._get_table_descriptions(manifest_content)
                tags, tbl_badges = self._get_table_tags_badges(manifest_content)

                tbl_metadata = TableMetadata(
                    database=self._default_sanitize(self._database_name),
                    # The dbt "database" is the cluster here
                    cluster=self._default_sanitize(manifest_content['database']),
                    schema=self._default_sanitize(manifest_content['schema']),
                    name=self._default_sanitize(manifest_content[self._model_name_key]),
                    is_view=catalog_content['metadata']['type'] == 'view',
                    columns=tbl_columns,
                    tags=tags,
                    description=desc,
                    description_source=desc_src
                )
                # Keep track for Lineage
                dbt_id_to_table_key[tbl_node] = tbl_metadata._get_table_key()

                # Optionally filter schemas in the output
                yield_schema = self._can_yield_schema(manifest_content['schema'])

                if self._extract_tables and yield_schema:
                    yield tbl_metadata

                if self._extract_tags and tbl_badges and yield_schema:
                    yield BadgeMetadata(start_label=TableMetadata.TABLE_NODE_LABEL,
                                        start_key=tbl_metadata._get_table_key(),
                                        badges=[Badge(badge, 'table') for badge in tbl_badges])

                if self._source_url and yield_schema:
                    yield TableSource(db_name=tbl_metadata.database,
                                      cluster=tbl_metadata.cluster,
                                      schema=tbl_metadata.schema,
                                      table_name=tbl_metadata.name,
                                      source=os.path.join(self._source_url, manifest_content.get('original_file_path')))

        if self._extract_lineage:
            for upstream, downstreams in self._dbt_manifest['child_map'].items():
                if upstream not in dbt_id_to_table_key:
                    continue
                valid_downstreams = [
                    dbt_id_to_table_key[k] for k in downstreams
                    if k.startswith(DBT_MODEL_PREFIX) and dbt_id_to_table_key.get(k)
                ]
                if valid_downstreams:
                    yield TableLineage(
                        table_key=dbt_id_to_table_key[upstream],
                        downstream_deps=valid_downstreams
                    )
Esempio n. 17
0
 def setUp(self) -> None:
     super(TestBadge, self).setUp()
     self.badge_metada = BadgeMetadata(
         start_label='Column',
         start_key='hive://default.base/test/ds',
         badges=[badge1, badge2])
Esempio n. 18
0
    def _create_next_node(self) -> Iterator[Any]:  # noqa: C901

        table_node = {NODE_LABEL: TableMetadata.TABLE_NODE_LABEL,
                      NODE_KEY: self._get_table_key(),
                      TableMetadata.TABLE_NAME: self.name,
                      TableMetadata.IS_VIEW: self.is_view}
        if self.attrs:
            for k, v in self.attrs.items():
                if k not in table_node:
                    table_node[k] = v
        yield table_node

        if self.description:
            node_key = self._get_table_description_key(self.description)
            yield self.description.get_node_dict(node_key)

        # Create the table tag node
        if self.tags:
            for tag in self.tags:
                yield TagMetadata.create_tag_node(tag)

        for col in self.columns:
            yield {
                NODE_LABEL: ColumnMetadata.COLUMN_NODE_LABEL,
                NODE_KEY: self._get_col_key(col),
                ColumnMetadata.COLUMN_NAME: col.name,
                ColumnMetadata.COLUMN_TYPE: col.type,
                ColumnMetadata.COLUMN_ORDER: col.sort_order}

            if col.description:
                node_key = self._get_col_description_key(col, col.description)
                yield col.description.get_node_dict(node_key)

            if col.badges:
                badge_metadata = BadgeMetadata(db_name=self._get_database_key(),
                                               schema=self._get_schema_key(),
                                               start_label=ColumnMetadata.COLUMN_NODE_LABEL,
                                               start_key=self._get_col_key(col),
                                               badges=col.badges,
                                               cluster=self._get_cluster_key())
                badge_nodes = badge_metadata.create_nodes()
                for node in badge_nodes:
                    yield node

        # Database, cluster, schema
        others = [NodeTuple(key=self._get_database_key(),
                            name=self.database,
                            label=TableMetadata.DATABASE_NODE_LABEL),
                  NodeTuple(key=self._get_cluster_key(),
                            name=self.cluster,
                            label=TableMetadata.CLUSTER_NODE_LABEL),
                  NodeTuple(key=self._get_schema_key(),
                            name=self.schema,
                            label=TableMetadata.SCHEMA_NODE_LABEL)
                  ]

        for node_tuple in others:
            if node_tuple not in TableMetadata.serialized_nodes:
                TableMetadata.serialized_nodes.add(node_tuple)
                yield {
                    NODE_LABEL: node_tuple.label,
                    NODE_KEY: node_tuple.key,
                    'name': node_tuple.name
                }