def test_dashboard_table_records(self) -> None:
        dashboard_table = DashboardTable(
            table_ids=[
                'hive://gold.schema/table1', 'hive://gold.schema/table2'
            ],
            cluster='cluster_id',
            product='product_id',
            dashboard_id='dashboard_id',
            dashboard_group_id='dashboard_group_id')
        actual1 = dashboard_table.create_next_record()
        actual1_serialized = mysql_serializer.serialize_record(actual1)
        expected1 = {
            'dashboard_rk':
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
            'table_rk': 'hive://gold.schema/table1'
        }

        actual2 = dashboard_table.create_next_record()
        actual2_serialized = mysql_serializer.serialize_record(actual2)
        expected2 = {
            'dashboard_rk':
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
            'table_rk': 'hive://gold.schema/table2'
        }

        assert actual1 is not None
        self.assertDictEqual(expected1, actual1_serialized)

        assert actual2 is not None
        self.assertDictEqual(expected2, actual2_serialized)
        self.assertIsNone(dashboard_table.create_next_record())
Beispiel #2
0
    def test_dashboard_table_nodes(self) -> None:
        dashboard_table = DashboardTable(table_ids=['hive://gold.schema/table1', 'hive://gold.schema/table2'],
                                         cluster='cluster_id', product='product_id',
                                         dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id')

        actual = dashboard_table.create_next_node()
        self.assertIsNone(actual)
 def test_dashboard_table_with_slash_as_name(self) -> None:
     dashboard_table = DashboardTable(
         table_ids=['bq/name://project/id.schema/name/table/name'],
         cluster='cluster_id',
         product='product_id',
         dashboard_id='dashboard_id',
         dashboard_group_id='dashboard_group_id')
     actual = dashboard_table.create_next_relation()
     self.assertIsNone(actual)
Beispiel #4
0
    def test_dashboard_table_relations(self) -> None:
        dashboard_table = DashboardTable(table_ids=['hive://gold.schema/table1'],
                                         cluster='cluster_id', product='product_id',
                                         dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id')

        actual = dashboard_table.create_next_relation()
        actual_serialized = neo4_serializer.serialize_relationship(actual)
        actual_neptune_serialized = neptune_serializer.convert_relationship(actual)
        expected = {RELATION_END_KEY: 'hive://gold.schema/table1', RELATION_START_LABEL: 'Dashboard',
                    RELATION_END_LABEL: 'Table',
                    RELATION_START_KEY: 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
                    RELATION_TYPE: 'DASHBOARD_WITH_TABLE',
                    RELATION_REVERSE_TYPE: 'TABLE_OF_DASHBOARD'}

        neptune_forward_expected = {
            NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Dashboard:product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
                to_vertex_id='Table:hive://gold.schema/table1',
                label='DASHBOARD_WITH_TABLE'
            ),
            METADATA_KEY_PROPERTY_NAME: "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Dashboard:product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
                to_vertex_id='Table:hive://gold.schema/table1',
                label='DASHBOARD_WITH_TABLE'
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
                'Dashboard:product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
            NEPTUNE_RELATIONSHIP_HEADER_TO: 'Table:hive://gold.schema/table1',
            NEPTUNE_HEADER_LABEL: 'DASHBOARD_WITH_TABLE',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected = {
            NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Table:hive://gold.schema/table1',
                to_vertex_id='Dashboard:product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
                label='TABLE_OF_DASHBOARD'
            ),
            METADATA_KEY_PROPERTY_NAME: "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Table:hive://gold.schema/table1',
                to_vertex_id='Dashboard:product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
                label='TABLE_OF_DASHBOARD'
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM: 'Table:hive://gold.schema/table1',
            NEPTUNE_RELATIONSHIP_HEADER_TO:
                'Dashboard:product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
            NEPTUNE_HEADER_LABEL: 'TABLE_OF_DASHBOARD',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
        }
        assert actual is not None
        self.assertDictEqual(actual_serialized, expected)
        self.assertDictEqual(actual_neptune_serialized[0], neptune_forward_expected)
        self.assertDictEqual(actual_neptune_serialized[1], neptune_reversed_expected)
Beispiel #5
0
 def test_dashboard_table_without_dot_as_name(self) -> None:
     dashboard_table = DashboardTable(table_ids=['bq-name://project-id.schema-name/table-name'],
                                      cluster='cluster_id', product='product_id',
                                      dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id')
     actual = dashboard_table.create_next_relation()
     actual_serialized = neo4_serializer.serialize_relationship(actual)
     expected = {RELATION_END_KEY: 'bq-name://project-id.schema-name/table-name', RELATION_START_LABEL: 'Dashboard',
                 RELATION_END_LABEL: 'Table',
                 RELATION_START_KEY: 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
                 RELATION_TYPE: 'DASHBOARD_WITH_TABLE',
                 RELATION_REVERSE_TYPE: 'TABLE_OF_DASHBOARD'}
     assert actual is not None
     self.assertDictEqual(actual_serialized, expected)
    def test_dashboard_table_relations(self):
        # type: () -> None
        dashboard_table = DashboardTable(table_ids=['hive://gold.schema/table1'],
                                         cluster='cluster_id', product='product_id',
                                         dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id')

        actual = dashboard_table.create_next_relation()
        expected = {RELATION_END_KEY: 'hive://gold.schema/table1', RELATION_START_LABEL: 'Dashboard',
                    RELATION_END_LABEL: 'Table',
                    RELATION_START_KEY: 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
                    RELATION_TYPE: 'DASHBOARD_WITH_TABLE',
                    RELATION_REVERSE_TYPE: 'TABLE_OF_DASHBOARD'}
        self.assertDictEqual(actual, expected)
Beispiel #7
0
    def test_create_next_atlas_relation(self) -> None:
        dashboard_table = DashboardTable(
            table_ids=[
                'hive://gold.schema/table1', 'hive_table://gold.schema/table2'
            ],
            cluster='cluster_id',
            product='product_id',
            dashboard_id='dashboard_id',
            dashboard_group_id='dashboard_group_id',
        )

        # 'hive' is db name compatible with Amundsen Databuilder sourced data. in such case qn = amundsen key
        # 'hive_table' is db name compatible with data sources from Atlas Hive Hook. in such case custom qn is used
        expected = [
            {
                "relationshipType":
                "Table__Dashboard",
                "entityType1":
                "Table",
                "entityQualifiedName1":
                "hive://gold.schema/table1",
                "entityType2":
                "Dashboard",
                "entityQualifiedName2":
                "product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id",
            },
            {
                "relationshipType":
                "Table__Dashboard",
                "entityType1":
                "Table",
                "entityQualifiedName1":
                "schema.table2@gold",
                "entityType2":
                "Dashboard",
                "entityQualifiedName2":
                "product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id",
            },
        ]
        relationship = dashboard_table.create_next_atlas_relation(
        )  # type: ignore
        actual = []
        while relationship:
            actual_serialized = atlas_serializer.serialize_relationship(
                relationship)
            actual.append(actual_serialized)
            relationship = dashboard_table.create_next_atlas_relation()

        self.assertEqual(expected, actual)
    def _get_extract_iter(self) -> Iterator[Any]:

        while True:
            record = self._extractor.extract()
            if not record:
                break  # the end.

            record = next(self._transformer.transform(record=record), None)

            if not self._is_published_dashboard(record):
                continue  # filter this one out

            identity_data = {
                'cluster':
                self._cluster,
                'product':
                RedashDashboardExtractor.PRODUCT,
                'dashboard_group_id':
                str(RedashDashboardExtractor.DASHBOARD_GROUP_ID),
                'dashboard_id':
                str(record['dashboard_id'])
            }

            dash_data = {
                'dashboard_group':
                RedashDashboardExtractor.DASHBOARD_GROUP_NAME,
                'dashboard_group_url': self._redash_base_url,
                'dashboard_name': record['dashboard_name'],
                'dashboard_url':
                f'{self._redash_base_url}/dashboards/{record["dashboard_id"]}',
                'created_timestamp': record['created_timestamp']
            }
            dash_data.update(identity_data)

            widgets = sort_widgets(record['widgets'])
            text_widgets = get_text_widgets(widgets)
            viz_widgets = get_visualization_widgets(widgets)

            # generate a description for this dashboard, since Redash does not have descriptions
            dash_data['description'] = generate_dashboard_description(
                text_widgets, viz_widgets)

            yield DashboardMetadata(**dash_data)

            last_mod_data = {
                'last_modified_timestamp': record['last_modified_timestamp']
            }
            last_mod_data.update(identity_data)

            yield DashboardLastModifiedTimestamp(**last_mod_data)

            owner_data = {'email': record['user']['email']}
            owner_data.update(identity_data)

            yield DashboardOwner(**owner_data)

            table_keys = set()

            for viz in viz_widgets:
                query_data = {
                    'query_id': str(viz.query_id),
                    'query_name': viz.query_name,
                    'url': self._redash_base_url + viz.query_relative_url,
                    'query_text': viz.raw_query
                }

                query_data.update(identity_data)
                yield DashboardQuery(**query_data)

                chart_data = {
                    'query_id': str(viz.query_id),
                    'chart_id': str(viz.visualization_id),
                    'chart_name': viz.visualization_name,
                    'chart_type': viz.visualization_type,
                }
                chart_data.update(identity_data)
                yield DashboardChart(**chart_data)

                # if a table parser is provided, retrieve tables from this viz
                if self._parse_tables:
                    for tbl in self._parse_tables(viz):
                        table_keys.add(tbl.key)

            if len(table_keys) > 0:
                yield DashboardTable(table_ids=list(table_keys),
                                     **identity_data)
Beispiel #9
0
    def test_with_one_dashboard(self) -> None:
        def mock_api_get(url: str, *args: Any, **kwargs: Any) -> MockApiResponse:
            if 'test-dash' in url:
                return MockApiResponse({
                    'id': 123,
                    'widgets': [
                        {
                            'visualization': {
                                'query': {
                                    'data_source_id': 1,
                                    'id': '1234',
                                    'name': 'Test Query',
                                    'query': 'SELECT id FROM users'
                                }
                            },
                            'options': {}
                        }
                    ]
                })

            return MockApiResponse({
                'page': 1,
                'count': 1,
                'page_size': 50,
                'results': [
                    {
                        'id': 123,
                        'name': 'Test Dash',
                        'slug': 'test-dash',
                        'created_at': '2020-01-01T00:00:00.000Z',
                        'updated_at': '2020-01-02T00:00:00.000Z',
                        'is_archived': False,
                        'is_draft': False,
                        'user': {'email': '*****@*****.**'}
                    }
                ]
            })

        redash_base_url = 'https://redash.example.com'
        config = ConfigFactory.from_dict({
            'extractor.redash_dashboard.redash_base_url': redash_base_url,
            'extractor.redash_dashboard.api_base_url': redash_base_url,  # probably not but doesn't matter
            'extractor.redash_dashboard.api_key': 'abc123',
            'extractor.redash_dashboard.table_parser':
                'tests.unit.extractor.dashboard.redash.test_redash_dashboard_extractor.dummy_tables'
        })

        with patch('databuilder.rest_api.rest_api_query.requests.get') as mock_get:
            mock_get.side_effect = mock_api_get

            extractor = RedashDashboardExtractor()
            extractor.init(Scoped.get_scoped_conf(conf=config, scope=extractor.get_scope()))

            # DashboardMetadata
            record = extractor.extract()
            self.assertEqual(record.dashboard_id, 123)
            self.assertEqual(record.dashboard_name, 'Test Dash')
            self.assertEqual(record.dashboard_group_id, RedashDashboardExtractor.DASHBOARD_GROUP_ID)
            self.assertEqual(record.dashboard_group, RedashDashboardExtractor.DASHBOARD_GROUP_NAME)
            self.assertEqual(record.product, RedashDashboardExtractor.PRODUCT)
            self.assertEqual(record.cluster, RedashDashboardExtractor.DEFAULT_CLUSTER)
            self.assertEqual(record.created_timestamp, 1577836800)
            self.assertTrue(redash_base_url in record.dashboard_url)
            self.assertTrue('test-dash' in record.dashboard_url)

            # DashboardLastModified
            record = extractor.extract()
            identity: Dict[str, Any] = {
                'dashboard_id': 123,
                'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID,
                'product': RedashDashboardExtractor.PRODUCT,
                'cluster': u'prod'
            }
            expected_timestamp = DashboardLastModifiedTimestamp(
                last_modified_timestamp=1577923200,
                **identity
            )
            self.assertEqual(record.__repr__(), expected_timestamp.__repr__())

            # DashboardOwner
            record = extractor.extract()
            expected_owner = DashboardOwner(email='*****@*****.**', **identity)
            self.assertEqual(record.__repr__(), expected_owner.__repr__())

            # DashboardQuery
            record = extractor.extract()
            expected_query = DashboardQuery(
                query_id='1234',
                query_name='Test Query',
                url=u'{base}/queries/1234'.format(base=redash_base_url),
                query_text='SELECT id FROM users',
                **identity
            )
            self.assertEqual(record.__repr__(), expected_query.__repr__())

            # DashboardTable
            record = extractor.extract()
            expected_table = DashboardTable(
                table_ids=[TableRelationData('some_db', 'prod', 'public', 'users').key],
                **identity
            )
            self.assertEqual(record.__repr__(), expected_table.__repr__())
Beispiel #10
0
    def _get_extract_iter(self) -> Iterator[Union[DashboardTable, None]]:
        dashboards: Dict[str, set] = dict()

        ids = self._get_resource_ids('dataset')

        data = [(self._get_dataset_details(i),
                 self._get_dataset_related_objects(i)) for i in ids]

        for entry in data:
            dataset_details, dataset_objects = entry

            database_id = self.get_nested_field(dataset_details,
                                                'result.database.id')

            if database_id:
                database_details = self._get_database_details(database_id)

                sql = self.get_nested_field(dataset_details,
                                            'result.sql') or ''

                # if sql exists then table_name cannot be associated with physical table in db
                if not len(sql) > 0:
                    uri = self.get_nested_field(database_details,
                                                'result.sqlalchemy_uri')
                    database_spec = make_url(uri)

                    db = self.driver_mapping.get(database_spec.drivername,
                                                 database_spec.drivername)
                    schema = database_spec.database

                    cluster = self.cluster_mapping.get(str(database_id),
                                                       self.cluster)
                    tbl = self.get_nested_field(dataset_details,
                                                'result.table_name')

                    table_key = TableMetadata.TABLE_KEY_FORMAT.format(
                        db=db, cluster=cluster, schema=schema, tbl=tbl)

                    for dashboard in dataset_objects.get('dashboards',
                                                         dict()).get(
                                                             'result', []):
                        dashboard_id = str(dashboard.get('id'))

                        if not dashboards.get(dashboard_id):
                            dashboards[dashboard_id] = set()

                        dashboards[dashboard_id].add(table_key)
                else:
                    pass
            else:
                continue

        for dashboard_id, table_keys in dashboards.items():
            table_metadata: Dict[str, Any] = {
                'dashboard_id': dashboard_id,
                'table_ids': table_keys
            }

            table_metadata.update(**self.common_params)

            result = DashboardTable(**table_metadata)

            yield result