def _get_extract_iter(self):
        # type: () -> Iterator[Any]

        while True:
            record = self._extractor.extract()
            if not record:
                break  # the end.

            record = self._transformer.transform(record=record)

            if not self._is_published_dashboard(record):
                continue  # filter this one out

            identity_data = {
                'cluster': self._cluster,
                'product': RedashDashboardExtractor.PRODUCT,
                'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID,
                'dashboard_id': record['dashboard_id']
            }

            dash_data = {
                'dashboard_group':
                    RedashDashboardExtractor.DASHBOARD_GROUP_NAME,
                'dashboard_group_url':
                    self._redash_base_url,
                'dashboard_name':
                    record['dashboard_name'],
                'dashboard_url':
                    '{redash}/dashboard/{slug}'
                    .format(redash=self._redash_base_url, slug=record['slug']),
                'created_timestamp':
                    record['created_timestamp']
            }
            dash_data.update(identity_data)

            widgets = sort_widgets(record['widgets'])
            text_widgets = get_text_widgets(widgets)
            viz_widgets = get_visualization_widgets(widgets)

            # generate a description for this dashboard, since Redash does not have descriptions
            dash_data['description'] = generate_dashboard_description(text_widgets, viz_widgets)

            yield DashboardMetadata(**dash_data)

            last_mod_data = {'last_modified_timestamp': record['last_modified_timestamp']}
            last_mod_data.update(identity_data)

            yield DashboardLastModifiedTimestamp(**last_mod_data)

            owner_data = {'email': record['user']['email']}
            owner_data.update(identity_data)

            yield DashboardOwner(**owner_data)

            table_keys = set()

            for viz in viz_widgets:
                query_data = {
                    'query_id': viz.query_id,
                    'query_name': viz.query_name,
                    'url': self._redash_base_url + viz.query_relative_url,
                    'query_text': viz.raw_query
                }

                query_data.update(identity_data)
                yield DashboardQuery(**query_data)

                # if a table parser is provided, retrieve tables from this viz
                if self._parse_tables:
                    for tbl in self._parse_tables(viz):
                        table_keys.add(tbl.key)

            if len(table_keys) > 0:
                yield DashboardTable(table_ids=list(table_keys), **identity_data)
Beispiel #2
0
class TestDashboardQuery(unittest.TestCase):
    def setUp(self) -> None:
        self.dashboard_query = DashboardQuery(
            dashboard_group_id='dg_id',
            dashboard_id='d_id',
            query_id='q_id',
            query_name='q_name',
            url='http://foo.bar/query/baz',
            query_text='SELECT * FROM foo.bar')

    def test_create_nodes(self) -> None:
        actual = self.dashboard_query.create_next_node()
        actual_serialized = neo4_serializer.serialize_node(actual)
        expected = {
            'url': 'http://foo.bar/query/baz',
            'name': 'q_name',
            'id': 'q_id',
            'query_text': 'SELECT * FROM foo.bar',
            NODE_KEY: '_dashboard://gold.dg_id/d_id/query/q_id',
            NODE_LABEL: DashboardQuery.DASHBOARD_QUERY_LABEL
        }

        self.assertEqual(expected, actual_serialized)

    def test_create_nodes_neptune(self) -> None:
        actual = self.dashboard_query.create_next_node()
        actual_serialized = neptune_serializer.convert_node(actual)
        neptune_expected = {
            NEPTUNE_HEADER_ID:
            'Query:_dashboard://gold.dg_id/d_id/query/q_id',
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            '_dashboard://gold.dg_id/d_id/query/q_id',
            NEPTUNE_HEADER_LABEL:
            DashboardQuery.DASHBOARD_QUERY_LABEL,
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            'id:String(single)':
            'q_id',
            'query_text:String(single)':
            'SELECT * FROM foo.bar',
            'name:String(single)':
            'q_name',
            'url:String(single)':
            'http://foo.bar/query/baz'
        }
        self.assertEqual(neptune_expected, actual_serialized)

    def test_create_relation(self) -> None:
        actual = self.dashboard_query.create_next_relation()
        actual_serialized = neo4_serializer.serialize_relationship(actual)
        expected = {
            RELATION_END_KEY: '_dashboard://gold.dg_id/d_id/query/q_id',
            RELATION_START_LABEL: 'Dashboard',
            RELATION_END_LABEL: DashboardQuery.DASHBOARD_QUERY_LABEL,
            RELATION_START_KEY: '_dashboard://gold.dg_id/d_id',
            RELATION_TYPE: 'HAS_QUERY',
            RELATION_REVERSE_TYPE: 'QUERY_OF'
        }

        self.assertEqual(expected, actual_serialized)

    def test_create_relation_neptune(self) -> None:
        actual = self.dashboard_query.create_next_relation()
        actual_serialized = neptune_serializer.convert_relationship(actual)
        neptune_forward_expected = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Dashboard:_dashboard://gold.dg_id/d_id',
                to_vertex_id='Query:_dashboard://gold.dg_id/d_id/query/q_id',
                label='HAS_QUERY'),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Dashboard:_dashboard://gold.dg_id/d_id',
                to_vertex_id='Query:_dashboard://gold.dg_id/d_id/query/q_id',
                label='HAS_QUERY'),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            'Dashboard:_dashboard://gold.dg_id/d_id',
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            'Query:_dashboard://gold.dg_id/d_id/query/q_id',
            NEPTUNE_HEADER_LABEL:
            'HAS_QUERY',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Query:_dashboard://gold.dg_id/d_id/query/q_id',
                to_vertex_id='Dashboard:_dashboard://gold.dg_id/d_id',
                label='QUERY_OF'),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Query:_dashboard://gold.dg_id/d_id/query/q_id',
                to_vertex_id='Dashboard:_dashboard://gold.dg_id/d_id',
                label='QUERY_OF'),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            'Query:_dashboard://gold.dg_id/d_id/query/q_id',
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            'Dashboard:_dashboard://gold.dg_id/d_id',
            NEPTUNE_HEADER_LABEL:
            'QUERY_OF',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        assert actual is not None
        self.assertDictEqual(actual_serialized[0], neptune_forward_expected)
        self.assertDictEqual(actual_serialized[1], neptune_reversed_expected)

    def test_create_records(self) -> None:
        actual = self.dashboard_query.create_next_record()
        actual_serialized = mysql_serializer.serialize_record(actual)
        expected = {
            'rk': '_dashboard://gold.dg_id/d_id/query/q_id',
            'name': 'q_name',
            'id': 'q_id',
            'dashboard_rk': '_dashboard://gold.dg_id/d_id',
            'url': 'http://foo.bar/query/baz',
            'query_text': 'SELECT * FROM foo.bar'
        }

        assert actual is not None
        self.assertDictEqual(expected, actual_serialized)
        self.assertIsNone(self.dashboard_query.create_next_record())
Beispiel #3
0
    def test_with_one_dashboard(self) -> None:
        def mock_api_get(url: str, *args: Any,
                         **kwargs: Any) -> MockApiResponse:
            if '1000' in url:
                return MockApiResponse({
                    'id':
                    1000,
                    'widgets': [{
                        'visualization': {
                            'query': {
                                'data_source_id': 1,
                                'id': 1234,
                                'name': 'Test Query',
                                'query': 'SELECT id FROM users'
                            },
                            'id': 12345,
                            'name': 'test_widget',
                            'type': 'CHART',
                        },
                        'options': {}
                    }]
                })

            return MockApiResponse({
                'page':
                1,
                'count':
                1,
                'page_size':
                50,
                'results': [{
                    'id': 1000,
                    'name': 'Test Dash',
                    'slug': 'test-dash',
                    'created_at': '2020-01-01T00:00:00.000Z',
                    'updated_at': '2020-01-02T00:00:00.000Z',
                    'is_archived': False,
                    'is_draft': False,
                    'user': {
                        'email': '*****@*****.**'
                    }
                }]
            })

        redash_base_url = 'https://redash.example.com'
        config = ConfigFactory.from_dict({
            'extractor.redash_dashboard.redash_base_url':
            redash_base_url,
            'extractor.redash_dashboard.api_base_url':
            redash_base_url,  # probably not but doesn't matter
            'extractor.redash_dashboard.api_key':
            'abc123',
            'extractor.redash_dashboard.table_parser':
            'tests.unit.extractor.dashboard.redash.test_redash_dashboard_extractor.dummy_tables'
        })

        with patch('databuilder.rest_api.rest_api_query.requests.get'
                   ) as mock_get:
            mock_get.side_effect = mock_api_get

            extractor = RedashDashboardExtractor()
            extractor.init(
                Scoped.get_scoped_conf(conf=config,
                                       scope=extractor.get_scope()))

            # DashboardMetadata
            record = extractor.extract()
            self.assertEqual(record.dashboard_id, '1000')
            self.assertEqual(record.dashboard_name, 'Test Dash')
            self.assertEqual(record.dashboard_group_id,
                             RedashDashboardExtractor.DASHBOARD_GROUP_ID)
            self.assertEqual(record.dashboard_group,
                             RedashDashboardExtractor.DASHBOARD_GROUP_NAME)
            self.assertEqual(record.product, RedashDashboardExtractor.PRODUCT)
            self.assertEqual(record.cluster,
                             RedashDashboardExtractor.DEFAULT_CLUSTER)
            self.assertEqual(record.created_timestamp, 1577836800)
            self.assertTrue(redash_base_url in record.dashboard_url)
            self.assertTrue('1000' in record.dashboard_url)

            # DashboardLastModified
            record = extractor.extract()
            identity: Dict[str, Any] = {
                'dashboard_id': '1000',
                'dashboard_group_id':
                RedashDashboardExtractor.DASHBOARD_GROUP_ID,
                'product': RedashDashboardExtractor.PRODUCT,
                'cluster': u'prod'
            }
            expected_timestamp = DashboardLastModifiedTimestamp(
                last_modified_timestamp=1577923200, **identity)
            self.assertEqual(record.__repr__(), expected_timestamp.__repr__())

            # DashboardOwner
            record = extractor.extract()
            expected_owner = DashboardOwner(email='*****@*****.**',
                                            **identity)
            self.assertEqual(record.__repr__(), expected_owner.__repr__())

            # DashboardQuery
            record = extractor.extract()
            expected_query = DashboardQuery(
                query_id='1234',
                query_name='Test Query',
                url=u'{base}/queries/1234'.format(base=redash_base_url),
                query_text='SELECT id FROM users',
                **identity)
            self.assertEqual(record.__repr__(), expected_query.__repr__())

            # DashboardChart
            record = extractor.extract()
            expected_chart = DashboardChart(query_id='1234',
                                            chart_id='12345',
                                            chart_name='test_widget',
                                            chart_type='CHART',
                                            **identity)
            self.assertEqual(record.__repr__(), expected_chart.__repr__())

            # DashboardTable
            record = extractor.extract()
            expected_table = DashboardTable(table_ids=[
                TableRelationData('some_db', 'prod', 'public', 'users').key
            ],
                                            **identity)
            self.assertEqual(record.__repr__(), expected_table.__repr__())
Beispiel #4
0
    def _get_extract_iter(self) -> Iterator[Any]:
        while True:
            record = self._extractor.extract()
            if not record:
                break

            record = next(self._transformer.transform(record=record), None)
            dashboard_identity_data = {
                "dashboard_group_id":
                DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_ID,
                "dashboard_id": record["dashboard_id"],
                "product": "databricks-sql",
            }

            dashboard_data = {
                "dashboard_group":
                DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_NAME,
                "dashboard_name": record["dashboard_name"],
                "dashboard_url":
                f"{self._databricks_host}/sql/dashboards/{record['dashboard_id']}",
                "dashboard_group_url": self._databricks_host,
                "created_timestamp": record["created_timestamp"],
                "tags": record["tags"],
            }

            dashboard_owner_data = {"email": record["user"]["email"]}
            dashboard_owner_data.update(dashboard_identity_data)
            yield DashboardOwner(**dashboard_owner_data)

            dashboard_last_modified_data = {
                "last_modified_timestamp": record["last_modified_timestamp"],
            }
            dashboard_last_modified_data.update(dashboard_identity_data)
            yield DashboardLastModifiedTimestamp(
                **dashboard_last_modified_data)

            if "widgets" in record:
                widgets = sort_widgets(record["widgets"])
                text_widgets = get_text_widgets(widgets)
                viz_widgets = get_visualization_widgets(widgets)
                dashboard_data["description"] = generate_dashboard_description(
                    text_widgets, viz_widgets)

                for viz in viz_widgets:
                    dashboard_query_data = {
                        "query_id": str(viz.query_id),
                        "query_name": viz.query_name,
                        "url": self._databricks_host + viz.query_relative_url,
                        "query_text": viz.raw_query,
                    }
                    dashboard_query_data.update(dashboard_identity_data)
                    yield DashboardQuery(**dashboard_query_data)

                    dashboard_chart_data = {
                        "query_id": str(viz.query_id),
                        "chart_id": str(viz.visualization_id),
                        "chart_name": viz.visualization_name,
                        "chart_type": viz.visualization_type,
                    }
                    dashboard_chart_data.update(dashboard_identity_data)
                    yield DashboardChart(**dashboard_chart_data)

            dashboard_data.update(dashboard_identity_data)
            yield DashboardMetadata(**dashboard_data)