Ejemplo n.º 1
0
    def test_create_atlas_entity(self) -> None:
        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_name='c_name',
                                         chart_type='bar',
                                         chart_url='http://gold.foo/chart',
                                         product='superset')

        actual = dashboard_chart.create_next_atlas_entity()
        actual_serialized = atlas_serializer.serialize_entity(actual)
        expected = {
            'typeName': 'DashboardChart',
            'operation': 'CREATE',
            'relationships':
            'query#DashboardQuery#superset_dashboard://gold.dg_id/d_id/query/q_id',
            'qualifiedName':
            'superset_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'name': 'c_name',
            'type': 'bar',
            'url': 'http://gold.foo/chart'
        }

        assert actual is not None
        self.assertDictEqual(expected, actual_serialized)
        self.assertIsNone(dashboard_chart.create_next_atlas_entity())
    def test_create_relation(self) -> None:
        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_name='c_name',
                                         chart_type='bar',
                                         )

        actual = dashboard_chart.create_next_relation()
        actual_serialized = neo4_serializer.serialize_relationship(actual)
        actual_neptune_serialized = neptune_serializer.convert_relationship(actual)
        start_key = '_dashboard://gold.dg_id/d_id/query/q_id'
        end_key = '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id'
        expected: Dict[str, Any] = {
            RELATION_END_KEY: end_key,
            RELATION_START_LABEL: 'Query',
            RELATION_END_LABEL: 'Chart',
            RELATION_START_KEY: start_key,
            RELATION_TYPE: 'HAS_CHART',
            RELATION_REVERSE_TYPE: 'CHART_OF'
        }

        neptune_forward_expected = {
            NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=start_key,
                to_vertex_id=end_key,
                label='HAS_CHART'
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM: start_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO: end_key,
            NEPTUNE_HEADER_LABEL: 'HAS_CHART',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected = {
            NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format(
                from_vertex_id=end_key,
                to_vertex_id=start_key,
                label='CHART_OF'
            ),
            NEPTUNE_RELATIONSHIP_HEADER_FROM: end_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO: start_key,
            NEPTUNE_HEADER_LABEL: 'CHART_OF',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB
        }

        assert actual is not None
        self.assertEqual(expected, actual_serialized)
        self.assertEqual(neptune_forward_expected, actual_neptune_serialized[0])
        self.assertEqual(neptune_reversed_expected, actual_neptune_serialized[1])
        self.assertIsNone(dashboard_chart.create_next_relation())
Ejemplo n.º 3
0
    def test_create_relation(self) -> None:
        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_name='c_name',
                                         chart_type='bar',
                                         )

        actual = dashboard_chart.create_next_relation()
        actual_serialized = neo4_serializer.serialize_relationship(actual)
        expected: Dict[str, Any] = {
            RELATION_END_KEY: '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            RELATION_START_LABEL: 'Query', RELATION_END_LABEL: 'Chart',
            RELATION_START_KEY: '_dashboard://gold.dg_id/d_id/query/q_id', RELATION_TYPE: 'HAS_CHART',
            RELATION_REVERSE_TYPE: 'CHART_OF'
        }

        assert actual is not None
        self.assertEqual(expected, actual_serialized)
        self.assertIsNone(dashboard_chart.create_next_relation())
    def test_create_relation(self):
        # type: () -> None
        dashboard_chart = DashboardChart(
            dashboard_group_id='dg_id',
            dashboard_id='d_id',
            query_id='q_id',
            chart_id='c_id',
            chart_name='c_name',
            chart_type='bar',
        )

        actual = dashboard_chart.create_next_relation()
        expected = {
            RELATION_END_KEY:
            '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            RELATION_START_LABEL: 'Query',
            RELATION_END_LABEL: 'Chart',
            RELATION_START_KEY: '_dashboard://gold.dg_id/d_id/query/q_id',
            RELATION_TYPE: 'HAS_CHART',
            RELATION_REVERSE_TYPE: 'CHART_OF'
        }

        self.assertEqual(expected, actual)
        self.assertIsNone(dashboard_chart.create_next_relation())
Ejemplo n.º 5
0
    def test_create_nodes(self) -> None:

        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_name='c_name',
                                         chart_type='bar',
                                         chart_url='http://gold.foo/chart'
                                         )

        actual = dashboard_chart.create_next_node()
        actual_serialized = neo4_serializer.serialize_node(actual)
        expected: Dict[str, Any] = {
            'name': 'c_name',
            'type': 'bar',
            'id': 'c_id',
            'url': 'http://gold.foo/chart',
            'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'LABEL': 'Chart'
        }

        assert actual is not None
        self.assertDictEqual(expected, actual_serialized)
        self.assertIsNone(dashboard_chart.create_next_node())

        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_url='http://gold.foo.bar/'
                                         )

        actual2 = dashboard_chart.create_next_node()
        actual2_serialized = neo4_serializer.serialize_node(actual2)
        expected2: Dict[str, Any] = {
            'id': 'c_id',
            'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'LABEL': 'Chart',
            'url': 'http://gold.foo.bar/'
        }
        assert actual2 is not None
        self.assertDictEqual(expected2, actual2_serialized)
    def test_create_records(self) -> None:
        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_name='c_name',
                                         chart_type='bar',
                                         chart_url='http://gold.foo/chart')

        actual = dashboard_chart.create_next_record()
        actual_serialized = mysql_serializer.serialize_record(actual)
        expected = {
            'rk': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'id': 'c_id',
            'query_rk': '_dashboard://gold.dg_id/d_id/query/q_id',
            'name': 'c_name',
            'type': 'bar',
            'url': 'http://gold.foo/chart'
        }

        assert actual is not None
        self.assertDictEqual(expected, actual_serialized)
        self.assertIsNone(dashboard_chart.create_next_record())

        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_url='http://gold.foo.bar/')

        actual2 = dashboard_chart.create_next_record()
        actual2_serialized = mysql_serializer.serialize_record(actual2)
        expected2 = {
            'rk': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'id': 'c_id',
            'query_rk': '_dashboard://gold.dg_id/d_id/query/q_id',
            'url': 'http://gold.foo.bar/'
        }

        assert actual2 is not None
        self.assertDictEqual(expected2, actual2_serialized)
    def test_create_nodes(self):
        # type: () -> None

        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_name='c_name',
                                         chart_type='bar',
                                         chart_url='http://gold.foo/chart')

        actual = dashboard_chart.create_next_node()
        expected = {
            'name': 'c_name',
            'type': 'bar',
            'id': 'c_id',
            'url': 'http://gold.foo/chart',
            'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'LABEL': 'Chart'
        }

        self.assertDictEqual(expected, actual)
        self.assertIsNone(dashboard_chart.create_next_node())

        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_url='http://gold.foo.bar/')

        actual2 = dashboard_chart.create_next_node()
        expected2 = {
            'id': 'c_id',
            'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'LABEL': 'Chart',
            'url': 'http://gold.foo.bar/'
        }
        self.assertDictEqual(expected2, actual2)
    def _get_extract_iter(self) -> Iterator[Any]:

        while True:
            record = self._extractor.extract()
            if not record:
                break  # the end.

            record = next(self._transformer.transform(record=record), None)

            if not self._is_published_dashboard(record):
                continue  # filter this one out

            identity_data = {
                'cluster':
                self._cluster,
                'product':
                RedashDashboardExtractor.PRODUCT,
                'dashboard_group_id':
                str(RedashDashboardExtractor.DASHBOARD_GROUP_ID),
                'dashboard_id':
                str(record['dashboard_id'])
            }

            dash_data = {
                'dashboard_group':
                RedashDashboardExtractor.DASHBOARD_GROUP_NAME,
                'dashboard_group_url': self._redash_base_url,
                'dashboard_name': record['dashboard_name'],
                'dashboard_url':
                f'{self._redash_base_url}/dashboards/{record["dashboard_id"]}',
                'created_timestamp': record['created_timestamp']
            }
            dash_data.update(identity_data)

            widgets = sort_widgets(record['widgets'])
            text_widgets = get_text_widgets(widgets)
            viz_widgets = get_visualization_widgets(widgets)

            # generate a description for this dashboard, since Redash does not have descriptions
            dash_data['description'] = generate_dashboard_description(
                text_widgets, viz_widgets)

            yield DashboardMetadata(**dash_data)

            last_mod_data = {
                'last_modified_timestamp': record['last_modified_timestamp']
            }
            last_mod_data.update(identity_data)

            yield DashboardLastModifiedTimestamp(**last_mod_data)

            owner_data = {'email': record['user']['email']}
            owner_data.update(identity_data)

            yield DashboardOwner(**owner_data)

            table_keys = set()

            for viz in viz_widgets:
                query_data = {
                    'query_id': str(viz.query_id),
                    'query_name': viz.query_name,
                    'url': self._redash_base_url + viz.query_relative_url,
                    'query_text': viz.raw_query
                }

                query_data.update(identity_data)
                yield DashboardQuery(**query_data)

                chart_data = {
                    'query_id': str(viz.query_id),
                    'chart_id': str(viz.visualization_id),
                    'chart_name': viz.visualization_name,
                    'chart_type': viz.visualization_type,
                }
                chart_data.update(identity_data)
                yield DashboardChart(**chart_data)

                # if a table parser is provided, retrieve tables from this viz
                if self._parse_tables:
                    for tbl in self._parse_tables(viz):
                        table_keys.add(tbl.key)

            if len(table_keys) > 0:
                yield DashboardTable(table_ids=list(table_keys),
                                     **identity_data)
    def test_create_nodes(self) -> None:
        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_name='c_name',
                                         chart_type='bar',
                                         chart_url='http://gold.foo/chart')

        actual = dashboard_chart.create_next_node()
        actual_serialized = neo4_serializer.serialize_node(actual)
        neptune_serialized = neptune_serializer.convert_node(actual)
        expected: Dict[str, Any] = {
            'name': 'c_name',
            'type': 'bar',
            'id': 'c_id',
            'url': 'http://gold.foo/chart',
            'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'LABEL': 'Chart'
        }
        neptune_expected = {
            '~id':
            'Chart:_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            '~label':
            'Chart',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            'type:String(single)':
            'bar',
            'name:String(single)':
            'c_name',
            'id:String(single)':
            'c_id',
            'url:String(single)':
            'http://gold.foo/chart',
        }

        assert actual is not None
        self.assertDictEqual(expected, actual_serialized)
        self.assertDictEqual(neptune_expected, neptune_serialized)
        self.assertIsNone(dashboard_chart.create_next_node())

        dashboard_chart = DashboardChart(dashboard_group_id='dg_id',
                                         dashboard_id='d_id',
                                         query_id='q_id',
                                         chart_id='c_id',
                                         chart_url='http://gold.foo.bar/')

        actual2 = dashboard_chart.create_next_node()
        actual2_serialized = neo4_serializer.serialize_node(actual2)
        actual2_neptune_serialized = neptune_serializer.convert_node(actual2)
        expected2: Dict[str, Any] = {
            'id': 'c_id',
            'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            'LABEL': 'Chart',
            'url': 'http://gold.foo.bar/'
        }
        neptune_expected2 = {
            '~id':
            'Chart:_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id',
            '~label':
            'Chart',
            'id:String(single)':
            'c_id',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            'url:String(single)':
            'http://gold.foo.bar/',
        }
        assert actual2 is not None
        self.assertDictEqual(expected2, actual2_serialized)
        self.assertDictEqual(neptune_expected2, actual2_neptune_serialized)
Ejemplo n.º 10
0
    def test_with_one_dashboard(self) -> None:
        def mock_api_get(url: str, *args: Any,
                         **kwargs: Any) -> MockApiResponse:
            if '1000' in url:
                return MockApiResponse({
                    'id':
                    1000,
                    'widgets': [{
                        'visualization': {
                            'query': {
                                'data_source_id': 1,
                                'id': 1234,
                                'name': 'Test Query',
                                'query': 'SELECT id FROM users'
                            },
                            'id': 12345,
                            'name': 'test_widget',
                            'type': 'CHART',
                        },
                        'options': {}
                    }]
                })

            return MockApiResponse({
                'page':
                1,
                'count':
                1,
                'page_size':
                50,
                'results': [{
                    'id': 1000,
                    'name': 'Test Dash',
                    'slug': 'test-dash',
                    'created_at': '2020-01-01T00:00:00.000Z',
                    'updated_at': '2020-01-02T00:00:00.000Z',
                    'is_archived': False,
                    'is_draft': False,
                    'user': {
                        'email': '*****@*****.**'
                    }
                }]
            })

        redash_base_url = 'https://redash.example.com'
        config = ConfigFactory.from_dict({
            'extractor.redash_dashboard.redash_base_url':
            redash_base_url,
            'extractor.redash_dashboard.api_base_url':
            redash_base_url,  # probably not but doesn't matter
            'extractor.redash_dashboard.api_key':
            'abc123',
            'extractor.redash_dashboard.table_parser':
            'tests.unit.extractor.dashboard.redash.test_redash_dashboard_extractor.dummy_tables'
        })

        with patch('databuilder.rest_api.rest_api_query.requests.get'
                   ) as mock_get:
            mock_get.side_effect = mock_api_get

            extractor = RedashDashboardExtractor()
            extractor.init(
                Scoped.get_scoped_conf(conf=config,
                                       scope=extractor.get_scope()))

            # DashboardMetadata
            record = extractor.extract()
            self.assertEqual(record.dashboard_id, '1000')
            self.assertEqual(record.dashboard_name, 'Test Dash')
            self.assertEqual(record.dashboard_group_id,
                             RedashDashboardExtractor.DASHBOARD_GROUP_ID)
            self.assertEqual(record.dashboard_group,
                             RedashDashboardExtractor.DASHBOARD_GROUP_NAME)
            self.assertEqual(record.product, RedashDashboardExtractor.PRODUCT)
            self.assertEqual(record.cluster,
                             RedashDashboardExtractor.DEFAULT_CLUSTER)
            self.assertEqual(record.created_timestamp, 1577836800)
            self.assertTrue(redash_base_url in record.dashboard_url)
            self.assertTrue('1000' in record.dashboard_url)

            # DashboardLastModified
            record = extractor.extract()
            identity: Dict[str, Any] = {
                'dashboard_id': '1000',
                'dashboard_group_id':
                RedashDashboardExtractor.DASHBOARD_GROUP_ID,
                'product': RedashDashboardExtractor.PRODUCT,
                'cluster': u'prod'
            }
            expected_timestamp = DashboardLastModifiedTimestamp(
                last_modified_timestamp=1577923200, **identity)
            self.assertEqual(record.__repr__(), expected_timestamp.__repr__())

            # DashboardOwner
            record = extractor.extract()
            expected_owner = DashboardOwner(email='*****@*****.**',
                                            **identity)
            self.assertEqual(record.__repr__(), expected_owner.__repr__())

            # DashboardQuery
            record = extractor.extract()
            expected_query = DashboardQuery(
                query_id='1234',
                query_name='Test Query',
                url=u'{base}/queries/1234'.format(base=redash_base_url),
                query_text='SELECT id FROM users',
                **identity)
            self.assertEqual(record.__repr__(), expected_query.__repr__())

            # DashboardChart
            record = extractor.extract()
            expected_chart = DashboardChart(query_id='1234',
                                            chart_id='12345',
                                            chart_name='test_widget',
                                            chart_type='CHART',
                                            **identity)
            self.assertEqual(record.__repr__(), expected_chart.__repr__())

            # DashboardTable
            record = extractor.extract()
            expected_table = DashboardTable(table_ids=[
                TableRelationData('some_db', 'prod', 'public', 'users').key
            ],
                                            **identity)
            self.assertEqual(record.__repr__(), expected_table.__repr__())
Ejemplo n.º 11
0
    def _get_extract_iter(self) -> Iterator[Any]:
        while True:
            record = self._extractor.extract()
            if not record:
                break

            record = next(self._transformer.transform(record=record), None)
            dashboard_identity_data = {
                "dashboard_group_id":
                DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_ID,
                "dashboard_id": record["dashboard_id"],
                "product": "databricks-sql",
            }

            dashboard_data = {
                "dashboard_group":
                DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_NAME,
                "dashboard_name": record["dashboard_name"],
                "dashboard_url":
                f"{self._databricks_host}/sql/dashboards/{record['dashboard_id']}",
                "dashboard_group_url": self._databricks_host,
                "created_timestamp": record["created_timestamp"],
                "tags": record["tags"],
            }

            dashboard_owner_data = {"email": record["user"]["email"]}
            dashboard_owner_data.update(dashboard_identity_data)
            yield DashboardOwner(**dashboard_owner_data)

            dashboard_last_modified_data = {
                "last_modified_timestamp": record["last_modified_timestamp"],
            }
            dashboard_last_modified_data.update(dashboard_identity_data)
            yield DashboardLastModifiedTimestamp(
                **dashboard_last_modified_data)

            if "widgets" in record:
                widgets = sort_widgets(record["widgets"])
                text_widgets = get_text_widgets(widgets)
                viz_widgets = get_visualization_widgets(widgets)
                dashboard_data["description"] = generate_dashboard_description(
                    text_widgets, viz_widgets)

                for viz in viz_widgets:
                    dashboard_query_data = {
                        "query_id": str(viz.query_id),
                        "query_name": viz.query_name,
                        "url": self._databricks_host + viz.query_relative_url,
                        "query_text": viz.raw_query,
                    }
                    dashboard_query_data.update(dashboard_identity_data)
                    yield DashboardQuery(**dashboard_query_data)

                    dashboard_chart_data = {
                        "query_id": str(viz.query_id),
                        "chart_id": str(viz.visualization_id),
                        "chart_name": viz.visualization_name,
                        "chart_type": viz.visualization_type,
                    }
                    dashboard_chart_data.update(dashboard_identity_data)
                    yield DashboardChart(**dashboard_chart_data)

            dashboard_data.update(dashboard_identity_data)
            yield DashboardMetadata(**dashboard_data)