Beispiel #1
0
    def test_dashboard_owner_relations(self) -> None:
        dashboard_last_modified = DashboardLastModifiedTimestamp(
            last_modified_timestamp=123456789,
            cluster='cluster_id',
            product='product_id',
            dashboard_id='dashboard_id',
            dashboard_group_id='dashboard_group_id')

        actual = dashboard_last_modified.create_next_relation()
        actual_serialized = neo4_serializer.serialize_relationship(actual)
        expected: Dict[str, Any] = {
            RELATION_END_KEY:
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id'
            '/_last_modified_timestamp',
            RELATION_START_LABEL:
            'Dashboard',
            RELATION_END_LABEL:
            'Timestamp',
            RELATION_START_KEY:
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
            RELATION_TYPE:
            'LAST_UPDATED_AT',
            RELATION_REVERSE_TYPE:
            'LAST_UPDATED_TIME_OF'
        }

        assert actual is not None
        self.assertDictEqual(actual_serialized, expected)
        self.assertIsNone(dashboard_last_modified.create_next_relation())
    def test_dashboard_owner_relations(self):
        # type: () -> None
        dashboard_last_modified = DashboardLastModifiedTimestamp(
            last_modified_timestamp=123456789,
            cluster='cluster_id',
            product='product_id',
            dashboard_id='dashboard_id',
            dashboard_group_id='dashboard_group_id')

        actual = dashboard_last_modified.create_next_relation()
        print(actual)
        expected = {
            RELATION_END_KEY:
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id'
            '/_last_modified_timestamp',
            RELATION_START_LABEL:
            'Dashboard',
            RELATION_END_LABEL:
            'Timestamp',
            RELATION_START_KEY:
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
            RELATION_TYPE:
            'LAST_UPDATED_AT',
            RELATION_REVERSE_TYPE:
            'LAST_UPDATED_TIME_OF'
        }

        self.assertDictEqual(actual, expected)
        self.assertIsNone(dashboard_last_modified.create_next_relation())
Beispiel #3
0
    def setUp(self) -> None:
        self.dashboard_last_modified = DashboardLastModifiedTimestamp(
            last_modified_timestamp=123456789,
            cluster='cluster_id',
            product='product_id',
            dashboard_id='dashboard_id',
            dashboard_group_id='dashboard_group_id')

        self.expected_ts_key = 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/' \
                               '_last_modified_timestamp'
        self.expected_dashboard_key = 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id'
Beispiel #4
0
    def _get_extract_iter(
        self
    ) -> Iterator[Union[DashboardMetadata, DashboardLastModifiedTimestamp,
                        None]]:
        ids = self._get_resource_ids('dashboard')

        data = [self._get_dashboard_details(i) for i in ids]

        if self.extract_published_only:
            data = [
                d for d in data
                if self.get_nested_field(d, 'result.published')
            ]

        for entry in data:
            dashboard_metadata = self.map_fields(
                entry, self.dashboard_metadata_field_mappings())
            dashboard_metadata.update(**self.common_params)

            yield DashboardMetadata(**dashboard_metadata)

            dashboard_last_modified = self.map_fields(
                entry, self.last_modified_field_mappings())
            dashboard_last_modified.update(**self.common_params)

            yield DashboardLastModifiedTimestamp(**dashboard_last_modified)
    def test_dashboard_timestamp_nodes(self):
        # type: () -> None
        dashboard_last_modified = DashboardLastModifiedTimestamp(last_modified_timestamp=123456789,
                                                                 cluster='cluster_id',
                                                                 product='product_id',
                                                                 dashboard_id='dashboard_id',
                                                                 dashboard_group_id='dashboard_group_id')

        actual = dashboard_last_modified.create_next_node()
        expected = {'timestamp': 123456789,
                    'name': 'last_updated_timestamp',
                    'KEY': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/_last_modified_timestamp',
                    'LABEL': 'Timestamp'}

        self.assertDictEqual(actual, expected)
        self.assertIsNone(dashboard_last_modified.create_next_node())
Beispiel #6
0
    def test_dashboard_timestamp_nodes(self) -> None:
        dashboard_last_modified = DashboardLastModifiedTimestamp(
            last_modified_timestamp=123456789,
            cluster='cluster_id',
            product='product_id',
            dashboard_id='dashboard_id',
            dashboard_group_id='dashboard_group_id')

        actual = dashboard_last_modified.create_next_node()
        actual_serialized = neo4_serializer.serialize_node(actual)
        expected: Dict[str, Any] = {
            'timestamp:UNQUOTED': 123456789,
            'name': 'last_updated_timestamp',
            'KEY':
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/_last_modified_timestamp',
            'LABEL': 'Timestamp'
        }

        assert actual is not None
        self.assertDictEqual(actual_serialized, expected)
        self.assertIsNone(dashboard_last_modified.create_next_node())
    def _get_extract_iter(self) -> Iterator[Any]:

        while True:
            record = self._extractor.extract()
            if not record:
                break  # the end.

            record = next(self._transformer.transform(record=record), None)

            if not self._is_published_dashboard(record):
                continue  # filter this one out

            identity_data = {
                'cluster':
                self._cluster,
                'product':
                RedashDashboardExtractor.PRODUCT,
                'dashboard_group_id':
                str(RedashDashboardExtractor.DASHBOARD_GROUP_ID),
                'dashboard_id':
                str(record['dashboard_id'])
            }

            dash_data = {
                'dashboard_group':
                RedashDashboardExtractor.DASHBOARD_GROUP_NAME,
                'dashboard_group_url': self._redash_base_url,
                'dashboard_name': record['dashboard_name'],
                'dashboard_url':
                f'{self._redash_base_url}/dashboards/{record["dashboard_id"]}',
                'created_timestamp': record['created_timestamp']
            }
            dash_data.update(identity_data)

            widgets = sort_widgets(record['widgets'])
            text_widgets = get_text_widgets(widgets)
            viz_widgets = get_visualization_widgets(widgets)

            # generate a description for this dashboard, since Redash does not have descriptions
            dash_data['description'] = generate_dashboard_description(
                text_widgets, viz_widgets)

            yield DashboardMetadata(**dash_data)

            last_mod_data = {
                'last_modified_timestamp': record['last_modified_timestamp']
            }
            last_mod_data.update(identity_data)

            yield DashboardLastModifiedTimestamp(**last_mod_data)

            owner_data = {'email': record['user']['email']}
            owner_data.update(identity_data)

            yield DashboardOwner(**owner_data)

            table_keys = set()

            for viz in viz_widgets:
                query_data = {
                    'query_id': str(viz.query_id),
                    'query_name': viz.query_name,
                    'url': self._redash_base_url + viz.query_relative_url,
                    'query_text': viz.raw_query
                }

                query_data.update(identity_data)
                yield DashboardQuery(**query_data)

                chart_data = {
                    'query_id': str(viz.query_id),
                    'chart_id': str(viz.visualization_id),
                    'chart_name': viz.visualization_name,
                    'chart_type': viz.visualization_type,
                }
                chart_data.update(identity_data)
                yield DashboardChart(**chart_data)

                # if a table parser is provided, retrieve tables from this viz
                if self._parse_tables:
                    for tbl in self._parse_tables(viz):
                        table_keys.add(tbl.key)

            if len(table_keys) > 0:
                yield DashboardTable(table_ids=list(table_keys),
                                     **identity_data)
Beispiel #8
0
    def test_with_one_dashboard(self) -> None:
        def mock_api_get(url: str, *args: Any, **kwargs: Any) -> MockApiResponse:
            if 'test-dash' in url:
                return MockApiResponse({
                    'id': 123,
                    'widgets': [
                        {
                            'visualization': {
                                'query': {
                                    'data_source_id': 1,
                                    'id': '1234',
                                    'name': 'Test Query',
                                    'query': 'SELECT id FROM users'
                                }
                            },
                            'options': {}
                        }
                    ]
                })

            return MockApiResponse({
                'page': 1,
                'count': 1,
                'page_size': 50,
                'results': [
                    {
                        'id': 123,
                        'name': 'Test Dash',
                        'slug': 'test-dash',
                        'created_at': '2020-01-01T00:00:00.000Z',
                        'updated_at': '2020-01-02T00:00:00.000Z',
                        'is_archived': False,
                        'is_draft': False,
                        'user': {'email': '*****@*****.**'}
                    }
                ]
            })

        redash_base_url = 'https://redash.example.com'
        config = ConfigFactory.from_dict({
            'extractor.redash_dashboard.redash_base_url': redash_base_url,
            'extractor.redash_dashboard.api_base_url': redash_base_url,  # probably not but doesn't matter
            'extractor.redash_dashboard.api_key': 'abc123',
            'extractor.redash_dashboard.table_parser':
                'tests.unit.extractor.dashboard.redash.test_redash_dashboard_extractor.dummy_tables'
        })

        with patch('databuilder.rest_api.rest_api_query.requests.get') as mock_get:
            mock_get.side_effect = mock_api_get

            extractor = RedashDashboardExtractor()
            extractor.init(Scoped.get_scoped_conf(conf=config, scope=extractor.get_scope()))

            # DashboardMetadata
            record = extractor.extract()
            self.assertEqual(record.dashboard_id, 123)
            self.assertEqual(record.dashboard_name, 'Test Dash')
            self.assertEqual(record.dashboard_group_id, RedashDashboardExtractor.DASHBOARD_GROUP_ID)
            self.assertEqual(record.dashboard_group, RedashDashboardExtractor.DASHBOARD_GROUP_NAME)
            self.assertEqual(record.product, RedashDashboardExtractor.PRODUCT)
            self.assertEqual(record.cluster, RedashDashboardExtractor.DEFAULT_CLUSTER)
            self.assertEqual(record.created_timestamp, 1577836800)
            self.assertTrue(redash_base_url in record.dashboard_url)
            self.assertTrue('test-dash' in record.dashboard_url)

            # DashboardLastModified
            record = extractor.extract()
            identity: Dict[str, Any] = {
                'dashboard_id': 123,
                'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID,
                'product': RedashDashboardExtractor.PRODUCT,
                'cluster': u'prod'
            }
            expected_timestamp = DashboardLastModifiedTimestamp(
                last_modified_timestamp=1577923200,
                **identity
            )
            self.assertEqual(record.__repr__(), expected_timestamp.__repr__())

            # DashboardOwner
            record = extractor.extract()
            expected_owner = DashboardOwner(email='*****@*****.**', **identity)
            self.assertEqual(record.__repr__(), expected_owner.__repr__())

            # DashboardQuery
            record = extractor.extract()
            expected_query = DashboardQuery(
                query_id='1234',
                query_name='Test Query',
                url=u'{base}/queries/1234'.format(base=redash_base_url),
                query_text='SELECT id FROM users',
                **identity
            )
            self.assertEqual(record.__repr__(), expected_query.__repr__())

            # DashboardTable
            record = extractor.extract()
            expected_table = DashboardTable(
                table_ids=[TableRelationData('some_db', 'prod', 'public', 'users').key],
                **identity
            )
            self.assertEqual(record.__repr__(), expected_table.__repr__())
Beispiel #9
0
class TestDashboardLastModifiedTimestamp(unittest.TestCase):
    def setUp(self) -> None:
        self.dashboard_last_modified = DashboardLastModifiedTimestamp(
            last_modified_timestamp=123456789,
            cluster='cluster_id',
            product='product_id',
            dashboard_id='dashboard_id',
            dashboard_group_id='dashboard_group_id')

        self.expected_ts_key = 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/' \
                               '_last_modified_timestamp'
        self.expected_dashboard_key = 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id'

    def test_dashboard_timestamp_nodes(self) -> None:

        actual = self.dashboard_last_modified.create_next_node()
        actual_serialized = neo4_serializer.serialize_node(actual)

        expected: Dict[str, Any] = {
            'timestamp:UNQUOTED': 123456789,
            'name': 'last_updated_timestamp',
            'KEY': self.expected_ts_key,
            'LABEL': 'Timestamp'
        }

        assert actual is not None
        self.assertDictEqual(actual_serialized, expected)

        self.assertIsNone(self.dashboard_last_modified.create_next_node())

    def test_neptune_dashboard_timestamp_nodes(self) -> None:
        actual = self.dashboard_last_modified.create_next_node()
        actual_neptune_serialized = neptune_serializer.convert_node(actual)
        neptune_expected = {
            NEPTUNE_HEADER_ID:
            'Timestamp:' + self.expected_ts_key,
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            self.expected_ts_key,
            NEPTUNE_HEADER_LABEL:
            'Timestamp',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB,
            'name:String(single)':
            'last_updated_timestamp',
            'timestamp:Long(single)':
            123456789,
        }

        self.assertDictEqual(actual_neptune_serialized, neptune_expected)

    def test_dashboard_owner_relations(self) -> None:

        actual = self.dashboard_last_modified.create_next_relation()
        actual_serialized = neo4_serializer.serialize_relationship(actual)

        expected: Dict[str, Any] = {
            RELATION_END_KEY: self.expected_ts_key,
            RELATION_START_LABEL: 'Dashboard',
            RELATION_END_LABEL: 'Timestamp',
            RELATION_START_KEY: self.expected_dashboard_key,
            RELATION_TYPE: 'LAST_UPDATED_AT',
            RELATION_REVERSE_TYPE: 'LAST_UPDATED_TIME_OF'
        }

        assert actual is not None
        self.assertDictEqual(actual_serialized, expected)
        self.assertIsNone(self.dashboard_last_modified.create_next_relation())

    def test_dashboard_owner_relations_neptune(self) -> None:
        actual = self.dashboard_last_modified.create_next_relation()
        actual_serialized = neptune_serializer.convert_relationship(actual)
        neptune_forward_expected = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Dashboard:' + self.expected_dashboard_key,
                to_vertex_id='Timestamp:' + self.expected_ts_key,
                label='LAST_UPDATED_AT'),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Dashboard:' + self.expected_dashboard_key,
                to_vertex_id='Timestamp:' + self.expected_ts_key,
                label='LAST_UPDATED_AT'),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            'Dashboard:' + self.expected_dashboard_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            'Timestamp:' + self.expected_ts_key,
            NEPTUNE_HEADER_LABEL:
            'LAST_UPDATED_AT',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }

        neptune_reversed_expected = {
            NEPTUNE_HEADER_ID:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Timestamp:' + self.expected_ts_key,
                to_vertex_id='Dashboard:' + self.expected_dashboard_key,
                label='LAST_UPDATED_TIME_OF'),
            METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT:
            "{label}:{from_vertex_id}_{to_vertex_id}".format(
                from_vertex_id='Timestamp:' + self.expected_ts_key,
                to_vertex_id='Dashboard:' + self.expected_dashboard_key,
                label='LAST_UPDATED_TIME_OF'),
            NEPTUNE_RELATIONSHIP_HEADER_FROM:
            'Timestamp:' + self.expected_ts_key,
            NEPTUNE_RELATIONSHIP_HEADER_TO:
            'Dashboard:' + self.expected_dashboard_key,
            NEPTUNE_HEADER_LABEL:
            'LAST_UPDATED_TIME_OF',
            NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            ANY,
            NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT:
            NEPTUNE_CREATION_TYPE_JOB
        }
        self.maxDiff = None
        assert actual is not None
        self.assertDictEqual(actual_serialized[0], neptune_forward_expected)
        self.assertDictEqual(actual_serialized[1], neptune_reversed_expected)
        self.assertIsNone(self.dashboard_last_modified.create_next_relation())

    def test_dashboard_timestamp_records(self) -> None:

        actual = self.dashboard_last_modified.create_next_record()
        actual_serialized = mysql_serializer.serialize_record(actual)

        expected = {
            'rk':
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/_last_modified_timestamp',
            'timestamp':
            123456789,
            'name':
            'last_updated_timestamp',
            'dashboard_rk':
            'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id'
        }

        assert actual is not None
        self.assertDictEqual(actual_serialized, expected)
        self.assertIsNone(self.dashboard_last_modified.create_next_record())

    def test_dashboard_last_modified_relation_atlas(self) -> None:

        actual = self.dashboard_last_modified.create_next_atlas_entity()
        actual_serialized = atlas_serializer.serialize_entity(actual)

        expected = {
            "typeName": "Dashboard",
            "operation": "UPDATE",
            "relationships": None,
            "qualifiedName":
            "product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id",
            "lastModifiedTimestamp": 123456789
        }

        assert actual is not None
        self.assertDictEqual(actual_serialized, expected)
        self.assertIsNone(
            self.dashboard_last_modified.create_next_atlas_entity())
Beispiel #10
0
    def _get_extract_iter(self) -> Iterator[Any]:
        while True:
            record = self._extractor.extract()
            if not record:
                break

            record = next(self._transformer.transform(record=record), None)
            dashboard_identity_data = {
                "dashboard_group_id":
                DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_ID,
                "dashboard_id": record["dashboard_id"],
                "product": "databricks-sql",
            }

            dashboard_data = {
                "dashboard_group":
                DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_NAME,
                "dashboard_name": record["dashboard_name"],
                "dashboard_url":
                f"{self._databricks_host}/sql/dashboards/{record['dashboard_id']}",
                "dashboard_group_url": self._databricks_host,
                "created_timestamp": record["created_timestamp"],
                "tags": record["tags"],
            }

            dashboard_owner_data = {"email": record["user"]["email"]}
            dashboard_owner_data.update(dashboard_identity_data)
            yield DashboardOwner(**dashboard_owner_data)

            dashboard_last_modified_data = {
                "last_modified_timestamp": record["last_modified_timestamp"],
            }
            dashboard_last_modified_data.update(dashboard_identity_data)
            yield DashboardLastModifiedTimestamp(
                **dashboard_last_modified_data)

            if "widgets" in record:
                widgets = sort_widgets(record["widgets"])
                text_widgets = get_text_widgets(widgets)
                viz_widgets = get_visualization_widgets(widgets)
                dashboard_data["description"] = generate_dashboard_description(
                    text_widgets, viz_widgets)

                for viz in viz_widgets:
                    dashboard_query_data = {
                        "query_id": str(viz.query_id),
                        "query_name": viz.query_name,
                        "url": self._databricks_host + viz.query_relative_url,
                        "query_text": viz.raw_query,
                    }
                    dashboard_query_data.update(dashboard_identity_data)
                    yield DashboardQuery(**dashboard_query_data)

                    dashboard_chart_data = {
                        "query_id": str(viz.query_id),
                        "chart_id": str(viz.visualization_id),
                        "chart_name": viz.visualization_name,
                        "chart_type": viz.visualization_type,
                    }
                    dashboard_chart_data.update(dashboard_identity_data)
                    yield DashboardChart(**dashboard_chart_data)

            dashboard_data.update(dashboard_identity_data)
            yield DashboardMetadata(**dashboard_data)