def test_create_atlas_entity(self) -> None: dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', chart_url='http://gold.foo/chart', product='superset') actual = dashboard_chart.create_next_atlas_entity() actual_serialized = atlas_serializer.serialize_entity(actual) expected = { 'typeName': 'DashboardChart', 'operation': 'CREATE', 'relationships': 'query#DashboardQuery#superset_dashboard://gold.dg_id/d_id/query/q_id', 'qualifiedName': 'superset_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'name': 'c_name', 'type': 'bar', 'url': 'http://gold.foo/chart' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(dashboard_chart.create_next_atlas_entity())
def test_create_relation(self) -> None: dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', ) actual = dashboard_chart.create_next_relation() actual_serialized = neo4_serializer.serialize_relationship(actual) actual_neptune_serialized = neptune_serializer.convert_relationship(actual) start_key = '_dashboard://gold.dg_id/d_id/query/q_id' end_key = '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id' expected: Dict[str, Any] = { RELATION_END_KEY: end_key, RELATION_START_LABEL: 'Query', RELATION_END_LABEL: 'Chart', RELATION_START_KEY: start_key, RELATION_TYPE: 'HAS_CHART', RELATION_REVERSE_TYPE: 'CHART_OF' } neptune_forward_expected = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=start_key, to_vertex_id=end_key, label='HAS_CHART' ), NEPTUNE_RELATIONSHIP_HEADER_FROM: start_key, NEPTUNE_RELATIONSHIP_HEADER_TO: end_key, NEPTUNE_HEADER_LABEL: 'HAS_CHART', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_reversed_expected = { NEPTUNE_HEADER_ID: "{from_vertex_id}_{to_vertex_id}_{label}".format( from_vertex_id=end_key, to_vertex_id=start_key, label='CHART_OF' ), NEPTUNE_RELATIONSHIP_HEADER_FROM: end_key, NEPTUNE_RELATIONSHIP_HEADER_TO: start_key, NEPTUNE_HEADER_LABEL: 'CHART_OF', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } assert actual is not None self.assertEqual(expected, actual_serialized) self.assertEqual(neptune_forward_expected, actual_neptune_serialized[0]) self.assertEqual(neptune_reversed_expected, actual_neptune_serialized[1]) self.assertIsNone(dashboard_chart.create_next_relation())
def test_create_relation(self) -> None: dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', ) actual = dashboard_chart.create_next_relation() actual_serialized = neo4_serializer.serialize_relationship(actual) expected: Dict[str, Any] = { RELATION_END_KEY: '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', RELATION_START_LABEL: 'Query', RELATION_END_LABEL: 'Chart', RELATION_START_KEY: '_dashboard://gold.dg_id/d_id/query/q_id', RELATION_TYPE: 'HAS_CHART', RELATION_REVERSE_TYPE: 'CHART_OF' } assert actual is not None self.assertEqual(expected, actual_serialized) self.assertIsNone(dashboard_chart.create_next_relation())
def test_create_relation(self): # type: () -> None dashboard_chart = DashboardChart( dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', ) actual = dashboard_chart.create_next_relation() expected = { RELATION_END_KEY: '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', RELATION_START_LABEL: 'Query', RELATION_END_LABEL: 'Chart', RELATION_START_KEY: '_dashboard://gold.dg_id/d_id/query/q_id', RELATION_TYPE: 'HAS_CHART', RELATION_REVERSE_TYPE: 'CHART_OF' } self.assertEqual(expected, actual) self.assertIsNone(dashboard_chart.create_next_relation())
def test_create_nodes(self) -> None: dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', chart_url='http://gold.foo/chart' ) actual = dashboard_chart.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) expected: Dict[str, Any] = { 'name': 'c_name', 'type': 'bar', 'id': 'c_id', 'url': 'http://gold.foo/chart', 'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'LABEL': 'Chart' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(dashboard_chart.create_next_node()) dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_url='http://gold.foo.bar/' ) actual2 = dashboard_chart.create_next_node() actual2_serialized = neo4_serializer.serialize_node(actual2) expected2: Dict[str, Any] = { 'id': 'c_id', 'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'LABEL': 'Chart', 'url': 'http://gold.foo.bar/' } assert actual2 is not None self.assertDictEqual(expected2, actual2_serialized)
def test_create_records(self) -> None: dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', chart_url='http://gold.foo/chart') actual = dashboard_chart.create_next_record() actual_serialized = mysql_serializer.serialize_record(actual) expected = { 'rk': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'id': 'c_id', 'query_rk': '_dashboard://gold.dg_id/d_id/query/q_id', 'name': 'c_name', 'type': 'bar', 'url': 'http://gold.foo/chart' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(dashboard_chart.create_next_record()) dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_url='http://gold.foo.bar/') actual2 = dashboard_chart.create_next_record() actual2_serialized = mysql_serializer.serialize_record(actual2) expected2 = { 'rk': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'id': 'c_id', 'query_rk': '_dashboard://gold.dg_id/d_id/query/q_id', 'url': 'http://gold.foo.bar/' } assert actual2 is not None self.assertDictEqual(expected2, actual2_serialized)
def test_create_nodes(self): # type: () -> None dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', chart_url='http://gold.foo/chart') actual = dashboard_chart.create_next_node() expected = { 'name': 'c_name', 'type': 'bar', 'id': 'c_id', 'url': 'http://gold.foo/chart', 'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'LABEL': 'Chart' } self.assertDictEqual(expected, actual) self.assertIsNone(dashboard_chart.create_next_node()) dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_url='http://gold.foo.bar/') actual2 = dashboard_chart.create_next_node() expected2 = { 'id': 'c_id', 'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'LABEL': 'Chart', 'url': 'http://gold.foo.bar/' } self.assertDictEqual(expected2, actual2)
def _get_extract_iter(self) -> Iterator[Any]: while True: record = self._extractor.extract() if not record: break # the end. record = next(self._transformer.transform(record=record), None) if not self._is_published_dashboard(record): continue # filter this one out identity_data = { 'cluster': self._cluster, 'product': RedashDashboardExtractor.PRODUCT, 'dashboard_group_id': str(RedashDashboardExtractor.DASHBOARD_GROUP_ID), 'dashboard_id': str(record['dashboard_id']) } dash_data = { 'dashboard_group': RedashDashboardExtractor.DASHBOARD_GROUP_NAME, 'dashboard_group_url': self._redash_base_url, 'dashboard_name': record['dashboard_name'], 'dashboard_url': f'{self._redash_base_url}/dashboards/{record["dashboard_id"]}', 'created_timestamp': record['created_timestamp'] } dash_data.update(identity_data) widgets = sort_widgets(record['widgets']) text_widgets = get_text_widgets(widgets) viz_widgets = get_visualization_widgets(widgets) # generate a description for this dashboard, since Redash does not have descriptions dash_data['description'] = generate_dashboard_description( text_widgets, viz_widgets) yield DashboardMetadata(**dash_data) last_mod_data = { 'last_modified_timestamp': record['last_modified_timestamp'] } last_mod_data.update(identity_data) yield DashboardLastModifiedTimestamp(**last_mod_data) owner_data = {'email': record['user']['email']} owner_data.update(identity_data) yield DashboardOwner(**owner_data) table_keys = set() for viz in viz_widgets: query_data = { 'query_id': str(viz.query_id), 'query_name': viz.query_name, 'url': self._redash_base_url + viz.query_relative_url, 'query_text': viz.raw_query } query_data.update(identity_data) yield DashboardQuery(**query_data) chart_data = { 'query_id': str(viz.query_id), 'chart_id': str(viz.visualization_id), 'chart_name': viz.visualization_name, 'chart_type': viz.visualization_type, } chart_data.update(identity_data) yield DashboardChart(**chart_data) # if a table parser is provided, retrieve tables from this viz if self._parse_tables: for tbl in self._parse_tables(viz): table_keys.add(tbl.key) if len(table_keys) > 0: yield DashboardTable(table_ids=list(table_keys), **identity_data)
def test_create_nodes(self) -> None: dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_name='c_name', chart_type='bar', chart_url='http://gold.foo/chart') actual = dashboard_chart.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) neptune_serialized = neptune_serializer.convert_node(actual) expected: Dict[str, Any] = { 'name': 'c_name', 'type': 'bar', 'id': 'c_id', 'url': 'http://gold.foo/chart', 'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'LABEL': 'Chart' } neptune_expected = { '~id': 'Chart:_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT: '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', '~label': 'Chart', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB, 'type:String(single)': 'bar', 'name:String(single)': 'c_name', 'id:String(single)': 'c_id', 'url:String(single)': 'http://gold.foo/chart', } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertDictEqual(neptune_expected, neptune_serialized) self.assertIsNone(dashboard_chart.create_next_node()) dashboard_chart = DashboardChart(dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', chart_id='c_id', chart_url='http://gold.foo.bar/') actual2 = dashboard_chart.create_next_node() actual2_serialized = neo4_serializer.serialize_node(actual2) actual2_neptune_serialized = neptune_serializer.convert_node(actual2) expected2: Dict[str, Any] = { 'id': 'c_id', 'KEY': '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', 'LABEL': 'Chart', 'url': 'http://gold.foo.bar/' } neptune_expected2 = { '~id': 'Chart:_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT: '_dashboard://gold.dg_id/d_id/query/q_id/chart/c_id', '~label': 'Chart', 'id:String(single)': 'c_id', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB, 'url:String(single)': 'http://gold.foo.bar/', } assert actual2 is not None self.assertDictEqual(expected2, actual2_serialized) self.assertDictEqual(neptune_expected2, actual2_neptune_serialized)
def test_with_one_dashboard(self) -> None: def mock_api_get(url: str, *args: Any, **kwargs: Any) -> MockApiResponse: if '1000' in url: return MockApiResponse({ 'id': 1000, 'widgets': [{ 'visualization': { 'query': { 'data_source_id': 1, 'id': 1234, 'name': 'Test Query', 'query': 'SELECT id FROM users' }, 'id': 12345, 'name': 'test_widget', 'type': 'CHART', }, 'options': {} }] }) return MockApiResponse({ 'page': 1, 'count': 1, 'page_size': 50, 'results': [{ 'id': 1000, 'name': 'Test Dash', 'slug': 'test-dash', 'created_at': '2020-01-01T00:00:00.000Z', 'updated_at': '2020-01-02T00:00:00.000Z', 'is_archived': False, 'is_draft': False, 'user': { 'email': '*****@*****.**' } }] }) redash_base_url = 'https://redash.example.com' config = ConfigFactory.from_dict({ 'extractor.redash_dashboard.redash_base_url': redash_base_url, 'extractor.redash_dashboard.api_base_url': redash_base_url, # probably not but doesn't matter 'extractor.redash_dashboard.api_key': 'abc123', 'extractor.redash_dashboard.table_parser': 'tests.unit.extractor.dashboard.redash.test_redash_dashboard_extractor.dummy_tables' }) with patch('databuilder.rest_api.rest_api_query.requests.get' ) as mock_get: mock_get.side_effect = mock_api_get extractor = RedashDashboardExtractor() extractor.init( Scoped.get_scoped_conf(conf=config, scope=extractor.get_scope())) # DashboardMetadata record = extractor.extract() self.assertEqual(record.dashboard_id, '1000') self.assertEqual(record.dashboard_name, 'Test Dash') self.assertEqual(record.dashboard_group_id, RedashDashboardExtractor.DASHBOARD_GROUP_ID) self.assertEqual(record.dashboard_group, RedashDashboardExtractor.DASHBOARD_GROUP_NAME) self.assertEqual(record.product, RedashDashboardExtractor.PRODUCT) self.assertEqual(record.cluster, RedashDashboardExtractor.DEFAULT_CLUSTER) self.assertEqual(record.created_timestamp, 1577836800) self.assertTrue(redash_base_url in record.dashboard_url) self.assertTrue('1000' in record.dashboard_url) # DashboardLastModified record = extractor.extract() identity: Dict[str, Any] = { 'dashboard_id': '1000', 'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID, 'product': RedashDashboardExtractor.PRODUCT, 'cluster': u'prod' } expected_timestamp = DashboardLastModifiedTimestamp( last_modified_timestamp=1577923200, **identity) self.assertEqual(record.__repr__(), expected_timestamp.__repr__()) # DashboardOwner record = extractor.extract() expected_owner = DashboardOwner(email='*****@*****.**', **identity) self.assertEqual(record.__repr__(), expected_owner.__repr__()) # DashboardQuery record = extractor.extract() expected_query = DashboardQuery( query_id='1234', query_name='Test Query', url=u'{base}/queries/1234'.format(base=redash_base_url), query_text='SELECT id FROM users', **identity) self.assertEqual(record.__repr__(), expected_query.__repr__()) # DashboardChart record = extractor.extract() expected_chart = DashboardChart(query_id='1234', chart_id='12345', chart_name='test_widget', chart_type='CHART', **identity) self.assertEqual(record.__repr__(), expected_chart.__repr__()) # DashboardTable record = extractor.extract() expected_table = DashboardTable(table_ids=[ TableRelationData('some_db', 'prod', 'public', 'users').key ], **identity) self.assertEqual(record.__repr__(), expected_table.__repr__())
def _get_extract_iter(self) -> Iterator[Any]: while True: record = self._extractor.extract() if not record: break record = next(self._transformer.transform(record=record), None) dashboard_identity_data = { "dashboard_group_id": DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_ID, "dashboard_id": record["dashboard_id"], "product": "databricks-sql", } dashboard_data = { "dashboard_group": DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_NAME, "dashboard_name": record["dashboard_name"], "dashboard_url": f"{self._databricks_host}/sql/dashboards/{record['dashboard_id']}", "dashboard_group_url": self._databricks_host, "created_timestamp": record["created_timestamp"], "tags": record["tags"], } dashboard_owner_data = {"email": record["user"]["email"]} dashboard_owner_data.update(dashboard_identity_data) yield DashboardOwner(**dashboard_owner_data) dashboard_last_modified_data = { "last_modified_timestamp": record["last_modified_timestamp"], } dashboard_last_modified_data.update(dashboard_identity_data) yield DashboardLastModifiedTimestamp( **dashboard_last_modified_data) if "widgets" in record: widgets = sort_widgets(record["widgets"]) text_widgets = get_text_widgets(widgets) viz_widgets = get_visualization_widgets(widgets) dashboard_data["description"] = generate_dashboard_description( text_widgets, viz_widgets) for viz in viz_widgets: dashboard_query_data = { "query_id": str(viz.query_id), "query_name": viz.query_name, "url": self._databricks_host + viz.query_relative_url, "query_text": viz.raw_query, } dashboard_query_data.update(dashboard_identity_data) yield DashboardQuery(**dashboard_query_data) dashboard_chart_data = { "query_id": str(viz.query_id), "chart_id": str(viz.visualization_id), "chart_name": viz.visualization_name, "chart_type": viz.visualization_type, } dashboard_chart_data.update(dashboard_identity_data) yield DashboardChart(**dashboard_chart_data) dashboard_data.update(dashboard_identity_data) yield DashboardMetadata(**dashboard_data)