def _get_extract_iter(self): # type: () -> Iterator[Any] while True: record = self._extractor.extract() if not record: break # the end. record = self._transformer.transform(record=record) if not self._is_published_dashboard(record): continue # filter this one out identity_data = { 'cluster': self._cluster, 'product': RedashDashboardExtractor.PRODUCT, 'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID, 'dashboard_id': record['dashboard_id'] } dash_data = { 'dashboard_group': RedashDashboardExtractor.DASHBOARD_GROUP_NAME, 'dashboard_group_url': self._redash_base_url, 'dashboard_name': record['dashboard_name'], 'dashboard_url': '{redash}/dashboard/{slug}' .format(redash=self._redash_base_url, slug=record['slug']), 'created_timestamp': record['created_timestamp'] } dash_data.update(identity_data) widgets = sort_widgets(record['widgets']) text_widgets = get_text_widgets(widgets) viz_widgets = get_visualization_widgets(widgets) # generate a description for this dashboard, since Redash does not have descriptions dash_data['description'] = generate_dashboard_description(text_widgets, viz_widgets) yield DashboardMetadata(**dash_data) last_mod_data = {'last_modified_timestamp': record['last_modified_timestamp']} last_mod_data.update(identity_data) yield DashboardLastModifiedTimestamp(**last_mod_data) owner_data = {'email': record['user']['email']} owner_data.update(identity_data) yield DashboardOwner(**owner_data) table_keys = set() for viz in viz_widgets: query_data = { 'query_id': viz.query_id, 'query_name': viz.query_name, 'url': self._redash_base_url + viz.query_relative_url, 'query_text': viz.raw_query } query_data.update(identity_data) yield DashboardQuery(**query_data) # if a table parser is provided, retrieve tables from this viz if self._parse_tables: for tbl in self._parse_tables(viz): table_keys.add(tbl.key) if len(table_keys) > 0: yield DashboardTable(table_ids=list(table_keys), **identity_data)
class TestDashboardQuery(unittest.TestCase): def setUp(self) -> None: self.dashboard_query = DashboardQuery( dashboard_group_id='dg_id', dashboard_id='d_id', query_id='q_id', query_name='q_name', url='http://foo.bar/query/baz', query_text='SELECT * FROM foo.bar') def test_create_nodes(self) -> None: actual = self.dashboard_query.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) expected = { 'url': 'http://foo.bar/query/baz', 'name': 'q_name', 'id': 'q_id', 'query_text': 'SELECT * FROM foo.bar', NODE_KEY: '_dashboard://gold.dg_id/d_id/query/q_id', NODE_LABEL: DashboardQuery.DASHBOARD_QUERY_LABEL } self.assertEqual(expected, actual_serialized) def test_create_nodes_neptune(self) -> None: actual = self.dashboard_query.create_next_node() actual_serialized = neptune_serializer.convert_node(actual) neptune_expected = { NEPTUNE_HEADER_ID: 'Query:_dashboard://gold.dg_id/d_id/query/q_id', METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT: '_dashboard://gold.dg_id/d_id/query/q_id', NEPTUNE_HEADER_LABEL: DashboardQuery.DASHBOARD_QUERY_LABEL, NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB, 'id:String(single)': 'q_id', 'query_text:String(single)': 'SELECT * FROM foo.bar', 'name:String(single)': 'q_name', 'url:String(single)': 'http://foo.bar/query/baz' } self.assertEqual(neptune_expected, actual_serialized) def test_create_relation(self) -> None: actual = self.dashboard_query.create_next_relation() actual_serialized = neo4_serializer.serialize_relationship(actual) expected = { RELATION_END_KEY: '_dashboard://gold.dg_id/d_id/query/q_id', RELATION_START_LABEL: 'Dashboard', RELATION_END_LABEL: DashboardQuery.DASHBOARD_QUERY_LABEL, RELATION_START_KEY: '_dashboard://gold.dg_id/d_id', RELATION_TYPE: 'HAS_QUERY', RELATION_REVERSE_TYPE: 'QUERY_OF' } self.assertEqual(expected, actual_serialized) def test_create_relation_neptune(self) -> None: actual = self.dashboard_query.create_next_relation() actual_serialized = neptune_serializer.convert_relationship(actual) neptune_forward_expected = { NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format( from_vertex_id='Dashboard:_dashboard://gold.dg_id/d_id', to_vertex_id='Query:_dashboard://gold.dg_id/d_id/query/q_id', label='HAS_QUERY'), METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT: "{label}:{from_vertex_id}_{to_vertex_id}".format( from_vertex_id='Dashboard:_dashboard://gold.dg_id/d_id', to_vertex_id='Query:_dashboard://gold.dg_id/d_id/query/q_id', label='HAS_QUERY'), NEPTUNE_RELATIONSHIP_HEADER_FROM: 'Dashboard:_dashboard://gold.dg_id/d_id', NEPTUNE_RELATIONSHIP_HEADER_TO: 'Query:_dashboard://gold.dg_id/d_id/query/q_id', NEPTUNE_HEADER_LABEL: 'HAS_QUERY', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_reversed_expected = { NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format( from_vertex_id='Query:_dashboard://gold.dg_id/d_id/query/q_id', to_vertex_id='Dashboard:_dashboard://gold.dg_id/d_id', label='QUERY_OF'), METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT: "{label}:{from_vertex_id}_{to_vertex_id}".format( from_vertex_id='Query:_dashboard://gold.dg_id/d_id/query/q_id', to_vertex_id='Dashboard:_dashboard://gold.dg_id/d_id', label='QUERY_OF'), NEPTUNE_RELATIONSHIP_HEADER_FROM: 'Query:_dashboard://gold.dg_id/d_id/query/q_id', NEPTUNE_RELATIONSHIP_HEADER_TO: 'Dashboard:_dashboard://gold.dg_id/d_id', NEPTUNE_HEADER_LABEL: 'QUERY_OF', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } assert actual is not None self.assertDictEqual(actual_serialized[0], neptune_forward_expected) self.assertDictEqual(actual_serialized[1], neptune_reversed_expected) def test_create_records(self) -> None: actual = self.dashboard_query.create_next_record() actual_serialized = mysql_serializer.serialize_record(actual) expected = { 'rk': '_dashboard://gold.dg_id/d_id/query/q_id', 'name': 'q_name', 'id': 'q_id', 'dashboard_rk': '_dashboard://gold.dg_id/d_id', 'url': 'http://foo.bar/query/baz', 'query_text': 'SELECT * FROM foo.bar' } assert actual is not None self.assertDictEqual(expected, actual_serialized) self.assertIsNone(self.dashboard_query.create_next_record())
def test_with_one_dashboard(self) -> None: def mock_api_get(url: str, *args: Any, **kwargs: Any) -> MockApiResponse: if '1000' in url: return MockApiResponse({ 'id': 1000, 'widgets': [{ 'visualization': { 'query': { 'data_source_id': 1, 'id': 1234, 'name': 'Test Query', 'query': 'SELECT id FROM users' }, 'id': 12345, 'name': 'test_widget', 'type': 'CHART', }, 'options': {} }] }) return MockApiResponse({ 'page': 1, 'count': 1, 'page_size': 50, 'results': [{ 'id': 1000, 'name': 'Test Dash', 'slug': 'test-dash', 'created_at': '2020-01-01T00:00:00.000Z', 'updated_at': '2020-01-02T00:00:00.000Z', 'is_archived': False, 'is_draft': False, 'user': { 'email': '*****@*****.**' } }] }) redash_base_url = 'https://redash.example.com' config = ConfigFactory.from_dict({ 'extractor.redash_dashboard.redash_base_url': redash_base_url, 'extractor.redash_dashboard.api_base_url': redash_base_url, # probably not but doesn't matter 'extractor.redash_dashboard.api_key': 'abc123', 'extractor.redash_dashboard.table_parser': 'tests.unit.extractor.dashboard.redash.test_redash_dashboard_extractor.dummy_tables' }) with patch('databuilder.rest_api.rest_api_query.requests.get' ) as mock_get: mock_get.side_effect = mock_api_get extractor = RedashDashboardExtractor() extractor.init( Scoped.get_scoped_conf(conf=config, scope=extractor.get_scope())) # DashboardMetadata record = extractor.extract() self.assertEqual(record.dashboard_id, '1000') self.assertEqual(record.dashboard_name, 'Test Dash') self.assertEqual(record.dashboard_group_id, RedashDashboardExtractor.DASHBOARD_GROUP_ID) self.assertEqual(record.dashboard_group, RedashDashboardExtractor.DASHBOARD_GROUP_NAME) self.assertEqual(record.product, RedashDashboardExtractor.PRODUCT) self.assertEqual(record.cluster, RedashDashboardExtractor.DEFAULT_CLUSTER) self.assertEqual(record.created_timestamp, 1577836800) self.assertTrue(redash_base_url in record.dashboard_url) self.assertTrue('1000' in record.dashboard_url) # DashboardLastModified record = extractor.extract() identity: Dict[str, Any] = { 'dashboard_id': '1000', 'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID, 'product': RedashDashboardExtractor.PRODUCT, 'cluster': u'prod' } expected_timestamp = DashboardLastModifiedTimestamp( last_modified_timestamp=1577923200, **identity) self.assertEqual(record.__repr__(), expected_timestamp.__repr__()) # DashboardOwner record = extractor.extract() expected_owner = DashboardOwner(email='*****@*****.**', **identity) self.assertEqual(record.__repr__(), expected_owner.__repr__()) # DashboardQuery record = extractor.extract() expected_query = DashboardQuery( query_id='1234', query_name='Test Query', url=u'{base}/queries/1234'.format(base=redash_base_url), query_text='SELECT id FROM users', **identity) self.assertEqual(record.__repr__(), expected_query.__repr__()) # DashboardChart record = extractor.extract() expected_chart = DashboardChart(query_id='1234', chart_id='12345', chart_name='test_widget', chart_type='CHART', **identity) self.assertEqual(record.__repr__(), expected_chart.__repr__()) # DashboardTable record = extractor.extract() expected_table = DashboardTable(table_ids=[ TableRelationData('some_db', 'prod', 'public', 'users').key ], **identity) self.assertEqual(record.__repr__(), expected_table.__repr__())
def _get_extract_iter(self) -> Iterator[Any]: while True: record = self._extractor.extract() if not record: break record = next(self._transformer.transform(record=record), None) dashboard_identity_data = { "dashboard_group_id": DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_ID, "dashboard_id": record["dashboard_id"], "product": "databricks-sql", } dashboard_data = { "dashboard_group": DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_NAME, "dashboard_name": record["dashboard_name"], "dashboard_url": f"{self._databricks_host}/sql/dashboards/{record['dashboard_id']}", "dashboard_group_url": self._databricks_host, "created_timestamp": record["created_timestamp"], "tags": record["tags"], } dashboard_owner_data = {"email": record["user"]["email"]} dashboard_owner_data.update(dashboard_identity_data) yield DashboardOwner(**dashboard_owner_data) dashboard_last_modified_data = { "last_modified_timestamp": record["last_modified_timestamp"], } dashboard_last_modified_data.update(dashboard_identity_data) yield DashboardLastModifiedTimestamp( **dashboard_last_modified_data) if "widgets" in record: widgets = sort_widgets(record["widgets"]) text_widgets = get_text_widgets(widgets) viz_widgets = get_visualization_widgets(widgets) dashboard_data["description"] = generate_dashboard_description( text_widgets, viz_widgets) for viz in viz_widgets: dashboard_query_data = { "query_id": str(viz.query_id), "query_name": viz.query_name, "url": self._databricks_host + viz.query_relative_url, "query_text": viz.raw_query, } dashboard_query_data.update(dashboard_identity_data) yield DashboardQuery(**dashboard_query_data) dashboard_chart_data = { "query_id": str(viz.query_id), "chart_id": str(viz.visualization_id), "chart_name": viz.visualization_name, "chart_type": viz.visualization_type, } dashboard_chart_data.update(dashboard_identity_data) yield DashboardChart(**dashboard_chart_data) dashboard_data.update(dashboard_identity_data) yield DashboardMetadata(**dashboard_data)