def test_dashboard_owner_relations(self) -> None: dashboard_last_modified = DashboardLastModifiedTimestamp( last_modified_timestamp=123456789, cluster='cluster_id', product='product_id', dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id') actual = dashboard_last_modified.create_next_relation() actual_serialized = neo4_serializer.serialize_relationship(actual) expected: Dict[str, Any] = { RELATION_END_KEY: 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id' '/_last_modified_timestamp', RELATION_START_LABEL: 'Dashboard', RELATION_END_LABEL: 'Timestamp', RELATION_START_KEY: 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id', RELATION_TYPE: 'LAST_UPDATED_AT', RELATION_REVERSE_TYPE: 'LAST_UPDATED_TIME_OF' } assert actual is not None self.assertDictEqual(actual_serialized, expected) self.assertIsNone(dashboard_last_modified.create_next_relation())
def test_dashboard_owner_relations(self): # type: () -> None dashboard_last_modified = DashboardLastModifiedTimestamp( last_modified_timestamp=123456789, cluster='cluster_id', product='product_id', dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id') actual = dashboard_last_modified.create_next_relation() print(actual) expected = { RELATION_END_KEY: 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id' '/_last_modified_timestamp', RELATION_START_LABEL: 'Dashboard', RELATION_END_LABEL: 'Timestamp', RELATION_START_KEY: 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id', RELATION_TYPE: 'LAST_UPDATED_AT', RELATION_REVERSE_TYPE: 'LAST_UPDATED_TIME_OF' } self.assertDictEqual(actual, expected) self.assertIsNone(dashboard_last_modified.create_next_relation())
def setUp(self) -> None: self.dashboard_last_modified = DashboardLastModifiedTimestamp( last_modified_timestamp=123456789, cluster='cluster_id', product='product_id', dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id') self.expected_ts_key = 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/' \ '_last_modified_timestamp' self.expected_dashboard_key = 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id'
def _get_extract_iter( self ) -> Iterator[Union[DashboardMetadata, DashboardLastModifiedTimestamp, None]]: ids = self._get_resource_ids('dashboard') data = [self._get_dashboard_details(i) for i in ids] if self.extract_published_only: data = [ d for d in data if self.get_nested_field(d, 'result.published') ] for entry in data: dashboard_metadata = self.map_fields( entry, self.dashboard_metadata_field_mappings()) dashboard_metadata.update(**self.common_params) yield DashboardMetadata(**dashboard_metadata) dashboard_last_modified = self.map_fields( entry, self.last_modified_field_mappings()) dashboard_last_modified.update(**self.common_params) yield DashboardLastModifiedTimestamp(**dashboard_last_modified)
def test_dashboard_timestamp_nodes(self): # type: () -> None dashboard_last_modified = DashboardLastModifiedTimestamp(last_modified_timestamp=123456789, cluster='cluster_id', product='product_id', dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id') actual = dashboard_last_modified.create_next_node() expected = {'timestamp': 123456789, 'name': 'last_updated_timestamp', 'KEY': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/_last_modified_timestamp', 'LABEL': 'Timestamp'} self.assertDictEqual(actual, expected) self.assertIsNone(dashboard_last_modified.create_next_node())
def test_dashboard_timestamp_nodes(self) -> None: dashboard_last_modified = DashboardLastModifiedTimestamp( last_modified_timestamp=123456789, cluster='cluster_id', product='product_id', dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id') actual = dashboard_last_modified.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) expected: Dict[str, Any] = { 'timestamp:UNQUOTED': 123456789, 'name': 'last_updated_timestamp', 'KEY': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/_last_modified_timestamp', 'LABEL': 'Timestamp' } assert actual is not None self.assertDictEqual(actual_serialized, expected) self.assertIsNone(dashboard_last_modified.create_next_node())
def _get_extract_iter(self) -> Iterator[Any]: while True: record = self._extractor.extract() if not record: break # the end. record = next(self._transformer.transform(record=record), None) if not self._is_published_dashboard(record): continue # filter this one out identity_data = { 'cluster': self._cluster, 'product': RedashDashboardExtractor.PRODUCT, 'dashboard_group_id': str(RedashDashboardExtractor.DASHBOARD_GROUP_ID), 'dashboard_id': str(record['dashboard_id']) } dash_data = { 'dashboard_group': RedashDashboardExtractor.DASHBOARD_GROUP_NAME, 'dashboard_group_url': self._redash_base_url, 'dashboard_name': record['dashboard_name'], 'dashboard_url': f'{self._redash_base_url}/dashboards/{record["dashboard_id"]}', 'created_timestamp': record['created_timestamp'] } dash_data.update(identity_data) widgets = sort_widgets(record['widgets']) text_widgets = get_text_widgets(widgets) viz_widgets = get_visualization_widgets(widgets) # generate a description for this dashboard, since Redash does not have descriptions dash_data['description'] = generate_dashboard_description( text_widgets, viz_widgets) yield DashboardMetadata(**dash_data) last_mod_data = { 'last_modified_timestamp': record['last_modified_timestamp'] } last_mod_data.update(identity_data) yield DashboardLastModifiedTimestamp(**last_mod_data) owner_data = {'email': record['user']['email']} owner_data.update(identity_data) yield DashboardOwner(**owner_data) table_keys = set() for viz in viz_widgets: query_data = { 'query_id': str(viz.query_id), 'query_name': viz.query_name, 'url': self._redash_base_url + viz.query_relative_url, 'query_text': viz.raw_query } query_data.update(identity_data) yield DashboardQuery(**query_data) chart_data = { 'query_id': str(viz.query_id), 'chart_id': str(viz.visualization_id), 'chart_name': viz.visualization_name, 'chart_type': viz.visualization_type, } chart_data.update(identity_data) yield DashboardChart(**chart_data) # if a table parser is provided, retrieve tables from this viz if self._parse_tables: for tbl in self._parse_tables(viz): table_keys.add(tbl.key) if len(table_keys) > 0: yield DashboardTable(table_ids=list(table_keys), **identity_data)
def test_with_one_dashboard(self) -> None: def mock_api_get(url: str, *args: Any, **kwargs: Any) -> MockApiResponse: if 'test-dash' in url: return MockApiResponse({ 'id': 123, 'widgets': [ { 'visualization': { 'query': { 'data_source_id': 1, 'id': '1234', 'name': 'Test Query', 'query': 'SELECT id FROM users' } }, 'options': {} } ] }) return MockApiResponse({ 'page': 1, 'count': 1, 'page_size': 50, 'results': [ { 'id': 123, 'name': 'Test Dash', 'slug': 'test-dash', 'created_at': '2020-01-01T00:00:00.000Z', 'updated_at': '2020-01-02T00:00:00.000Z', 'is_archived': False, 'is_draft': False, 'user': {'email': '*****@*****.**'} } ] }) redash_base_url = 'https://redash.example.com' config = ConfigFactory.from_dict({ 'extractor.redash_dashboard.redash_base_url': redash_base_url, 'extractor.redash_dashboard.api_base_url': redash_base_url, # probably not but doesn't matter 'extractor.redash_dashboard.api_key': 'abc123', 'extractor.redash_dashboard.table_parser': 'tests.unit.extractor.dashboard.redash.test_redash_dashboard_extractor.dummy_tables' }) with patch('databuilder.rest_api.rest_api_query.requests.get') as mock_get: mock_get.side_effect = mock_api_get extractor = RedashDashboardExtractor() extractor.init(Scoped.get_scoped_conf(conf=config, scope=extractor.get_scope())) # DashboardMetadata record = extractor.extract() self.assertEqual(record.dashboard_id, 123) self.assertEqual(record.dashboard_name, 'Test Dash') self.assertEqual(record.dashboard_group_id, RedashDashboardExtractor.DASHBOARD_GROUP_ID) self.assertEqual(record.dashboard_group, RedashDashboardExtractor.DASHBOARD_GROUP_NAME) self.assertEqual(record.product, RedashDashboardExtractor.PRODUCT) self.assertEqual(record.cluster, RedashDashboardExtractor.DEFAULT_CLUSTER) self.assertEqual(record.created_timestamp, 1577836800) self.assertTrue(redash_base_url in record.dashboard_url) self.assertTrue('test-dash' in record.dashboard_url) # DashboardLastModified record = extractor.extract() identity: Dict[str, Any] = { 'dashboard_id': 123, 'dashboard_group_id': RedashDashboardExtractor.DASHBOARD_GROUP_ID, 'product': RedashDashboardExtractor.PRODUCT, 'cluster': u'prod' } expected_timestamp = DashboardLastModifiedTimestamp( last_modified_timestamp=1577923200, **identity ) self.assertEqual(record.__repr__(), expected_timestamp.__repr__()) # DashboardOwner record = extractor.extract() expected_owner = DashboardOwner(email='*****@*****.**', **identity) self.assertEqual(record.__repr__(), expected_owner.__repr__()) # DashboardQuery record = extractor.extract() expected_query = DashboardQuery( query_id='1234', query_name='Test Query', url=u'{base}/queries/1234'.format(base=redash_base_url), query_text='SELECT id FROM users', **identity ) self.assertEqual(record.__repr__(), expected_query.__repr__()) # DashboardTable record = extractor.extract() expected_table = DashboardTable( table_ids=[TableRelationData('some_db', 'prod', 'public', 'users').key], **identity ) self.assertEqual(record.__repr__(), expected_table.__repr__())
class TestDashboardLastModifiedTimestamp(unittest.TestCase): def setUp(self) -> None: self.dashboard_last_modified = DashboardLastModifiedTimestamp( last_modified_timestamp=123456789, cluster='cluster_id', product='product_id', dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id') self.expected_ts_key = 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/' \ '_last_modified_timestamp' self.expected_dashboard_key = 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id' def test_dashboard_timestamp_nodes(self) -> None: actual = self.dashboard_last_modified.create_next_node() actual_serialized = neo4_serializer.serialize_node(actual) expected: Dict[str, Any] = { 'timestamp:UNQUOTED': 123456789, 'name': 'last_updated_timestamp', 'KEY': self.expected_ts_key, 'LABEL': 'Timestamp' } assert actual is not None self.assertDictEqual(actual_serialized, expected) self.assertIsNone(self.dashboard_last_modified.create_next_node()) def test_neptune_dashboard_timestamp_nodes(self) -> None: actual = self.dashboard_last_modified.create_next_node() actual_neptune_serialized = neptune_serializer.convert_node(actual) neptune_expected = { NEPTUNE_HEADER_ID: 'Timestamp:' + self.expected_ts_key, METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT: self.expected_ts_key, NEPTUNE_HEADER_LABEL: 'Timestamp', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_NODE_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB, 'name:String(single)': 'last_updated_timestamp', 'timestamp:Long(single)': 123456789, } self.assertDictEqual(actual_neptune_serialized, neptune_expected) def test_dashboard_owner_relations(self) -> None: actual = self.dashboard_last_modified.create_next_relation() actual_serialized = neo4_serializer.serialize_relationship(actual) expected: Dict[str, Any] = { RELATION_END_KEY: self.expected_ts_key, RELATION_START_LABEL: 'Dashboard', RELATION_END_LABEL: 'Timestamp', RELATION_START_KEY: self.expected_dashboard_key, RELATION_TYPE: 'LAST_UPDATED_AT', RELATION_REVERSE_TYPE: 'LAST_UPDATED_TIME_OF' } assert actual is not None self.assertDictEqual(actual_serialized, expected) self.assertIsNone(self.dashboard_last_modified.create_next_relation()) def test_dashboard_owner_relations_neptune(self) -> None: actual = self.dashboard_last_modified.create_next_relation() actual_serialized = neptune_serializer.convert_relationship(actual) neptune_forward_expected = { NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format( from_vertex_id='Dashboard:' + self.expected_dashboard_key, to_vertex_id='Timestamp:' + self.expected_ts_key, label='LAST_UPDATED_AT'), METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT: "{label}:{from_vertex_id}_{to_vertex_id}".format( from_vertex_id='Dashboard:' + self.expected_dashboard_key, to_vertex_id='Timestamp:' + self.expected_ts_key, label='LAST_UPDATED_AT'), NEPTUNE_RELATIONSHIP_HEADER_FROM: 'Dashboard:' + self.expected_dashboard_key, NEPTUNE_RELATIONSHIP_HEADER_TO: 'Timestamp:' + self.expected_ts_key, NEPTUNE_HEADER_LABEL: 'LAST_UPDATED_AT', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } neptune_reversed_expected = { NEPTUNE_HEADER_ID: "{label}:{from_vertex_id}_{to_vertex_id}".format( from_vertex_id='Timestamp:' + self.expected_ts_key, to_vertex_id='Dashboard:' + self.expected_dashboard_key, label='LAST_UPDATED_TIME_OF'), METADATA_KEY_PROPERTY_NAME_BULK_LOADER_FORMAT: "{label}:{from_vertex_id}_{to_vertex_id}".format( from_vertex_id='Timestamp:' + self.expected_ts_key, to_vertex_id='Dashboard:' + self.expected_dashboard_key, label='LAST_UPDATED_TIME_OF'), NEPTUNE_RELATIONSHIP_HEADER_FROM: 'Timestamp:' + self.expected_ts_key, NEPTUNE_RELATIONSHIP_HEADER_TO: 'Dashboard:' + self.expected_dashboard_key, NEPTUNE_HEADER_LABEL: 'LAST_UPDATED_TIME_OF', NEPTUNE_LAST_EXTRACTED_AT_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: ANY, NEPTUNE_CREATION_TYPE_RELATIONSHIP_PROPERTY_NAME_BULK_LOADER_FORMAT: NEPTUNE_CREATION_TYPE_JOB } self.maxDiff = None assert actual is not None self.assertDictEqual(actual_serialized[0], neptune_forward_expected) self.assertDictEqual(actual_serialized[1], neptune_reversed_expected) self.assertIsNone(self.dashboard_last_modified.create_next_relation()) def test_dashboard_timestamp_records(self) -> None: actual = self.dashboard_last_modified.create_next_record() actual_serialized = mysql_serializer.serialize_record(actual) expected = { 'rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id/_last_modified_timestamp', 'timestamp': 123456789, 'name': 'last_updated_timestamp', 'dashboard_rk': 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id' } assert actual is not None self.assertDictEqual(actual_serialized, expected) self.assertIsNone(self.dashboard_last_modified.create_next_record()) def test_dashboard_last_modified_relation_atlas(self) -> None: actual = self.dashboard_last_modified.create_next_atlas_entity() actual_serialized = atlas_serializer.serialize_entity(actual) expected = { "typeName": "Dashboard", "operation": "UPDATE", "relationships": None, "qualifiedName": "product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id", "lastModifiedTimestamp": 123456789 } assert actual is not None self.assertDictEqual(actual_serialized, expected) self.assertIsNone( self.dashboard_last_modified.create_next_atlas_entity())
def _get_extract_iter(self) -> Iterator[Any]: while True: record = self._extractor.extract() if not record: break record = next(self._transformer.transform(record=record), None) dashboard_identity_data = { "dashboard_group_id": DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_ID, "dashboard_id": record["dashboard_id"], "product": "databricks-sql", } dashboard_data = { "dashboard_group": DatabricksSQLDashboardExtractor.DASHBOARD_GROUP_NAME, "dashboard_name": record["dashboard_name"], "dashboard_url": f"{self._databricks_host}/sql/dashboards/{record['dashboard_id']}", "dashboard_group_url": self._databricks_host, "created_timestamp": record["created_timestamp"], "tags": record["tags"], } dashboard_owner_data = {"email": record["user"]["email"]} dashboard_owner_data.update(dashboard_identity_data) yield DashboardOwner(**dashboard_owner_data) dashboard_last_modified_data = { "last_modified_timestamp": record["last_modified_timestamp"], } dashboard_last_modified_data.update(dashboard_identity_data) yield DashboardLastModifiedTimestamp( **dashboard_last_modified_data) if "widgets" in record: widgets = sort_widgets(record["widgets"]) text_widgets = get_text_widgets(widgets) viz_widgets = get_visualization_widgets(widgets) dashboard_data["description"] = generate_dashboard_description( text_widgets, viz_widgets) for viz in viz_widgets: dashboard_query_data = { "query_id": str(viz.query_id), "query_name": viz.query_name, "url": self._databricks_host + viz.query_relative_url, "query_text": viz.raw_query, } dashboard_query_data.update(dashboard_identity_data) yield DashboardQuery(**dashboard_query_data) dashboard_chart_data = { "query_id": str(viz.query_id), "chart_id": str(viz.visualization_id), "chart_name": viz.visualization_name, "chart_type": viz.visualization_type, } dashboard_chart_data.update(dashboard_identity_data) yield DashboardChart(**dashboard_chart_data) dashboard_data.update(dashboard_identity_data) yield DashboardMetadata(**dashboard_data)