def _make_chart_mce( self, dashboard_element: LookerDashboardElement) -> MetadataChangeEvent: actor = self.source_config.actor sys_time = get_sys_time() chart_urn = f"urn:li:chart:({self.source_config.platform_name},{dashboard_element.get_urn_element_id()})" chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) last_modified = ChangeAuditStamps( created=AuditStamp(time=sys_time, actor=actor), lastModified=AuditStamp(time=sys_time, actor=actor), ) chart_type = self._get_chart_type(dashboard_element) chart_info = ChartInfoClass( type=chart_type, description=dashboard_element.description if dashboard_element.description is not None else "", title=dashboard_element.title if dashboard_element.title is not None else "", lastModified=last_modified, chartUrl=dashboard_element.url(self.source_config.base_url), inputs=dashboard_element.get_view_urns( self.source_config.platform_name), ) chart_snapshot.aspects.append(chart_info) return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
def test_get_unknown_viz_chart_snapshot(mocked_data_source): """ Testing with unmapped visualization type SANKEY """ mocked_data_source.return_value = mock_mysql_data_source_response expected = ChartSnapshot( urn="urn:li:chart:(redash,9)", aspects=[ ChartInfoClass( customProperties={}, externalUrl=None, title="My Query Sankey", description="", lastModified=ChangeAuditStamps( created=AuditStamp(time=1628882009571, actor="urn:li:corpuser:unknown"), lastModified=AuditStamp(time=1628882009571, actor="urn:li:corpuser:unknown"), ), chartUrl="http://localhost:5000/queries/4#9", inputs=[ "urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam,PROD)" ], type="TABLE", ) ], ) viz_data = mock_chart_response.get("visualizations", [])[1] result = redash_source()._get_chart_snapshot(mock_chart_response, viz_data) assert result == expected
def test_get_chart_snapshot_parse_table_names_from_sql(mocked_data_source): mocked_data_source.return_value = mock_mysql_data_source_response expected = ChartSnapshot( urn="urn:li:chart:(redash,10)", aspects=[ ChartInfoClass( customProperties={}, externalUrl=None, title="My Query Chart", description="", lastModified=ChangeAuditStamps( created=AuditStamp(time=1628882022544, actor="urn:li:corpuser:unknown"), lastModified=AuditStamp(time=1628882022544, actor="urn:li:corpuser:unknown"), ), chartUrl="http://localhost:5000/queries/4#10", inputs=[ "urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam.order_items,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam.orders,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam.staffs,PROD)", ], type="PIE", ) ], ) viz_data = mock_chart_response.get("visualizations", [])[2] result = redash_source_parse_table_names_from_sql()._get_chart_snapshot( mock_chart_response, viz_data) assert result == expected
def _make_chart_mce( self, dashboard_element: LookerDashboardElement, dashboard: LookerDashboard ) -> MetadataChangeEvent: chart_urn = builder.make_chart_urn( self.source_config.platform_name, dashboard_element.get_urn_element_id() ) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) chart_type = self._get_chart_type(dashboard_element) chart_info = ChartInfoClass( type=chart_type, description=dashboard_element.description or "", title=dashboard_element.title or "", lastModified=ChangeAuditStamps(), chartUrl=dashboard_element.url(self.source_config.base_url), inputs=dashboard_element.get_view_urns(self.source_config), customProperties={ "upstream_fields": ",".join( sorted(set(dashboard_element.upstream_fields)) ) if dashboard_element.upstream_fields else "" }, ) chart_snapshot.aspects.append(chart_info) ownership = self.get_ownership(dashboard) if ownership is not None: chart_snapshot.aspects.append(ownership) return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
def construct_chart_from_chart_data(self, chart_data): chart_urn = f"urn:li:chart:({self.platform},{chart_data['id']})" chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) modified_actor = f"urn:li:corpuser:{(chart_data.get('changed_by') or {}).get('username', 'unknown')}" modified_ts = int( dp.parse(chart_data.get("changed_on_utc", "now")).timestamp()) title = chart_data.get("slice_name", "") # note: the API does not currently supply created_by usernames due to a bug, but we are required to # provide a created AuditStamp to comply with ChangeAuditStamp model. For now, I sub in the last # modified actor urn last_modified = ChangeAuditStamps( created=AuditStamp(time=modified_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_url = f"{self.config.connect_uri[:-1]}{chart_data.get('url', '')}" datasource_id = chart_data.get("datasource_id") datasource_urn = self.get_datasource_urn_from_id(datasource_id) chart_info = ChartInfoClass( description="", title=title, lastModified=last_modified, chartUrl=chart_url, inputs=[datasource_urn] if datasource_urn else None, ) chart_snapshot.aspects.append(chart_info) return chart_snapshot
def _make_chart_mce( self, dashboard_element: LookerDashboardElement) -> MetadataChangeEvent: chart_urn = builder.make_chart_urn( self.source_config.platform_name, dashboard_element.get_urn_element_id()) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) chart_type = self._get_chart_type(dashboard_element) chart_info = ChartInfoClass( type=chart_type, description=dashboard_element.description or "", title=dashboard_element.title or "", lastModified=ChangeAuditStamps(), chartUrl=dashboard_element.url(self.source_config.base_url), inputs=dashboard_element.get_view_urns( self.source_config.platform_name, self.source_config.env), ) chart_snapshot.aspects.append(chart_info) return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
def construct_chart_from_chart_data(self, chart_data): chart_urn = f"urn:li:chart:({self.platform},{chart_data['id']})" chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) modified_actor = f"urn:li:corpuser:{(chart_data.get('changed_by') or {}).get('username', 'unknown')}" modified_ts = int( dp.parse(chart_data.get("changed_on_utc", "now")).timestamp() * 1000) title = chart_data.get("slice_name", "") # note: the API does not currently supply created_by usernames due to a bug, but we are required to # provide a created AuditStamp to comply with ChangeAuditStamp model. For now, I sub in the last # modified actor urn last_modified = ChangeAuditStamps( created=AuditStamp(time=modified_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_type = chart_type_from_viz_type.get( chart_data.get("viz_type", "")) chart_url = f"{self.config.connect_uri}{chart_data.get('url', '')}" datasource_id = chart_data.get("datasource_id") datasource_urn = self.get_datasource_urn_from_id(datasource_id) params = json.loads(chart_data.get("params")) metrics = [ get_metric_name(metric) for metric in (params.get("metrics", []) or [params.get("metric")]) ] filters = [ get_filter_name(filter_obj) for filter_obj in params.get("adhoc_filters", []) ] group_bys = params.get("groupby", []) or [] if isinstance(group_bys, str): group_bys = [group_bys] custom_properties = { "Metrics": ", ".join(metrics), "Filters": ", ".join(filters), "Dimensions": ", ".join(group_bys), } chart_info = ChartInfoClass( type=chart_type, description="", title=title, lastModified=last_modified, chartUrl=chart_url, inputs=[datasource_urn] if datasource_urn else None, customProperties=custom_properties, ) chart_snapshot.aspects.append(chart_info) return chart_snapshot
def _get_chart_snapshot(self, query_data: Dict, viz_data: Dict) -> ChartSnapshot: viz_id = viz_data["id"] chart_urn = f"urn:li:chart:({self.platform},{viz_id})" chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) modified_actor = f"urn:li:corpuser:{viz_data.get('changed_by', {}).get('username', 'unknown')}" modified_ts = int( dp.parse(viz_data.get("updated_at", "now")).timestamp() * 1000) title = f"{query_data.get('name')} {viz_data.get('name', '')}" last_modified = ChangeAuditStamps( created=AuditStamp(time=modified_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) # Getting chart type chart_type = self._get_chart_type_from_viz_data(viz_data) chart_url = f"{self.config.connect_uri}/queries/{query_data.get('id')}#{viz_id}" description = (viz_data.get("description", "") if viz_data.get( "description", "") else "") data_source_id = query_data.get("data_source_id") data_source = self._get_chart_data_source(data_source_id) data_source_type = data_source.get("type") # TODO: Getting table lineage from SQL parsing # Currently we only get database level source from `data_source_id` which returns database name or Bigquery's projectId # query = query_data.get("query", "") datasource_urn = self._get_datasource_urn_from_data_source(data_source) if not datasource_urn: self.report.report_warning( key=f"redash-chart-{viz_id}", reason= f"data_source_type={data_source_type} not yet implemented. Setting inputs to None", ) chart_info = ChartInfoClass( type=chart_type, description=description, title=title, lastModified=last_modified, chartUrl=chart_url, inputs=[ datasource_urn, ] if datasource_urn else None, ) chart_snapshot.aspects.append(chart_info) return chart_snapshot
def construct_card_from_api_data( self, card_data: dict) -> Optional[ChartSnapshot]: card_id = card_data.get("id", "") card_url = f"{self.config.connect_uri}/api/card/{card_id}" try: card_response = self.session.get(card_url) card_response.raise_for_status() card_details = card_response.json() except HTTPError as http_error: self.report.report_failure( key=f"metabase-card-{card_id}", reason=f"Unable to retrieve Card info. " f"Reason: {str(http_error)}", ) return None chart_urn = builder.make_chart_urn(self.platform, card_id) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) last_edit_by = card_details.get("last-edit-info") or {} modified_actor = builder.make_user_urn( last_edit_by.get("email", "unknown")) modified_ts = self.get_timestamp_millis_from_ts_string( f"{last_edit_by.get('timestamp')}") last_modified = ChangeAuditStamps( created=AuditStamp(time=modified_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_type = self._get_chart_type(card_details.get("id", ""), card_details.get("display")) description = card_details.get("description") or "" title = card_details.get("name") or "" datasource_urn = self.get_datasource_urn(card_details) custom_properties = self.construct_card_custom_properties(card_details) chart_info = ChartInfoClass( type=chart_type, description=description, title=title, lastModified=last_modified, chartUrl=f"{self.config.connect_uri}/card/{card_id}", inputs=datasource_urn, customProperties=custom_properties, ) chart_snapshot.aspects.append(chart_info) if card_details.get("query_type", "") == "native": raw_query = (card_details.get("dataset_query", {}).get("native", {}).get("query", "")) chart_query_native = ChartQueryClass( rawQuery=raw_query, type=ChartQueryTypeClass.SQL, ) chart_snapshot.aspects.append(chart_query_native) # Ownership ownership = self._get_ownership(card_details.get("creator_id", "")) if ownership is not None: chart_snapshot.aspects.append(ownership) return chart_snapshot
def construct_chart_from_api_data(self, chart_data: dict, query: dict, path: str) -> ChartSnapshot: chart_urn = builder.make_chart_urn(self.platform, chart_data.get("token", "")) chart_snapshot = ChartSnapshot( urn=chart_urn, aspects=[], ) last_modified = ChangeAuditStamps() creator = self._get_creator( chart_data.get("_links", {}).get("creator", {}).get("href", "")) if creator is not None: modified_actor = builder.make_user_urn(creator) created_ts = int( dp.parse(chart_data.get("created_at", "now")).timestamp() * 1000) modified_ts = int( dp.parse(chart_data.get("updated_at", "now")).timestamp() * 1000) last_modified = ChangeAuditStamps( created=AuditStamp(time=created_ts, actor=modified_actor), lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_detail = (chart_data.get("view", {}) if len(chart_data.get("view", {})) != 0 else chart_data.get("view_vegas", {})) mode_chart_type = chart_detail.get( "chartType", "") or chart_detail.get("selectedChart", "") chart_type = self._get_chart_type(chart_data.get("token", ""), mode_chart_type) description = (chart_detail.get("description") or chart_detail.get("chartDescription") or "") title = chart_detail.get("title") or chart_detail.get( "chartTitle") or "" # create datasource urn platform, db_name = self._get_platform_and_dbname( query.get("data_source_id")) source_tables = self._get_source_from_query(query.get("raw_query")) datasource_urn = self._get_datasource_urn(platform, db_name, source_tables) custom_properties = self.construct_chart_custom_properties( chart_detail, mode_chart_type) # Chart Info chart_info = ChartInfoClass( type=chart_type, description=description, title=title, lastModified=last_modified, chartUrl=f"{self.config.connect_uri}" f"{chart_data.get('_links', {}).get('report_viz_web', {}).get('href', '')}", inputs=datasource_urn, customProperties=custom_properties, ) chart_snapshot.aspects.append(chart_info) # Browse Path browse_path = BrowsePathsClass(paths=[path]) chart_snapshot.aspects.append(browse_path) # Query chart_query = ChartQueryClass( rawQuery=query.get("raw_query", ""), type=ChartQueryTypeClass.SQL, ) chart_snapshot.aspects.append(chart_query) # Ownership ownership = self._get_ownership( self._get_creator( chart_data.get("_links", {}).get("creator", {}).get("href", ""))) if ownership is not None: chart_snapshot.aspects.append(ownership) return chart_snapshot
def emit_sheets_as_charts(self, workbook: Dict) -> Iterable[MetadataWorkUnit]: sheet_upstream_datasources = self.get_sheetwise_upstream_datasources( workbook) for sheet in workbook.get("sheets", []): chart_snapshot = ChartSnapshot( urn=builder.make_chart_urn(self.platform, sheet.get("id")), aspects=[], ) creator = workbook.get("owner", {}).get("username", "") created_at = sheet.get("createdAt", datetime.now()) updated_at = sheet.get("updatedAt", datetime.now()) last_modified = self.get_last_modified(creator, created_at, updated_at) if sheet.get("path"): site_part = f"/site/{self.config.site}" if self.config.site else "" sheet_external_url = ( f"{self.config.connect_uri}/#{site_part}/views/{sheet.get('path')}" ) elif sheet.get("containedInDashboards"): # sheet contained in dashboard site_part = f"/t/{self.config.site}" if self.config.site else "" dashboard_path = sheet.get("containedInDashboards")[0].get( "path", "") sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get('name', '')}" else: # hidden or viz-in-tooltip sheet sheet_external_url = None fields = {} for field in sheet.get("datasourceFields", ""): description = make_description_from_params( get_field_value_in_sheet(field, "description"), get_field_value_in_sheet(field, "formula"), ) fields[get_field_value_in_sheet(field, "name")] = description # datasource urn datasource_urn = [] data_sources = sheet_upstream_datasources.get( sheet.get("id"), set()) for ds_id in data_sources: if ds_id is None or not ds_id: continue ds_urn = builder.make_dataset_urn(self.platform, ds_id, self.config.env) datasource_urn.append(ds_urn) if ds_id not in self.datasource_ids_being_used: self.datasource_ids_being_used.append(ds_id) # Chart Info chart_info = ChartInfoClass( description="", title=sheet.get("name", ""), lastModified=last_modified, externalUrl=sheet_external_url, inputs=sorted(datasource_urn), customProperties=fields, ) chart_snapshot.aspects.append(chart_info) # Browse path browse_path = BrowsePathsClass(paths=[ f"/{self.platform}/{workbook.get('projectName', '').replace('/', REPLACE_SLASH_CHAR)}" f"/{workbook.get('name', '')}" f"/{sheet.get('name', '').replace('/', REPLACE_SLASH_CHAR)}" ]) chart_snapshot.aspects.append(browse_path) # Ownership owner = self._get_ownership(creator) if owner is not None: chart_snapshot.aspects.append(owner) # Tags tag_list = sheet.get("tags", []) if tag_list and self.config.ingest_tags: tag_list_str = [ t.get("name", "").upper() for t in tag_list if t is not None ] chart_snapshot.aspects.append( builder.make_global_tag_aspect_with_tag_list(tag_list_str)) yield self.get_metadata_change_event(chart_snapshot) yield from add_entity_to_container(self.gen_workbook_key(workbook), "chart", chart_snapshot.urn)
def __to_datahub_chart( self, tile: PowerBiAPI.Tile, ds_mcps: List[MetadataChangeProposalWrapper] ) -> List[MetadataChangeProposalWrapper]: """ Map PowerBi tile to datahub chart """ LOGGER.info("Converting tile {}(id={}) to chart".format( tile.title, tile.id)) # Create an URN for chart chart_urn = builder.make_chart_urn(self.__config.platform_name, tile.get_urn_part()) LOGGER.info("{}={}".format(Constant.CHART_URN, chart_urn)) ds_input: List[str] = self.to_urn_set(ds_mcps) def tile_custom_properties(tile: PowerBiAPI.Tile) -> dict: custom_properties = { "datasetId": tile.dataset.id if tile.dataset else "", "reportId": tile.report.id if tile.report else "", "datasetWebUrl": tile.dataset.webUrl if tile.dataset is not None else "", "createdFrom": tile.createdFrom.value, } return custom_properties # Create chartInfo mcp # Set chartUrl only if tile is created from Report chart_info_instance = ChartInfoClass( title=tile.title or "", description=tile.title or "", lastModified=ChangeAuditStamps(), inputs=ds_input, externalUrl=tile.report.webUrl if tile.report else None, customProperties={**tile_custom_properties(tile)}, ) info_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.CHART_INFO, aspect=chart_info_instance, ) # removed status mcp status_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.STATUS, aspect=StatusClass(removed=False), ) # ChartKey status chart_key_instance = ChartKeyClass( dashboardTool=self.__config.platform_name, chartId=Constant.CHART_ID.format(tile.id), ) chartkey_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, aspect_name=Constant.CHART_KEY, aspect=chart_key_instance, ) return [info_mcp, status_mcp, chartkey_mcp]