def emit_dashboards(self, workbook: Dict) -> Iterable[MetadataWorkUnit]: for dashboard in workbook.get("dashboards", []): dashboard_snapshot = DashboardSnapshot( urn=builder.make_dashboard_urn(self.platform, dashboard["id"]), aspects=[], ) creator = workbook.get("owner", {}).get("username", "") created_at = dashboard.get("createdAt", datetime.now()) updated_at = dashboard.get("updatedAt", datetime.now()) last_modified = self.get_last_modified(creator, created_at, updated_at) site_part = f"/site/{self.config.site}" if self.config.site else "" dashboard_external_url = f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get('path', '')}" title = ( dashboard["name"].replace("/", REPLACE_SLASH_CHAR) if dashboard.get("name") else "" ) chart_urns = [ builder.make_chart_urn(self.platform, sheet.get("id")) for sheet in dashboard.get("sheets", []) ] dashboard_info_class = DashboardInfoClass( description="", title=title, charts=chart_urns, lastModified=last_modified, dashboardUrl=dashboard_external_url, customProperties={}, ) dashboard_snapshot.aspects.append(dashboard_info_class) if workbook.get("projectName") and workbook.get("name"): dashboard_name = title if title else dashboard["id"] # browse path browse_paths = BrowsePathsClass( paths=[ f"/{self.platform}/{workbook['projectName'].replace('/', REPLACE_SLASH_CHAR)}" f"/{workbook['name'].replace('/', REPLACE_SLASH_CHAR)}" f"/{dashboard_name}" ] ) dashboard_snapshot.aspects.append(browse_paths) else: logger.debug(f"Browse path not set for dashboard {dashboard['id']}") # Ownership owner = self._get_ownership(creator) if owner is not None: dashboard_snapshot.aspects.append(owner) yield self.get_metadata_change_event(dashboard_snapshot) yield from add_entity_to_container( self.gen_workbook_key(workbook), "dashboard", dashboard_snapshot.urn )
def emit_sheets_as_charts(self, workbook: Dict) -> Iterable[MetadataWorkUnit]: sheet_upstream_datasources = self.get_sheetwise_upstream_datasources( workbook) for sheet in workbook.get("sheets", []): chart_snapshot = ChartSnapshot( urn=builder.make_chart_urn(self.platform, sheet.get("id")), aspects=[], ) creator = workbook.get("owner", {}).get("username", "") created_at = sheet.get("createdAt", datetime.now()) updated_at = sheet.get("updatedAt", datetime.now()) last_modified = self.get_last_modified(creator, created_at, updated_at) if sheet.get("path"): site_part = f"/site/{self.config.site}" if self.config.site else "" sheet_external_url = ( f"{self.config.connect_uri}/#{site_part}/views/{sheet.get('path')}" ) elif sheet.get("containedInDashboards"): # sheet contained in dashboard site_part = f"/t/{self.config.site}" if self.config.site else "" dashboard_path = sheet.get("containedInDashboards")[0].get( "path", "") sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get('name', '')}" else: # hidden or viz-in-tooltip sheet sheet_external_url = None fields = {} for field in sheet.get("datasourceFields", ""): description = make_description_from_params( get_field_value_in_sheet(field, "description"), get_field_value_in_sheet(field, "formula"), ) fields[get_field_value_in_sheet(field, "name")] = description # datasource urn datasource_urn = [] data_sources = sheet_upstream_datasources.get( sheet.get("id"), set()) for ds_id in data_sources: if ds_id is None or not ds_id: continue ds_urn = builder.make_dataset_urn(self.platform, ds_id, self.config.env) datasource_urn.append(ds_urn) if ds_id not in self.datasource_ids_being_used: self.datasource_ids_being_used.append(ds_id) # Chart Info chart_info = ChartInfoClass( description="", title=sheet.get("name", ""), lastModified=last_modified, externalUrl=sheet_external_url, inputs=sorted(datasource_urn), customProperties=fields, ) chart_snapshot.aspects.append(chart_info) # Browse path browse_path = BrowsePathsClass(paths=[ f"/{self.platform}/{workbook.get('projectName', '').replace('/', REPLACE_SLASH_CHAR)}" f"/{workbook.get('name', '')}" f"/{sheet.get('name', '').replace('/', REPLACE_SLASH_CHAR)}" ]) chart_snapshot.aspects.append(browse_path) # Ownership owner = self._get_ownership(creator) if owner is not None: chart_snapshot.aspects.append(owner) # Tags tag_list = sheet.get("tags", []) if tag_list and self.config.ingest_tags: tag_list_str = [ t.get("name", "").upper() for t in tag_list if t is not None ] chart_snapshot.aspects.append( builder.make_global_tag_aspect_with_tag_list(tag_list_str)) yield self.get_metadata_change_event(chart_snapshot) yield from add_entity_to_container(self.gen_workbook_key(workbook), "chart", chart_snapshot.urn)
def emit_datasource(self, datasource: dict, workbook: dict = None) -> Iterable[MetadataWorkUnit]: datasource_info = workbook if workbook is None: datasource_info = datasource project = (datasource_info.get("projectName", "").replace( "/", REPLACE_SLASH_CHAR) if datasource_info else "") datasource_id = datasource.get("id", "") datasource_name = f"{datasource.get('name')}.{datasource_id}" datasource_urn = builder.make_dataset_urn(self.platform, datasource_id, self.config.env) if datasource_id not in self.datasource_ids_being_used: self.datasource_ids_being_used.append(datasource_id) dataset_snapshot = DatasetSnapshot( urn=datasource_urn, aspects=[], ) # Browse path browse_paths = BrowsePathsClass(paths=[ f"/{self.config.env.lower()}/{self.platform}/{project}/{datasource.get('name', '')}/{datasource_name}" ]) dataset_snapshot.aspects.append(browse_paths) # Ownership owner = (self._get_ownership( datasource_info.get("owner", {}).get("username", "")) if datasource_info else None) if owner is not None: dataset_snapshot.aspects.append(owner) # Dataset properties dataset_props = DatasetPropertiesClass( name=datasource.get("name"), description=datasource.get("description"), customProperties={ "hasExtracts": str(datasource.get("hasExtracts", "")), "extractLastRefreshTime": datasource.get("extractLastRefreshTime", "") or "", "extractLastIncrementalUpdateTime": datasource.get("extractLastIncrementalUpdateTime", "") or "", "extractLastUpdateTime": datasource.get("extractLastUpdateTime", "") or "", "type": datasource.get("__typename", ""), }, ) dataset_snapshot.aspects.append(dataset_props) # Upstream Tables if datasource.get("upstreamTables") is not None: # datasource -> db table relations upstream_tables = self._create_upstream_table_lineage( datasource, project) if upstream_tables: upstream_lineage = UpstreamLineage(upstreams=upstream_tables) yield self.get_metadata_change_proposal( datasource_urn, aspect_name="upstreamLineage", aspect=upstream_lineage, ) # Datasource Fields schema_metadata = self._get_schema_metadata_for_embedded_datasource( datasource.get("fields", [])) if schema_metadata is not None: dataset_snapshot.aspects.append(schema_metadata) yield self.get_metadata_change_event(dataset_snapshot) yield self.get_metadata_change_proposal( dataset_snapshot.urn, aspect_name="subTypes", aspect=SubTypesClass(typeNames=["Data Source"]), ) if datasource.get("__typename") == "EmbeddedDatasource": yield from add_entity_to_container(self.gen_workbook_key(workbook), "dataset", dataset_snapshot.urn)
def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]: count_on_query = len(self.custom_sql_ids_being_used) custom_sql_filter = "idWithin: {}".format( json.dumps(self.custom_sql_ids_being_used) ) custom_sql_connection, total_count, has_next_page = self.get_connection_object( custom_sql_graphql_query, "customSQLTablesConnection", custom_sql_filter ) current_count = 0 while has_next_page: count = ( count_on_query if current_count + count_on_query < total_count else total_count - current_count ) ( custom_sql_connection, total_count, has_next_page, ) = self.get_connection_object( custom_sql_graphql_query, "customSQLTablesConnection", custom_sql_filter, count, current_count, ) current_count += count unique_custom_sql = get_unique_custom_sql( custom_sql_connection.get("nodes", []) ) for csql in unique_custom_sql: csql_id: str = csql["id"] csql_urn = builder.make_dataset_urn( self.platform, csql_id, self.config.env ) dataset_snapshot = DatasetSnapshot( urn=csql_urn, aspects=[], ) datasource_name = None project = None if len(csql["datasources"]) > 0: yield from self._create_lineage_from_csql_datasource( csql_urn, csql["datasources"] ) # CustomSQLTable id owned by exactly one tableau data source logger.debug( f"Number of datasources referencing CustomSQLTable: {len(csql['datasources'])}" ) datasource = csql["datasources"][0] datasource_name = datasource.get("name") if datasource.get( "__typename" ) == "EmbeddedDatasource" and datasource.get("workbook"): datasource_name = ( f"{datasource.get('workbook').get('name')}/{datasource_name}" if datasource_name and datasource.get("workbook").get("name") else None ) yield from add_entity_to_container( self.gen_workbook_key(datasource["workbook"]), "dataset", dataset_snapshot.urn, ) project = self._get_project(datasource) # lineage from custom sql -> datasets/tables # columns = csql.get("columns", []) yield from self._create_lineage_to_upstream_tables(csql_urn, columns) # Schema Metadata schema_metadata = self.get_schema_metadata_for_custom_sql(columns) if schema_metadata is not None: dataset_snapshot.aspects.append(schema_metadata) # Browse path csql_name = csql.get("name") if csql.get("name") else csql_id if project and datasource_name: browse_paths = BrowsePathsClass( paths=[ f"/{self.config.env.lower()}/{self.platform}/{project}/{datasource['name']}/{csql_name}" ] ) dataset_snapshot.aspects.append(browse_paths) else: logger.debug(f"Browse path not set for Custom SQL table {csql_id}") dataset_properties = DatasetPropertiesClass( name=csql.get("name"), description=csql.get("description") ) dataset_snapshot.aspects.append(dataset_properties) view_properties = ViewPropertiesClass( materialized=False, viewLanguage="SQL", viewLogic=clean_query(csql.get("query", "")), ) dataset_snapshot.aspects.append(view_properties) yield self.get_metadata_change_event(dataset_snapshot) yield self.get_metadata_change_proposal( dataset_snapshot.urn, aspect_name="subTypes", aspect=SubTypesClass(typeNames=["View", "Custom SQL"]), )