def execute(self) -> Iterator[Dict[str, Any]]: response = self.execute_query() for table in response['databases']: if table['connectionType'] in [ 'google-sheets', 'salesforce', 'excel-direct' ]: for downstreamTable in table['tables']: data = { 'cluster': self._conf.get_string( TableauGraphQLExternalTableExtractor. EXTERNAL_CLUSTER_NAME), 'database': TableauDashboardUtils.sanitize_database_name( table['connectionType']), 'schema': TableauDashboardUtils.sanitize_schema_name( table['name']), 'name': TableauDashboardUtils.sanitize_table_name( downstreamTable['name']), 'description': table['description'] } yield data else: data = { 'cluster': self._conf.get_string(TableauGraphQLExternalTableExtractor. EXTERNAL_CLUSTER_NAME), 'database': TableauDashboardUtils.sanitize_database_name( table['connectionType']), 'schema': self._conf.get_string(TableauGraphQLExternalTableExtractor. EXTERNAL_SCHEMA_NAME), 'name': TableauDashboardUtils.sanitize_table_name(table['name']), 'description': table['description'] } yield data
def execute(self) -> Iterator[Dict[str, Any]]: response = self.execute_query() workbooks_data = [workbook for workbook in response['workbooks'] if workbook['projectName'] not in self._conf.get_list(TableauGraphQLDashboardTableExtractor.EXCLUDED_PROJECTS)] for workbook in workbooks_data: data = { 'dashboard_group_id': workbook['projectName'], 'dashboard_id': TableauDashboardUtils.sanitize_workbook_name(workbook['name']), 'cluster': self._conf.get_string(TableauGraphQLDashboardTableExtractor.CLUSTER), 'table_ids': [] } for table in workbook['upstreamTables']: # external tables have no schema, so they must be parsed differently # see TableauExternalTableExtractor for more specifics if table['schema'] != '': cluster = self._conf.get_string(TableauGraphQLDashboardTableExtractor.CLUSTER) database = self._conf.get_string(TableauGraphQLDashboardTableExtractor.DATABASE) # Tableau sometimes incorrectly assigns the "schema" value # based on how the datasource connection is used in a workbook. # It will hide the real schema inside the table name, like "real_schema.real_table", # and set the "schema" value to "wrong_schema". In every case discovered so far, the schema # key is incorrect, so the "inner" schema from the table name is used instead. if '.' in table['name']: schema, name = table['name'].split('.') else: schema, name = table['schema'], table['name'] schema = TableauDashboardUtils.sanitize_schema_name(schema) name = TableauDashboardUtils.sanitize_table_name(name) else: cluster = self._conf.get_string(TableauGraphQLDashboardTableExtractor.EXTERNAL_CLUSTER_NAME) database = TableauDashboardUtils.sanitize_database_name( table['database']['connectionType'] ) schema = TableauDashboardUtils.sanitize_schema_name(table['database']['name']) name = TableauDashboardUtils.sanitize_table_name(table['name']) table_id = TableMetadata.TABLE_KEY_FORMAT.format( db=database, cluster=cluster, schema=schema, tbl=name, ) data['table_ids'].append(table_id) yield data