Example #1
0
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        workbooks_data = [workbook for workbook in response['workbooks']
                          if workbook['projectName'] not in
                          self._conf.get_list(TableauGraphQLDashboardTableExtractor.EXCLUDED_PROJECTS)]

        for workbook in workbooks_data:
            data = {
                'dashboard_group_id': workbook['projectName'],
                'dashboard_id': TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
                'cluster': self._conf.get_string(TableauGraphQLDashboardTableExtractor.CLUSTER),
                'table_ids': []
            }

            for table in workbook['upstreamTables']:

                # external tables have no schema, so they must be parsed differently
                # see TableauExternalTableExtractor for more specifics
                if table['schema'] != '':
                    cluster = self._conf.get_string(TableauGraphQLDashboardTableExtractor.CLUSTER)
                    database = self._conf.get_string(TableauGraphQLDashboardTableExtractor.DATABASE)

                    # Tableau sometimes incorrectly assigns the "schema" value
                    # based on how the datasource connection is used in a workbook.
                    # It will hide the real schema inside the table name, like "real_schema.real_table",
                    # and set the "schema" value to "wrong_schema". In every case discovered so far, the schema
                    # key is incorrect, so the "inner" schema from the table name is used instead.
                    if '.' in table['name']:
                        schema, name = table['name'].split('.')
                    else:
                        schema, name = table['schema'], table['name']
                    schema = TableauDashboardUtils.sanitize_schema_name(schema)
                    name = TableauDashboardUtils.sanitize_table_name(name)
                else:
                    cluster = self._conf.get_string(TableauGraphQLDashboardTableExtractor.EXTERNAL_CLUSTER_NAME)
                    database = TableauDashboardUtils.sanitize_database_name(
                        table['database']['connectionType']
                    )
                    schema = TableauDashboardUtils.sanitize_schema_name(table['database']['name'])
                    name = TableauDashboardUtils.sanitize_table_name(table['name'])

                table_id = TableMetadata.TABLE_KEY_FORMAT.format(
                    db=database,
                    cluster=cluster,
                    schema=schema,
                    tbl=name,
                )
                data['table_ids'].append(table_id)

            yield data
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        for table in response['databases']:
            if table['connectionType'] in [
                    'google-sheets', 'salesforce', 'excel-direct'
            ]:
                for downstreamTable in table['tables']:
                    data = {
                        'cluster':
                        self._conf.get_string(
                            TableauGraphQLExternalTableExtractor.
                            EXTERNAL_CLUSTER_NAME),
                        'database':
                        TableauDashboardUtils.sanitize_database_name(
                            table['connectionType']),
                        'schema':
                        TableauDashboardUtils.sanitize_schema_name(
                            table['name']),
                        'name':
                        TableauDashboardUtils.sanitize_table_name(
                            downstreamTable['name']),
                        'description':
                        table['description']
                    }
                    yield data
            else:
                data = {
                    'cluster':
                    self._conf.get_string(TableauGraphQLExternalTableExtractor.
                                          EXTERNAL_CLUSTER_NAME),
                    'database':
                    TableauDashboardUtils.sanitize_database_name(
                        table['connectionType']),
                    'schema':
                    self._conf.get_string(TableauGraphQLExternalTableExtractor.
                                          EXTERNAL_SCHEMA_NAME),
                    'name':
                    TableauDashboardUtils.sanitize_table_name(table['name']),
                    'description':
                    table['description']
                }
                yield data