Ejemplo n.º 1
0
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        workbooks_data = [
            workbook for workbook in response['workbooks']
            if workbook['projectName'] not in self._conf.get_list(
                TableauGraphQLApiMetadataExtractor.EXCLUDED_PROJECTS)
        ]

        for workbook in workbooks_data:
            data = {
                'dashboard_group':
                workbook['projectName'],
                'dashboard_name':
                TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
                'description':
                workbook.get('description', ''),
                'created_timestamp':
                workbook['createdAt'],
                'dashboard_group_url':
                '{}/#/projects/{}'.format(
                    self._conf.get(
                        TableauGraphQLApiMetadataExtractor.TABLEAU_BASE_URL),
                    workbook['projectVizportalUrlId']),
                'dashboard_url':
                '{}/#/workbooks/{}/views'.format(
                    self._conf.get(
                        TableauGraphQLApiMetadataExtractor.TABLEAU_BASE_URL),
                    workbook['vizportalUrlId']),
                'cluster':
                self._conf.get_string(
                    TableauGraphQLApiMetadataExtractor.CLUSTER)
            }
            yield data
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        workbooks_data = [
            workbook for workbook in response['workbooks']
            if workbook['projectName'] not in self._conf.get_list(
                TableauGraphQLApiLastModifiedExtractor.EXCLUDED_PROJECTS, [])
        ]

        for workbook in workbooks_data:
            if None in (workbook['projectName'], workbook['name']):
                LOGGER.warning(
                    f'Ignoring workbook (ID:{workbook["vizportalUrlId"]}) ' +
                    f'in project (ID:{workbook["projectVizportalUrlId"]}) because of a lack of permission'
                )
                continue
            data = {
                'dashboard_group_id':
                workbook['projectName'],
                'dashboard_id':
                TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
                'last_modified_timestamp':
                workbook['updatedAt'],
                'cluster':
                self._conf.get_string(
                    TableauGraphQLApiLastModifiedExtractor.CLUSTER)
            }
            yield data
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        workbooks_data = [
            workbook for workbook in response['workbooks']
            if workbook['projectName'] not in self._conf.get_list(
                TableauGraphQLApiMetadataExtractor.EXCLUDED_PROJECTS, [])
        ]
        base_url = self._conf.get(
            TableauGraphQLApiMetadataExtractor.TABLEAU_BASE_URL)
        for workbook in workbooks_data:
            if None in (workbook['projectName'], workbook['name']):
                LOGGER.warning(
                    f'Ignoring workbook (ID:{workbook["vizportalUrlId"]}) ' +
                    f'in project (ID:{workbook["projectVizportalUrlId"]}) because of a lack of permission'
                )
                continue
            data = {
                'dashboard_group':
                workbook['projectName'],
                'dashboard_name':
                TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
                'description':
                workbook.get('description', ''),
                'created_timestamp':
                workbook['createdAt'],
                'dashboard_group_url':
                f'{base_url}/#/projects/{workbook["projectVizportalUrlId"]}',
                'dashboard_url':
                f'{base_url}/#/workbooks/{workbook["vizportalUrlId"]}/views',
                'cluster':
                self._conf.get_string(
                    TableauGraphQLApiMetadataExtractor.CLUSTER)
            }
            yield data
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        for table in response['databases']:
            if table['connectionType'] in [
                    'google-sheets', 'salesforce', 'excel-direct'
            ]:
                for downstreamTable in table['tables']:
                    data = {
                        'cluster':
                        self._conf.get_string(
                            TableauGraphQLExternalTableExtractor.
                            EXTERNAL_CLUSTER_NAME),
                        'database':
                        TableauDashboardUtils.sanitize_database_name(
                            table['connectionType']),
                        'schema':
                        TableauDashboardUtils.sanitize_schema_name(
                            table['name']),
                        'name':
                        TableauDashboardUtils.sanitize_table_name(
                            downstreamTable['name']),
                        'description':
                        table['description']
                    }
                    yield data
            else:
                data = {
                    'cluster':
                    self._conf.get_string(TableauGraphQLExternalTableExtractor.
                                          EXTERNAL_CLUSTER_NAME),
                    'database':
                    TableauDashboardUtils.sanitize_database_name(
                        table['connectionType']),
                    'schema':
                    self._conf.get_string(TableauGraphQLExternalTableExtractor.
                                          EXTERNAL_SCHEMA_NAME),
                    'name':
                    TableauDashboardUtils.sanitize_table_name(table['name']),
                    'description':
                    table['description']
                }
                yield data
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        workbooks_data = [workbook for workbook in response['workbooks']
                          if workbook['projectName'] not in
                          self._conf.get_list(TableauGraphQLApiLastModifiedExtractor.EXCLUDED_PROJECTS)]

        for workbook in workbooks_data:
            data = {
                'dashboard_group_id': workbook['projectName'],
                'dashboard_id': TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
                'last_modified_timestamp': workbook['updatedAt'],
                'cluster': self._conf.get_string(TableauGraphQLApiLastModifiedExtractor.CLUSTER)
            }
            yield data
Ejemplo n.º 6
0
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        for query in response['customSQLTables']:
            for workbook in query['downstreamWorkbooks']:
                if workbook['projectName'] not in \
                        self._conf.get_list(TableauGraphQLApiQueryExtractor.EXCLUDED_PROJECTS):
                    data = {
                        'dashboard_group_id': workbook['projectName'],
                        'dashboard_id': TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
                        'query_name': query['name'],
                        'query_id': query['id'],
                        'query_text': query['query'],
                        'cluster': self._conf.get_string(TableauGraphQLApiQueryExtractor.CLUSTER)
                    }
                    yield data
Ejemplo n.º 7
0
    def execute(self) -> Iterator[Dict[str, Any]]:
        response = self.execute_query()

        workbooks_data = [workbook for workbook in response['workbooks']
                          if workbook['projectName'] not in
                          self._conf.get_list(TableauGraphQLDashboardTableExtractor.EXCLUDED_PROJECTS)]

        for workbook in workbooks_data:
            data = {
                'dashboard_group_id': workbook['projectName'],
                'dashboard_id': TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
                'cluster': self._conf.get_string(TableauGraphQLDashboardTableExtractor.CLUSTER),
                'table_ids': []
            }

            for table in workbook['upstreamTables']:

                # external tables have no schema, so they must be parsed differently
                # see TableauExternalTableExtractor for more specifics
                if table['schema'] != '':
                    cluster = self._conf.get_string(TableauGraphQLDashboardTableExtractor.CLUSTER)
                    database = self._conf.get_string(TableauGraphQLDashboardTableExtractor.DATABASE)

                    # Tableau sometimes incorrectly assigns the "schema" value
                    # based on how the datasource connection is used in a workbook.
                    # It will hide the real schema inside the table name, like "real_schema.real_table",
                    # and set the "schema" value to "wrong_schema". In every case discovered so far, the schema
                    # key is incorrect, so the "inner" schema from the table name is used instead.
                    if '.' in table['name']:
                        schema, name = table['name'].split('.')
                    else:
                        schema, name = table['schema'], table['name']
                    schema = TableauDashboardUtils.sanitize_schema_name(schema)
                    name = TableauDashboardUtils.sanitize_table_name(name)
                else:
                    cluster = self._conf.get_string(TableauGraphQLDashboardTableExtractor.EXTERNAL_CLUSTER_NAME)
                    database = TableauDashboardUtils.sanitize_database_name(
                        table['database']['connectionType']
                    )
                    schema = TableauDashboardUtils.sanitize_schema_name(table['database']['name'])
                    name = TableauDashboardUtils.sanitize_table_name(table['name'])

                table_id = TableMetadata.TABLE_KEY_FORMAT.format(
                    db=database,
                    cluster=cluster,
                    schema=schema,
                    tbl=name,
                )
                data['table_ids'].append(table_id)

            yield data