def test_conversion(self):
        # type: () -> None

        transformer = DictToModel()
        config = ConfigFactory.from_dict({
            MODEL_CLASS:
            'databuilder.models.dashboard.dashboard_execution.DashboardExecution',
        })
        transformer.init(conf=config)

        actual = transformer.transform({
            'dashboard_group_id': 'foo',
            'dashboard_id': 'bar',
            'execution_timestamp': 123456789,
            'execution_state': 'succeed',
            'product': 'mode',
            'cluster': 'gold'
        })

        self.assertTrue(isinstance(actual, DashboardExecution))
        self.assertEqual(
            actual.__repr__(),
            DashboardExecution(dashboard_group_id='foo',
                               dashboard_id='bar',
                               execution_timestamp=123456789,
                               execution_state='succeed',
                               product='mode',
                               cluster='gold').__repr__())
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf
        self.query = """query {
            workbooks {
                id
                name
                projectName
                updatedAt
            }
        }"""

        self._extractor = self._build_extractor()

        transformers: List[Transformer] = []
        timestamp_str_to_epoch_transformer = TimestampStringToEpoch()
        timestamp_str_to_epoch_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf,
            timestamp_str_to_epoch_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    FIELD_NAME: 'last_modified_timestamp',
                })))
        transformers.append(timestamp_str_to_epoch_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_last_modified.DashboardLastModifiedTimestamp'
            })))
        transformers.append(dict_to_model_transformer)

        self._transformer = ChainedTransformer(transformers=transformers)
Exemple #3
0
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf
        self.query = """query {
          workbooks {
            name
            projectName
            upstreamTables {
              name
              schema
              database {
                name
                connectionType
              }
            }
          }
        }"""
        self._extractor = self._build_extractor()

        transformers = []
        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(
            conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict(
                    {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_table.DashboardTable'})))
        transformers.append(dict_to_model_transformer)
        self._transformer = ChainedTransformer(transformers=transformers)
Exemple #4
0
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf

        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query, conf=self._conf)

        # Remove all unnecessary fields because User model accepts all attributes and push it to Neo4j.
        transformers: List[Transformer] = []

        remove_fields_transformer = RemoveFieldTransformer()
        remove_fields_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, remove_fields_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    FIELD_NAMES:
                    ['organization', 'mode_user_resource_path', 'product']
                })))
        transformers.append(remove_fields_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict(
                    {MODEL_CLASS: 'databuilder.models.user.User'})))
        transformers.append(dict_to_model_transformer)

        self._transformer = ChainedTransformer(transformers=transformers)
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf
        self.query = """query externalTables($externalTableTypes: [String]) {
          databases (filter: {connectionTypeWithin: $externalTableTypes}) {
            name
            connectionType
            description
            tables {
                name
            }
          }
        }"""
        self.query_variables = {
            'externalTableTypes':
            self._conf.get_list(
                TableauDashboardExternalTableExtractor.EXTERNAL_TABLE_TYPES)
        }
        self._extractor = self._build_extractor()

        transformers = []
        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    MODEL_CLASS:
                    'databuilder.models.table_metadata.TableMetadata'
                })))
        transformers.append(dict_to_model_transformer)
        self._transformer = ChainedTransformer(transformers=transformers)
    def init(self, conf):
        # type: (ConfigTree) -> None
        self._conf = conf

        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query, conf=self._conf)

        # Constructing URL using resource path via TemplateVariableSubstitutionTransformer
        transformers = []
        chart_url_transformer = TemplateVariableSubstitutionTransformer()
        chart_url_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, chart_url_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict(
                    {
                        FIELD_NAME: 'chart_url',
                        TEMPLATE: 'https://app.mode.com{chart_url}'
                    })))

        transformers.append(chart_url_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_chart.DashboardChart'
            })))
        transformers.append(dict_to_model_transformer)

        self._transformer = ChainedTransformer(transformers=transformers)
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf

        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query, conf=self._conf)

        # Payload from RestApiQuery has timestamp which is ISO8601. Here we are using TimestampStringToEpoch to
        # transform into epoch and then using DictToModel to convert Dictionary to Model
        transformers: List[Transformer] = []
        timestamp_str_to_epoch_transformer = TimestampStringToEpoch()
        timestamp_str_to_epoch_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf,
            timestamp_str_to_epoch_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    FIELD_NAME: 'execution_timestamp',
                })))

        transformers.append(timestamp_str_to_epoch_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_execution.DashboardExecution'
            })))
        transformers.append(dict_to_model_transformer)

        self._transformer = ChainedTransformer(transformers=transformers)
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf
        self.query = """query {
          customSQLTables {
            id
            name
            query
            downstreamWorkbooks {
              name
              projectName
            }
          }
        }"""

        self._extractor = self._build_extractor()

        transformers = []
        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_query.DashboardQuery'
            })))
        transformers.append(dict_to_model_transformer)
        self._transformer = ChainedTransformer(transformers=transformers)
Exemple #9
0
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf

        self.dashboard_group_ids_to_skip = self._conf.get_list(
            DASHBOARD_GROUP_IDS_TO_SKIP, [])

        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query, conf=self._conf)

        # Payload from RestApiQuery has timestamp which is ISO8601. Here we are using TimestampStringToEpoch to
        # transform into epoch and then using DictToModel to convert Dictionary to Model
        transformers: List[Transformer] = []
        timestamp_str_to_epoch_transformer = TimestampStringToEpoch()
        timestamp_str_to_epoch_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf,
            timestamp_str_to_epoch_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    FIELD_NAME: 'created_timestamp',
                })))

        transformers.append(timestamp_str_to_epoch_transformer)

        dashboard_group_url_transformer = TemplateVariableSubstitutionTransformer(
        )
        dashboard_group_url_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dashboard_group_url_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                VAR_FIELD_NAME:
                'dashboard_group_url',
                TEMPLATE:
                'https://app.mode.com/{organization}/spaces/{dashboard_group_id}'
            })))

        transformers.append(dashboard_group_url_transformer)

        dashboard_url_transformer = TemplateVariableSubstitutionTransformer()
        dashboard_url_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dashboard_url_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                VAR_FIELD_NAME:
                'dashboard_url',
                TEMPLATE:
                'https://app.mode.com/{organization}/reports/{dashboard_id}'
            })))
        transformers.append(dashboard_url_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_metadata.DashboardMetadata'
            })))
        transformers.append(dict_to_model_transformer)

        self._transformer = ChainedTransformer(transformers=transformers)
Exemple #10
0
def create_dashboard_tables_job():
    # loader saves data to these folders and publisher reads it from here
    tmp_folder = '/var/tmp/amundsen/dashboard_table'
    node_files_folder = '{tmp_folder}/nodes'.format(tmp_folder=tmp_folder)
    relationship_files_folder = '{tmp_folder}/relationships'.format(
        tmp_folder=tmp_folder)

    csv_extractor = CsvExtractor()
    loader = FSNeptuneCSVLoader()
    publisher = NeptuneCSVPublisher()

    generic_transformer = GenericTransformer()
    dict_to_model_transformer = DictToModel()
    transformer = ChainedTransformer(
        transformers=[generic_transformer, dict_to_model_transformer],
        is_init_transformers=True)

    task = DefaultTask(extractor=csv_extractor,
                       loader=loader,
                       transformer=transformer)

    job_config = ConfigFactory.from_dict({
        csv_extractor.get_scope(): {
            CsvExtractor.FILE_LOCATION:
            'example/sample_data/sample_dashboard_table.csv'
        },
        transformer.get_scope(): {
            generic_transformer.get_scope(): {
                FIELD_NAME: 'table_ids',
                CALLBACK_FUNCTION: _str_to_list
            },
            dict_to_model_transformer.get_scope(): {
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_table.DashboardTable',
            }
        },
        loader.get_scope(): {
            FSNeptuneCSVLoader.NODE_DIR_PATH: node_files_folder,
            FSNeptuneCSVLoader.RELATION_DIR_PATH: relationship_files_folder,
            FSNeptuneCSVLoader.SHOULD_DELETE_CREATED_DIR: True,
            FSNeptuneCSVLoader.JOB_PUBLISHER_TAG: 'unique_tag'
        },
        publisher.get_scope(): {
            NeptuneCSVPublisher.NODE_FILES_DIR: node_files_folder,
            NeptuneCSVPublisher.RELATION_FILES_DIR: relationship_files_folder,
            NeptuneCSVPublisher.AWS_S3_BUCKET_NAME: S3_BUCKET_NAME,
            NeptuneCSVPublisher.AWS_BASE_S3_DATA_PATH: S3_DATA_PATH,
            NeptuneCSVPublisher.NEPTUNE_HOST: NEPTUNE_ENDPOINT,
            NeptuneCSVPublisher.AWS_IAM_ROLE_NAME: neptune_iam_role_name,
            NeptuneCSVPublisher.AWS_REGION: AWS_REGION,
            NeptuneCSVPublisher.AWS_ACCESS_KEY: aws_access_key,
            NeptuneCSVPublisher.AWS_SECRET_ACCESS_KEY: aws_access_secret,
            NeptuneCSVPublisher.AWS_SESSION_TOKEN: aws_token
        }
    })

    return DefaultJob(conf=job_config, task=task, publisher=publisher)
def create_dashboard_tables_job():
    # loader saves data to these folders and publisher reads it from here
    tmp_folder = '/var/tmp/amundsen/dashboard_table'
    node_files_folder = '{tmp_folder}/nodes'.format(tmp_folder=tmp_folder)
    relationship_files_folder = '{tmp_folder}/relationships'.format(
        tmp_folder=tmp_folder)

    csv_extractor = CsvExtractor()
    csv_loader = FsNeo4jCSVLoader()

    generic_transformer = GenericTransformer()
    dict_to_model_transformer = DictToModel()
    transformer = ChainedTransformer(
        transformers=[generic_transformer, dict_to_model_transformer],
        is_init_transformers=True)

    task = DefaultTask(extractor=csv_extractor,
                       loader=csv_loader,
                       transformer=transformer)
    publisher = Neo4jCsvPublisher()

    job_config = ConfigFactory.from_dict({
        '{}.file_location'.format(csv_extractor.get_scope()):
        'example/sample_data/sample_dashboard_table.csv',
        '{}.{}.{}'.format(transformer.get_scope(),
                          generic_transformer.get_scope(), FIELD_NAME):
        'table_ids',
        '{}.{}.{}'.format(transformer.get_scope(),
                          generic_transformer.get_scope(), CALLBACK_FUNCTION):
        _str_to_list,
        '{}.{}.{}'.format(transformer.get_scope(),
                          dict_to_model_transformer.get_scope(), MODEL_CLASS):
        'databuilder.models.dashboard.dashboard_table.DashboardTable',
        '{}.node_dir_path'.format(csv_loader.get_scope()):
        node_files_folder,
        '{}.relationship_dir_path'.format(csv_loader.get_scope()):
        relationship_files_folder,
        '{}.delete_created_directories'.format(csv_loader.get_scope()):
        True,
        '{}.node_files_directory'.format(publisher.get_scope()):
        node_files_folder,
        '{}.relation_files_directory'.format(publisher.get_scope()):
        relationship_files_folder,
        '{}.neo4j_endpoint'.format(publisher.get_scope()):
        neo4j_endpoint,
        '{}.neo4j_user'.format(publisher.get_scope()):
        neo4j_user,
        '{}.neo4j_password'.format(publisher.get_scope()):
        neo4j_password,
        '{}.neo4j_encrypted'.format(publisher.get_scope()):
        False,
        '{}.job_publish_tag'.format(publisher.get_scope()):
        'unique_tag',  # should use unique tag here like {ds}
    })

    return DefaultJob(conf=job_config, task=task, publisher=publisher)
Exemple #12
0
    def init(self, conf: ConfigTree) -> None:
        self._conf = conf
        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query,
            conf=self._conf
        )

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(
            conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict(
                    {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'})))
        self._transformer = dict_to_model_transformer
    def init(self, conf):
        # type: (ConfigTree) -> None
        self._conf = conf

        restapi_query = self._build_restapi_query()
        self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
            restapi_query=restapi_query, conf=self._conf)

        # Constructing URL using several ID via TemplateVariableSubstitutionTransformer
        transformers = []
        variable_substitution_transformer = TemplateVariableSubstitutionTransformer(
        )
        variable_substitution_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf,
            variable_substitution_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    FIELD_NAME:
                    'url',
                    TEMPLATE:
                    'https://app.mode.com/{organization}'
                    '/reports/{dashboard_id}/queries/{query_id}'
                })))

        transformers.append(variable_substitution_transformer)

        # Escape backslash as it breaks Cypher statement.
        replace_transformer = RegexStrReplaceTransformer()
        replace_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, replace_transformer.get_scope()).with_fallback(
                ConfigFactory.from_dict({
                    REGEX_REPLACE_TUPLE_LIST: [('\\', '\\\\')],
                    ATTRIBUTE_NAME:
                    'query_text'
                })))
        transformers.append(replace_transformer)

        dict_to_model_transformer = DictToModel()
        dict_to_model_transformer.init(conf=Scoped.get_scoped_conf(
            self._conf, dict_to_model_transformer.get_scope()
        ).with_fallback(
            ConfigFactory.from_dict({
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_query.DashboardQuery'
            })))
        transformers.append(dict_to_model_transformer)

        self._transformer = ChainedTransformer(transformers=transformers)
Exemple #14
0
 def init(self, conf: ConfigTree) -> None:
     conf = conf.with_fallback(
         ConfigFactory.from_dict({
             STATIC_RECORD_DICT: {
                 'product': 'mode'
             },
             '{}.{}'.format(DictToModel().get_scope(), MODEL_CLASS):
             'databuilder.models.dashboard.dashboard_last_modified.DashboardLastModifiedTimestamp',
             '{}.{}'.format(TimestampStringToEpoch().get_scope(), FIELD_NAME):
             'last_modified_timestamp'
         }))
     super(ModeDashboardLastModifiedTimestampExtractor, self).init(conf)
Exemple #15
0
def create_dashboard_tables_job():
    # loader saves data to these folders and publisher reads it from here
    tmp_folder = '/var/tmp/amundsen/dashboard_table'
    node_files_folder = f'{tmp_folder}/nodes'
    relationship_files_folder = f'{tmp_folder}/relationships'

    csv_extractor = CsvExtractor()
    csv_loader = FsAtlasCSVLoader()

    generic_transformer = GenericTransformer()
    dict_to_model_transformer = DictToModel()
    transformer = ChainedTransformer(
        transformers=[generic_transformer, dict_to_model_transformer],
        is_init_transformers=True)

    task = DefaultTask(extractor=csv_extractor,
                       loader=csv_loader,
                       transformer=transformer)
    publisher = AtlasCSVPublisher()

    job_config = ConfigFactory.from_dict({
        f'{csv_extractor.get_scope()}.file_location':
        'example/sample_data/sample_dashboard_table.csv',
        f'{transformer.get_scope()}.{generic_transformer.get_scope()}.{FIELD_NAME}':
        'table_ids',
        f'{transformer.get_scope()}.{generic_transformer.get_scope()}.{CALLBACK_FUNCTION}':
        _str_to_list,
        f'{transformer.get_scope()}.{dict_to_model_transformer.get_scope()}.{MODEL_CLASS}':
        'databuilder.models.dashboard.dashboard_table.DashboardTable',
        f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.ENTITY_DIR_PATH}':
        node_files_folder,
        f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.RELATIONSHIP_DIR_PATH}':
        relationship_files_folder,
        f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.SHOULD_DELETE_CREATED_DIR}':
        True,
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ATLAS_CLIENT}':
        AtlasClient(atlas_endpoint, (atlas_user, atlas_password)),
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ENTITY_DIR_PATH}':
        node_files_folder,
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.RELATIONSHIP_DIR_PATH}':
        relationship_files_folder,
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ATLAS_ENTITY_CREATE_BATCH_SIZE}':
        ATLAS_CREATE_BATCH_SIZE,
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.REGISTER_ENTITY_TYPES}':
        False
    })

    return DefaultJob(conf=job_config, task=task, publisher=publisher)
Exemple #16
0
def create_dashboard_tables_job():
    # loader saves data to these folders and publisher reads it from here
    tmp_folder = '/var/tmp/amundsen/dashboard_table'
    record_files_folder = f'{tmp_folder}/records'
    model_class = 'databuilder.models.dashboard.dashboard_table.DashboardTable'

    csv_extractor = CsvExtractor()
    csv_loader = FSMySQLCSVLoader()

    generic_transformer = GenericTransformer()
    dict_to_model_transformer = DictToModel()
    transformer = ChainedTransformer(
        transformers=[generic_transformer, dict_to_model_transformer],
        is_init_transformers=True)

    task = DefaultTask(extractor=csv_extractor,
                       loader=csv_loader,
                       transformer=transformer)
    publisher = MySQLCSVPublisher()

    job_config = ConfigFactory.from_dict({
        'extractor.csv.file_location':
        'example/sample_data/sample_dashboard_table.csv',
        'transformer.chained.transformer.generic.field_name':
        'table_ids',
        'transformer.chained.transformer.generic.callback_function':
        _str_to_list,
        'transformer.chained.transformer.dict_to_model.model_class':
        model_class,
        'loader.mysql_filesystem_csv.record_dir_path':
        record_files_folder,
        'loader.mysql_filesystem_csv.delete_created_directories':
        True,
        'publisher.mysql.record_files_directory':
        record_files_folder,
        'publisher.mysql.conn_string':
        mysql_conn_string,
        'publisher.mysql.job_publish_tag':
        'unique_tag',
    })

    return DefaultJob(conf=job_config, task=task, publisher=publisher)