def test_conversion(self): # type: () -> None transformer = DictToModel() config = ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_execution.DashboardExecution', }) transformer.init(conf=config) actual = transformer.transform({ 'dashboard_group_id': 'foo', 'dashboard_id': 'bar', 'execution_timestamp': 123456789, 'execution_state': 'succeed', 'product': 'mode', 'cluster': 'gold' }) self.assertTrue(isinstance(actual, DashboardExecution)) self.assertEqual( actual.__repr__(), DashboardExecution(dashboard_group_id='foo', dashboard_id='bar', execution_timestamp=123456789, execution_state='succeed', product='mode', cluster='gold').__repr__())
def init(self, conf: ConfigTree) -> None: self._conf = conf self.query = """query { workbooks { id name projectName updatedAt } }""" self._extractor = self._build_extractor() transformers: List[Transformer] = [] timestamp_str_to_epoch_transformer = TimestampStringToEpoch() timestamp_str_to_epoch_transformer.init(conf=Scoped.get_scoped_conf( self._conf, timestamp_str_to_epoch_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ FIELD_NAME: 'last_modified_timestamp', }))) transformers.append(timestamp_str_to_epoch_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_last_modified.DashboardLastModifiedTimestamp' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf: ConfigTree) -> None: self._conf = conf self.query = """query { workbooks { name projectName upstreamTables { name schema database { name connectionType } } } }""" self._extractor = self._build_extractor() transformers = [] dict_to_model_transformer = DictToModel() dict_to_model_transformer.init( conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback( ConfigFactory.from_dict( {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_table.DashboardTable'}))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf: ConfigTree) -> None: self._conf = conf restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf) # Remove all unnecessary fields because User model accepts all attributes and push it to Neo4j. transformers: List[Transformer] = [] remove_fields_transformer = RemoveFieldTransformer() remove_fields_transformer.init(conf=Scoped.get_scoped_conf( self._conf, remove_fields_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ FIELD_NAMES: ['organization', 'mode_user_resource_path', 'product'] }))) transformers.append(remove_fields_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope()).with_fallback( ConfigFactory.from_dict( {MODEL_CLASS: 'databuilder.models.user.User'}))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf: ConfigTree) -> None: self._conf = conf self.query = """query externalTables($externalTableTypes: [String]) { databases (filter: {connectionTypeWithin: $externalTableTypes}) { name connectionType description tables { name } } }""" self.query_variables = { 'externalTableTypes': self._conf.get_list( TableauDashboardExternalTableExtractor.EXTERNAL_TABLE_TYPES) } self._extractor = self._build_extractor() transformers = [] dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.table_metadata.TableMetadata' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf): # type: (ConfigTree) -> None self._conf = conf restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf) # Constructing URL using resource path via TemplateVariableSubstitutionTransformer transformers = [] chart_url_transformer = TemplateVariableSubstitutionTransformer() chart_url_transformer.init(conf=Scoped.get_scoped_conf( self._conf, chart_url_transformer.get_scope()).with_fallback( ConfigFactory.from_dict( { FIELD_NAME: 'chart_url', TEMPLATE: 'https://app.mode.com{chart_url}' }))) transformers.append(chart_url_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf: ConfigTree) -> None: self._conf = conf restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf) # Payload from RestApiQuery has timestamp which is ISO8601. Here we are using TimestampStringToEpoch to # transform into epoch and then using DictToModel to convert Dictionary to Model transformers: List[Transformer] = [] timestamp_str_to_epoch_transformer = TimestampStringToEpoch() timestamp_str_to_epoch_transformer.init(conf=Scoped.get_scoped_conf( self._conf, timestamp_str_to_epoch_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ FIELD_NAME: 'execution_timestamp', }))) transformers.append(timestamp_str_to_epoch_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_execution.DashboardExecution' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf: ConfigTree) -> None: self._conf = conf self.query = """query { customSQLTables { id name query downstreamWorkbooks { name projectName } } }""" self._extractor = self._build_extractor() transformers = [] dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_query.DashboardQuery' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf: ConfigTree) -> None: self._conf = conf self.dashboard_group_ids_to_skip = self._conf.get_list( DASHBOARD_GROUP_IDS_TO_SKIP, []) restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf) # Payload from RestApiQuery has timestamp which is ISO8601. Here we are using TimestampStringToEpoch to # transform into epoch and then using DictToModel to convert Dictionary to Model transformers: List[Transformer] = [] timestamp_str_to_epoch_transformer = TimestampStringToEpoch() timestamp_str_to_epoch_transformer.init(conf=Scoped.get_scoped_conf( self._conf, timestamp_str_to_epoch_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ FIELD_NAME: 'created_timestamp', }))) transformers.append(timestamp_str_to_epoch_transformer) dashboard_group_url_transformer = TemplateVariableSubstitutionTransformer( ) dashboard_group_url_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dashboard_group_url_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ VAR_FIELD_NAME: 'dashboard_group_url', TEMPLATE: 'https://app.mode.com/{organization}/spaces/{dashboard_group_id}' }))) transformers.append(dashboard_group_url_transformer) dashboard_url_transformer = TemplateVariableSubstitutionTransformer() dashboard_url_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dashboard_url_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ VAR_FIELD_NAME: 'dashboard_url', TEMPLATE: 'https://app.mode.com/{organization}/reports/{dashboard_id}' }))) transformers.append(dashboard_url_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_metadata.DashboardMetadata' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def create_dashboard_tables_job(): # loader saves data to these folders and publisher reads it from here tmp_folder = '/var/tmp/amundsen/dashboard_table' node_files_folder = '{tmp_folder}/nodes'.format(tmp_folder=tmp_folder) relationship_files_folder = '{tmp_folder}/relationships'.format( tmp_folder=tmp_folder) csv_extractor = CsvExtractor() loader = FSNeptuneCSVLoader() publisher = NeptuneCSVPublisher() generic_transformer = GenericTransformer() dict_to_model_transformer = DictToModel() transformer = ChainedTransformer( transformers=[generic_transformer, dict_to_model_transformer], is_init_transformers=True) task = DefaultTask(extractor=csv_extractor, loader=loader, transformer=transformer) job_config = ConfigFactory.from_dict({ csv_extractor.get_scope(): { CsvExtractor.FILE_LOCATION: 'example/sample_data/sample_dashboard_table.csv' }, transformer.get_scope(): { generic_transformer.get_scope(): { FIELD_NAME: 'table_ids', CALLBACK_FUNCTION: _str_to_list }, dict_to_model_transformer.get_scope(): { MODEL_CLASS: 'databuilder.models.dashboard.dashboard_table.DashboardTable', } }, loader.get_scope(): { FSNeptuneCSVLoader.NODE_DIR_PATH: node_files_folder, FSNeptuneCSVLoader.RELATION_DIR_PATH: relationship_files_folder, FSNeptuneCSVLoader.SHOULD_DELETE_CREATED_DIR: True, FSNeptuneCSVLoader.JOB_PUBLISHER_TAG: 'unique_tag' }, publisher.get_scope(): { NeptuneCSVPublisher.NODE_FILES_DIR: node_files_folder, NeptuneCSVPublisher.RELATION_FILES_DIR: relationship_files_folder, NeptuneCSVPublisher.AWS_S3_BUCKET_NAME: S3_BUCKET_NAME, NeptuneCSVPublisher.AWS_BASE_S3_DATA_PATH: S3_DATA_PATH, NeptuneCSVPublisher.NEPTUNE_HOST: NEPTUNE_ENDPOINT, NeptuneCSVPublisher.AWS_IAM_ROLE_NAME: neptune_iam_role_name, NeptuneCSVPublisher.AWS_REGION: AWS_REGION, NeptuneCSVPublisher.AWS_ACCESS_KEY: aws_access_key, NeptuneCSVPublisher.AWS_SECRET_ACCESS_KEY: aws_access_secret, NeptuneCSVPublisher.AWS_SESSION_TOKEN: aws_token } }) return DefaultJob(conf=job_config, task=task, publisher=publisher)
def create_dashboard_tables_job(): # loader saves data to these folders and publisher reads it from here tmp_folder = '/var/tmp/amundsen/dashboard_table' node_files_folder = '{tmp_folder}/nodes'.format(tmp_folder=tmp_folder) relationship_files_folder = '{tmp_folder}/relationships'.format( tmp_folder=tmp_folder) csv_extractor = CsvExtractor() csv_loader = FsNeo4jCSVLoader() generic_transformer = GenericTransformer() dict_to_model_transformer = DictToModel() transformer = ChainedTransformer( transformers=[generic_transformer, dict_to_model_transformer], is_init_transformers=True) task = DefaultTask(extractor=csv_extractor, loader=csv_loader, transformer=transformer) publisher = Neo4jCsvPublisher() job_config = ConfigFactory.from_dict({ '{}.file_location'.format(csv_extractor.get_scope()): 'example/sample_data/sample_dashboard_table.csv', '{}.{}.{}'.format(transformer.get_scope(), generic_transformer.get_scope(), FIELD_NAME): 'table_ids', '{}.{}.{}'.format(transformer.get_scope(), generic_transformer.get_scope(), CALLBACK_FUNCTION): _str_to_list, '{}.{}.{}'.format(transformer.get_scope(), dict_to_model_transformer.get_scope(), MODEL_CLASS): 'databuilder.models.dashboard.dashboard_table.DashboardTable', '{}.node_dir_path'.format(csv_loader.get_scope()): node_files_folder, '{}.relationship_dir_path'.format(csv_loader.get_scope()): relationship_files_folder, '{}.delete_created_directories'.format(csv_loader.get_scope()): True, '{}.node_files_directory'.format(publisher.get_scope()): node_files_folder, '{}.relation_files_directory'.format(publisher.get_scope()): relationship_files_folder, '{}.neo4j_endpoint'.format(publisher.get_scope()): neo4j_endpoint, '{}.neo4j_user'.format(publisher.get_scope()): neo4j_user, '{}.neo4j_password'.format(publisher.get_scope()): neo4j_password, '{}.neo4j_encrypted'.format(publisher.get_scope()): False, '{}.job_publish_tag'.format(publisher.get_scope()): 'unique_tag', # should use unique tag here like {ds} }) return DefaultJob(conf=job_config, task=task, publisher=publisher)
def init(self, conf: ConfigTree) -> None: self._conf = conf restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf ) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init( conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback( ConfigFactory.from_dict( {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'}))) self._transformer = dict_to_model_transformer
def init(self, conf): # type: (ConfigTree) -> None self._conf = conf restapi_query = self._build_restapi_query() self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( restapi_query=restapi_query, conf=self._conf) # Constructing URL using several ID via TemplateVariableSubstitutionTransformer transformers = [] variable_substitution_transformer = TemplateVariableSubstitutionTransformer( ) variable_substitution_transformer.init(conf=Scoped.get_scoped_conf( self._conf, variable_substitution_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ FIELD_NAME: 'url', TEMPLATE: 'https://app.mode.com/{organization}' '/reports/{dashboard_id}/queries/{query_id}' }))) transformers.append(variable_substitution_transformer) # Escape backslash as it breaks Cypher statement. replace_transformer = RegexStrReplaceTransformer() replace_transformer.init(conf=Scoped.get_scoped_conf( self._conf, replace_transformer.get_scope()).with_fallback( ConfigFactory.from_dict({ REGEX_REPLACE_TUPLE_LIST: [('\\', '\\\\')], ATTRIBUTE_NAME: 'query_text' }))) transformers.append(replace_transformer) dict_to_model_transformer = DictToModel() dict_to_model_transformer.init(conf=Scoped.get_scoped_conf( self._conf, dict_to_model_transformer.get_scope() ).with_fallback( ConfigFactory.from_dict({ MODEL_CLASS: 'databuilder.models.dashboard.dashboard_query.DashboardQuery' }))) transformers.append(dict_to_model_transformer) self._transformer = ChainedTransformer(transformers=transformers)
def init(self, conf: ConfigTree) -> None: conf = conf.with_fallback( ConfigFactory.from_dict({ STATIC_RECORD_DICT: { 'product': 'mode' }, '{}.{}'.format(DictToModel().get_scope(), MODEL_CLASS): 'databuilder.models.dashboard.dashboard_last_modified.DashboardLastModifiedTimestamp', '{}.{}'.format(TimestampStringToEpoch().get_scope(), FIELD_NAME): 'last_modified_timestamp' })) super(ModeDashboardLastModifiedTimestampExtractor, self).init(conf)
def create_dashboard_tables_job(): # loader saves data to these folders and publisher reads it from here tmp_folder = '/var/tmp/amundsen/dashboard_table' node_files_folder = f'{tmp_folder}/nodes' relationship_files_folder = f'{tmp_folder}/relationships' csv_extractor = CsvExtractor() csv_loader = FsAtlasCSVLoader() generic_transformer = GenericTransformer() dict_to_model_transformer = DictToModel() transformer = ChainedTransformer( transformers=[generic_transformer, dict_to_model_transformer], is_init_transformers=True) task = DefaultTask(extractor=csv_extractor, loader=csv_loader, transformer=transformer) publisher = AtlasCSVPublisher() job_config = ConfigFactory.from_dict({ f'{csv_extractor.get_scope()}.file_location': 'example/sample_data/sample_dashboard_table.csv', f'{transformer.get_scope()}.{generic_transformer.get_scope()}.{FIELD_NAME}': 'table_ids', f'{transformer.get_scope()}.{generic_transformer.get_scope()}.{CALLBACK_FUNCTION}': _str_to_list, f'{transformer.get_scope()}.{dict_to_model_transformer.get_scope()}.{MODEL_CLASS}': 'databuilder.models.dashboard.dashboard_table.DashboardTable', f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.ENTITY_DIR_PATH}': node_files_folder, f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.RELATIONSHIP_DIR_PATH}': relationship_files_folder, f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.SHOULD_DELETE_CREATED_DIR}': True, f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ATLAS_CLIENT}': AtlasClient(atlas_endpoint, (atlas_user, atlas_password)), f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ENTITY_DIR_PATH}': node_files_folder, f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.RELATIONSHIP_DIR_PATH}': relationship_files_folder, f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ATLAS_ENTITY_CREATE_BATCH_SIZE}': ATLAS_CREATE_BATCH_SIZE, f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.REGISTER_ENTITY_TYPES}': False }) return DefaultJob(conf=job_config, task=task, publisher=publisher)
def create_dashboard_tables_job(): # loader saves data to these folders and publisher reads it from here tmp_folder = '/var/tmp/amundsen/dashboard_table' record_files_folder = f'{tmp_folder}/records' model_class = 'databuilder.models.dashboard.dashboard_table.DashboardTable' csv_extractor = CsvExtractor() csv_loader = FSMySQLCSVLoader() generic_transformer = GenericTransformer() dict_to_model_transformer = DictToModel() transformer = ChainedTransformer( transformers=[generic_transformer, dict_to_model_transformer], is_init_transformers=True) task = DefaultTask(extractor=csv_extractor, loader=csv_loader, transformer=transformer) publisher = MySQLCSVPublisher() job_config = ConfigFactory.from_dict({ 'extractor.csv.file_location': 'example/sample_data/sample_dashboard_table.csv', 'transformer.chained.transformer.generic.field_name': 'table_ids', 'transformer.chained.transformer.generic.callback_function': _str_to_list, 'transformer.chained.transformer.dict_to_model.model_class': model_class, 'loader.mysql_filesystem_csv.record_dir_path': record_files_folder, 'loader.mysql_filesystem_csv.delete_created_directories': True, 'publisher.mysql.record_files_directory': record_files_folder, 'publisher.mysql.conn_string': mysql_conn_string, 'publisher.mysql.job_publish_tag': 'unique_tag', }) return DefaultJob(conf=job_config, task=task, publisher=publisher)