def create_dashboard_tables_job():
    # loader saves data to these folders and publisher reads it from here
    tmp_folder = '/var/tmp/amundsen/dashboard_table'
    node_files_folder = '{tmp_folder}/nodes'.format(tmp_folder=tmp_folder)
    relationship_files_folder = '{tmp_folder}/relationships'.format(
        tmp_folder=tmp_folder)

    csv_extractor = CsvExtractor()
    csv_loader = FsNeo4jCSVLoader()

    generic_transformer = GenericTransformer()
    dict_to_model_transformer = DictToModel()
    transformer = ChainedTransformer(
        transformers=[generic_transformer, dict_to_model_transformer],
        is_init_transformers=True)

    task = DefaultTask(extractor=csv_extractor,
                       loader=csv_loader,
                       transformer=transformer)
    publisher = Neo4jCsvPublisher()

    job_config = ConfigFactory.from_dict({
        '{}.file_location'.format(csv_extractor.get_scope()):
        'example/sample_data/sample_dashboard_table.csv',
        '{}.{}.{}'.format(transformer.get_scope(),
                          generic_transformer.get_scope(), FIELD_NAME):
        'table_ids',
        '{}.{}.{}'.format(transformer.get_scope(),
                          generic_transformer.get_scope(), CALLBACK_FUNCTION):
        _str_to_list,
        '{}.{}.{}'.format(transformer.get_scope(),
                          dict_to_model_transformer.get_scope(), MODEL_CLASS):
        'databuilder.models.dashboard.dashboard_table.DashboardTable',
        '{}.node_dir_path'.format(csv_loader.get_scope()):
        node_files_folder,
        '{}.relationship_dir_path'.format(csv_loader.get_scope()):
        relationship_files_folder,
        '{}.delete_created_directories'.format(csv_loader.get_scope()):
        True,
        '{}.node_files_directory'.format(publisher.get_scope()):
        node_files_folder,
        '{}.relation_files_directory'.format(publisher.get_scope()):
        relationship_files_folder,
        '{}.neo4j_endpoint'.format(publisher.get_scope()):
        neo4j_endpoint,
        '{}.neo4j_user'.format(publisher.get_scope()):
        neo4j_user,
        '{}.neo4j_password'.format(publisher.get_scope()):
        neo4j_password,
        '{}.neo4j_encrypted'.format(publisher.get_scope()):
        False,
        '{}.job_publish_tag'.format(publisher.get_scope()):
        'unique_tag',  # should use unique tag here like {ds}
    })

    return DefaultJob(conf=job_config, task=task, publisher=publisher)
Ejemplo n.º 2
0
def create_dashboard_tables_job():
    # loader saves data to these folders and publisher reads it from here
    tmp_folder = '/var/tmp/amundsen/dashboard_table'
    node_files_folder = '{tmp_folder}/nodes'.format(tmp_folder=tmp_folder)
    relationship_files_folder = '{tmp_folder}/relationships'.format(
        tmp_folder=tmp_folder)

    csv_extractor = CsvExtractor()
    loader = FSNeptuneCSVLoader()
    publisher = NeptuneCSVPublisher()

    generic_transformer = GenericTransformer()
    dict_to_model_transformer = DictToModel()
    transformer = ChainedTransformer(
        transformers=[generic_transformer, dict_to_model_transformer],
        is_init_transformers=True)

    task = DefaultTask(extractor=csv_extractor,
                       loader=loader,
                       transformer=transformer)

    job_config = ConfigFactory.from_dict({
        csv_extractor.get_scope(): {
            CsvExtractor.FILE_LOCATION:
            'example/sample_data/sample_dashboard_table.csv'
        },
        transformer.get_scope(): {
            generic_transformer.get_scope(): {
                FIELD_NAME: 'table_ids',
                CALLBACK_FUNCTION: _str_to_list
            },
            dict_to_model_transformer.get_scope(): {
                MODEL_CLASS:
                'databuilder.models.dashboard.dashboard_table.DashboardTable',
            }
        },
        loader.get_scope(): {
            FSNeptuneCSVLoader.NODE_DIR_PATH: node_files_folder,
            FSNeptuneCSVLoader.RELATION_DIR_PATH: relationship_files_folder,
            FSNeptuneCSVLoader.SHOULD_DELETE_CREATED_DIR: True,
            FSNeptuneCSVLoader.JOB_PUBLISHER_TAG: 'unique_tag'
        },
        publisher.get_scope(): {
            NeptuneCSVPublisher.NODE_FILES_DIR: node_files_folder,
            NeptuneCSVPublisher.RELATION_FILES_DIR: relationship_files_folder,
            NeptuneCSVPublisher.AWS_S3_BUCKET_NAME: S3_BUCKET_NAME,
            NeptuneCSVPublisher.AWS_BASE_S3_DATA_PATH: S3_DATA_PATH,
            NeptuneCSVPublisher.NEPTUNE_HOST: NEPTUNE_ENDPOINT,
            NeptuneCSVPublisher.AWS_IAM_ROLE_NAME: neptune_iam_role_name,
            NeptuneCSVPublisher.AWS_REGION: AWS_REGION,
            NeptuneCSVPublisher.AWS_ACCESS_KEY: aws_access_key,
            NeptuneCSVPublisher.AWS_SECRET_ACCESS_KEY: aws_access_secret,
            NeptuneCSVPublisher.AWS_SESSION_TOKEN: aws_token
        }
    })

    return DefaultJob(conf=job_config, task=task, publisher=publisher)