def run_table_badge_job(table_path, badge_path):
    tmp_folder = '/var/tmp/amundsen/table_badge'
    node_files_folder = f'{tmp_folder}/nodes'
    relationship_files_folder = f'{tmp_folder}/relationships'
    extractor = CsvTableBadgeExtractor()
    csv_loader = FsNeo4jCSVLoader()
    task = DefaultTask(extractor=extractor,
                       loader=csv_loader,
                       transformer=NoopTransformer())
    job_config = ConfigFactory.from_dict({
        'extractor.csvtablebadge.table_file_location': table_path,
        'extractor.csvtablebadge.badge_file_location': badge_path,
        'loader.filesystem_csv_neo4j.node_dir_path': node_files_folder,
        'loader.filesystem_csv_neo4j.relationship_dir_path': relationship_files_folder,
        'loader.filesystem_csv_neo4j.delete_created_directories': True,
        'publisher.neo4j.node_files_directory': node_files_folder,
        'publisher.neo4j.relation_files_directory': relationship_files_folder,
        'publisher.neo4j.neo4j_endpoint': neo4j_endpoint,
        'publisher.neo4j.neo4j_user': neo4j_user,
        'publisher.neo4j.neo4j_password': neo4j_password,
        'publisher.neo4j.neo4j_encrypted': False,
        'publisher.neo4j.job_publish_tag': 'unique_tag_b',  # should use unique tag here like {ds}
    })
    job = DefaultJob(conf=job_config,
                     task=task,
                     publisher=Neo4jCsvPublisher())
    job.launch()
Example #2
0
def run_table_badge_job(table_path, badge_path):
    tmp_folder = '/var/tmp/amundsen/table_badge'
    node_files_folder = f'{tmp_folder}/nodes'
    relationship_files_folder = f'{tmp_folder}/relationships'
    extractor = CsvTableBadgeExtractor()
    csv_loader = FsAtlasCSVLoader()
    task = DefaultTask(extractor=extractor,
                       loader=csv_loader,
                       transformer=NoopTransformer())
    job_config = ConfigFactory.from_dict({
        'extractor.csvtablebadge.table_file_location':
        table_path,
        'extractor.csvtablebadge.badge_file_location':
        badge_path,
        f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.ENTITY_DIR_PATH}':
        node_files_folder,
        f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.RELATIONSHIP_DIR_PATH}':
        relationship_files_folder,
        f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.SHOULD_DELETE_CREATED_DIR}':
        True,
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ATLAS_CLIENT}':
        AtlasClient(atlas_endpoint, (atlas_user, atlas_password)),
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ENTITY_DIR_PATH}':
        node_files_folder,
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.RELATIONSHIP_DIR_PATH}':
        relationship_files_folder,
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ATLAS_ENTITY_CREATE_BATCH_SIZE}':
        ATLAS_CREATE_BATCH_SIZE,
        f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.REGISTER_ENTITY_TYPES}':
        False
    })
    job = DefaultJob(conf=job_config, task=task, publisher=AtlasCSVPublisher())
    job.launch()
Example #3
0
    def test_extraction_table_badges(self) -> None:
        """
        Tests that badges are properly parsed from a CSV file and assigned to a table.
        """
        config_dict = {
            f'extractor.csvtablebadge.{CsvTableBadgeExtractor.TABLE_FILE_LOCATION}':
            'example/sample_data/sample_table.csv',
            f'extractor.csvtablebadge.{CsvTableBadgeExtractor.BADGE_FILE_LOCATION}':
            'example/sample_data/sample_badges.csv',
        }
        self.conf = ConfigFactory.from_dict(config_dict)
        extractor = CsvTableBadgeExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf,
                                   scope=extractor.get_scope()))
        result_1 = extractor.extract()
        self.assertEqual([b.name for b in result_1.badges], ['beta'])

        result_2 = extractor.extract()
        self.assertEqual([b.name for b in result_2.badges], ['json', 'npi'])