def run_table_badge_job(table_path, badge_path): tmp_folder = '/var/tmp/amundsen/table_badge' node_files_folder = f'{tmp_folder}/nodes' relationship_files_folder = f'{tmp_folder}/relationships' extractor = CsvTableBadgeExtractor() csv_loader = FsNeo4jCSVLoader() task = DefaultTask(extractor=extractor, loader=csv_loader, transformer=NoopTransformer()) job_config = ConfigFactory.from_dict({ 'extractor.csvtablebadge.table_file_location': table_path, 'extractor.csvtablebadge.badge_file_location': badge_path, 'loader.filesystem_csv_neo4j.node_dir_path': node_files_folder, 'loader.filesystem_csv_neo4j.relationship_dir_path': relationship_files_folder, 'loader.filesystem_csv_neo4j.delete_created_directories': True, 'publisher.neo4j.node_files_directory': node_files_folder, 'publisher.neo4j.relation_files_directory': relationship_files_folder, 'publisher.neo4j.neo4j_endpoint': neo4j_endpoint, 'publisher.neo4j.neo4j_user': neo4j_user, 'publisher.neo4j.neo4j_password': neo4j_password, 'publisher.neo4j.neo4j_encrypted': False, 'publisher.neo4j.job_publish_tag': 'unique_tag_b', # should use unique tag here like {ds} }) job = DefaultJob(conf=job_config, task=task, publisher=Neo4jCsvPublisher()) job.launch()
def run_table_badge_job(table_path, badge_path): tmp_folder = '/var/tmp/amundsen/table_badge' node_files_folder = f'{tmp_folder}/nodes' relationship_files_folder = f'{tmp_folder}/relationships' extractor = CsvTableBadgeExtractor() csv_loader = FsAtlasCSVLoader() task = DefaultTask(extractor=extractor, loader=csv_loader, transformer=NoopTransformer()) job_config = ConfigFactory.from_dict({ 'extractor.csvtablebadge.table_file_location': table_path, 'extractor.csvtablebadge.badge_file_location': badge_path, f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.ENTITY_DIR_PATH}': node_files_folder, f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.RELATIONSHIP_DIR_PATH}': relationship_files_folder, f'loader.filesystem_csv_atlas.{FsAtlasCSVLoader.SHOULD_DELETE_CREATED_DIR}': True, f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ATLAS_CLIENT}': AtlasClient(atlas_endpoint, (atlas_user, atlas_password)), f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ENTITY_DIR_PATH}': node_files_folder, f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.RELATIONSHIP_DIR_PATH}': relationship_files_folder, f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.ATLAS_ENTITY_CREATE_BATCH_SIZE}': ATLAS_CREATE_BATCH_SIZE, f'publisher.atlas_csv_publisher.{AtlasCSVPublisher.REGISTER_ENTITY_TYPES}': False }) job = DefaultJob(conf=job_config, task=task, publisher=AtlasCSVPublisher()) job.launch()
def test_extraction_table_badges(self) -> None: """ Tests that badges are properly parsed from a CSV file and assigned to a table. """ config_dict = { f'extractor.csvtablebadge.{CsvTableBadgeExtractor.TABLE_FILE_LOCATION}': 'example/sample_data/sample_table.csv', f'extractor.csvtablebadge.{CsvTableBadgeExtractor.BADGE_FILE_LOCATION}': 'example/sample_data/sample_badges.csv', } self.conf = ConfigFactory.from_dict(config_dict) extractor = CsvTableBadgeExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result_1 = extractor.extract() self.assertEqual([b.name for b in result_1.badges], ['beta']) result_2 = extractor.extract() self.assertEqual([b.name for b in result_2.badges], ['json', 'npi'])