def context(request): work_path = os.path.join(os.path.dirname(__file__), request.param) project = Utils.load_json(os.path.join(work_path, 'test1.json')) task_uuid = project['task_uuid'] def teardown(): if os.path.isdir(os.path.join(work_path, task_uuid)): shutil.rmtree(os.path.join(work_path, task_uuid)) if os.path.exists(project['temp_path']): shutil.rmtree(project['temp_path']) request.addfinalizer(teardown) return work_path, task_uuid
def test_extract_linked_records(project, linker): """Tests if linked records are removed""" step = project['steps'][0] step_linked = project['temp_path'] + LinkFiles.TEMP_STEP_LINKED_FILE data_filename = project['output_root'] + 'left_file.csv' matched_file = project['temp_path'] + LinkFiles.MATCHED_RECORDS open(matched_file, 'w').close() linker.load_data() linker.pair_n_match(step=step['seq'], link_method=step['linking_method'], blocking=step['blocking_schema'], linking=step['linking_schema'], matched_file=matched_file) linker.link(project['relationship_type']) assert os.path.isfile(step_linked) linker.extract_linked_records(linked_filename=step_linked, prefix='LEFT_') assert os.path.isfile(data_filename) assert Utils.file_len(data_filename) == 929 assert os.path.isfile(step_linked) assert Utils.file_len(step_linked) == 73
def test_extract_rows(project, ddp): """Tests if linked records are removed from input data""" step = project['steps'][1] matched_file = project['temp_path'] + LinkFiles.MATCHED_RECORDS open(matched_file, 'w').close() linked_file = project['temp_path'] + LinkFiles.TEMP_ENTITIES_FILE ddp.load_data() ddp.pair_n_match(step=step['seq'], link_method=step['linking_method'], blocking=step['blocking_schema'], linking=step['linking_schema'], matched_file=matched_file) ddp.link_pairs() ddp.extract_rows(data_filename=ddp.left_file, data_id=ddp.left_index, index_filename=linked_file, index_id='REC_ID', index_cols=['ENTITY_ID']) assert not os.path.isfile(project['temp_path'] + LinkFiles.TEMP_STEP_REMAINED) assert os.path.isfile(ddp.left_file) assert Utils.file_len(ddp.left_file) == 1000
def project(): """Read test_jtst_dedup project configuration""" return Utils.load_project_data('test_jtst_dedup.json')
def project(): """Read test_jtst_educ_linking project configuration""" return Utils.load_project_data('test_jtst_educ_linking.json')