def context(request):
    work_path = os.path.join(os.path.dirname(__file__), request.param)
    project = Utils.load_json(os.path.join(work_path, 'test1.json'))
    task_uuid = project['task_uuid']

    def teardown():
        if os.path.isdir(os.path.join(work_path, task_uuid)):
            shutil.rmtree(os.path.join(work_path, task_uuid))
        if os.path.exists(project['temp_path']):
            shutil.rmtree(project['temp_path'])

    request.addfinalizer(teardown)
    return work_path, task_uuid
Exemple #2
0
def test_extract_linked_records(project, linker):
    """Tests if linked records are removed"""
    step = project['steps'][0]
    step_linked = project['temp_path'] + LinkFiles.TEMP_STEP_LINKED_FILE
    data_filename = project['output_root'] + 'left_file.csv'
    matched_file = project['temp_path'] + LinkFiles.MATCHED_RECORDS
    open(matched_file, 'w').close()

    linker.load_data()
    linker.pair_n_match(step=step['seq'],
                        link_method=step['linking_method'],
                        blocking=step['blocking_schema'],
                        linking=step['linking_schema'],
                        matched_file=matched_file)
    linker.link(project['relationship_type'])

    assert os.path.isfile(step_linked)
    linker.extract_linked_records(linked_filename=step_linked, prefix='LEFT_')

    assert os.path.isfile(data_filename)
    assert Utils.file_len(data_filename) == 929
    assert os.path.isfile(step_linked)
    assert Utils.file_len(step_linked) == 73
Exemple #3
0
def test_extract_rows(project, ddp):
    """Tests if linked records are removed from input data"""
    step = project['steps'][1]
    matched_file = project['temp_path'] + LinkFiles.MATCHED_RECORDS
    open(matched_file, 'w').close()
    linked_file = project['temp_path'] + LinkFiles.TEMP_ENTITIES_FILE

    ddp.load_data()
    ddp.pair_n_match(step=step['seq'],
                     link_method=step['linking_method'],
                     blocking=step['blocking_schema'],
                     linking=step['linking_schema'],
                     matched_file=matched_file)
    ddp.link_pairs()
    ddp.extract_rows(data_filename=ddp.left_file,
                     data_id=ddp.left_index,
                     index_filename=linked_file,
                     index_id='REC_ID',
                     index_cols=['ENTITY_ID'])

    assert not os.path.isfile(project['temp_path'] +
                              LinkFiles.TEMP_STEP_REMAINED)
    assert os.path.isfile(ddp.left_file)
    assert Utils.file_len(ddp.left_file) == 1000
Exemple #4
0
def project():
    """Read test_jtst_dedup project configuration"""
    return Utils.load_project_data('test_jtst_dedup.json')
Exemple #5
0
def project():
    """Read test_jtst_educ_linking project configuration"""
    return Utils.load_project_data('test_jtst_educ_linking.json')