예제 #1
0
def get_correct_ids(correct_fh, diff_fh, match=.5):
    correct_ids = get_ids(correct_fh)
    # diff_ids = get_ids(diff_fh)
    fixed_ids = []
    for line in diff_fh:
        line = line.strip().split('\t')
        if line[0].startswith('#'):
            fixed_ids.append('\t'.join(line))
        elif line[0] in correct_ids:
            fixed_ids.append('\t'.join(line))
        else:
            fixed_id = get_close_matches(line[0], correct_ids, 1, match)
            line[0] = fixed_id[0]
            line = '\t'.join(line)
            fixed_ids.append(line)

    # for i in diff_ids:
    #     fixed_id = get_close_matches(i, correct_ids, 1, match)
    #     if len(fixed_id) != 1:
    #         print 'need to do something here'
    #         fixed_ids.append(i[0])
    #     else:
    #         fixed_ids.append(fixed_id[0])
    # diff = get_diffs(diff_ids, correct_ids)
    return '\n'.join(fixed_ids)
예제 #2
0
    def test_get_ids(self):

        obs = get_ids(self.id_file)
        exp = ['Test_ID1', 'Test_ID2']
        self.assertEqual(obs, exp)
예제 #3
0
    def test_get_ids(self):

        obs = get_ids(self.id_file)
        exp = ['Test_ID1', 'Test_ID2']
        self.assertEqual(obs, exp)