def align_from_files(self, reffile, targetfile, ref_indexes=None, target_indexes=None, ref_encoding=None, target_encoding=None, ref_separator='\t', target_separator='\t', get_matrix=True): """ Align data from files Parameters ---------- reffile: name of the reference file targetfile: name of the target file ref_encoding: if given (e.g. 'utf-8' or 'latin-1'), it will be used to read the files. target_encoding: if given (e.g. 'utf-8' or 'latin-1'), it will be used to read the files. ref_separator: separator of the reference file target_separator: separator of the target file """ refset = parsefile(reffile, indexes=ref_indexes, encoding=ref_encoding, delimiter=ref_separator) targetset = parsefile(targetfile, indexes=target_indexes, encoding=target_encoding, delimiter=target_separator) return self.align(refset, targetset, get_matrix=get_matrix)
def get_aligned_pairs_from_files(self, reffile, targetfile, ref_indexes=None, target_indexes=None, ref_encoding=None, target_encoding=None, ref_separator='\t', target_separator='\t', unique=True): """ Get the pairs of aligned elements """ refset = parsefile(reffile, indexes=ref_indexes, encoding=ref_encoding, delimiter=ref_separator) targetset = parsefile(targetfile, indexes=target_indexes, encoding=target_encoding, delimiter=target_separator) global_mat, global_matched = self.align(refset, targetset, get_matrix=False) for pair in iter_aligned_pairs(refset, targetset, global_mat, global_matched, unique): yield pair
def test_parser(self): data = parsefile(path.join(TESTDIR, 'data', 'file2parse'), [0, (2, 3), 4, 1], delimiter=',') self.assertEqual([[1, (12, 19), u'apple', u'house'], [2, (21.9, 19), u'stramberry', u'horse'], [3, (23, 2.17), u'cherry', u'flower']], data) data = parsefile(path.join(TESTDIR, 'data', 'file2parse'), [0, (2, 3), 4, 1], delimiter=',', formatopt={2:str}) self.assertEqual([[1, ('12', 19), u'apple', u'house'], [2, ('21.9', 19), u'stramberry', u'horse'], [3, ('23', 2.17), u'cherry', u'flower']], data) data = parsefile(path.join(TESTDIR, 'data', 'file2parse'), [0, (2, 3), 4, 1], delimiter=',', autocast_data=False) self.assertEqual([['1', ('12', '19'), 'apple', 'house'], ['2', ('21.9', '19'), 'stramberry', 'horse'], ['3', ('23', '2.17'), 'cherry', 'flower']], data)