コード例 #1
0
    def align_from_files(self, reffile, targetfile,
                         ref_indexes=None, target_indexes=None,
                         ref_encoding=None, target_encoding=None,
                         ref_separator='\t', target_separator='\t',
                         get_matrix=True):
        """ Align data from files

        Parameters
        ----------

        reffile: name of the reference file

        targetfile: name of the target file

        ref_encoding: if given (e.g. 'utf-8' or 'latin-1'), it will
                      be used to read the files.

        target_encoding: if given (e.g. 'utf-8' or 'latin-1'), it will
                         be used to read the files.

        ref_separator: separator of the reference file

        target_separator: separator of the target file
        """
        refset = parsefile(reffile, indexes=ref_indexes,
                           encoding=ref_encoding, delimiter=ref_separator)
        targetset = parsefile(targetfile, indexes=target_indexes,
                              encoding=target_encoding, delimiter=target_separator)
        return self.align(refset, targetset, get_matrix=get_matrix)
コード例 #2
0
 def get_aligned_pairs_from_files(self, reffile, targetfile,
                      ref_indexes=None, target_indexes=None,
                      ref_encoding=None, target_encoding=None,
                      ref_separator='\t', target_separator='\t',
                      unique=True):
     """ Get the pairs of aligned elements
     """
     refset = parsefile(reffile, indexes=ref_indexes,
                        encoding=ref_encoding, delimiter=ref_separator)
     targetset = parsefile(targetfile, indexes=target_indexes,
                           encoding=target_encoding, delimiter=target_separator)
     global_mat, global_matched = self.align(refset, targetset, get_matrix=False)
     for pair in iter_aligned_pairs(refset, targetset, global_mat, global_matched, unique):
         yield pair
コード例 #3
0
    def test_parser(self):
        data = parsefile(path.join(TESTDIR, 'data', 'file2parse'),
                         [0, (2, 3), 4, 1], delimiter=',')
        self.assertEqual([[1, (12, 19), u'apple', u'house'],
                          [2, (21.9, 19), u'stramberry', u'horse'],
                          [3, (23, 2.17), u'cherry', u'flower']], data)

        data = parsefile(path.join(TESTDIR, 'data', 'file2parse'),
                         [0, (2, 3), 4, 1], delimiter=',', formatopt={2:str})
        self.assertEqual([[1, ('12', 19), u'apple', u'house'],
                          [2, ('21.9', 19), u'stramberry', u'horse'],
                          [3, ('23', 2.17), u'cherry', u'flower']], data)

        data = parsefile(path.join(TESTDIR, 'data', 'file2parse'),
                         [0, (2, 3), 4, 1], delimiter=',', autocast_data=False)
        self.assertEqual([['1', ('12', '19'), 'apple', 'house'],
                          ['2', ('21.9', '19'), 'stramberry', 'horse'],
                          ['3', ('23', '2.17'), 'cherry', 'flower']], data)