Esempio n. 1
0
def column_filter(seq_iter, character_list=['?','-'], max_frequency=1.0):
    col_freqs, seqs = seqstats.column_frequencies(seq_iter,
            character_list=character_list)
    cols_to_keep = [p < max_frequency for p in col_freqs]
    for seq in seqs:
        new_seq = itertools.compress(str(seq.seq), cols_to_keep)
        yield sequtils.copy_seq_metadata(seq, new_seq=''.join(new_seq))
Esempio n. 2
0
def align_pair(seq_record1, seq_record2, tools=['mafft', 'muscle']):
    """
    Returns the aligned copies of `SeqRecord` `seq_record1` and `seq_record2`.
    The `tools` argument should be a prioritized list of options for the
    external alignment program to use. For example, if `['mafft', 'muscle']` is
    specified (the default), mafft will be used if the executable is found in
    PATH. If mafft cannot be found, it will try to use muscle. If none of the
    listed programs can be found (or if the argument is an empty list or None),
    the (much slower) built-in `global_align` function is used.
    """
    aligner = get_aligner(tools=tools)
    if not aligner:
        if tools:
            _LOG.warning('WARNING: external alignment tools not found; '
                         'using (slow) built-in alignment function.')
        seq1, seq2 = global_align(seq_record1, seq_record2)
        s1 = sequtils.copy_seq_metadata(seq_record1, seq1)
        s2 = sequtils.copy_seq_metadata(seq_record2, seq2)
        return s1, s2
    seqs = list(aligner.align([seq_record1, seq_record2]))
    assert len(seqs) == 2
    sequences = dict(zip([s.id for s in seqs], seqs))
    return sequences[seq_record1.id], sequences[seq_record2.id]
Esempio n. 3
0
def longest_reading_frames(seq_iter,
        gap_characters=['-'],
        table = 1,
        allow_partial = True,
        require_start_after_stop = True):
    for i, s in enumerate(remove_gaps(seq_iter, gap_characters=gap_characters)):
        lrf = sequtils.get_longest_reading_frames(s,
                table = table,
                allow_partial = allow_partial,
                require_start_after_stop = require_start_after_stop)
        if lrf:
            yield lrf[0]
        else:
            yield sequtils.copy_seq_metadata(s, '')
Esempio n. 4
0
def align_pair(seq_record1, seq_record2, tools = ['mafft', 'muscle']):
    """
    Returns the aligned copies of `SeqRecord` `seq_record1` and `seq_record2`.
    The `tools` argument should be a prioritized list of options for the
    external alignment program to use. For example, if `['mafft', 'muscle']` is
    specified (the default), mafft will be used if the executable is found in
    PATH. If mafft cannot be found, it will try to use muscle. If none of the
    listed programs can be found (or if the argument is an empty list or None),
    the (much slower) built-in `global_align` function is used.
    """
    aligner = get_aligner(tools = tools)
    if not aligner:
        if tools:
            _LOG.warning('WARNING: external alignment tools not found; '
                    'using (slow) built-in alignment function.')
        seq1, seq2 = global_align(seq_record1, seq_record2)
        s1 = sequtils.copy_seq_metadata(seq_record1, seq1)
        s2 = sequtils.copy_seq_metadata(seq_record2, seq2)
        return s1, s2
    seqs = list(aligner.align([seq_record1, seq_record2]))
    assert len(seqs) == 2
    sequences = dict(zip([s.id for s in seqs], seqs))
    return sequences[seq_record1.id], sequences[seq_record2.id]
Esempio n. 5
0
def seq_mod(seq_iter, from_chars='', to_chars='', del_chars=''):
    '''
    Modify sequences. Each sequence in `seq_iter` will have the characters
    in the `from_chars` string mapped to the characters in the `to_chars`
    string, and any characters in the `del_chars` string will be removed.
    '''
    if len(from_chars) != len(to_chars):
        raise ValueError('from and to characters must have same length')
    table = None
    if len(from_chars) > 0:
        table = maketrans(from_chars, to_chars)
    for seq in seq_iter:
        yield sequtils.copy_seq_metadata(seq,
                                         new_seq=str(seq.seq).translate(
                                             table, del_chars))
Esempio n. 6
0
def longest_reading_frames(seq_iter,
                           gap_characters=['-'],
                           table=1,
                           allow_partial=True,
                           require_start_after_stop=True):
    for i, s in enumerate(remove_gaps(seq_iter,
                                      gap_characters=gap_characters)):
        lrf = sequtils.get_longest_reading_frames(
            s,
            table=table,
            allow_partial=allow_partial,
            require_start_after_stop=require_start_after_stop)
        if lrf:
            yield lrf[0]
        else:
            yield sequtils.copy_seq_metadata(s, '')
Esempio n. 7
0
def seq_mod(seq_iter,
        from_chars='',
        to_chars='',
        del_chars=''):
    '''
    Modify sequences. Each sequence in `seq_iter` will have the characters
    in the `from_chars` string mapped to the characters in the `to_chars`
    string, and any characters in the `del_chars` string will be removed.
    '''
    if len(from_chars) != len(to_chars):
        raise ValueError('from and to characters must have same length')
    table = None
    if len(from_chars) > 0:
        table = maketrans(from_chars, to_chars)
    for seq in seq_iter:
        yield sequtils.copy_seq_metadata(seq,
                new_seq=str(seq.seq).translate(table, del_chars))
Esempio n. 8
0
def constant_column_filter(seq_iter):
    cols_to_keep, seqs = seqstats.variable_columns(seq_iter)
    for seq in seqs:
        new_seq = itertools.compress(str(seq.seq), cols_to_keep)
        yield sequtils.copy_seq_metadata(seq, new_seq=''.join(new_seq))
Esempio n. 9
0
def dice(seq_iter, slices_to_keep):
    for seq in seq_iter:
        yield sequtils.copy_seq_metadata(seq,
                                         new_seq=''.join(
                                             (str(seq.seq[l:r])
                                              for l, r in slices_to_keep)))
Esempio n. 10
0
 def test_seq_record(self):
     s = sequtils.copy_seq_metadata(self.seq, SeqRecord(Seq('AGCT'),
                                                        id='1'))
     self.assertSameMetadata(self.seq, s)
     self.assertEqual(str(s.seq), 'AGCT')
     self.assertFalse(s is self.seq)
Esempio n. 11
0
 def test_string(self):
     s = sequtils.copy_seq_metadata(self.seq, 'AGCT')
     self.assertSameMetadata(self.seq, s)
     self.assertEqual(str(s.seq), 'AGCT')
     self.assertFalse(s is self.seq)
Esempio n. 12
0
 def test_empty_seq(self):
     s = sequtils.copy_seq_metadata(self.seq)
     self.assertSameMetadata(self.seq, s)
     self.assertEqual(str(s.seq), '')
     self.assertFalse(s is self.seq)
Esempio n. 13
0
 def test_seq_record(self):
     s = sequtils.copy_seq_metadata(self.seq, SeqRecord(Seq('AGCT'), id='1'))
     self.assertSameMetadata(self.seq, s)
     self.assertEqual(str(s.seq), 'AGCT')
     self.assertNotEqual(s, self.seq)
Esempio n. 14
0
 def test_string(self):
     s = sequtils.copy_seq_metadata(self.seq, 'AGCT')
     self.assertSameMetadata(self.seq, s)
     self.assertEqual(str(s.seq), 'AGCT')
     self.assertNotEqual(s, self.seq)
Esempio n. 15
0
 def test_empty_seq(self):
     s = sequtils.copy_seq_metadata(self.seq)
     self.assertSameMetadata(self.seq, s)
     self.assertEqual(str(s.seq), '')
     self.assertNotEqual(s, self.seq)
Esempio n. 16
0
 def test_seq(self):
     s = sequtils.copy_seq_metadata(self.seq, Seq('AGCT'))
     self.assertSameMetadata(self.seq, s)
     self.assertEqual(str(s.seq), 'AGCT')
     self.assertFalse(s is self.seq)