Exemple #1
0
def _SeqIO_to_alignment_iterator(handle,
                                 format,
                                 alphabet=None,
                                 seq_count=None):
    """Uses Bio.SeqIO to create an Alignment iterator (PRIVATE).

    handle   - handle to the file.
    format   - string describing the file format.
    alphabet - optional Alphabet object, useful when the sequence type cannot
               be automatically inferred from the file itself (e.g. fasta)
    seq_count- Optional integer, number of sequences expected in
               each alignment.  Recommended for fasta format files.

    If count is omitted (default) then all the sequences in
    the file are combined into a single Alignment.
    """
    from Bio import SeqIO
    assert format in SeqIO._FormatToIterator

    if seq_count:
        #Use the count to split the records into batches.
        seq_record_iterator = SeqIO.parse(handle, format, alphabet)

        records = []
        for record in seq_record_iterator:
            records.append(record)
            if len(records) == seq_count:
                yield SeqIO.to_alignment(records)
                records = []
        if len(records) > 0:
            raise ValueError("Check seq_count argument, not enough sequences?")
    else:
        #Must assume that there is a single alignment using all
        #the SeqRecord objects:
        records = list(SeqIO.parse(handle, format, alphabet))
        if records:
            yield SeqIO.to_alignment(records)
        else:
            #No alignment found!
            pass
Exemple #2
0
def _SeqIO_to_alignment_iterator(handle, format, alphabet=None, seq_count=None) :
    """Uses Bio.SeqIO to create an Alignment iterator (PRIVATE).

    Arguments:
     - handle    - handle to the file.
     - format    - string describing the file format.
     - alphabet  - optional Alphabet object, useful when the sequence type
                   cannot be automatically inferred from the file itself
                   (e.g. fasta, phylip, clustal)
     - seq_count - Optional integer, number of sequences expected in each
                   alignment.  Recommended for fasta format files.

    If count is omitted (default) then all the sequences in
    the file are combined into a single Alignment.
    """
    from Bio import SeqIO
    assert format in SeqIO._FormatToIterator

    if seq_count :
        #Use the count to split the records into batches.
        seq_record_iterator = SeqIO.parse(handle, format, alphabet)

        records = []
        for record in seq_record_iterator :
            records.append(record)
            if len(records) == seq_count :
                yield SeqIO.to_alignment(records)
                records = []
        if len(records) > 0 :
            raise ValueError("Check seq_count argument, not enough sequences?")
    else :
        #Must assume that there is a single alignment using all
        #the SeqRecord objects:
        records = list(SeqIO.parse(handle, format, alphabet))
        if records :
            yield SeqIO.to_alignment(records)
        else :
            #No alignment found!
            pass
Exemple #3
0
    for given_alpha in bad:
        #These should all fail...
        try:
            print SeqIO.parse(open(t_filename),t_format,given_alpha).next()
            assert False, "Forcing wrong alphabet, %s, should fail (%s)" \
                   % (repr(given_alpha), t_filename)
        except ValueError:
            pass
    del good, bad, given_alpha, base_alpha

    if t_alignment:
        print "Testing reading %s format file %s as an alignment" \
              % (t_format, t_filename)

        #Using SeqIO.to_alignment(SeqIO.parse(...))
        alignment = SeqIO.to_alignment(SeqIO.parse( \
                    handle=open(t_filename,"r"), format=t_format))
        assert len(alignment.get_all_seqs()) == t_count

        alignment_len = alignment.get_alignment_length()

        #Check the record order agrees, and double check the
        #sequence lengths all agree too.
        for i in range(t_count):
            assert compare_record(records[i], alignment.get_all_seqs()[i])
            assert len(records[i].seq) == alignment_len

        print alignment_summary(alignment)

    #Some alignment file formats have magic characters which mean
    #use the letter in this position in the first sequence.
    #They should all have been converted by the parser, but if
Exemple #4
0
    for given_alpha in bad :
        #These should all fail...
        try :
            print SeqIO.parse(open(t_filename),t_format,given_alpha).next()
            assert False, "Forcing wrong alphabet, %s, should fail (%s)" \
                   % (repr(given_alpha), t_filename)
        except ValueError :
            pass
    del good, bad, given_alpha, base_alpha

    if t_alignment :
        print "Testing reading %s format file %s as an alignment" \
              % (t_format, t_filename)

        #Using SeqIO.to_alignment(SeqIO.parse(...))
        alignment = SeqIO.to_alignment(SeqIO.parse( \
                    handle=open(t_filename,"r"), format=t_format))
        assert len(alignment.get_all_seqs()) == t_count

        alignment_len = alignment.get_alignment_length()

        #Check the record order agrees, and double check the
        #sequence lengths all agree too.
        for i in range(t_count) :
            assert records_match(records[i], alignment.get_all_seqs()[i])
            assert len(records[i].seq) == alignment_len

        print alignment_summary(alignment)

    #Some alignment file formats have magic characters which mean
    #use the letter in this position in the first sequence.
    #They should all have been converted by the parser, but if