def _SeqIO_to_alignment_iterator(handle, format, alphabet=None, seq_count=None): """Uses Bio.SeqIO to create an Alignment iterator (PRIVATE). handle - handle to the file. format - string describing the file format. alphabet - optional Alphabet object, useful when the sequence type cannot be automatically inferred from the file itself (e.g. fasta) seq_count- Optional integer, number of sequences expected in each alignment. Recommended for fasta format files. If count is omitted (default) then all the sequences in the file are combined into a single Alignment. """ from Bio import SeqIO assert format in SeqIO._FormatToIterator if seq_count: #Use the count to split the records into batches. seq_record_iterator = SeqIO.parse(handle, format, alphabet) records = [] for record in seq_record_iterator: records.append(record) if len(records) == seq_count: yield SeqIO.to_alignment(records) records = [] if len(records) > 0: raise ValueError("Check seq_count argument, not enough sequences?") else: #Must assume that there is a single alignment using all #the SeqRecord objects: records = list(SeqIO.parse(handle, format, alphabet)) if records: yield SeqIO.to_alignment(records) else: #No alignment found! pass
def _SeqIO_to_alignment_iterator(handle, format, alphabet=None, seq_count=None) : """Uses Bio.SeqIO to create an Alignment iterator (PRIVATE). Arguments: - handle - handle to the file. - format - string describing the file format. - alphabet - optional Alphabet object, useful when the sequence type cannot be automatically inferred from the file itself (e.g. fasta, phylip, clustal) - seq_count - Optional integer, number of sequences expected in each alignment. Recommended for fasta format files. If count is omitted (default) then all the sequences in the file are combined into a single Alignment. """ from Bio import SeqIO assert format in SeqIO._FormatToIterator if seq_count : #Use the count to split the records into batches. seq_record_iterator = SeqIO.parse(handle, format, alphabet) records = [] for record in seq_record_iterator : records.append(record) if len(records) == seq_count : yield SeqIO.to_alignment(records) records = [] if len(records) > 0 : raise ValueError("Check seq_count argument, not enough sequences?") else : #Must assume that there is a single alignment using all #the SeqRecord objects: records = list(SeqIO.parse(handle, format, alphabet)) if records : yield SeqIO.to_alignment(records) else : #No alignment found! pass
for given_alpha in bad: #These should all fail... try: print SeqIO.parse(open(t_filename),t_format,given_alpha).next() assert False, "Forcing wrong alphabet, %s, should fail (%s)" \ % (repr(given_alpha), t_filename) except ValueError: pass del good, bad, given_alpha, base_alpha if t_alignment: print "Testing reading %s format file %s as an alignment" \ % (t_format, t_filename) #Using SeqIO.to_alignment(SeqIO.parse(...)) alignment = SeqIO.to_alignment(SeqIO.parse( \ handle=open(t_filename,"r"), format=t_format)) assert len(alignment.get_all_seqs()) == t_count alignment_len = alignment.get_alignment_length() #Check the record order agrees, and double check the #sequence lengths all agree too. for i in range(t_count): assert compare_record(records[i], alignment.get_all_seqs()[i]) assert len(records[i].seq) == alignment_len print alignment_summary(alignment) #Some alignment file formats have magic characters which mean #use the letter in this position in the first sequence. #They should all have been converted by the parser, but if
for given_alpha in bad : #These should all fail... try : print SeqIO.parse(open(t_filename),t_format,given_alpha).next() assert False, "Forcing wrong alphabet, %s, should fail (%s)" \ % (repr(given_alpha), t_filename) except ValueError : pass del good, bad, given_alpha, base_alpha if t_alignment : print "Testing reading %s format file %s as an alignment" \ % (t_format, t_filename) #Using SeqIO.to_alignment(SeqIO.parse(...)) alignment = SeqIO.to_alignment(SeqIO.parse( \ handle=open(t_filename,"r"), format=t_format)) assert len(alignment.get_all_seqs()) == t_count alignment_len = alignment.get_alignment_length() #Check the record order agrees, and double check the #sequence lengths all agree too. for i in range(t_count) : assert records_match(records[i], alignment.get_all_seqs()[i]) assert len(records[i].seq) == alignment_len print alignment_summary(alignment) #Some alignment file formats have magic characters which mean #use the letter in this position in the first sequence. #They should all have been converted by the parser, but if