def to_alignment(sequences, alphabet=None, strict=True): """Returns a multiple sequence alignment (OBSOLETE). - sequences -An iterator that returns SeqRecord objects, or simply a list of SeqRecord objects. All the record sequences must be the same length. - alphabet - Optional alphabet. Stongly recommended. - strict - Optional, defaults to True. Should error checking be done? Using this function is now discouraged. Rather doing this: >>> from Bio import SeqIO >>> handle = open("Clustalw/protein.aln") >>> alignment = SeqIO.to_alignment(SeqIO.parse(handle, "clustal")) >>> handle.close() You are now encouraged to use Bio.AlignIO instead, e.g. >>> from Bio import AlignIO >>> handle = open("Clustalw/protein.aln") >>> alignment = AlignIO.read(handle, "clustal") >>> handle.close() """ #TODO - Move this functionality into the Alignment class instead? from Bio.Alphabet import generic_alphabet from Bio.Alphabet import _consensus_alphabet if alphabet is None: sequences = list(sequences) alphabet = _consensus_alphabet([rec.seq.alphabet for rec in sequences \ if rec.seq is not None]) if not (isinstance(alphabet, Alphabet) or isinstance(alphabet, AlphabetEncoder)): raise ValueError("Invalid alphabet") alignment_length = None alignment = Alignment(alphabet) for record in sequences: if strict: if alignment_length is None: alignment_length = len(record.seq) elif alignment_length != len(record.seq): raise ValueError("Sequences must all be the same length") assert isinstance(record.seq.alphabet, Alphabet) \ or isinstance(record.seq.alphabet, AlphabetEncoder), \ "Sequence does not have a valid alphabet" #TODO - Move this alphabet comparison code into the Alphabet module/class? #TODO - Is a normal alphabet "ungapped" by default, or does it just mean #undecided? if isinstance(record.seq.alphabet, Alphabet) \ and isinstance(alphabet, Alphabet): #Comparing two non-gapped alphabets if not isinstance(record.seq.alphabet, alphabet.__class__): raise ValueError("Incompatible sequence alphabet " \ + "%s for %s alignment" \ % (record.seq.alphabet, alphabet)) elif isinstance(record.seq.alphabet, AlphabetEncoder) \ and isinstance(alphabet, Alphabet): raise ValueError("Sequence has a gapped alphabet, alignment does not") elif isinstance(record.seq.alphabet, Alphabet) \ and isinstance(alphabet, Gapped): #Sequence isn't gapped, alignment is. if not isinstance(record.seq.alphabet, alphabet.alphabet.__class__): raise ValueError("Incompatible sequence alphabet " \ + "%s for %s alignment" \ % (record.seq.alphabet, alphabet)) else: #Comparing two gapped alphabets if not isinstance(record.seq.alphabet, alphabet.__class__): raise ValueError("Incompatible sequence alphabet " \ + "%s for %s alignment" \ % (record.seq.alphabet, alphabet)) if record.seq.alphabet.gap_char != alphabet.gap_char: raise ValueError("Sequence gap characters != alignment gap char") #ToDo, additional checks on the specified alignment... #Should we look at the alphabet.contains() method? if record.seq is None: raise TypeError("SeqRecord (id=%s) has None for its sequence." % record.id) #This is abusing the "private" records list, #we should really have a method like add_sequence #but which takes SeqRecord objects. See also Bug 1944 alignment._records.append(record) return alignment
def to_alignment(sequences, alphabet=None, strict=True): """Returns a multiple sequence alignment (OBSOLETE). - sequences -An iterator that returns SeqRecord objects, or simply a list of SeqRecord objects. All the record sequences must be the same length. - alphabet - Optional alphabet. Stongly recommended. - strict - Optional, defaults to True. Should error checking be done? Using this function is now discouraged. Rather doing this: >>> from Bio import SeqIO >>> handle = open("Clustalw/protein.aln") >>> alignment = SeqIO.to_alignment(SeqIO.parse(handle, "clustal")) >>> handle.close() You are now encouraged to use Bio.AlignIO instead, e.g. >>> from Bio import AlignIO >>> handle = open("Clustalw/protein.aln") >>> alignment = AlignIO.read(handle, "clustal") >>> handle.close() """ #TODO - Move this functionality into the Alignment class instead? from Bio.Alphabet import generic_alphabet from Bio.Alphabet import _consensus_alphabet if alphabet is None: sequences = list(sequences) alphabet = _consensus_alphabet([rec.seq.alphabet for rec in sequences \ if rec.seq is not None]) if not (isinstance(alphabet, Alphabet) or isinstance(alphabet, AlphabetEncoder)): raise ValueError("Invalid alphabet") alignment_length = None alignment = Alignment(alphabet) for record in sequences: if strict: if alignment_length is None: alignment_length = len(record.seq) elif alignment_length != len(record.seq): raise ValueError("Sequences must all be the same length") assert isinstance(record.seq.alphabet, Alphabet) \ or isinstance(record.seq.alphabet, AlphabetEncoder), \ "Sequence does not have a valid alphabet" #TODO - Move this alphabet comparison code into the Alphabet module/class? #TODO - Is a normal alphabet "ungapped" by default, or does it just mean #undecided? if isinstance(record.seq.alphabet, Alphabet) \ and isinstance(alphabet, Alphabet): #Comparing two non-gapped alphabets if not isinstance(record.seq.alphabet, alphabet.__class__): raise ValueError("Incompatible sequence alphabet " \ + "%s for %s alignment" \ % (record.seq.alphabet, alphabet)) elif isinstance(record.seq.alphabet, AlphabetEncoder) \ and isinstance(alphabet, Alphabet): raise ValueError( "Sequence has a gapped alphabet, alignment does not") elif isinstance(record.seq.alphabet, Alphabet) \ and isinstance(alphabet, Gapped): #Sequence isn't gapped, alignment is. if not isinstance(record.seq.alphabet, alphabet.alphabet.__class__): raise ValueError("Incompatible sequence alphabet " \ + "%s for %s alignment" \ % (record.seq.alphabet, alphabet)) else: #Comparing two gapped alphabets if not isinstance(record.seq.alphabet, alphabet.__class__): raise ValueError("Incompatible sequence alphabet " \ + "%s for %s alignment" \ % (record.seq.alphabet, alphabet)) if record.seq.alphabet.gap_char != alphabet.gap_char: raise ValueError( "Sequence gap characters != alignment gap char") #ToDo, additional checks on the specified alignment... #Should we look at the alphabet.contains() method? if record.seq is None: raise TypeError("SeqRecord (id=%s) has None for its sequence." % record.id) #This is abusing the "private" records list, #we should really have a method like add_sequence #but which takes SeqRecord objects. See also Bug 1944 alignment._records.append(record) return alignment