def loadAlignment( self, path ): """ path is a path to an alignment file in .aln format""" alignment = Clustalw.parse_file( path ) self.allseq = alignment.get_all_seqs() self.summary = AlignInfo.SummaryInfo(alignment) self.l = alignment.get_alignment_length() self.insertLoadedBioAlignment()
def loadAlignment( self, alignmentFile ): "Populates this object with the given alignment data from a CLUSTAL .aln file." # ***NOTE*** the CLUSTAL parser does not handle windows line breaks well... alignment = Clustalw.parse_file(alignmentFile) alignments = alignment.get_all_seqs() self.alignmentLength = alignment.get_alignment_length() for seq in alignments: sequence = fasta.Record() align = fasta.Record() sequence.title = seq.description align.title = seq.description align.sequence = seq.seq.tostring() sequence.sequence = seq.seq.tostring().replace("-","") self.alignments.append( align ) self.sequences.append( sequence )
#!/usr/bin/env python """Example of generating a substitution matrix from an alignment. """ # standard library import sys # Biopython from Bio import SubsMat from Bio import Clustalw from Bio.Alphabet import IUPAC from Bio.Align import AlignInfo # get an alignment object from a Clustalw alignment output c_align = Clustalw.parse_file('protein.aln', IUPAC.protein) summary_align = AlignInfo.SummaryInfo(c_align) # get a replacement dictionary and accepted replacement matrix # exclude all amino acids that aren't charged polar replace_info = summary_align.replacement_dictionary(["G", "A", "V", "L", "I", "M", "P", "F", "W", "S", "T", "N", "Q", "Y", "C"]) my_arm = SubsMat.SeqMat(replace_info) print replace_info my_lom = SubsMat.make_log_odds_matrix(my_arm) print 'log_odds_mat:', my_lom my_lom.print_mat()
assert alignment[::-1][2].id == "mixed" del alignment del letters print "testing reading and writing clustal format..." test_dir = os.path.join(os.getcwd(), 'Clustalw') test_names = ['opuntia.aln', 'cw02.aln'] test_files = [] for name in test_names: test_files.append(os.path.join(test_dir, name)) for test_file in test_files: # parse the alignment file and get an aligment object alignment = Clustalw.parse_file(test_file) # print the alignment back out print alignment alignment = Clustalw.parse_file(os.path.join(test_dir, test_names[0])) # test the base alignment stuff print 'all_seqs...' for seq_record in alignment: print 'description:', seq_record.description print 'seq:', repr(seq_record.seq) print 'length:', alignment.get_alignment_length() print 'Calculating summary information...' align_info = AlignInfo.SummaryInfo(alignment)
# Check Bio.AlignIO.read(...) alignment = AlignIO.read(handle=open(t_filename), format="clustal") assert isinstance(alignment, Alignment) assert compare(alignment, alignments[0]) print "Using Bio.AlignIO.read(...)" #print "~" * 75 #handle = StringIO() #AlignIO.write([alignment], handle, "clustal") #handle.seek(0) #print handle.read() #print "~" * 75 print "Using Bio.Clustalw.parse_file(...)" c_alignment = Clustalw.parse_file(t_filename) assert isinstance(c_alignment, Alignment) assert isinstance(c_alignment, Clustalw.ClustalAlignment) #print " Using Bio.Clustalw.parse_file(...)" #print "~" * 75 #print c_alignment #print "~" * 75 #print # Compare the two... assert compare(alignment, c_alignment) # Check Bio.AlignIO can read the Bio.Clustalw's string output n_alignment = AlignIO.read(StringIO(str(c_alignment)), "clustal") assert isinstance(alignment, Alignment)
assert alignment[::-1][2].id == "mixed" del alignment del letters print "testing reading and writing clustal format..." test_dir = os.path.join(os.getcwd(), 'Clustalw') test_names = ['opuntia.aln', 'cw02.aln'] test_files = [] for name in test_names: test_files.append(os.path.join(test_dir, name)) for test_file in test_files: # parse the alignment file and get an aligment object alignment = Clustalw.parse_file(test_file) # print the alignment back out print alignment alignment = Clustalw.parse_file(os.path.join(test_dir, test_names[0])) # test the base alignment stuff print 'all_seqs...' all_seqs = alignment.get_all_seqs() for seq_record in all_seqs: print 'description:', seq_record.description print 'seq:', repr(seq_record.seq) print 'length:', alignment.get_alignment_length() print 'Calculating summary information...'
#!/usr/bin/env python """Example of generating a substitution matrix from an alignment. """ # standard library import sys # Biopython from Bio import SubsMat from Bio import Clustalw from Bio.Alphabet import IUPAC from Bio.Align import AlignInfo # get an alignment object from a Clustalw alignment output c_align = Clustalw.parse_file("protein.aln", IUPAC.protein) summary_align = AlignInfo.SummaryInfo(c_align) # get a replacement dictionary and accepted replacement matrix # exclude all amino acids that aren't charged polar replace_info = summary_align.replacement_dictionary( ["G", "A", "V", "L", "I", "M", "P", "F", "W", "S", "T", "N", "Q", "Y", "C"] ) my_arm = SubsMat.SeqMat(replace_info) print (replace_info) my_lom = SubsMat.make_log_odds_matrix(my_arm) print "log_odds_mat:", my_lom my_lom.print_mat()