def clustal_align_protein(rec_1, rec_2, work_dir): """Align the two given proteins with clustalw. """ fasta_file = op.join(work_dir, "prot-start.fasta") align_file = op.join(work_dir, "prot.aln") SeqIO.write((rec_1, rec_2), file(fasta_file, "w"), "fasta") clustal_cl = Clustalw.MultipleAlignCL(fasta_file, command=CLUSTALW_BIN) clustal_cl.set_output(align_file, output_order="INPUT") clustal_cl.set_type("PROTEIN") Clustalw.do_alignment(clustal_cl) aln_file = file(clustal_cl.output_file) alignment = AlignIO.read(aln_file, "clustal") print >>sys.stderr, "\tDoing clustalw alignment: %s" % clustal_cl return alignment.format("fasta")
def Align_Results(OutputFileName): import os FileIN_Name = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.FASTA""" % (OutputFileName) FileOUT_ALN = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.ALN""" % (OutputFileName) print FileIN_Name print FileOUT_ALN from Bio.Clustalw import MultipleAlignCL from Bio import Clustalw cline = MultipleAlignCL(os.path.join(os.curdir, FileIN_Name)) cline.set_output(FileOUT_ALN) alignment = Clustalw.do_alignment(cline) cline.close()
def align(self): "Aligns the sequences using CLUSTAL, storing the results" if len(self.sequences) == 0: return self.sequencesToFile( self.tmpFileName ) commandLine = MultipleAlignCL(os.path.join(os.curdir, self.tmpFileName), self.clustalPath) alignment = Clustalw.do_alignment(commandLine) allRecords = alignment.get_all_seqs() length = alignment.get_alignment_length() alignmentStrings = [] for record in allRecords: f = fasta.Record() f.title = record.description.strip() f.sequence = record.seq.tostring() alignmentStrings.append( f ) self.alignments = alignmentStrings self.alignmentLength = length os.remove(self.tmpFileName)
# biopython from Bio.Alphabet import IUPAC from Bio import Clustalw from Bio.Clustalw import MultipleAlignCL from Bio.Align import AlignInfo from Bio.SubsMat import FreqTable # create the command line to run clustalw # this assumes you've got clustalw somewhere on your path, otherwise # you need to pass a second argument to MultipleAlignCL with the complete # path to clustalw cline = MultipleAlignCL(os.path.join(os.curdir, 'opuntia.fasta')) cline.set_output('test.aln') # actually perform the alignment and get back an alignment object alignment = Clustalw.do_alignment(cline) # get the records in the alignment all_records = alignment.get_all_seqs() print 'description:', all_records[0].description print 'sequence:', all_records[0].seq # get the length of the alignment print 'length', alignment.get_alignment_length() print alignment # print out interesting information about the alignment summary_align = AlignInfo.SummaryInfo(alignment)
if not clustalw_exe: raise MissingExternalDependencyError(\ "Install clustalw or clustalw2 if you want to use Bio.Clustalw.") ################################################################# print "Checking error conditions" print "=========================" print "Empty file" input_file = "does_not_exist.fasta" assert not os.path.isfile(input_file) cline = MultipleAlignCL(input_file, command=clustalw_exe) try: align = Clustalw.do_alignment(cline) assert False, "Should have failed, returned %s" % repr(align) except IOError, err: print "Failed (good)" #Python 2.3 on Windows gave (0, 'Error') #Python 2.5 on Windows gives [Errno 0] Error assert "Cannot open sequence file" in str(err) \ or "not produced" in str(err) \ or str(err) == "[Errno 0] Error" \ or str(err) == "(0, 'Error')", str(err) print print "Single sequence" input_file = "Fasta/f001" assert os.path.isfile(input_file) assert len(list(SeqIO.parse(input_file, "fasta"))) == 1