def test_Probcons_alignment_clustalw(self): """Round-trip through app and read clustalw alignment from stdout.""" cmdline = ProbconsCommandline(probcons_exe) cmdline.set_parameter("input", "Fasta/fa01") cmdline.clustalw = True self.assertEqual(str(cmdline), probcons_exe + " -clustalw Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue(stderr.strip().startswith("PROBCONS")) align = AlignIO.read(StringIO(stdout), "clustal") records = list(SeqIO.parse(self.infile1, "fasta")) self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
def test_Probcons_complex_commandline(self): """Round-trip through app with complex command line and output file.""" cmdline = ProbconsCommandline(probcons_exe, pre=1) cmdline.set_parameter("input", "Fasta/fa01") cmdline.consistency = 4 cmdline.set_parameter("--iterative-refinement", 222) cmdline.set_parameter("a", True) cmdline.annot = self.annotation_outfile self.assertEqual( str(cmdline), probcons_exe + " -c 4 -ir 222 -pre 1 -annot Fasta/probcons_annot.out" " -a Fasta/fa01") stdout, stderr = cmdline() self.assertTrue(stderr.startswith("\nPROBCONS")) self.assertTrue(stdout.startswith(">AK1H_ECOLI/1-378"))
def test_Probcons_alignment_clustalw(self): """Round-trip through app and read clustalw alignment from stdout """ cmdline = ProbconsCommandline(probcons_exe) cmdline.set_parameter("input", "Fasta/fa01") cmdline.clustalw = True self.assertEqual(str(cmdline), probcons_exe + " -clustalw Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue(stderr.strip().startswith("PROBCONS")) align = AlignIO.read(StringIO(stdout), "clustal") records = list(SeqIO.parse(self.infile1, "fasta")) self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
def pbcn(inf): from Bio.Align.Applications import ProbconsCommandline pbcn_exe = "/Users/tianyilu/Tools/MSA/probcons/probcons" probcons_cline = ProbconsCommandline(pbcn_exe, input=inf, clustalw=True) print(probcons_cline) stdout, stderr = probcons_cline() with open(inf.replace('.fasta', '_probcons.aln'), "w") as handle: handle.write(stdout)
def test_Probcons_complex_commandline(self): """Round-trip through app with complex command line and output file.""" cmdline = ProbconsCommandline(probcons_exe, pre=1) cmdline.set_parameter("input", "Fasta/fa01") cmdline.consistency = 4 cmdline.set_parameter("--iterative-refinement", 222) cmdline.set_parameter("a", True) cmdline.annot = self.annotation_outfile self.assertEqual(str(cmdline), probcons_exe + " -c 4 -ir 222 -pre 1 -annot Fasta/probcons_annot.out" " -a Fasta/fa01") stdout, stderr = cmdline() self.assertTrue(stderr.startswith("\nPROBCONS")) self.assertTrue(stdout.startswith(">AK1H_ECOLI/1-378"))
def test_Probcons_complex_commandline(self): """Round-trip through app with complex command line and output file """ cmdline = ProbconsCommandline(probcons_exe, pre=1) cmdline.set_parameter("input", "Fasta/fa01") cmdline.consistency = 4 cmdline.set_parameter("--iterative-refinement", 222) cmdline.set_parameter("a", True) cmdline.annot = self.annotation_outfile self.assertEqual( str(cmdline), probcons_exe + " -c 4 -ir 222 -pre 1 -annot Fasta/probcons_annot.out " "-a Fasta/fa01") result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(str(cmdline), str(result._cl)) self.assertEquals(result.return_code, 0) self.assert_(stderr.read().startswith("\nPROBCONS")) self.assert_(stdout.read().startswith(">AK1H_ECOLI/1-378"))
def test_Probcons_complex_commandline(self): """Round-trip through app with complex command line and output file """ cmdline = ProbconsCommandline(probcons_exe, pre=1) cmdline.set_parameter("input", "Fasta/fa01") cmdline.consistency = 4 cmdline.set_parameter("--iterative-refinement", 222) cmdline.set_parameter("a", True) cmdline.annot = self.annotation_outfile self.assertEqual(str(cmdline), probcons_exe + " -c 4 -ir 222 -pre 1 -annot Fasta/probcons_annot.out " "-a Fasta/fa01") result, stdout, stderr = Application.generic_run(cmdline) self.assertEqual(str(cmdline), str(result._cl)) self.assertEquals(result.return_code, 0) self.assert_(stderr.read().startswith("\nPROBCONS")) self.assert_(stdout.read().startswith(">AK1H_ECOLI/1-378"))
def test_Probcons_alignment_clustalw(self): """Round-trip through app and read clustalw alignment from stdout """ cmdline = ProbconsCommandline(probcons_exe) cmdline.set_parameter("input", "Fasta/fa01") cmdline.clustalw = True self.assertEqual(str(cmdline), probcons_exe + " -clustalw Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32")) return_code = child.wait() self.assertEqual(return_code, 0) self.assert_(child.stderr.read().strip().startswith("PROBCONS")) #self.assert_(stdout.read().strip().startswith("PROBCONS")) align = AlignIO.read(StringIO(child.stdout.read()), "clustal") records = list(SeqIO.parse(open(self.infile1),"fasta")) self.assertEqual(len(records),len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-",""), str(old.seq).replace("-","")) del child
def test_Probcons_alignment_fasta(self): """Round-trip through app and read fasta alignment from stdout.""" cmdline = ProbconsCommandline(probcons_exe, input=self.infile1) self.assertEqual(str(cmdline), probcons_exe + " Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdout, stderr = cmdline() self.assertTrue(stderr.startswith("\nPROBCONS")) align = AlignIO.read(StringIO(stdout), "fasta") records = list(SeqIO.parse(self.infile1, "fasta")) self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual( str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
def test_Probcons_complex_commandline(self): """Round-trip through app with complex command line and output file """ cmdline = ProbconsCommandline(probcons_exe, pre=1) cmdline.set_parameter("input", "Fasta/fa01") cmdline.consistency = 4 cmdline.set_parameter("--iterative-refinement", 222) cmdline.set_parameter("a", True) cmdline.annot = self.annotation_outfile self.assertEqual(str(cmdline), probcons_exe + " -c 4 -ir 222 -pre 1 -annot Fasta/probcons_annot.out " "-a Fasta/fa01") child = subprocess.Popen(str(cmdline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=(sys.platform!="win32")) return_code = child.wait() self.assertEqual(return_code, 0) self.assert_(child.stderr.read().startswith("\nPROBCONS")) self.assert_(child.stdout.read().startswith(">AK1H_ECOLI/1-378")) del child
def test_Probcons_alignment_fasta(self): """Round-trip through app and read fasta alignment from stdout """ cmdline = ProbconsCommandline(probcons_exe, input=self.infile1) self.assertEqual(str(cmdline), probcons_exe + " Fasta/fa01") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, stdout, stderr = Application.generic_run(cmdline) self.assertEquals(result.return_code, 0) self.assertEqual(str(cmdline), str(result._cl)) self.assert_(stderr.read().startswith("\nPROBCONS")) align = AlignIO.read(StringIO(stdout.read()), "fasta") records = list(SeqIO.parse(open(self.infile1), "fasta")) self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual( str(new.seq).replace("-", ""), str(old.seq).replace("-", ""))
def run_multiple_sequence_alignment(records, workdir, msa): """ This runs the MSA, user can choose between emma, clustalw (old and busted), clustal omega (recommended for proteins and also uses HMM), MUSCLE or MAFFT (recommended for nucleotide data, and MUSCLE should be pretty fast), T-Coffee (good for distantly related sequences). FUTURE: Add more iterative methods to improve runtime? Add HMMER? HHpred is also quite fast """ #get filename for fasta file sequence_list_file = os.path.join(workdir, "msa.fasta") #write sequences SeqIO.write(records, sequence_list_file, "fasta") #prepare filenames for MSA output outfile = os.path.join(workdir, "msa.aln") treefile = os.path.join(workdir, "msa.dnd") #Prepare command line according to chosen algorithm if msa.lower() == "emma": #output is fasta print "Aligning by emma" cmd = EmmaCommandline(sequence=sequence_list_file, outseq=outfile, dendoutfile=treefile) elif msa.lower() == "clustalo" or msa.lower( ) == "clustal_omega" or msa.lower() == "clustal-omega": print "Aligning by Clustal Omega" cmd = ClustalOmegaCommandline(infile=sequence_list_file, outfile=outfile, verbose=True, auto=True, guidetree_out=treefile, outfmt="clu", force=True) elif msa.lower() == "t-coffee" or msa.lower( ) == "t_coffee": #should output tree file automatically print "Aligning by T-Coffeee" cmd = TCoffeeCommandline(infile=sequence_list_file, output="clustalw", outfile=outfile) elif msa.lower() == "muscle": print "Aligning by MUSCLE" #cmd = MuscleCommandline(input=sequence_list_file, out=outfile, tree2=treefile, clw=True) cmd = MuscleCommandline(input=sequence_list_file, out=outfile, tree2=treefile) elif msa.lower() == "mafft": #probably gonna save tree as input.tree print "Aligning by MAFFT" cmd = MafftCommandline(input=sequence_list_file, clustalout=True, treeout=True) elif msa.lower() == "clustalw" or msa.lower() == "clustalw2": print "Aligning by ClustalW2" cmd = ClustalwCommandline("clustalw", infile=sequence_list_file, outfile=outfile, tree=True, newtree=treefile) elif msa.lower( ) == "prank": #output is fasta, tree will be outputted to .dnd file? print "Aligning by PRANK" cmd = PrankCommandline(d=sequence_list_file, o=outfile, f=8, showtree=True, noxml=True) elif msa.lower() == "msaprobs": #doesn't use a guide tree print "Aligning by MSAprobs" cmd = MSAProbsCommandline(infile=sequence_list_file, outfile=outfile, clustalw=True) elif msa.lower() == "probcons": print "Aligning by ProbCons" cmd = ProbconsCommandline(input=sequence_list_file, clustalw=True) elif msa.lower( ) == "dialign": #phylip tree should be created automatically, names are a mystery? print "Aligning by Dialign" cmd = DialignCommandline(input=sequence_list_file, cw=True, fn=outfile) else: raise BaseException( "Only Multiple Sequence Alignment algorithms currently supported are emma, clustalo, t_coffee, muscle and mafft" ) #Execute the command stdout, stderr = cmd() #For algorithms that don't have an option to save ouptut to file, capture the stdout if msa.lower() == "mafft" or msa.lower() == "probcons": with open(outfile, "w") as handle: handle.write(stdout)