def reads2clones_align(): """ """ from Bio.Application import generic_run from cmline import NeedleCommandline go = 6.0 ge = 3.0 ref_genome = 'tmp.fas' needle_run = NeedleCommandline() needle_run.set_parameter('-asequence', ref_genome) needle_run.set_parameter('-gapopen', go) needle_run.set_parameter('-gapextend', ge) needle_run.set_parameter('-aformat', 'markx10') # forward needle_run.set_parameter('-bsequence', 'tmp_reads_f.fas') outfile = 'tmp_align_f.needle' needle_run.set_parameter('-outfile', outfile) result_1, messages_1, errors_1 = generic_run(needle_run) # reverse needle_run.set_parameter('-bsequence', 'tmp_reads_r.fas') outfile = 'tmp_align_r.needle' needle_run.set_parameter('-outfile', outfile) result_2, messages_2, errors_2 = generic_run(needle_run) for ar in result_1.available_results(): print ar, result_1.get_result(ar) if Verbose: for m in messages_1.readlines(): print >> sys.stderr, m for e in errors_1.readlines(): print >> sys.stderr, e for ar in result_2.available_results(): print ar, result_2.get_result(ar) if Verbose: for m in messages_2.readlines(): print >> sys.stderr, m for e in errors_2.readlines(): print >> sys.stderr, e return
def reads2clones_align(ref_genome): """ """ from Bio.Application import generic_run from pythonlib.cmline import NeedleCommandline go = 6.0 ge = 3.0 Verbose = False needle_run = NeedleCommandline() needle_run.set_parameter('-asequence', ref_genome) needle_run.set_parameter('-gapopen', go) needle_run.set_parameter('-gapextend', ge) needle_run.set_parameter('-aformat', 'markx10') needle_run.set_parameter('-usa', True) needle_run.set_parameter('-auto', True) # forward needle_run.set_parameter('-bsequence', 'tmp_reads_f.fas') outfile_1 = ref_genome + '_f.needle' needle_run.set_parameter('-outfile', outfile_1) result_1, messages_1, errors_1 = generic_run(needle_run) # reverse needle_run.set_parameter('-bsequence', 'tmp_reads_r.fas') outfile_2 = ref_genome + '_r.needle' needle_run.set_parameter('-outfile', outfile_2) result_2, messages_2, errors_2 = generic_run(needle_run) for ar in result_1.available_results(): logfun.info(ar + result_1.get_result(ar)) for m in messages_1.readlines(): logfun.debug(m) for e in errors_1.readlines(): logfun.debug(e) for ar in result_2.available_results(): logfun.info(ar + result_2.get_result(ar)) for m in messages_2.readlines(): logfun.debug(m) for e in errors_2.readlines(): logfun.debug(e) return outfile_1, outfile_2
def test_water_file(self): """water with the asis trick, output to a file.""" #Setup, try a mixture of keyword arguments and later additions: cline = WaterCommandline(cmd=exes["water"], gapopen="10", gapextend="0.5") #Try using both human readable names, and the literal ones: cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT") cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT") #Try using a property set here: cline.outfile = "Emboss/temp with space.water" self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0: print >> sys.stderr, "\n%s" % cline self.assertEqual(result.return_code, 0) filename = result.get_result("outfile") self.assertEqual(filename, "Emboss/temp with space.water") assert os.path.isfile(filename) #Check we can parse the output... align = AlignIO.read(open(filename), "emboss") self.assertEqual(len(align), 2) self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT") self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT") #Clean up, os.remove(filename)
def haps2clones_align(est_hap): """ """ from Bio.Application import generic_run from pythonlib.cmline import NeedleCommandline go = 6.0 ge = 3.0 Verbose = False needle_run = NeedleCommandline() needle_run.set_parameter('-asequence', est_hap) needle_run.set_parameter('-gapopen', go) needle_run.set_parameter('-gapextend', ge) needle_run.set_parameter('-aformat', 'markx10') # needle_run.set_parameter('-usa', True) # needle_run.set_parameter('-des', True) needle_run.set_parameter('-auto', True) needle_run.set_parameter('-bsequence', 'tmp_ref.fas') outfile_1 = est_hap + '_hap.needle' needle_run.set_parameter('-outfile', outfile_1) result_1, messages_1, errors_1 = generic_run(needle_run) for ar in result_1.available_results(): logfun.info(ar + result_1.get_result(ar)) for m in messages_1.readlines(): logfun.debug(m) for e in errors_1.readlines(): logfun.debug(e) return outfile_1
def test_water_file2(self): """water with the asis trick and nucleotide FASTA file, output to a file.""" #Setup, query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG" out_file = "Emboss/temp_test2.water" in_file = "Fasta/f002" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file) : os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) cline.set_parameter("-gapopen", "10") cline.set_parameter("-gapextend", "0.5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) self.assertEqual(result.get_result("outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file),"fasta"), AlignIO.parse(open(out_file),"emboss"), local=True) #Clean up, os.remove(out_file)
def test_needle_file(self): """needle with the asis trick, output to a file.""" #Setup, cline = NeedleCommandline(cmd=exes["needle"]) cline.set_parameter("-asequence", "asis:ACCCGGGCGCGGT") cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT") cline.set_parameter("-gapopen", "10") cline.set_parameter("-gapextend", "0.5") #EMBOSS would guess this, but let's be explicit: cline.set_parameter("-snucleotide", "True") cline.set_parameter("-outfile", "Emboss/temp with space.needle") self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Needleman-Wunsch global alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) filename = result.get_result("outfile") self.assertEqual(filename, "Emboss/temp with space.needle") assert os.path.isfile(filename) #Check we can parse the output... align = AlignIO.read(open(filename),"emboss") self.assertEqual(len(align), 2) self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT") self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT") #Clean up, os.remove(filename)
def test_water_file(self): """water with the asis trick, output to a file.""" #Setup, try a mixture of keyword arguments and later additions: cline = WaterCommandline(cmd=exes["water"], gapopen="10", gapextend="0.5") #Try using both human readable names, and the literal ones: cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT") cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT") #Try using a property set here: cline.outfile = "Emboss/temp with space.water" self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) filename = result.get_result("outfile") self.assertEqual(filename, "Emboss/temp with space.water") assert os.path.isfile(filename) #Check we can parse the output... align = AlignIO.read(open(filename),"emboss") self.assertEqual(len(align), 2) self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT") self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT") #Clean up, os.remove(filename)
def test_needle_file(self): """needle with the asis trick, output to a file.""" #Setup, cline = NeedleCommandline(cmd=exes["needle"]) cline.set_parameter("-asequence", "asis:ACCCGGGCGCGGT") cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT") cline.set_parameter("-gapopen", "10") cline.set_parameter("-gapextend", "0.5") #EMBOSS would guess this, but let's be explicit: cline.set_parameter("-snucleotide", "True") cline.set_parameter("-outfile", "Emboss/temp with space.needle") self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Needleman-Wunsch global alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0: print >> sys.stderr, "\n%s" % cline self.assertEqual(result.return_code, 0) filename = result.get_result("outfile") self.assertEqual(filename, "Emboss/temp with space.needle") assert os.path.isfile(filename) #Check we can parse the output... align = AlignIO.read(open(filename), "emboss") self.assertEqual(len(align), 2) self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT") self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT") #Clean up, os.remove(filename)
def test_water_file2(self): """water with the asis trick and nucleotide FASTA file, output to a file.""" #Setup, query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG" out_file = "Emboss/temp_test2.water" in_file = "Fasta/f002" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) cline.set_parameter("-gapopen", "10") cline.set_parameter("-gapextend", "0.5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0: print >> sys.stderr, "\n%s" % cline self.assertEqual(result.return_code, 0) self.assertEqual(result.get_result("outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file), "fasta"), AlignIO.parse(open(out_file), "emboss"), local=True) #Clean up, os.remove(out_file)
def test_water_file3(self): """water with the asis trick and GenBank file, output to a file.""" #Setup, query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA" out_file = "Emboss/temp_test3.water" in_file = "GenBank/cor6_6.gb" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("asequence", "asis:%s" % query) cline.set_parameter("bsequence", in_file) #TODO - Tell water this is a GenBank file! cline.set_parameter("gapopen", "1") cline.set_parameter("gapextend", "0.5") cline.set_parameter("outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0: print >> sys.stderr, "\n%s" % cline self.assertEqual(result.return_code, 0) self.assertEqual(result.get_result("outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file), "genbank"), AlignIO.parse(open(out_file), "emboss"), local=True) #Clean up, os.remove(out_file)
def needle_align(a_file, b_file, out_file): """ """ from Bio.Application import generic_run from cmline import NeedleCommandline go = 6.0 ge = 3.0 needle_run = NeedleCommandline() needle_run.set_parameter('-asequence', a_file) needle_run.set_parameter('-bsequence', b_file) needle_run.set_parameter('-gapopen', go) needle_run.set_parameter('-gapextend', ge) needle_run.set_parameter('-outfile', out_file) needle_run.set_parameter('-aformat', 'markx10') result, messages, errors = generic_run(needle_run) if Pedantic: for ar in result.available_results(): print ar, result.get_result(ar) for m in messages.readlines(): print >> sys.stderr, m for e in errors.readlines(): print >> sys.stderr, e return
def test_long(self): """Simple muscle call using long file.""" #Create a large input file by converting some of another example file temp_large_fasta_file = "temp_cw_prot.fasta" handle = open(temp_large_fasta_file, "w") records = list(SeqIO.parse(open("NBRF/Cw_prot.pir", "rU"), "pir"))[:40] SeqIO.write(records, handle, "fasta") handle.close() #Prepare the command... cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("in", temp_large_fasta_file) #Preserve input record order cmdline.set_parameter("stable", True) #Default None treated as False! #Use fast options cmdline.set_parameter("maxiters", 1) cmdline.set_parameter("diags", True) #Default None treated as False! #Use clustal output cmdline.set_parameter("clwstrict", True) #Default None treated as False! #Shoudn't need this, but just to make sure it is accepted cmdline.set_parameter("maxhours", 0.1) #No progress reports to stderr cmdline.set_parameter("quiet", True) #Default None treated as False! self.assertEqual(str(cmdline).rstrip(), muscle_exe + \ " -in temp_cw_prot.fasta -diags -maxhours 0.1" + \ " -maxiters 1 -clwstrict -stable -quiet") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, out_handle, err_handle = generic_run(cmdline) align = AlignIO.read(out_handle, "clustal") self.assertEqual(len(records), len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-",""), str(old.seq)) os.remove(temp_large_fasta_file) #See if quiet worked: self.assertEqual("", err_handle.read().strip())
def test_water_file3(self): """water with the asis trick and GenBank file, output to a file.""" #Setup, query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA" out_file = "Emboss/temp_test3.water" in_file = "GenBank/cor6_6.gb" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file) : os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("asequence", "asis:%s" % query) cline.set_parameter("bsequence", in_file) #TODO - Tell water this is a GenBank file! cline.set_parameter("gapopen", "1") cline.set_parameter("gapextend", "0.5") cline.set_parameter("outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) self.assertEqual(result.get_result("outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file),"genbank"), AlignIO.parse(open(out_file),"emboss"), local=True) #Clean up, os.remove(out_file)
def _run_genepop(self, extensions, option, fname, opts={}): for extension in extensions: self._remove_garbage(fname + extension) self.controller.set_menu(option) self.controller.set_input(fname) for opt in opts: self.controller.set_parameter(opt, opt+"="+str(opts[opt])) ret, out, err = generic_run(self.controller) self._remove_garbage(None) if ret.return_code != 0: raise IOError("GenePop not found") return ret, out, err
def test_Muscle_simple(self): """Simple round-trip through app just infile and outfile.""" cmdline = MuscleCommandline(muscle_exe, input=self.infile1, out=self.outfile1) self.assertEqual(str(cmdline), muscle_exe \ + " -in Fasta/f002 -out Fasta/temp_align_out1.fa") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdin, stdout, stderr = generic_run(cmdline) self.assertEqual(stdin.return_code, 0) self.assertEqual(stdout.read(), "") self.assert_("ERROR" not in stderr.read()) self.assertEqual(str(stdin._cl), str(cmdline))
def test_Muscle_profile_simple(self): """Simple round-trip through app doing a profile alignment.""" cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("out", self.outfile3) cmdline.set_parameter("profile", True) cmdline.set_parameter("in1", self.infile2) cmdline.set_parameter("in2", self.infile3) self.assertEqual(str(cmdline), muscle_exe + \ " -out Fasta/temp_align_out3.fa" + \ " -profile -in1 Fasta/fa01 -in2 Fasta/f001") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdin, stdout, stderr = generic_run(cmdline) self.assertEqual(stdin.return_code, 0) self.assertEqual(stdout.read(), "") self.assert_("ERROR" not in stderr.read()) self.assertEqual(str(stdin._cl), str(cmdline))
def mutations(hap, ref): ''' ''' from Bio.Application import generic_run from pythonlib.cmline import NeedleCommandline from pythonlib.MarkxIO import Markx10Iterator needle_run = NeedleCommandline() needle_run.set_parameter('-asequence', 'asis:%s' % hap) needle_run.set_parameter('-bsequence', 'asis:%s' % ref) needle_run.set_parameter('-aformat', 'markx10') needle_run.set_parameter('-gapopen', 10) needle_run.set_parameter('-gapextend', 1) # needle_run.set_parameter('-usa', True) # needle_run.set_parameter('-des', True) needle_run.set_parameter('-auto', True) needle_run.set_parameter('-outfile', 'tmpout') result_1, messages_1, errors_1 = generic_run(needle_run) for ar in result_1.available_results(): logfun.info(ar + result_1.get_result(ar)) for m in messages_1.readlines(): logfun.debug(m) for e in errors_1.readlines(): logfun.debug(e) oh = open('tmpout') aligniter = Markx10Iterator(oh) mt = [] while True: try: al = aligniter.next() except: break if al is None: break seq_pair = zip(al.get_all_seqs()[0].seq, al.get_all_seqs()[1].seq) i = 0 for sp in seq_pair: i += 1 if '-' not in sp and 'N' not in sp and sp[0] != sp[1]: mt.append('%s%d%s' % (sp[0], i, sp[1])) return mt
def test_Muscle_with_options(self): """Round-trip through app with a switch and valued option.""" cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("input", self.infile1) #"input" is alias for "in" cmdline.set_parameter("out", self.outfile2) #Use property: cmdline.objscore = "sp" cmdline.noanchors = True self.assertEqual(str(cmdline), muscle_exe +\ " -in Fasta/f002" + \ " -out Fasta/temp_align_out2.fa" + \ " -objscore sp -noanchors") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) stdin, stdout, stderr = generic_run(cmdline) self.assertEqual(stdin.return_code, 0) self.assertEqual(stdout.read(), "") self.assert_("ERROR" not in stderr.read()) self.assertEqual(str(stdin._cl), str(cmdline))
def test_simple_clustal(self): """Simple muscle call using Clustal output with a MUSCLE header.""" input_file = "Fasta/f002" self.assert_(os.path.isfile(input_file)) records = list(SeqIO.parse(open(input_file),"fasta")) #Prepare the command... use Clustal output (with a MUSCLE header) cmdline = MuscleCommandline(muscle_exe, input=input_file, stable=True, clw = True) self.assertEqual(str(cmdline).rstrip(), muscle_exe + \ " -in Fasta/f002 -clw -stable") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, out_handle, err_handle = generic_run(cmdline) align = AlignIO.read(out_handle, "clustal") self.assertEqual(len(records),len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-",""), str(old.seq)) #Didn't use -quiet so there should be progress reports on stderr, self.assert_(err_handle.read().strip().startswith("MUSCLE"))
def test_water_file4(self): """water with the asis trick and SwissProt file, output to a file.""" #Setup, query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI" out_file = "Emboss/temp_test4.water" in_file = "SwissProt/sp004" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) #EMBOSS should work this out, but let's be explicit: cline.set_parameter("-sprotein", True) #TODO - Tell water this is a SwissProt file! cline.set_parameter("-gapopen", "20") cline.set_parameter("-gapextend", "5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0: print >> sys.stderr, "\n%s" % cline self.assertEqual(result.return_code, 0) #Should be able to access this via any alias: self.assertEqual(result.get_result("-outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file), "swiss"), AlignIO.parse(open(out_file), "emboss"), local=True) #Clean up, os.remove(out_file)
def test_simple_clustal_strict(self): """Simple muscle call using strict Clustal output.""" input_file = "Fasta/f002" self.assert_(os.path.isfile(input_file)) records = list(SeqIO.parse(open(input_file),"fasta")) #Prepare the command... cmdline = MuscleCommandline(muscle_exe) cmdline.set_parameter("in", input_file) #Preserve input record order (makes checking output easier) cmdline.set_parameter("stable", True) #Default None treated as False! #Use clustal output (with a CLUSTAL header) cmdline.set_parameter("clwstrict", True) #Default None treated as False! self.assertEqual(str(cmdline).rstrip(), muscle_exe + \ " -in Fasta/f002 -clwstrict -stable") self.assertEqual(str(eval(repr(cmdline))), str(cmdline)) result, out_handle, err_handle = generic_run(cmdline) align = AlignIO.read(out_handle, "clustal") self.assertEqual(len(records),len(align)) for old, new in zip(records, align): self.assertEqual(old.id, new.id) self.assertEqual(str(new.seq).replace("-",""), str(old.seq)) #Didn't use -quiet so there should be progress reports on stderr, self.assert_(err_handle.read().strip().startswith("MUSCLE"))
def test_water_file4(self): """water with the asis trick and SwissProt file, output to a file.""" #Setup, query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI" out_file = "Emboss/temp_test4.water" in_file = "SwissProt/sp004" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file) : os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) #EMBOSS should work this out, but let's be explicit: cline.set_parameter("-sprotein", True) #TODO - Tell water this is a SwissProt file! cline.set_parameter("-gapopen", "20") cline.set_parameter("-gapextend", "5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) #Should be able to access this via any alias: self.assertEqual(result.get_result("-outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file),"swiss"), AlignIO.parse(open(out_file),"emboss"), local=True) #Clean up, os.remove(out_file)
#And again, but this time using Bio.Align.Applications wrapper #Any filesnames with spaces should get escaped with quotes automatically. #Using keyword arguments here. cline = ClustalwCommandline(clustalw_exe, infile=input_file, outfile=output_file) assert str(eval(repr(cline))) == str(cline) if newtree_file is not None: #Test using a property: cline.newtree = newtree_file #I don't just want the tree, also want the alignment: cline.align = True assert str(eval(repr(cline))) == str(cline) #print cline return_code, out_handle, err_handle = generic_run(cline) assert out_handle.read().strip().startswith("CLUSTAL") assert err_handle.read().strip() == "" align = AlignIO.read(open(output_file), "clustal") assert set(input_records.keys()) == set(output_records.keys()) for record in align: assert str(record.seq) == str(output_records[record.id].seq) assert str(record.seq).replace("-","") == \ str(input_records[record.id].seq) #Clean up... os.remove(output_file) #Check the DND file was created. #TODO - Try and parse this with Bio.Nexus? if newtree_file is not None:
def reads2clones_align(sample_dir, max_freq=2): """ """ from Bio.Application import generic_run from cmline import NeedleCommandline go = 6.0 ge = 3.0 if max_freq > 10: sys.exit('10 reference genomes maximum') # align uncorrected reads for f in range(max_freq): ref_genome = './raw/Fastas/clone' + ref_genomes[f] + '.fsta' outfile = '%s/reads-%s.needle' % (sample_dir, ref_genomes[f]) needle_run = NeedleCommandline() needle_run.set_parameter('-asequence', ref_genome) needle_run.set_parameter('-bsequence', '%s/reads.fas' % sample_dir) needle_run.set_parameter('-gapopen', go) needle_run.set_parameter('-gapextend', ge) needle_run.set_parameter('-outfile', outfile) needle_run.set_parameter('-aformat', 'markx10') result, messages, errors = generic_run(needle_run) for ar in result.available_results(): print ar, result.get_result(ar) if Verbose: for m in messages.readlines(): print >> sys.stderr, m for e in errors.readlines(): print >> sys.stderr, e # align corrected reads for f in range(max_freq): ref_genome = './raw/Fastas/clone' + ref_genomes[f] + '.fsta' outfile = '%s/reads-cor-%s.needle' % (sample_dir, ref_genomes[f]) needle_run = NeedleCommandline() needle_run.set_parameter('-asequence', ref_genome) needle_run.set_parameter('-bsequence', '%s/reads.cor.fas' % sample_dir) needle_run.set_parameter('-gapopen', go) needle_run.set_parameter('-gapextend', ge) needle_run.set_parameter('-outfile', outfile) needle_run.set_parameter('-aformat', 'markx10') result, messages, errors = generic_run(needle_run) for ar in result.available_results(): print ar, result.get_result(ar) if Verbose: for m in messages.readlines(): print >> sys.stderr, m for e in errors.readlines(): print >> sys.stderr, e return
def run(self): return generic_run(self)
SeqIO.write(rec, 'in.fas', 'fasta') fw2.close() primer_cl = Primer3Commandline(sequence="in.fas", auto=True) primer_cl.outfile = "out.pr3" primer_cl.numreturn = 3 # primer_cl.target = str(overgo1s)+","+str(overgo1e/3+overgo2s*2/3) # can specify here the region that requires inclusion in the product primer_cl.osize = 20 primer_cl.maxsize = 26 primer_cl.otm = 58 primer_cl.mintm = 52 primer_cl.mingc = 35 primer_cl.maxgc = 75 primer_cl.psizeopt = 200 primer_cl.prange = "100-400" result, messages, errors = generic_run(primer_cl) print result try: open_outfile = file("out.pr3", "r") except: pass else: primer_record = read(open_outfile) for primer in primer_record.primers: product_len = -primer.forward_start + primer.reverse_start + primer.reverse_length fw.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % \ (rec.id,overgos[rec.id][0][0],primer.forward_seq,primer.forward_start,primer.forward_length,primer.forward_tm,primer.forward_gc,primer.reverse_seq,primer.reverse_start,primer.reverse_length,primer.reverse_tm,primer.reverse_gc,len(rec.seq),product_len)) open_outfile.close() try: os.system("rm out.pr3")
SeqIO.write(rec,'in.fas','fasta') fw2.close() primer_cl = Primer3Commandline(sequence="in.fas",auto=True) primer_cl.outfile = "out.pr3" primer_cl.numreturn = 3 # primer_cl.target = str(overgo1s)+","+str(overgo1e/3+overgo2s*2/3) # can specify here the region that requires inclusion in the product primer_cl.osize = 20 primer_cl.maxsize = 26 primer_cl.otm = 58 primer_cl.mintm = 52 primer_cl.mingc = 35 primer_cl.maxgc = 75 primer_cl.psizeopt = 200 primer_cl.prange = "100-400" result, messages, errors = generic_run(primer_cl) print result try: open_outfile = file("out.pr3", "r") except: pass else: primer_record = read(open_outfile) for primer in primer_record.primers: product_len = -primer.forward_start+primer.reverse_start+primer.reverse_length fw.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % \ (rec.id,overgos[rec.id][0][0],primer.forward_seq,primer.forward_start,primer.forward_length,primer.forward_tm,primer.forward_gc,primer.reverse_seq,primer.reverse_start,primer.reverse_length,primer.reverse_tm,primer.reverse_gc,len(rec.seq),product_len)) open_outfile.close() try: os.system("rm out.pr3") except: pass
#And again, but this time using Bio.Align.Applications wrapper #Any filesnames with spaces should get escaped with quotes automatically. #Using keyword arguments here. cline = ClustalwCommandline(clustalw_exe, infile=input_file, outfile=output_file) assert str(eval(repr(cline)))==str(cline) if newtree_file is not None : #Test using a property: cline.newtree = newtree_file #I don't just want the tree, also want the alignment: cline.align = True assert str(eval(repr(cline)))==str(cline) #print cline return_code, out_handle, err_handle = generic_run(cline) assert out_handle.read().strip().startswith("CLUSTAL") assert err_handle.read().strip() == "" align = AlignIO.read(open(output_file), "clustal") assert set(input_records.keys()) == set(output_records.keys()) for record in align : assert str(record.seq) == str(output_records[record.id].seq) assert str(record.seq).replace("-","") == \ str(input_records[record.id].seq) #Clean up... os.remove(output_file) #Check the DND file was created. #TODO - Try and parse this with Bio.Nexus? if newtree_file is not None :