def test_water_file(self): """water with the asis trick, output to a file.""" #Setup, try a mixture of keyword arguments and later additions: cline = WaterCommandline(cmd=exes["water"], gapopen="10", gapextend="0.5") #Try using both human readable names, and the literal ones: cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT") cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT") #Try using a property set here: cline.outfile = "Emboss/temp with space.water" self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) filename = result.get_result("outfile") self.assertEqual(filename, "Emboss/temp with space.water") assert os.path.isfile(filename) #Check we can parse the output... align = AlignIO.read(open(filename),"emboss") self.assertEqual(len(align), 2) self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT") self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT") #Clean up, os.remove(filename)
def generate_water_cmd(macierz, pliki_fasta_rodzina): """ Generuje polecenia wywolania programu water EMBOSS dla wszystkich sekwencji podanych jako nazwy plikow je zawierajacych :param macierz: lokalizacja/nazwa pliku z macierza substytucji PAM/BLOSUM :param pliki_fasta_fodzina: lista lokalizacji/nazw plikow z sekwencjami bialkowymi fasta nalezacymi do danej rodziny :return: polecenie wywolania programu water """ records = [] for file in pliki_fasta_rodzina: handle = open(file, "rU") records.extend(list(SeqIO.parse(handle, "fasta"))) handle.close() from Bio.Emboss.Applications import WaterCommandline all_water_cmd = [] for i in range(len(records)): for j in range(len(records)): if i < j: water_cmd = WaterCommandline(gapopen=100, gapextend=10)#maksymalne wartosci aby uzyskac uliniowienia bezspacjowe water_cmd.asequence = "asis:" + str(records[i].seq) water_cmd.bsequence = "asis:" + str(records[j].seq) water_cmd.stdout = True water_cmd.sprotein=True water_cmd.datafile=macierz all_water_cmd.append(str(water_cmd)) return all_water_cmd
def test_water_file(self): """Run water with the asis trick, output to a file.""" # Setup, try a mixture of keyword arguments and later additions: cline = WaterCommandline(cmd=exes["water"], gapopen="10", gapextend="0.5") # Try using both human readable names, and the literal ones: cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT") cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT") # Try using a property set here: cline.outfile = "Emboss/temp with space.water" self.assertEqual(str(eval(repr(cline))), str(cline)) # Run the tool, self.run_water(cline) # Check we can parse the output... align = AlignIO.read(cline.outfile, "emboss") self.assertEqual(len(align), 2) self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT") self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT") # Clean up, os.remove(cline.outfile)
def test_water_piped(self): """Run water with asis trick, output piped to stdout.""" cline = WaterCommandline( cmd=exes["water"], asequence="asis:ACCCGGGCGCGGT", bsequence="asis:ACCCGAGCGCGGT", gapopen=10, gapextend=0.5, auto=True, filter=True, ) self.assertEqual( str(cline), exes["water"] + " -auto -filter" + " -asequence=asis:ACCCGGGCGCGGT" + " -bsequence=asis:ACCCGAGCGCGGT" + " -gapopen=10 -gapextend=0.5", ) # Run the tool, child = subprocess.Popen( str(cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) child.stdin.close() # Check we could read it's output align = AlignIO.read(child.stdout, "emboss") self.assertEqual(len(align), 2) self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT") self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT") # Check no error output: self.assertEqual(child.stderr.read(), "") self.assertEqual(0, child.wait()) child.stdout.close() child.stderr.close()
def doWater(contig, seq): with open("contig.faa", "w") as stuff1: stuff1.write(">contig\n") stuff1.write(contig) with open("seq.faa", "w") as stuff2: stuff2.write(">seq\n") stuff2.write(str(seq)) water_cline = WaterCommandline() water_cline.asequence="contig.faa" water_cline.bsequence="seq.faa" water_cline.gapopen=10 water_cline.gapextend=0.5 water_cline.outfile="water.txt" stdout, stderr = water_cline() print(stdout + stderr) values = getStartEnd() return values
def getCellBarcodeAlignment(read, fil): """ use stdin and stdout to simplify water asequence: one SMRT read bsequence: {index}_CBC-list.fasta return: best matched CBC for this SMRT read and the corresponding score """ water_cline = WaterCommandline(asequence='stdin', filter=True, bsequence=fil, gapopen=10.0, gapextend=.5, stdout=True) child = subprocess.Popen(str(water_cline), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) rec = SeqRecord(Seq(read), id="temp") SeqIO.write(rec, child.stdin, "fasta") child.stdin.close() seqs, scores = [], [] line = child.stdout.readline() eof = False while True: if not line: eof = True if eof: break if '2:' in line[:8]: seqs.append(line.strip().split(':')[1]) elif 'Score' in line: scores.append(float(line.split(':')[1])) line = child.stdout.readline() assert len(seqs) == len( scores), "ERROR: incorrect alignment file line counting." return seqs[scores.index(max(scores))], max(scores)
def main(): comm = MPI.COMM_WORLD rank = comm.Get_rank() loc = '01-fix_orientation' chunk = 'chunk-' + '{:03}'.format(rank + 1) if not os.path.exists(loc): os.mkdir(loc) for fragment in ('inside', 'inside_rc', 'outside', 'outside_rc'): subloc = loc + '/' + fragment if not os.path.exists(subloc): os.mkdir(subloc) infile = os.path.join('00-external', fragment + '.txt') data = os.path.join('00-data', chunk + '.fasta') if not os.path.isfile(data): continue outfile = os.path.join(subloc, chunk + '.txt') water_cline = WaterCommandline(asequence=infile, bsequence=data, gapopen=10.0, gapextend=.5, outfile=outfile) stdout, stderr = water_cline()
def water(*id, gop=10, gex=0.5, out='emb.aln'): """Alignement global par la methode de Needleman""" lso = list(SeqIO.parse(workfile, "fasta")) mkfasx('seqa.fas', id[0]) mkfasx('seqb.fas', *id[1:]) water_cline = WaterCommandline(asequence='seqa.fas', bsequence='seqb.fas', gapopen=gop, gapextend=gex, outfile=out) stdout, stderr = water_cline() os.remove('seqa.fas') os.remove('seqb.fas') if len(id) < 3: align = AlignIO.read(out, "emboss") return align
def emboss_local_pairwise_alignment(query_dir, seq_type): if seq_type == 'fg': print '\n ...pairwise comparison of functional gene sequences...\n' elif seq_type == 'ssu': print '\n ...pairwise comparison of SSU rRNA sequences...\n' water_cline = WaterCommandline() water_cline.gapopen=10 water_cline.gapextend=0.5 query_list = [query for query in sorted(glob.glob(query_dir+"/*.fa"))] for i, a_seq in enumerate(query_list): water_cline.asequence=str(a_seq) for j, b_seq in enumerate(query_list[i:]): water_cline.bsequence=str(b_seq) align_out = query_dir+"/pairwise_"+str(i+1)+"_"+str(i+j+1)+".aln" water_cline.outfile=str(align_out) water_cline() print 'Done\n' return query_dir+"/*.aln"
def emboss_water(seq_a_file: str, seq_b_file: str, out_file: str): """ Do a global pairwise alignment using EMBOSS Args: seq_a_file: First sequence seq_b_file: second sequence out_file: Output file Returns: r [subprocess object]: Execute the commandline command for EMBOSS """ water_cline = WaterCommandline(asequence=seq_a_file, bsequence=seq_b_file, outfile=out_file, verbose=True, gapextend=1, gapopen=10) cmd = str(water_cline) cmd = cmd.split(" ") cmd.append("-aformat=msf") return subprocess.run(cmd, check=True)
def test_water_file(self): """water with the asis trick, output to a file.""" # Setup, try a mixture of keyword arguments and later additions: cline = WaterCommandline(cmd=exes["water"], gapopen="10", gapextend="0.5") # Try using both human readable names, and the literal ones: cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT") cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT") # Try using a property set here: cline.outfile = "Emboss/temp with space.water" self.assertEqual(str(eval(repr(cline))), str(cline)) # Run the tool, self.run_water(cline) # Check we can parse the output... align = AlignIO.read(open(cline.outfile), "emboss") self.assertEqual(len(align), 2) self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT") self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT") # Clean up, os.remove(cline.outfile)
def test_water_file3(self): """water with the asis trick and GenBank file, output to a file.""" #Setup, query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA" out_file = "Emboss/temp_test3.water" in_file = "GenBank/cor6_6.gb" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("asequence", "asis:%s" % query) cline.set_parameter("bsequence", in_file) #TODO - Tell water this is a GenBank file! cline.set_parameter("gapopen", "1") cline.set_parameter("gapextend", "0.5") cline.set_parameter("outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0: print >> sys.stderr, "\n%s" % cline self.assertEqual(result.return_code, 0) self.assertEqual(result.get_result("outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file), "genbank"), AlignIO.parse(open(out_file), "emboss"), local=True) #Clean up, os.remove(out_file)
def test_water_file2(self): """water with the asis trick and nucleotide FASTA file, output to a file.""" #Setup, query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG" out_file = "Emboss/temp_test2.water" in_file = "Fasta/f002" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) cline.set_parameter("-gapopen", "10") cline.set_parameter("-gapextend", "0.5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0: print >> sys.stderr, "\n%s" % cline self.assertEqual(result.return_code, 0) self.assertEqual(result.get_result("outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file), "fasta"), AlignIO.parse(open(out_file), "emboss"), local=True) #Clean up, os.remove(out_file)
# http://rosalind.info/problems/swat/ from Bio.Emboss.Applications import WaterCommandline from Bio import ExPASy, SeqIO if __name__ == "__main__": ids = open('rosalind_swat.txt').read().split(' ') for i in ids: handle = ExPASy.get_sprot_raw(i) r = SeqIO.read(handle, "swiss") handle.close() with open(i, 'w') as f: SeqIO.write(r, f, 'fasta') water_cline = WaterCommandline() water_cline.asequence = ids[0] water_cline.bsequence = ids[1] water_cline.outfile = "rosalind_swat_output.txt" water_cline.gapopen = 10 water_cline.gapextend = 1 water_cline() for line in open('rosalind_swat_output.txt').readlines(): if 'Score:' in line: print(int(float(line[:-1].split(':')[-1].strip())))
from Bio.Emboss.Applications import WaterCommandline from Bio import ExPASy from Bio import SeqIO if __name__ == "__main__": with open(os.path.join('data', 'rosalind_swat.txt')) as dataset: ids = dataset.read().split() for i in ids: handle = ExPASy.get_sprot_raw(i) r = SeqIO.read(handle, "swiss") handle.close() with open(i, 'w') as f: SeqIO.write(r, f, 'fasta') water_cline = WaterCommandline() water_cline.asequence = ids[0] water_cline.bsequence = ids[1] water_cline.outfile = "water.txt" water_cline.gapopen = 10 water_cline.gapextend = 1 water_cline() with open('water.txt') as f: output = f.readlines() for line in output: if 'Score:' in line: print(int(float(line[:-1].split(':')[-1].strip())))
# Get telomere ref sequence ref_length = int(math.ceil(float(size / float(6)))) if strand == "+": telo_ref = "TTAGGC" * ref_length elif strand == "-": telo_ref = "GCCTAA" * ref_length else: print("ERROR: strand must be + or -") sys.exit(1) # Perform alignment with water with open("its_seq.temp", "w") as fi: fi.write(str(seq)) with open("telo.temp", "w") as ft: ft.write(telo_ref) water_cmd = WaterCommandline(gapopen=10, gapextend=0.5, asequence="its_seq.temp", bsequence="telo.temp", stdout=True, auto=True) stdout, stderr = water_cmd() identity = re.findall("# Identity:.*\((.+)\%\)", stdout)[0] outfile.write(line.strip() + "\t" + str(identity) + "\n") outfile.close() os.remove("its_seq.temp") os.remove("telo.temp")
def water_aligner(TR_frame, seqrec_array, m, go, ge, args): """ Performs TR alignment using the provided EMBOSS-water aligner executable. TR_frame: A data frame containing TR instances seqrec_array: An array containing indexed seqrecord instances from the query feature array m: match score go: gap open penalty ge: gap extension penalty min_match: The minimum percentage similarity to accept the alignment, otherwise realign with reverse complement or remove """ tr_count = len(TR_frame) missing_features = 0 ## counter for instances of missing features in the query lib water_log = open("./TR_aln.log", "w") ## file to dump water subprocess output vprint(subprocessID, "Starting alignments...", "prYellow") print( f"\n\t\t\tEMBOSS-water Smith-Waterman Aligner.\n\t\t\tmatch={m}\n\t\t\tgap_open={go}\n\t\t\tgap_extend={ge}\n", flush=True) for i, tr in enumerate(TR_frame): time = strftime("%H:%M:%S", localtime()) print("\r{time} {subprocess} :: Aligning TR {i}/{tr_count}".format( time=time, subprocess=prYellow(subprocessID), i=i + 1, tr_count=tr_count), end="... ", file=sys.stdout, flush=True) ## generate a homolog dict from the TR and track the number of missing features from the query library rfa_out, qfa_out, missing = tr.get_homologs_fasta(seqrec_array, args.m) missing_features += missing aln_out = path.join(args.o, f"{tr.id}.water") flank1_out = path.join(args.o, f"{tr.id}_F1.water") flank2_out = path.join(args.o, f"{tr.id}_F2.water") def run_alignment(water_cline, a_prefix): """ Runs water alignment using a Biopython water commandline object and a prefix to identify which sequence is being aligned """ p = Popen(str(water_cline), stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) output, err = p.communicate() rc = p.returncode if rc == 0: print(f"\nAlignment of {a_prefix}:{tr.id} exited with 0", file=water_log) else: print( f"\nAlignment of {a_prefix}:{tr.id} exited with {rc} and warning:\n{str(err)}", file=water_log) ## align full TR region water_cline = WaterCommandline(args.w, asequence=rfa_out, bsequence=qfa_out, gapopen=go, gapextend=ge, outfile=aln_out) run_alignment(water_cline, "FULL") ## align flank 1 water_cline = WaterCommandline(args.w, asequence=f"asis:{tr.flank1}", bsequence=qfa_out, gapopen=go, gapextend=ge, outfile=flank1_out) run_alignment(water_cline, "flank1") ## align flank 2 water_cline = WaterCommandline(args.w, asequence=f"asis:{tr.flank2}", bsequence=qfa_out, gapopen=go, gapextend=ge, outfile=flank2_out) run_alignment(water_cline, "flank2") if i > 10: break print(f"Done with {missing_features} missing seqeuences.\n", flush=True) water_log.close()
def test_water_file2(self): """water with the asis trick and nucleotide FASTA file, output to a file.""" # Setup, query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG" out_file = "Emboss/temp_test2.water" in_file = "Fasta/f002" self.assertTrue(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) cline.set_parameter("-gapopen", "10") cline.set_parameter("-gapextend", "0.5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) # Run the tool, self.run_water(cline) # Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(in_file, "fasta"), AlignIO.parse(out_file, "emboss"), local=True) # Clean up, os.remove(out_file)
def test_water_file4(self): """water with the asis trick and SwissProt file, output to a file.""" #Setup, query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI" out_file = "Emboss/temp_test4.water" in_file = "SwissProt/sp004" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file) : os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) #EMBOSS should work this out, but let's be explicit: cline.set_parameter("-sprotein", True) #TODO - Tell water this is a SwissProt file! cline.set_parameter("-gapopen", "20") cline.set_parameter("-gapextend", "5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) #Should be able to access this via any alias: self.assertEqual(result.get_result("-outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file),"swiss"), AlignIO.parse(open(out_file),"emboss"), local=True) #Clean up, os.remove(out_file)
def GetExec(self, optList, frame): # Respond to the "embossn" type command. self.frame = frame plugin_exe = r"C:/mEMBOSS/water.exe" self.outfile = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\water.txt" self.outtype = "fasta" cline = WaterCommandline( plugin_exe, asequence=str(self.frame.paramBoxes[1].GetValue()), bsequence=str(self.frame.paramBoxes[3].GetValue())) cline.outfile = self.outfile cline.gapopen = self.param[7].GetValue() cline.gapextend = self.param[9].GetValue() if self.param[10].GetValue(): cline.similarity = True else: cline.similarity = False if self.frame.abet == "AA": cline.snucleotide = True cline.sprotein = False elif self.frame.abet == "DNA" or self.frame.abet == "RNA": cline.snucleotide = True cline.sprotein = False if self.frame.options: t = self.boxList[3].GetValue() if t != '': cline.datafile = str(t) return str(cline)
def test_water_file3(self): """Run water with the asis trick and GenBank file, output to a file.""" # Setup, query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA" out_file = "Emboss/temp_test3.water" in_file = "GenBank/cor6_6.gb" self.assertTrue(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("asequence", "asis:%s" % query) cline.set_parameter("bsequence", in_file) # TODO - Tell water this is a GenBank file! cline.set_parameter("gapopen", "1") cline.set_parameter("gapextend", "0.5") cline.set_parameter("outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) # Run the tool, self.run_water(cline) # Check we can parse the output and it is sensible... self.pairwise_alignment_check( query, SeqIO.parse(in_file, "genbank"), AlignIO.parse(out_file, "emboss"), local=True, ) # Clean up, os.remove(out_file)
def test_water_file4(self): """water with the asis trick and SwissProt file, output to a file.""" # Setup, query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI" out_file = "Emboss/temp_test4.water" in_file = "SwissProt/sp004" self.assertTrue(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) # EMBOSS should work this out, but let's be explicit: cline.set_parameter("-sprotein", True) # TODO - Tell water this is a SwissProt file! cline.set_parameter("-gapopen", "20") cline.set_parameter("-gapextend", "5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) # Run the tool, self.run_water(cline) # Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(in_file, "swiss"), AlignIO.parse(out_file, "emboss"), local=True) # Clean up, os.remove(out_file)
def GetExec(inF, outF): # Create User Modifiable search check boxes. plugin_exe = r"C:/mEMBOSS/water.exe" cline = WaterCommandline(plugin_exe, infile=inF, outfile=outF) p = subprocess.Popen(str(self.cline)) p.wait()
import Bio.Seq import os from Bio.Emboss.Applications import WaterCommandline from Bio.Align.Applications import ClustalwCommandline fasta = open('/home/nastia/fasta_end.txt', 'r') string = fasta.readline() outfileput = open('/home/nastia/Desktop/output.txt', 'w') while len(string) > 0: m = string.find('\t') n = string.rfind('\t') my_seq_1 = Bio.Seq.Seq(string[m + 1:n]) my_seg_2 = Bio.Seq.Seq(string[n + 1:-1]) cline = WaterCommandline(gapopen=10, gapextend=0.5, asequence=my_seq_1, bsequence=my_seg_2, outfile='/home/nastia/Desktop/Water.txt') #os.system('clustalw'+cline) #print(type(cline)) #print(cline) #outfileput.write(cline) string = fasta.readline() outfileput.close()
def test_water_file4(self): """water with the asis trick and SwissProt file, output to a file.""" #Setup, query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI" out_file = "Emboss/temp_test4.water" in_file = "SwissProt/sp004" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) #EMBOSS should work this out, but let's be explicit: cline.set_parameter("-sprotein", True) #TODO - Tell water this is a SwissProt file! cline.set_parameter("-gapopen", "20") cline.set_parameter("-gapextend", "5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0: print >> sys.stderr, "\n%s" % cline self.assertEqual(result.return_code, 0) #Should be able to access this via any alias: self.assertEqual(result.get_result("-outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file), "swiss"), AlignIO.parse(open(out_file), "emboss"), local=True) #Clean up, os.remove(out_file)
""" This is the first example of Python script. """ a = 10 # variable a b = 33 # variable b c = a / b # variable c holds the ratio # Let's print the result to screen. print("a:", a, " b:", b, " a/b=", c) from Bio.Seq import Seq a = Seq("ATATATACG") a.alphabet a.sequence() from Bio.Emboss.Applications import WaterCommandline cline = WaterCommandline(gapopen=10, gapextend=0.5) cline.asequence = "asis:ACCCGGGCGCGGT" cline.bsequence = "asis:ACCCGAGCGCGGT" cline.outfile = "temp_water.txt" print(cline)
def GetExec(self, optList, frame): # Respond to the "embossn" type command. self.frame = frame plugin_exe = r"C:/mEMBOSS/water.exe" self.outfile = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\water.txt" self.outtype = "fasta" cline = WaterCommandline(plugin_exe, asequence=str(self.frame.paramBoxes[1].GetValue()), bsequence=str(self.frame.paramBoxes[3].GetValue())) cline.outfile = self.outfile cline.gapopen = self.param[7].GetValue() cline.gapextend = self.param[9].GetValue() if self.param[10].GetValue(): cline.similarity = True else: cline.similarity = False if self.frame.abet=="AA": cline.snucleotide = True cline.sprotein = False elif self.frame.abet=="DNA" or self.frame.abet=="RNA": cline.snucleotide = True cline.sprotein = False if self.frame.options: t = self.boxList[3].GetValue() if t != '': cline.datafile = str(t) return str(cline)
def test_water_file3(self): """water with the asis trick and GenBank file, output to a file.""" # Setup, query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA" out_file = "Emboss/temp_test3.water" in_file = "GenBank/cor6_6.gb" self.assertTrue(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("asequence", "asis:%s" % query) cline.set_parameter("bsequence", in_file) # TODO - Tell water this is a GenBank file! cline.set_parameter("gapopen", "1") cline.set_parameter("gapextend", "0.5") cline.set_parameter("outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) # Run the tool, self.run_water(cline) # Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(in_file, "genbank"), AlignIO.parse(out_file, "emboss"), local=True) # Clean up, os.remove(out_file)
from Bio import ExPASy from Bio import SeqIO if __name__ == "__main__": with open(os.path.join('data', 'rosalind_swat.txt')) as dataset: ids = dataset.read().split() for i in ids: handle = ExPASy.get_sprot_raw(i) r = SeqIO.read(handle, "swiss") handle.close() with open(i, 'w') as f: SeqIO.write(r, f, 'fasta') water_cline = WaterCommandline() water_cline.asequence = ids[0] water_cline.bsequence = ids[1] water_cline.outfile = "water.txt" water_cline.gapopen = 10 water_cline.gapextend = 1 water_cline() with open('water.txt') as f: output = f.readlines() for line in output: if 'Score:' in line: print(int(float(line[:-1].split(':')[-1].strip())))
def test_water_file2(self): """Run water with the asis trick and nucleotide FASTA file, output to a file.""" # Setup, query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG" out_file = "Emboss/temp_test2.water" in_file = "Fasta/f002" self.assertTrue(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) cline.set_parameter("-gapopen", "10") cline.set_parameter("-gapextend", "0.5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) # Run the tool, self.run_water(cline) # Check we can parse the output and it is sensible... self.pairwise_alignment_check( query, SeqIO.parse(in_file, "fasta"), AlignIO.parse(out_file, "emboss"), local=True, ) # Clean up, os.remove(out_file)
def test_water_file2(self): """water with the asis trick and nucleotide FASTA file, output to a file.""" #Setup, query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG" out_file = "Emboss/temp_test2.water" in_file = "Fasta/f002" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file) : os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) cline.set_parameter("-gapopen", "10") cline.set_parameter("-gapextend", "0.5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) self.assertEqual(result.get_result("outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file),"fasta"), AlignIO.parse(open(out_file),"emboss"), local=True) #Clean up, os.remove(out_file)
def test_water_file4(self): """Run water with the asis trick and SwissProt file, output to a file.""" # Setup, query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI" out_file = "Emboss/temp_test4.water" in_file = "SwissProt/P0A186.txt" self.assertTrue(os.path.isfile(in_file)) if os.path.isfile(out_file): os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("-asequence", "asis:%s" % query) cline.set_parameter("-bsequence", in_file) # EMBOSS should work this out, but let's be explicit: cline.set_parameter("-sprotein", True) # TODO - Tell water this is a SwissProt file! cline.set_parameter("-gapopen", "20") cline.set_parameter("-gapextend", "5") cline.set_parameter("-outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) # Run the tool, self.run_water(cline) # Check we can parse the output and it is sensible... self.pairwise_alignment_check( query, SeqIO.parse(in_file, "swiss"), AlignIO.parse(out_file, "emboss"), local=True, ) # Clean up, os.remove(out_file)
def test_water_file3(self): """water with the asis trick and GenBank file, output to a file.""" #Setup, query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA" out_file = "Emboss/temp_test3.water" in_file = "GenBank/cor6_6.gb" self.assert_(os.path.isfile(in_file)) if os.path.isfile(out_file) : os.remove(out_file) cline = WaterCommandline(cmd=exes["water"]) cline.set_parameter("asequence", "asis:%s" % query) cline.set_parameter("bsequence", in_file) #TODO - Tell water this is a GenBank file! cline.set_parameter("gapopen", "1") cline.set_parameter("gapextend", "0.5") cline.set_parameter("outfile", out_file) self.assertEqual(str(eval(repr(cline))), str(cline)) #Run the tool, result, out, err = generic_run(cline) #Check it worked, errors = err.read().strip() self.assert_(errors.startswith("Smith-Waterman local alignment"), errors) self.assertEqual(out.read().strip(), "") if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline self.assertEqual(result.return_code, 0) self.assertEqual(result.get_result("outfile"), out_file) assert os.path.isfile(out_file) #Check we can parse the output and it is sensible... self.pairwise_alignment_check(query, SeqIO.parse(open(in_file),"genbank"), AlignIO.parse(open(out_file),"emboss"), local=True) #Clean up, os.remove(out_file)