def Align_Results(OutputFileName): import os FileIN_Name = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.FASTA""" % (OutputFileName) FileOUT_ALN = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.ALN""" % (OutputFileName) print FileIN_Name print FileOUT_ALN from Bio.Clustalw import MultipleAlignCL from Bio import Clustalw cline = MultipleAlignCL(os.path.join(os.curdir, FileIN_Name)) cline.set_output(FileOUT_ALN) alignment = Clustalw.do_alignment(cline) cline.close()
def Align(self, s1, s2, result): result.clear() handle_tmpfile1, filename_tmpfile1 = tempfile.mkstemp() handle_tmpfile2, filename_tmpfile2 = tempfile.mkstemp() os.write(handle_tmpfile1, ">s1\n%s\n" % (s1)) os.write(handle_tmpfile1, ">s2\n%s\n" % (s2)) os.close(handle_tmpfile1) os.close(handle_tmpfile2) cline = MultipleAlignCL(filename_tmpfile1) cline.set_output(filename_tmpfile2) align = do_alignment(cline) seqs = align.get_all_seqs() if len(seqs) != 2: return result a1 = seqs[0].seq.tostring() a2 = seqs[1].seq.tostring() x1 = 0 x2 = 0 for pos in range(len(a1)): if a1[pos] not in "Nn-" and a2[pos] not in "Nn-": result.addPair(x1, x2, 0) x1 += 1 x2 += 1 continue if a1[pos] != "-": x1 += 1 if a2[pos] != "-": x2 += 1 os.remove(filename_tmpfile1) os.remove(filename_tmpfile2) return result
and "Multiple Sequence Alignments" in output: clustalw_exe = "clustalw" if not clustalw_exe: raise MissingExternalDependencyError(\ "Install clustalw or clustalw2 if you want to use Bio.Clustalw.") ################################################################# print "Checking error conditions" print "=========================" print "Empty file" input_file = "does_not_exist.fasta" assert not os.path.isfile(input_file) cline = MultipleAlignCL(input_file, command=clustalw_exe) try: align = Clustalw.do_alignment(cline) assert False, "Should have failed, returned %s" % repr(align) except IOError, err: print "Failed (good)" #Python 2.3 on Windows gave (0, 'Error') #Python 2.5 on Windows gives [Errno 0] Error assert "Cannot open sequence file" in str(err) \ or "not produced" in str(err) \ or str(err) == "[Errno 0] Error" \ or str(err) == "(0, 'Error')", str(err) print print "Single sequence" input_file = "Fasta/f001"
def align(self): id = True ppservers = () job_server = pp.Server(ppservers=ppservers, secret="secret") while id: # this should test to make sure there are still alignments to do print 'getting work unit' try: clustalw_work_unit = self.phamServer.request_seqs(self.client) if not clustalw_work_unit.query_id: print 'no work units available...sleeping' logo = os.path.join(os.path.dirname(__file__), "pixmaps/phamerator.png") #print "logo: %s" % logo try: import pynotify if pynotify.init("Phamerator"): n = pynotify.Notification( "Phamerator Update", "No Clustalw alignments left to do...sleeping", "file:///%s" % logo) n.show() else: pass #print "there was a problem initializing the pynotify module" except: pass time.sleep(30) continue except Exception, x: print ''.join(Pyro.util.getPyroTraceback(x)) server, db = self.phamServer.request_db_info() #c = db_conf(username=self.username, password=self.password, server=server, db=db) #clustalw_work_unit.set_cursor(c) print 'got it' try: import pynotify if pynotify.init("Phamerator"): logo = os.path.join(os.path.dirname(__file__), "pixmaps/phamerator.png") #print "logo: %s" % logo n = pynotify.Notification( "Phamerator Update", "Clustalw alignments in progress for id %s" % clustalw_work_unit.query_id, "file:///%s" % logo) n.show() else: pass #print "there was a problem initializing the pynotify module" except: pass self._logger.log('aligning sequences') ########################################################################### # BEGIN MATT'S ALTERATIONS # # # ########################################################################### results = [] open_files = [] # tuple of all parallel python servers to connect with # Creates jobserver with automatically detected number of workers #grab number of processors numcpus = job_server.get_ncpus() print "numcpus =", numcpus #for n, person in enumerate(people): #for seq in seqs: jobs = [] #for i,currentseq in enumerate(seqs): query_id = clustalw_work_unit.query_id query_translation = clustalw_work_unit.query_translation counter = 0 for record in clustalw_work_unit.database: subject_id, subject_translation = record.id, record.translation fname = os.path.join( self.rootdir, 'temp' + query_id + '_' + subject_id + '.fasta') f = open(fname, 'w') open_files.append(fname) open_files.append(fname.replace('.fasta', '.dnd')) open_files.append(fname.replace('.fasta', '.aln')) f.write('>%s\n%s\n>%s\n%s\n' % (query_id, query_translation, subject_id, subject_translation)) f.close() clustalw_infile = os.path.join( self.rootdir, 'temp' + str(query_id) + '_' + str(subject_id) + '.fasta') if float(Bio.__version__) >= 1.56: # pass the query id (qid) and the subject id (sid) to run_clustalw jobs.append( job_server.submit( clustalwAligner.run_clustalw, (clustalw_infile, query_id, subject_id), (), ())) else: cline = MultipleAlignCL(clustalw_infile) cline.set_output( os.path.join( self.rootdir, 'temp' + str(query_id) + '_' + str(subject_id) + '.aln')) # pass the query id (qid) and the subject id (sid) to run_clustalw jobs.append( job_server.submit(clustalwAligner.run_clustalw_old, (query_id, subject_id, cline), (), ("Bio.Clustalw", ))) counter = counter + 3 if counter > 50: results = self.process_jobs(jobs, results, open_files) jobs = [] open_files = [] counter = 0 results = self.process_jobs(jobs, results, open_files) jobs = [] open_files = [] counter = 0 # must report everything back in atomic transaction print 'reporting scores back to server' try: self.phamServer.report_scores(clustalw_work_unit, results, self.client) except Exception, x: print ''.join(Pyro.util.getPyroTraceback(x)) print 'exiting on pyro traceback' sys.exit()