def _psipred(self): """ Run psipred on the sequence. """ debug("Running psipred prediction") from hpf.seq import TemporaryRecordFile from hpf.pdb.psipred import Psipred, PsipredOptions from Bio import SeqIO fasta = os.path.join(self.scratch,self.prediction_code,self.prediction_code+".fasta") with open(fasta,"w") as handle: SeqIO.write([self.sequence.record], handle, "fasta") psipred = self.psipred from Bio.Blast.NCBIStandalone import blastpgp chk = fasta+".chk" import subprocess cmd = subprocess.Popen(["which", "blastpgp"], stdout=subprocess.PIPE).communicate()[0].strip() debug("Using",cmd) result,error = blastpgp(cmd, "nr", fasta, npasses=3, checkpoint_outfile=chk, expectation=1e-4, model_threshold=1e-4, align_outfile="/dev/null") debug(result.readlines()) debug(error.readlines()) options = PsipredOptions(fasta, profile=chk, output=psipred+".1", output2=psipred+".2", horiz=psipred, cwd = os.path.join(self.scratch,self.prediction_code)) prediction = Psipred(options).run() db_pred = PsipredFactory().create(prediction,sequence_key=self.sequence.id) self.session.add(db_pred) self.session.commit() #assert os.path.exists(psipred) return psipred
def run(self, ): """ Cobbles together elements for running Psipred (writes fasta, runs blast to get checkpoint, runs psipred and psipass2, and then parses results and uploads to DB if set to). IF dbstore is true, adds Psipred ORM object to hpf database and sets self.dbo. RETURNS hpf.pdb.psipred.PsipredPrediction object """ import subprocess from Bio import SeqIO from Bio.Blast.NCBIStandalone import blastpgp from hpf.hddb.db import Psipred as PsipredORM, PsipredFactory from sqlalchemy.exc import IntegrityError # Write fasta file if self.debug: print "Psipred: writing fasta file" with open(self.fasta_file, 'w') as handle: SeqIO.write([self.sequence.record], handle, "fasta") # Get exe path and run blast if self.debug: print "Psipred: running blastpgp against '{0}' DB to create checkpoint file".format(self.nr_db) blast_cmd = subprocess.Popen(["which", "blastpgp"], stdout=subprocess.PIPE).communicate()[0].strip() result,error = blastpgp(blastcmd=blast_cmd, program='blastpgp', database=self.nr_db, infile=self.fasta_file, npasses=3, checkpoint_outfile=self.chkpt_file, expectation=1e-4, model_threshold=1e-4, align_outfile="/dev/null") # Note: must call something that blocks on blastpgp results (need to wait for cmd to finish) res = result.read() err = error.read() if self.debug: print "Result: ", res print "Error/Warning: ", err # Create Psipred options object and run Psipred 3.2 on them if self.debug: print "Psipred: running Psipred 3.2" options = PsipredOptions(self.fasta_file, profile=self.chkpt_file, output=self.psipred_file+".1", output2=self.psipred_file+".2", horiz=self.psipred_file, cwd=self.dir) self.prediction = Psipred32(options).run() # Add to database (optional) and return if self.dbstore: if self.debug: print "Psipred: adding psipred DBO to hpf database" psipred_dbo = PsipredFactory().create(self.prediction, sequence_key=self.sequence.id, ginzu_version=self.ginzu_version) self.session.add(psipred_dbo) try: self.session.commit() except IntegrityError: print "Psipred entry for <seq {0}, ginzu_version {1}> already exists in DB. Returning existing object".format(self.sequence_key, self.ginzu_version) self.session.rollback() psipred_dbo = self.session.query(PsipredORM).filter_by(sequence_key=self.sequence.id, ginzu_version=self.ginzu_version).first() self.session.refresh(psipred_dbo) self.dbo = psipred_dbo return self.prediction