def _run(self) : global DEBUG self.query_fname = tmpfasta_orfs(self._queries, strand=True) #self.query_fname = tmpfasta(self._queries) self.out_fname = tmpfile() self.alignment_fname = tmpfasta(self._alignment) # tmpfasta_kill_n(self._alignment) self.tree_fname = tmpfile(self._tree) if self._tree else None start_time = time.time() result = self.pagan.run(self.query_fname, self.out_fname, self.alignment_fname, self.tree_fname, self.identity, self.overlap) elapsed_time = time.time() - start_time q_count, q_sum, q_min, q_max, q_mean, q_sd = fasta_stats(self.query_fname) a_count, a_sum, a_min, a_max, a_mean, a_sd = fasta_stats(self.alignment_fname) if DEBUG : threadsafe_io('pagan_stats.txt', "%s %d %d %d %d %d %.3f %.3f %d %d %d %d %.3f %.3f %d" % \ (self._genefamily, result, \ q_count, q_sum, q_min, q_max, q_mean, q_sd, \ a_count, a_sum, a_min, a_max, a_mean, a_sd, \ elapsed_time)) return result
def _run(self) : global DEBUG self.query_fname = tmpfasta(self.queries) self.out_fname = tmpfile() result = self.blastx.run(self.query_fname, self.database, self.out_fname) q = dict([ (q.id, len(q)) for q in self.input ]) if DEBUG : for br in self.results : threadsafe_io('blastx_stats.txt', "%s %s %.3f %d %d %d %d %d %.3e %d %.3f" % \ (br.qseqid, br.sseqid, br.pident, br.length, br.qstart, br.qend, br.sstart, br.send, br.evalue, q[br.qseqid], ((br.pident / 100.0) * (max(br.qstart, br.qend) - min(br.qstart, br.qend))) / float(q[br.qseqid]))) return result
def job_callback(self, job) : self.log.debug("callback from %s: %s + %s" % (str(job), job.genefamily, ','.join([ i.id for i in job.input ]))) self.log.debug("protein alignment file = %s" % (job.protein_alignment)) if self.db.nucleotide : self.log.debug("nucleotide alignment file = %s" % (job.nucleotide_alignment)) self._progress() if job.success() : dst = tmpfile(directory=self.directory, suffix='.protein') dst_base = basename(dst)[:-8] self.log.debug("cp %s %s" % (job.protein_alignment, dst)) shutil.copyfile(job.protein_alignment, dst) shutil.copyfile(job.query_fname, dst[:-8] + '.queries') shutil.copyfile(job.alignment_fname, dst[:-8] + '.reference') if job.tree_fname : shutil.copyfile(job.tree_fname, dst[:-8] + '.tree') if self.db.nucleotide : self.log.debug("cp %s %s" % (job.nucleotide_alignment, dst[:-8] + '.nucleotide')) shutil.copyfile(job.nucleotide_alignment, dst[:-8] + '.nucleotide') self.info.put_genefamily2filename(job.genefamily, dst_base) else : self.info.put_genefamily2filename(job.genefamily)
def extract_all(self) : fname = tmpfile() with open(fname, 'w') as f : for gf in self.data : for g in self.data[gf] : print >> f, g.format('protein' if self.nucleotide else 'fasta').rstrip() return fname
def _write_to_archive(self, data, zfile, zname) : fname = tmpfile() f = open(fname, 'w') f.write(json.dumps(data)) f.close() zfile.write(fname, arcname=zname) os.remove(fname)