def __init__(self, organisms, ncpus=1, evalue=1e-10, recover=False, prefix='', matrix='BLOSUM80', queue=Queue.Queue()): CommonMultiProcess.__init__(self, ncpus, queue) # Blast self.organisms = list(organisms) self.dbs = {} self._prot2orgs = {} self.out = [] self.evalue = float(evalue) # TODO: implement recovery self.recover = recover # self.results = {} self._blast = Blaster() self._pangenomeroom = None self.prefix = prefix.rstrip('_') self.matrix = matrix self._already = set() # Results self.orthologs = {} self.core = [] self.accessory = [] self.unique = []
def __init__(self, query, target, ncpus=1, evalue=1e-50, buildDB=True, bbh=True, recover=False, queue=Queue.Queue()): CommonMultiProcess.__init__(self, ncpus, queue) # Blast self.query = query if buildDB: self.target = target self.db = None else: self.target = None self.db = target self.out = [] self.evalue = float(evalue) self.bbh = bool(bbh) self.recover = recover self.ncpus = int(ncpus) self._kohits = [] self.results = {} self._keggroom = None self._blast = Blaster()
def serialBBH(self): orthindex = 1 self._maxsubstatus = len(self._prot2orgs) for org in self.organisms: seqs = [seq for seq in SeqIO.parse(open(org), 'fasta')] # Iterate over each protein for seq in seqs: self._substatus += 1 self.updateStatus(sub=True) # Log some info, might be useful for # long running jobs logger.debug('Running orthology prediction for protein %d/%d' % (self._substatus, self._maxsubstatus)) logger.debug('Organism: %s, Protein: %s' % (org, seq.id)) if seq.id in self._already: continue orthname = self.prefix + str(orthindex) orgsincluded = [org] self.orthologs[orthname] = [seq.id] query = '>%s\n%s\n' % (seq.id, str(seq.seq)) self.initiateParallel() # Iterate over each other organism for otherorg in self.organisms: if org == otherorg: continue # Go fot it! if len(seq) < 30: short = True else: short = False uniqueid = self.getUniqueID() # Multi process obj = RunBBH(query, seq.id, self.dbs[org], self.dbs[otherorg], otherorg, self.evalue, self.matrix, short=short, uniqueid=uniqueid, useDisk=False) self._paralleltasks.put(obj) # Poison pill to stop the workers self.addPoison() while True: if self.killed: logger.debug('Exiting for a kill signal') return while not self._parallelresults.empty(): if self.killed: logger.debug('Exiting for a kill signal') return result = self._parallelresults.get() if not result[2]: logger.error( 'An error occurred for BBH on query %s' % seq.id + ' and target %s' % result[1]) return False if result[0] and result[0] not in self._already: self.orthologs[orthname].append(result[0]) orgsincluded.append(result[1]) self._already.add(result[0]) if self.isTerminated(): break self.sleeper.sleep(0.01) while not self._parallelresults.empty(): if self.killed: logger.debug('Exiting for a kill signal') return result = self._parallelresults.get() if not result[2]: logger.error('An error occurred for BBH on query %s' % seq.id + ' and target %s' % result[1]) return False if result[0] and result[0] not in self._already: self.orthologs[orthname].append(result[0]) orgsincluded.append(result[1]) self._already.add(result[0]) self.killParallel() if len(orgsincluded) < len(self.organisms): logger.debug('Additional search on missing organisms for' + ' ortholog %s' % orthname) for otherprotein in self.orthologs[orthname]: if otherprotein == seq.id: continue neworg = self._prot2orgs[otherprotein] if neworg == org: continue searcher = Blaster(useDisk=False) searcher.retrieveFromDB(self.dbs[neworg], otherprotein) query = searcher.retrieved self.initiateParallel() for evenneworg in self.organisms: if evenneworg in orgsincluded: continue # Go fot it! if len(seq) < 30: short = True else: short = False uniqueid = self.getUniqueID() # Multi process obj = RunBBH(query, otherprotein, self.dbs[neworg], self.dbs[evenneworg], evenneworg, self.evalue, self.matrix, short=short, uniqueid=uniqueid, useDisk=False) self._paralleltasks.put(obj) # Poison pill to stop the workers self.addPoison() while True: if self.killed: logger.debug('Exiting for a kill signal') return while not self._parallelresults.empty(): if self.killed: logger.debug('Exiting for a kill signal') return result = self._parallelresults.get() if not result[2]: logger.error( 'An error occurred for BBH on query %s' % seq.id + ' and target %s' % result[1]) return False if result[0] and result[0] not in self._already: self.orthologs[orthname].append(result[0]) orgsincluded.append(result[1]) self._already.add(result[0]) if self.isTerminated(): break self.sleeper.sleep(0.01) while not self._parallelresults.empty(): if self.killed: logger.debug('Exiting for a kill signal') return result = self._parallelresults.get() if not result[2]: logger.error( 'An error occurred for BBH on query %s' % seq.id + ' and target %s' % result[1]) return False if result[0] and result[0] not in self._already: self.orthologs[orthname].append(result[0]) orgsincluded.append(result[1]) self._already.add(result[0]) self.killParallel() orthindex += 1 return True