def runBlast(self,short=False): lS = [] for s in SeqIO.parse(open(self.query),'fasta'): if short and len(s) <= 30: lS.append(s) elif not short:lS.append(s) self._maxsubstatus = len(lS) for seqs in slice_it(lS,10): if self.killed: logger.debug('Exiting for a kill signal') return False if len(seqs) == 0: continue self._substatus += len(seqs) self.updateStatus(sub=True) if short: query = os.path.join(self._room, 'KEGGshort_%d.faa'%self._substatus) out = os.path.join(self._room, 'KEGGshort_%d.xml'%self._substatus) else: query = os.path.join(self._room, 'KEGG_%d.faa'%self._substatus) out = os.path.join(self._room,'KEGG_%d.xml'%self._substatus) self.out.append(out) # If recovery, skip the unnecessary scans if ( self.recover and os.path.exists(query) and os.path.exists(out)): # Last test: can it be parsed? try: self._blast.parseBlast(out) for hits in self._blast.getHits(self.evalue): pass logger.debug('Skipping slice %s because has already been done' %query) continue except: pass oseqs = SeqIO.write(seqs,open(query,'w'),'fasta') if oseqs != len(seqs): logger.warning('Query splitting error! Expected %d, '+ 'Printed %d'%(len(seqs),oseqs)) if short: res = self._blast.runBlast(query, self.db, out, evalue = self.evalue, ncpus = self.ncpus, task='blastp-short') else: res = self._blast.runBlast(query, self.db, out, evalue = self.evalue, ncpus = self.ncpus) if not res: return False return True
def plotClusters(X, labels, params=None, method='', prefix='clusters'): from ductape.common.utils import slice_it import matplotlib.pyplot as plt import matplotlib.cm as cm X = np.array(X) labels = np.array(labels) labels_unique = np.unique(labels) n_clusters_ = len(labels_unique) figidx = 1 fig = plt.figure(1) fig.clf() for x, y in product(range(len(X[0])), repeat=2): ax = fig.add_subplot(len(X[0]),len(X[0]),figidx) if figidx%len(X[0]) == 1: if not params: ax.set_ylabel(x, rotation='horizontal') else: ax.set_ylabel(params[x], rotation='horizontal') if abs((len(X[0])*len(X[0])) % figidx - len(X[0])) <= len(X[0]): if not params: ax.set_xlabel(y) else: ax.set_xlabel(params[y]) figidx += 1 color = dict() j = 0 for i in slice_it(range(255), cols=n_clusters_): color[j] = cm.RdYlGn(i[0]) j += 1 for k in range(n_clusters_): my_members = labels == k ax.plot(X[my_members, y], X[my_members, x], '.', color=color[k]) fig.subplots_adjust(wspace=0, hspace=0) for ax in fig.axes: ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) fig.suptitle('Clusters (%s, %s): %d' % (prefix, method, n_clusters_)) fig.savefig('%s_%s.png'%(prefix,method),dpi=300)
def plotClusters(X, labels, params=None, method='', prefix='clusters'): from ductape.common.utils import slice_it import matplotlib.pyplot as plt import matplotlib.cm as cm X = np.array(X) labels = np.array(labels) labels_unique = np.unique(labels) n_clusters_ = len(labels_unique) figidx = 1 fig = plt.figure(1) fig.clf() for x, y in product(range(len(X[0])), repeat=2): ax = fig.add_subplot(len(X[0]), len(X[0]), figidx) if figidx % len(X[0]) == 1: if not params: ax.set_ylabel(x, rotation='horizontal') else: ax.set_ylabel(params[x], rotation='horizontal') if abs((len(X[0]) * len(X[0])) % figidx - len(X[0])) <= len(X[0]): if not params: ax.set_xlabel(y) else: ax.set_xlabel(params[y]) figidx += 1 color = dict() j = 0 for i in slice_it(range(255), cols=n_clusters_): color[j] = cm.RdYlGn(i[0]) j += 1 for k in range(n_clusters_): my_members = labels == k ax.plot(X[my_members, y], X[my_members, x], '.', color=color[k]) fig.subplots_adjust(wspace=0, hspace=0) for ax in fig.axes: ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) fig.suptitle('Clusters (%s, %s): %d' % (prefix, method, n_clusters_)) fig.savefig('%s_%s.png' % (prefix, method), dpi=300)
def prepareColors(dReacts, colorrange): if len(dReacts) == 0: return {} maximum = max([dReacts[x] for x in dReacts]) hexs = {} prev = '#FFFFFF' i = 1 for color in slice_it(colorrange, cols=maximum): if len(color) == 0: hexs[i] = prev else: hexs[i] = rgb_to_hex(tuple([int(round(x*255)) for x in color[-1][:3]])).upper() prev = hexs[i] i += 1 return hexs
def runBlast(self, short=False): lS = [] for s in SeqIO.parse(open(self.query), 'fasta'): if short and len(s) <= 30: lS.append(s) elif not short: lS.append(s) self._maxsubstatus = len(lS) for seqs in slice_it(lS, 10): if self.killed: logger.debug('Exiting for a kill signal') return False if len(seqs) == 0: continue self._substatus += len(seqs) self.updateStatus(sub=True) if short: query = os.path.join(self._room, 'KEGGshort_%d.faa' % self._substatus) out = os.path.join(self._room, 'KEGGshort_%d.xml' % self._substatus) else: query = os.path.join(self._room, 'KEGG_%d.faa' % self._substatus) out = os.path.join(self._room, 'KEGG_%d.xml' % self._substatus) self.out.append(out) # If recovery, skip the unnecessary scans if (self.recover and os.path.exists(query) and os.path.exists(out)): # Last test: can it be parsed? try: self._blast.parseBlast(out) for hits in self._blast.getHits(self.evalue): pass logger.debug( 'Skipping slice %s because has already been done' % query) continue except: pass oseqs = SeqIO.write(seqs, open(query, 'w'), 'fasta') if oseqs != len(seqs): logger.warning('Query splitting error! Expected %d, ' + 'Printed %d' % (len(seqs), oseqs)) if short: res = self._blast.runBlast(query, self.db, out, evalue=self.evalue, ncpus=self.ncpus, task='blastp-short') else: res = self._blast.runBlast(query, self.db, out, evalue=self.evalue, ncpus=self.ncpus) if not res: return False return True