Example #1
0
 def runBlast(self,short=False):
     lS = []
     for s in SeqIO.parse(open(self.query),'fasta'):
         if short and len(s) <= 30:
             lS.append(s)
         elif not short:lS.append(s)
     self._maxsubstatus = len(lS)
     for seqs in slice_it(lS,10):
         if self.killed:
             logger.debug('Exiting for a kill signal')
             return False
         
         if len(seqs) == 0:
             continue
         self._substatus += len(seqs)
         self.updateStatus(sub=True)
         if short:
             query = os.path.join(self._room,
                      'KEGGshort_%d.faa'%self._substatus)
             out = os.path.join(self._room,
                        'KEGGshort_%d.xml'%self._substatus)
         else:
             query = os.path.join(self._room,
                      'KEGG_%d.faa'%self._substatus)
             out = os.path.join(self._room,'KEGG_%d.xml'%self._substatus)
         self.out.append(out)
         # If recovery, skip the unnecessary scans
         if ( self.recover and os.path.exists(query) and
              os.path.exists(out)):
             # Last test: can it be parsed?
             try:
                 self._blast.parseBlast(out)
                 for hits in self._blast.getHits(self.evalue):
                     pass
                 logger.debug('Skipping slice %s because has already been done'
                             %query)
                 continue
             except:
                 pass
         oseqs = SeqIO.write(seqs,open(query,'w'),'fasta')
         if oseqs != len(seqs):
             logger.warning('Query splitting error! Expected %d, '+
                             'Printed %d'%(len(seqs),oseqs))
         if short:
             res = self._blast.runBlast(query, self.db, out,
                              evalue = self.evalue,
                              ncpus = self.ncpus, task='blastp-short')
         else:
             res = self._blast.runBlast(query, self.db, out,
                              evalue = self.evalue,
                              ncpus = self.ncpus)
         if not res:
             return False
     return True
Example #2
0
def plotClusters(X, labels, params=None, method='', prefix='clusters'):
    from ductape.common.utils import slice_it
    import matplotlib.pyplot as plt
    import matplotlib.cm as cm
    
    X = np.array(X)
    labels = np.array(labels)
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    
    figidx = 1
    fig = plt.figure(1)
    fig.clf()
    for x, y in product(range(len(X[0])), repeat=2):
        ax = fig.add_subplot(len(X[0]),len(X[0]),figidx)
        
        if figidx%len(X[0]) == 1:
            if not params:
                ax.set_ylabel(x, rotation='horizontal')
            else:
                ax.set_ylabel(params[x], rotation='horizontal')
        if abs((len(X[0])*len(X[0])) % figidx - len(X[0])) <= len(X[0]):
            if not params:
                ax.set_xlabel(y)
            else:
                ax.set_xlabel(params[y])
        
        figidx += 1

        color = dict()
        j = 0
        for i in slice_it(range(255), cols=n_clusters_):
            color[j] = cm.RdYlGn(i[0])
            j += 1

        for k in range(n_clusters_):
            my_members = labels == k
            ax.plot(X[my_members, y], X[my_members, x], '.', color=color[k])

    fig.subplots_adjust(wspace=0, hspace=0)
    for ax in fig.axes:
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
    fig.suptitle('Clusters (%s, %s): %d' % (prefix, method, n_clusters_))
    fig.savefig('%s_%s.png'%(prefix,method),dpi=300)
Example #3
0
def plotClusters(X, labels, params=None, method='', prefix='clusters'):
    from ductape.common.utils import slice_it
    import matplotlib.pyplot as plt
    import matplotlib.cm as cm

    X = np.array(X)
    labels = np.array(labels)
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)

    figidx = 1
    fig = plt.figure(1)
    fig.clf()
    for x, y in product(range(len(X[0])), repeat=2):
        ax = fig.add_subplot(len(X[0]), len(X[0]), figidx)

        if figidx % len(X[0]) == 1:
            if not params:
                ax.set_ylabel(x, rotation='horizontal')
            else:
                ax.set_ylabel(params[x], rotation='horizontal')
        if abs((len(X[0]) * len(X[0])) % figidx - len(X[0])) <= len(X[0]):
            if not params:
                ax.set_xlabel(y)
            else:
                ax.set_xlabel(params[y])

        figidx += 1

        color = dict()
        j = 0
        for i in slice_it(range(255), cols=n_clusters_):
            color[j] = cm.RdYlGn(i[0])
            j += 1

        for k in range(n_clusters_):
            my_members = labels == k
            ax.plot(X[my_members, y], X[my_members, x], '.', color=color[k])

    fig.subplots_adjust(wspace=0, hspace=0)
    for ax in fig.axes:
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
    fig.suptitle('Clusters (%s, %s): %d' % (prefix, method, n_clusters_))
    fig.savefig('%s_%s.png' % (prefix, method), dpi=300)
Example #4
0
def prepareColors(dReacts, colorrange):
    if len(dReacts) == 0:
        return {}
    
    maximum = max([dReacts[x] for x in dReacts])
    hexs = {}
    prev = '#FFFFFF'
    i = 1
    for color in slice_it(colorrange, cols=maximum):
        if len(color) == 0:
            hexs[i] = prev
        else:
            hexs[i] = rgb_to_hex(tuple([int(round(x*255))
                              for x in color[-1][:3]])).upper()
        prev = hexs[i]
        i += 1
    
    return hexs
Example #5
0
    def runBlast(self, short=False):
        lS = []
        for s in SeqIO.parse(open(self.query), 'fasta'):
            if short and len(s) <= 30:
                lS.append(s)
            elif not short:
                lS.append(s)
        self._maxsubstatus = len(lS)
        for seqs in slice_it(lS, 10):
            if self.killed:
                logger.debug('Exiting for a kill signal')
                return False

            if len(seqs) == 0:
                continue
            self._substatus += len(seqs)
            self.updateStatus(sub=True)
            if short:
                query = os.path.join(self._room,
                                     'KEGGshort_%d.faa' % self._substatus)
                out = os.path.join(self._room,
                                   'KEGGshort_%d.xml' % self._substatus)
            else:
                query = os.path.join(self._room,
                                     'KEGG_%d.faa' % self._substatus)
                out = os.path.join(self._room, 'KEGG_%d.xml' % self._substatus)
            self.out.append(out)
            # If recovery, skip the unnecessary scans
            if (self.recover and os.path.exists(query)
                    and os.path.exists(out)):
                # Last test: can it be parsed?
                try:
                    self._blast.parseBlast(out)
                    for hits in self._blast.getHits(self.evalue):
                        pass
                    logger.debug(
                        'Skipping slice %s because has already been done' %
                        query)
                    continue
                except:
                    pass
            oseqs = SeqIO.write(seqs, open(query, 'w'), 'fasta')
            if oseqs != len(seqs):
                logger.warning('Query splitting error! Expected %d, ' +
                               'Printed %d' % (len(seqs), oseqs))
            if short:
                res = self._blast.runBlast(query,
                                           self.db,
                                           out,
                                           evalue=self.evalue,
                                           ncpus=self.ncpus,
                                           task='blastp-short')
            else:
                res = self._blast.runBlast(query,
                                           self.db,
                                           out,
                                           evalue=self.evalue,
                                           ncpus=self.ncpus)
            if not res:
                return False
        return True