コード例 #1
0
 def getNchanged(self):
     """ Return number of annotations changed by the model (sum of included and exluded genes )
     """
     i_use = SP.setxor1d(SP.arange(self.Pi.shape[1]),
                         SP.hstack([self.iLatentSparse, self.iLatent]))
     nChanged = SP.sum((self.Pi > .5) !=
                       (self.W.C[:, :, 0] > .5), 0)[i_use] * 1.0
     nChangedRel = nChanged / SP.sum((self.Pi > .5), 0)[i_use]
     return (nChanged, nChangedRel)
コード例 #2
0
ファイル: core.py プロジェクト: huangyh09/slalom
    def regressOut(self,
                   idx=None,
                   terms=None,
                   use_latent=False,
                   use_lm=False,
                   Yraw=None):
        """Regress out unwanted variation

        Args:
            idx          (vector_like): Indices of factors to be regressed out
            use_latent     (bool):      Boolean varoable indicating whether to regress out 
                                        the unwanted variation on the low-dimensional latent 
                                        space or the high-dimensional gene expression space.  
            use_lm               (bool):   Regress out the factors by fitting a linear model for each gene
            Yraw            (array_like): Optionally a gene expression array can be passed from which the facotrs are regressed out                                                         
        Returns:
            A matrix containing the corrected expression values.
        """

        #if (idx is None) and (terms is None):
        #    raise Exception('Provide either indices or terms to regress out')

        if terms is None:
            idx = SP.array(idx)
        else:
            idx = self.getTermIndex(terms)

        if use_lm == False and (Yraw is None):
            isOn = (self.W.C[:, :, 0] > .5) * 1.0
            if use_latent == False:
                Ycorr = self.Z.E1 - SP.dot(
                    self.S.E1[:, idx], (isOn[:, idx] * self.W.E1[:, idx]).T)
            else:
                idx_use = SP.setxor1d(SP.arange(self.S.E1.shape[1]), idx)
                Ycorr = SP.dot(self.S.E1[:, idx_use],
                               (isOn[:, idx_use] * self.W.E1[:, idx_use]).T)
        else:
            if Yraw is None:
                Y = self.Z.E1.shape
            else:
                Y = Yraw.copy()
            Ycorr = SP.zeros(Y.shape)

            if terms is None:
                X = self.S.E1[:, idx]
            else:
                X = self.getX(terms=terms)

            for ig in SP.arange(Y.shape[1]):
                lm = LinearRegression()
                lm.fit(X, Y[:, ig])
                Ycorr[:, ig] = Y[:, ig] - lm.predict(X)

        return Ycorr
コード例 #3
0
    def getTerms(self,
                 annotated=True,
                 unannotated=True,
                 unannotated_sparse=True):
        """Get terms

        """
        terms = list()
        if unannotated_sparse == True:
            terms.extend(self.terms[self.iLatentSparse])

        if unannotated == True:
            terms.extend(self.terms[self.iLatent])

        if annotated == True:
            terms.extend(self.terms[SP.setxor1d(
                SP.hstack([
                    SP.where(self.terms == 'bias')[0], self.iLatentSparse,
                    self.iLatent
                ]), SP.arange(len(self.terms)))])

        return terms
コード例 #4
0
def findDuplicateVectors(vec, tol=vTol, equivPM=False):
    """
    Find vectors in an array that are equivalent to within
    a specified tolerance

      USAGE:

          eqv = DuplicateVectors(vec, *tol)

      INPUT:

          1) vec is n x m, a double array of m horizontally concatenated
                           n-dimensional vectors.
         *2) tol is 1 x 1, a scalar tolerance.  If not specified, the default
                           tolerance is 1e-14.
         *3) set equivPM to True if vec and -vec are to be treated as equivalent

      OUTPUT:

          1) eqv is 1 x p, a list of p equivalence relationships.

      NOTES:

          Each equivalence relationship is a 1 x q vector of indices that
          represent the locations of duplicate columns/entries in the array
          vec.  For example:

                | 1     2     2     2     1     2     7 |
          vec = |                                       |
                | 2     3     5     3     2     3     3 |

          eqv = [[1x2 double]    [1x3 double]], where

          eqv[0] = [0  4]
          eqv[1] = [1  3  5]
    """

    vlen = vec.shape[1]
    vlen0 = vlen
    orid = asarray(range(vlen), dtype="int")

    torid = orid.copy()
    tvec = vec.copy()

    eqv = []
    eqvTot = 0
    uid = 0

    ii = 1
    while vlen > 1 and ii < vlen0:
        dupl = tile(tvec[:, 0], (vlen, 1))

        if not equivPM:
            diff = abs(tvec - dupl.T).sum(0)
            match = abs(diff[1:]) <= tol  # logical to find duplicates
        else:
            diffn = abs(tvec - dupl.T).sum(0)
            matchn = abs(diffn[1:]) <= tol
            diffp = abs(tvec + dupl.T).sum(0)
            matchp = abs(diffp[1:]) <= tol
            match = matchn + matchp

        kick = hstack([True, match])  # pick self too

        if kick.sum() > 1:
            eqv += [torid[kick].tolist()]
            eqvTot = hstack([eqvTot, torid[kick]])
            uid = hstack([uid, torid[kick][0]])

        cmask = ones((vlen, ))
        cmask[kick] = 0
        cmask = cmask != 0

        tvec = tvec[:, cmask]

        torid = torid[cmask]

        vlen = tvec.shape[1]

        ii += 1

    if len(eqv) == 0:
        eqvTot = []
        uid = []
    else:
        eqvTot = eqvTot[1:].tolist()
        uid = uid[1:].tolist()

    # find all single-instance vectors
    singles = sort(setxor1d(eqvTot, range(vlen0)))

    # now construct list of unique vector column indices
    uid = int_(sort(union1d(uid, singles))).tolist()
    # make sure is a 1D list
    if not hasattr(uid, '__len__'):
        uid = [uid]

    return eqv, uid
コード例 #5
0
ファイル: matrixutil.py プロジェクト: donald-e-boyce/hexrd
def findDuplicateVectors(vec, tol=vTol, equivPM=False):
    """
    Find vectors in an array that are equivalent to within
    a specified tolerance

      USAGE:

          eqv = DuplicateVectors(vec, *tol)

      INPUT:

          1) vec is n x m, a double array of m horizontally concatenated
                           n-dimensional vectors.
         *2) tol is 1 x 1, a scalar tolerance.  If not specified, the default
                           tolerance is 1e-14.
         *3) set equivPM to True if vec and -vec are to be treated as equivalent

      OUTPUT:

          1) eqv is 1 x p, a list of p equivalence relationships.

      NOTES:

          Each equivalence relationship is a 1 x q vector of indices that
          represent the locations of duplicate columns/entries in the array
          vec.  For example:

                | 1     2     2     2     1     2     7 |
          vec = |                                       |
                | 2     3     5     3     2     3     3 |

          eqv = [[1x2 double]    [1x3 double]], where

          eqv[0] = [0  4]
          eqv[1] = [1  3  5]
    """

    vlen  = vec.shape[1]
    vlen0 = vlen
    orid  = asarray(range(vlen), dtype="int")

    torid = orid.copy()
    tvec  = vec.copy()

    eqv    = []
    eqvTot = 0
    uid    = 0

    ii = 1
    while vlen > 1 and ii < vlen0:
        dupl = tile(tvec[:, 0], (vlen, 1))

        if not equivPM:
            diff  = abs(tvec - dupl.T).sum(0)
            match = abs(diff[1:]) <= tol    # logical to find duplicates
        else:
            diffn  = abs(tvec - dupl.T).sum(0)
            matchn = abs(diffn[1:]) <= tol
            diffp  = abs(tvec + dupl.T).sum(0)
            matchp = abs(diffp[1:]) <= tol
            match = matchn + matchp

        kick = hstack([True, match])    # pick self too

        if kick.sum() > 1:
            eqv    += [torid[kick].tolist()]
            eqvTot  = hstack( [ eqvTot, torid[kick] ] )
            uid     = hstack( [ uid, torid[kick][0] ] )

        cmask       = ones((vlen,))
        cmask[kick] = 0
        cmask       = cmask != 0

        tvec  = tvec[:, cmask]

        torid = torid[cmask]

        vlen = tvec.shape[1]

        ii += 1

    if len(eqv) == 0:
        eqvTot = []
        uid    = []
    else:
        eqvTot = eqvTot[1:].tolist()
        uid    = uid[1:].tolist()

    # find all single-instance vectors
    singles = sort( setxor1d( eqvTot, range(vlen0) ) )

    # now construct list of unique vector column indices
    uid = int_( sort( union1d( uid, singles ) ) ).tolist()
    # make sure is a 1D list
    if not hasattr(uid,'__len__'):
        uid = [uid]

    return eqv, uid
コード例 #6
0
        print(i)
spike_samples_clean = pl.delete(spike_samples_clean, 0)
pl.save(os.path.join(memap_folder, 'spike_samples_clean.npy'), spike_samples_clean)

channels = np.empty(0)
for i in pl.arange(0, pl.size(spike_samples_clean)):
    data = np.array(data_probe_hp[:, spike_samples_clean[i]].tolist())
    channels = np.append(channels, np.argmax(data))
    if i%100==0:
        print(i)
channels_spikes_df = pd.DataFrame([(channels, spike_samples_clean)], columns=['Channels', 'Samples'])

spike_times_shaftA = channels_spikes_df.Samples[0][channels_spikes_df.Channels[0]>7][channels_spikes_df.Channels[0]<16]
spike_times_shaftB = channels_spikes_df.Samples[0][channels_spikes_df.Channels[0]>23]
spike_times_shaftD = channels_spikes_df.Samples[0][channels_spikes_df.Channels[0]<8]
spike_times_shaftC = sp.setxor1d(spike_samples_clean, sp.union1d(spike_times_shaftA, sp.union1d(spike_times_shaftB, spike_times_shaftD)))

pl.save(os.path.join(memap_folder, 'spike_times_shaftA.npy'), spike_times_shaftA)
pl.save(os.path.join(memap_folder, 'spike_times_shaftC.npy'), spike_times_shaftC)


#----------Analysis---------------------
f_ecog = f_sampling/(int(f_sampling/f_subsample))
spike_times_shaftA_ecog = np.array(spike_times_shaftA * f_ecog / f_sampling, dtype='int')
spike_times_shaftC_ecog = np.array(spike_times_shaftC * f_ecog / f_sampling, dtype='int')
data_ecog_lp_ss_clean = np.delete(data_ecog_lp_ss, ecog_bad_channels, axis=0)



#Generate eMUA for each Shaft
time_around_spike = 2
コード例 #7
0
ファイル: utils.py プロジェクト: afrahshafquat/f-scLVM
def plotRelevance(FA,
                  Nactive=20,
                  stacked=True,
                  madFilter=0.4,
                  annotated=True,
                  unannotated=False,
                  unannotated_sparse=False):
    """Plot results of f-scLVM

    Identified factors and corresponding gene set size ordered by relevance (white = low relevance; black = high relevance). 
    Top panel: Gene set augmentation, showing the number of genes added (red) and removed (blue) by the model for each factor.

    Args:
        FA                 (:class:`fscLVM.CSparseFA`): Factor analysis object, usually generated using `initFA` function
        Nactive                                  (int): Numer of terms to be plotted
        stacked                                 (bool): Boolean variable indicating whether bars should be stacked
        db                                      (str): Name of database used, either 'MSigDB' or 'REACTOME'
        madFilter                              (float): Filter factors by this mean absolute deviation to exclude outliers. 
                                                        For large datasets this can be set to 0.
        annotated                             (bool): Indicates whether  annotated factors should be plotted. Defaults to True.
        unannotated                             (bool): Indicates whether  unannotated factors should be plotted. Defaults to False.
        unannotated                             (bool): Indicates whether unannotated sparse factors should be plotted. Defaults to False.


    """

    pltparams = {
        'backend': 'pdf',
        'axes.labelsize': 12,
        'font.size': 12,
        'legend.fontsize': 13,
        'xtick.labelsize': 14,
        'ytick.labelsize': 12,
        'text.usetex': False
    }

    plt.rcParams.update(pltparams)

    pattern_hidden = re.compile('hidden*')
    pattern_bias = re.compile('bias')

    terms = FA.getTerms(annotated=annotated,
                        unannotated=unannotated,
                        unannotated_sparse=unannotated_sparse)

    i_use = list()
    if unannotated_sparse == True:
        i_use.extend(FA.iLatentSparse)
    if unannotated == True:
        i_use.extend(FA.iLatent)
    if annotated == True:
        i_use.extend(
            SP.setxor1d(
                SP.hstack([
                    SP.where(FA.terms == 'bias')[0], FA.iLatentSparse,
                    FA.iLatent
                ]), SP.arange(len(FA.terms))))
    i_use = SP.array(i_use)

    X = FA.getX()[:, i_use]
    Iprior = FA.getAnnotations()[:, i_use]
    Iposterior = FA.getZ()[:, i_use] > .5
    rel = FA.getRelevance()[i_use]

    MAD = mad(X)
    R = (MAD > madFilter) * (rel)
    terms = SP.array(terms)

    Nactive = min(SP.sum(R > 0), Nactive)

    #terms change,s etc.
    Nprior = Iprior.sum(axis=0)
    #gains
    Ngain = (Iposterior & (~Iprior)).sum(axis=0)
    #loss
    Nloss = ((~Iposterior & (Iprior))).sum(axis=0)

    #sort terms by relevance
    Iactive = R.argsort()[::-1][0:Nactive]
    RM = R[Iactive, SP.newaxis]

    xticks_range = SP.arange(Nactive)
    terms[terms == 'hidden'] = 'Unannotated'
    terms[terms == 'hiddenSparse'] = 'Unannotated-sparse'
    xticks_text = list(terms[Iactive])

    n_gain = []
    n_loss = []
    n_prior = []
    for i in range(Nactive):
        n_gain += [Ngain[Iactive[i]]]
        n_loss += [-1.0 * Nloss[Iactive[i]]]
        n_prior += [Nprior[Iactive[i]]]

    width = 0.6
    left = SP.arange(Nactive) - 0.5 + (1. - width) / 2.

    fig = plt.figure(2, figsize=(10, 6))
    fig.subplots_adjust(bottom=0.3)

    gs = mpl.gridspec.GridSpec(2,
                               2,
                               height_ratios=[2., 1.],
                               width_ratios=[1., 0.05])
    gs.update(hspace=0.1)

    #fig.text(0.06, 0.6, 'Number of annotated genes', ha='center', va='center', rotation='vertical', fontsize=17)

    #################################################################################
    ax1 = plt.subplot(gs[1, 0])
    simpleaxis(ax1)
    ax1.set_xlabel('Active pathways', fontsize=15)
    ax1.set_ylabel('Gene set size', fontsize=13.5)
    #im = ax1.imshow(SP.append(RM.T,[[0]],axis=1),origin=[0,0],interpolation='nearest',cmap='Greys',aspect='auto')

    minima = 0
    maxima = max(RM)

    norm = mpl.colors.Normalize(vmin=minima, vmax=maxima, clip=True)

    mapper = mpl.cm.ScalarMappable(norm=norm, cmap='Greys')

    colors = []
    for v in RM.flatten():
        colors += [mapper.to_rgba(v)]

    #colors = []
    #for i in xrange(RM.shape[0]):
    #    colors += [im.cmap(im.norm(RM[i]))[0,:-1]]

    y_max = Nprior[Iactive].max() + 100.

    bar_rel_importance = ax1.bar(left=SP.arange(Nactive) - 0.5,
                                 width=1.05,
                                 height=[y_max] * len(n_prior),
                                 bottom=0,
                                 color=colors,
                                 log=True,
                                 edgecolor='none')
    bar_annotated = ax1.bar(left=left,
                            width=width,
                            height=n_prior,
                            bottom=0,
                            color='w',
                            log=True,
                            alpha=0.6,
                            edgecolor='k')

    ax1.set_ylim([10, y_max])
    ax1.set_xlim([0, Nactive])
    #ax1.set_yticks([])
    #ax1.set_yscale('log')
    plt.xticks(xticks_range, xticks_text, rotation=45, fontsize=14, ha='right')

    color_bar_ax = plt.subplot(gs[1, 1])
    mpl.colorbar.ColorbarBase(color_bar_ax,
                              cmap='Greys',
                              norm=norm,
                              orientation='vertical',
                              ticks=[minima, maxima])

    #color_bar = fig.colorbar(im, cax=color_bar_ax,ticks=[0., RM.max()])
    color_bar_ax.set_yticklabels([0, 1])
    #color_bar_ax.set_yticklabels([0,round(RM.max(),3)])
    #color_bar_ax.set_ylabel('Rel. importance')
    #color_bar.outline.set_visible(False)
    #################################################################################

    ax0 = plt.subplot(gs[0, 0], sharex=ax1)
    simpleaxis(ax0)

    if stacked:
        bar_gain = ax0.bar(left=left,
                           width=width,
                           height=n_gain,
                           bottom=0,
                           color='#861608')
        bar_loss = ax0.bar(left=left,
                           width=width,
                           height=n_loss,
                           bottom=0,
                           color='#0c09a0')
    else:
        bar_gain = ax0.bar(left=SP.arange(Nactive) - 0.5,
                           width=0.5,
                           height=n_gain,
                           bottom=0,
                           color='#861608')
        bar_loss = ax0.bar(left=SP.arange(Nactive),
                           width=0.5,
                           height=n_loss,
                           bottom=0,
                           color='#0c09a0')

    #figure out range to make ylim symmatrix
    ax0.axhline(y=0, linestyle='-', color='gray')

    #ax0.set_yscale('symlog')
    gap = SP.ceil(max(max(n_gain), abs(min(n_loss))) / 4.)
    y_max = SP.ceil(max(n_gain) / gap)
    y_min = SP.floor(min(n_loss) / gap)
    yticks = SP.arange(y_min * gap, y_max * gap, gap)
    ax0.set_yticks(yticks)
    ax0.set_ylabel('Gene set augemntation', fontsize=13.5)
    ax0.legend((bar_gain[0], bar_loss[0]), ('Gain', 'Loss'),
               ncol=1,
               loc='center left',
               bbox_to_anchor=(1, 0.5),
               frameon=False,
               fontsize=15)
    plt.setp(ax0.get_xticklabels(), visible=False)
    plt.show()

    return fig
コード例 #8
0
        print(i)
spike_samples_clean = pl.delete(spike_samples_clean, 0)
pl.save(os.path.join(memap_folder, 'spike_samples_clean.npy'), spike_samples_clean)

channels = np.empty(0)
for i in pl.arange(0, pl.size(spike_samples_clean)):
    data = np.array(data_probe_hp[:, spike_samples_clean[i]].tolist())
    channels = np.append(channels, np.argmax(data))
    if i%100==0:
        print(i)
channels_spikes_df = pd.DataFrame([(channels, spike_samples_clean)], columns=['Channels', 'Samples'])

spike_times_shaftA = channels_spikes_df.Samples[0][channels_spikes_df.Channels[0]>7][channels_spikes_df.Channels[0]<16]
spike_times_shaftB = channels_spikes_df.Samples[0][channels_spikes_df.Channels[0]>23]
spike_times_shaftD = channels_spikes_df.Samples[0][channels_spikes_df.Channels[0]<8]
spike_times_shaftC = sp.setxor1d(spike_samples_clean, sp.union1d(spike_times_shaftA, sp.union1d(spike_times_shaftB, spike_times_shaftD)))

pl.save(os.path.join(memap_folder, 'spike_times_shaftA.npy'), spike_times_shaftA)
pl.save(os.path.join(memap_folder, 'spike_times_shaftC.npy'), spike_times_shaftC)


# ----------Analysis---------------------
f_ecog = f_sampling/(int(f_sampling/f_subsample))
spike_times_shaftA_ecog = np.array(spike_times_shaftA * f_ecog / f_sampling, dtype='int')
spike_times_shaftC_ecog = np.array(spike_times_shaftC * f_ecog / f_sampling, dtype='int')
data_ecog_lp_ss_clean = np.delete(data_ecog_lp_ss, ecog_bad_channels, axis=0)


# Generate eMUA for each Shaft
time_around_spike = 2
time_points_around_spike = int(time_around_spike * f_sampling)