def split3D(data, nHist):
    '''

    :param data:    Input array of shape 'rps'
    :param nHist:   number of timesteps of history to consider
    :return:        x - past values; y - future values

    Convert dataset into past and future values. Future is always for 1 timestep, past can be as along as nHist
    NOTE: currently, past dimensions are sorted from oldest to newest, so data[t-1] = x[-1]
    '''

    nTrial, nChannel, nTime = data.shape

    if nTime < nHist + 1:
        raise ValueError("Autoregression of history", nHist, "requires", nHist+1, "timesteps. Got", data.shape[2])

    # sample windows over time (rps) -> (srpw)
    x = np.array([data[:, :, i:i + nHist] for i in range(nTime - nHist)])

    # shape transform for x :: (srpw) -> (r*s, p*w)
    x = x.transpose((1, 0, 2, 3))        # (srpw) -> (rspw)
    x = numpy_merge_dimensions(x, 2, 4)  # p*w
    x = numpy_merge_dimensions(x, 0, 2)  # r*s

    # Convert to windows
    # shape transform for y :: (rps) -> (r*s, p)
    y = data.transpose((0,2,1))[:, nHist:]
    y = numpy_merge_dimensions(y, 0, 2)

    return x, y
def cross_mi_3D(data, settings):
    nTrial, nProcess, nSample = data.shape
    if nTrial*nSample < 2 * nProcess:
        # If there are too few samples, there is no point to calculate anything
        return np.full((nProcess, nProcess), np.nan)
    else:
        lag = settings['lag']

        # Check that number of timesteps is sufficient to estimate lagMax
        if nSample <= lag:
            raise ValueError('lag', lag, 'cannot be estimated for number of timesteps', nSample)

        # dataCanon = numpy_transpose_byorder(data, 'rps', 'srp')
        dataOrd = numpy_transpose_byorder(data, 'rps', 'psr')
        xx = numpy_merge_dimensions(dataOrd[:, :nSample-lag], 1, 3)
        yy = numpy_merge_dimensions(dataOrd[:, lag:], 1, 3)

        rez = np.zeros((nProcess, nProcess))
        if lag > 0:
            for i in range(nProcess):
                for j in range(nProcess):
                    rez[i][j] = ee.mi(xx[i], yy[j])
        else:
            # Optimization - take advantage of symmetry
            for i in range(nProcess):
                for j in range(i, nProcess):
                    rez[i][j] = ee.mi(xx[i], yy[j])
                    rez[j][i] = rez[i][j]

        return rez
def split3D_non_uniform(dataLst, nHist):
    # Test that the number of channels is uniform
    nChannel = list_assert_get_uniform_shape(dataLst, axis=1)

    # sample windows over time (rps) -> (rspw)
    rez = []
    for data2D in dataLst:
        nTime = data2D.shape[1]
        if nTime < nHist + 1:
            raise ValueError("Autoregression of history", nHist, "requires", nHist + 1, "timesteps. Got", nTime)

        rez += [np.array([data2D[:, i:i + nHist] for i in range(nTime - nHist)])]


    # shape transform for x :: (srpw) -> (r*s, p*w)
    # x = x.transpose((0, 2, 3, 1))
    x = np.concatenate(rez, axis=0)   # (rspw) -> (r*s, p, w)
    x = numpy_merge_dimensions(x, 1, 3)  # p*w

    # Convert to windows
    # shape transform for y :: (rps) -> (r*s, p)
    y = [data2D[:, nHist:].T for data2D in dataLst]   # (rps) -> (rsp)
    y = np.concatenate(y, axis=0)                     # (rsp) -> (r*s, p)

    return x, y
Exemple #4
0
def fit_predict_multivar_bychannel(xCh, yCh, alpha=1.0):
    nChannel = xCh.shape[1]
    xEff = xCh if xCh.ndim == 2 else numpy_merge_dimensions(xCh, 1, 3)
    rez = [
        ridge_fit_predict(xEff, yCh[:, iCh], alpha=alpha)
        for iCh in range(nChannel)
    ]
    return np.array(rez).T
def polyfit_data_3D(times, dataRSP, ord, alpha):
    timesFlat = times.flatten()
    dataFlat = numpy_merge_dimensions(dataRSP, 0, 2)

    rez = np.zeros(dataRSP.shape)
    for iCh in range(dataRSP.shape[2]):
        if np.any(np.isnan(dataFlat[:, iCh])):
            print(iCh, len(dataFlat), np.sum(np.isnan(dataFlat[:, iCh])))

        # y = polyfit.poly_fit_transform(timesFlat, dataFlat[:, iCh], ord)
        y = natural_cubic_spline_fit_reg(timesFlat, dataFlat[:, iCh], dof=ord, alpha=alpha)
        rez[:, :, iCh] = y.reshape(times.shape)
    return rez
def plot_pca1_mouse(dataDB, trialTypesSelected=('Hit', 'CR'), skipReward=None):
    haveDelay = 'DEL' in dataDB.get_interval_names()
    nMice = len(dataDB.mice)

    for iDataType, datatype in enumerate(['bn_trial', 'bn_session']):
        fig, ax = plt.subplots(nrows=2, ncols=nMice, figsize=(4 * nMice, 8))
        fig.suptitle(datatype)

        for iMouse, mousename in enumerate(sorted(dataDB.mice)):
            # Train PCA on whole dataset, but only trial based timesteps
            dataLst = get_data_list(dataDB, haveDelay, mousename,
                                    **{'datatype': datatype})
            dataRSP = np.concatenate(dataLst, axis=0)

            timesTrial = dataDB.get_times(dataRSP.shape[1])

            dataSP = numpy_merge_dimensions(dataRSP, 0, 2)
            dataSP = dataSP[~np.any(np.isnan(dataSP), axis=1)]

            pca = PCA(n_components=1)
            pca.fit(dataSP)
            pcaSig = np.sign(np.mean(pca.components_))
            pcaTransform = lambda x: pca.transform(x)[:, 0] * pcaSig

            for trialType in trialTypesSelected:
                # Evaluate on individual trials
                kwargs = {'datatype': datatype, 'trialType': trialType}
                dataLst = get_data_list(dataDB, haveDelay, mousename, **kwargs)
                dataRSP = np.concatenate(dataLst, axis=0)
                dataSP = np.nanmean(dataRSP, axis=0)
                dataPCA = pcaTransform(dataSP)
                dataAvg = np.mean(dataSP, axis=1)
                dataStd = np.mean(np.nanstd(dataRSP, axis=2),
                                  axis=0) / np.sqrt(dataRSP.shape[0])

                ax[0, iMouse].plot(timesTrial, dataAvg, label=trialType)
                ax[1, iMouse].plot(timesTrial, dataPCA, label=trialType)
                ax[0, iMouse].fill_between(timesTrial,
                                           dataAvg - dataStd,
                                           dataAvg + dataStd,
                                           alpha=0.2)

            dataDB.label_plot_timestamps(ax[0, iMouse])
            dataDB.label_plot_timestamps(ax[1, iMouse])
            ax[0, iMouse].legend()
            ax[1, iMouse].legend()
            ax[0, iMouse].set_title(mousename)

        ax[0, 0].set_ylabel('Trial-average activity')
        ax[1, 0].set_ylabel('1st PCA trial-average')
        plt.show()
Exemple #7
0
def decorrelate(data):
    # Leading dimension must be channels
    if data.ndim > 2:
        dataEff = numpy_merge_dimensions(data, 1, data.ndim+1)
    else:
        dataEff = data

    pca = PCA(n_components=48)
    rez = pca.fit_transform(dataEff.T)

    print(rez.shape)

    rez /= np.std(rez, axis=0)
    return rez.T.reshape(data.shape)
Exemple #8
0
def autocorr_3D(data, settings):
    '''
    :param data:        3D data of shape "rps"
    :param settings:    Extra settings.
    :return:            Autocorrelation. Length same as data
    TODO: Currently autocorrelation is averaged over other provided dimensions. Check if there is a more rigorous way
    '''

    if data.shape[2] <= 1:
        raise ValueError("Autocorrelation requires more than 1 timestep")

    # Convert to canonical form
    dataFlat = numpy_merge_dimensions(data, 0, 2)
    dataThis = zscore(dataFlat)
    return np.nanmean(np.array([autocorr_1D(d) for d in dataThis]), axis=0)
def _preprocess_mar_inp(data, inp, nHist):
    x, y = splitter.split3D(data, nHist)

    assert inp.ndim == 3, "Input matrix must be a 3D matrix"
    assert np.prod(inp.shape) != 0, "Input matrix is degenerate"
    nTr, nCh, nT = data.shape
    nTrInp, nChInp, nTInp = inp.shape
    assert nTr == nTrInp, "Input shape must be consistent with data shape"
    assert nT == nTInp, "Input shape must be consistent with data shape"

    # Convert input into the form (rps) -> (r*s, p)
    inpCanon = numpy_transpose_byorder(inp, 'rps', 'rsp')
    u = numpy_merge_dimensions(inpCanon[:, nHist:], 0, 2)

    # Drop any nan rows that are present in the data or input
    return drop_nan_rows([x, y, u])
def example_poly_fit(times, dataRSP, iCh=0, ord=2, alpha=0.01):
    timesFlat = times.flatten()
    dataFlat = numpy_merge_dimensions(dataRSP, 0, 2)

    print(times.shape, dataRSP.shape)

    nTrial, nTime, nChannel = dataRSP.shape
    plt.figure(figsize=(8, 4))
    # y = polyfit.poly_fit_transform(timesFlat, dataFlat[:, iCh], ord)
    y = natural_cubic_spline_fit_reg(timesFlat, dataFlat[:, iCh], dof=ord, alpha=alpha)

    for iTr in range(nTrial):
        plt.plot(times[iTr], dataRSP[iTr, :, iCh], color='orange')
    plt.plot(timesFlat, y)
    plt.ylabel(str(iCh))
    plt.show()
Exemple #11
0
def preprocess_data(dataRSPLst, nDropPCA=None, nBin=None, timeAvg=False):
    dataRSP = np.concatenate(dataRSPLst,
                             axis=0)  # Concatenate trials and sessions

    if timeAvg:
        dataRP = np.mean(dataRSP, axis=1)  # Average out time
    else:
        dataRP = numpy_merge_dimensions(dataRSP, 0, 2)

    if nDropPCA is not None:
        dataRP = drop_PCA(dataRP, nDropPCA)

    if nBin is not None:
        dataRP = bin_data(dataRP, nBin,
                          axis=1)  # Bin data separately for each channel

    return dataRP if timeAvg else dataRP.reshape(dataRSP.shape)
def split2D(data2D, nHist):
    '''
    :param data2D:  Input array of shape [nTrial, nTime]
    :param nHist:   number of timesteps of history to consider
    :return:        x - past values; y - future values

    Convert dataset into past and future values. Future is always for 1 timestep, past can be as along as nHist
    NOTE: currently, past dimensions are sorted from oldest to newest, so data[t-1] = x[-1]
    '''
    nTrial, nTime = data2D.shape
    if nTime < nHist + 1:
        raise ValueError("Autoregression of history", nHist, "requires", nHist+1, "timesteps. Got", nTime)

    x = np.array([data2D[:, i:i + nHist] for i in range(nTime - nHist)])    # (rs) -> (srw)
    x = numpy_merge_dimensions(x.transpose((1, 0, 2)), 0, 2)                # (srw) -> (r*s, w)
    y = data2D[:, nHist:].flatten()                                         # (rs) -> (r*s)
    return x, y
def _preprocess_ar(data, settings):
    # Flatten processes and repetitions, zscore
    dataFlat = numpy_merge_dimensions(data, 0, 2)
    return zscore(dataFlat)
def varmean(data, settings):
    data2D = numpy_merge_dimensions(data, 1, 3).T  # (p*s, r)
    nDimEff = len(data2D)
    return np.sum(np.cov(data2D)) / nDimEff**2
def plot_pca1_session(dataDB,
                      mousename,
                      session,
                      trialTypesSelected=('Hit', 'CR')):
    plotColors = pylab.cm.gist_heat([0.2, 0.4, 0.6, 0.8])
    plotColorMap = dict(
        zip(trialTypesSelected, plotColors[:len(trialTypesSelected)]))

    fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(12, 8))

    for iDataType, datatype in enumerate(['bn_trial', 'bn_session']):
        timesRS = dataDB.get_absolute_times(mousename, session)
        dataRSP = dataDB.get_neuro_data({'session': session},
                                        datatype=datatype)[0]

        # Train PCA on whole session, but only trial based timesteps
        dataSP = numpy_merge_dimensions(dataRSP, 0, 2)
        timesS = numpy_merge_dimensions(timesRS, 0, 2)
        pca = PCA(n_components=1)
        dataPCA1 = pca.fit_transform(dataSP)[:, 0]
        pcaSig = np.sign(np.mean(pca.components_))
        pcaTransform = lambda x: pca.transform(x)[:, 0] * pcaSig

        # Compute 1st PCA during trial-time
        # Note: it is irrelevant whether averaging or PCA-transform comes first
        trialTypes = dataDB.get_trial_types(session, mousename)
        timesTrial = dataDB.get_times(dataRSP.shape[1])

        for tt in trialTypesSelected:
            trialIdxs = trialTypes == tt
            if np.sum(trialIdxs) > 0:
                dataAvgTTSP = np.mean(dataRSP[trialIdxs], axis=0)
                dataAvgTTPCA = pcaTransform(dataAvgTTSP)
                dataAvgTTAvg = np.mean(dataAvgTTSP, axis=1)

                ax[iDataType, 1].plot(timesTrial,
                                      dataAvgTTAvg,
                                      label=tt,
                                      color=plotColorMap[tt])
                ax[iDataType, 2].plot(timesTrial,
                                      dataAvgTTPCA,
                                      label=tt,
                                      color=plotColorMap[tt])

        ax[iDataType, 0].set_ylabel(datatype)
        ax[iDataType, 0].plot(timesS, dataPCA1)
        ax[iDataType, 0].set_title('1st PCA during session')

        ax[iDataType, 1].set_title('Trial-average activity')
        ax[iDataType, 1].legend()

        ax[iDataType, 2].set_title('1st PCA trial-average')
        ax[iDataType, 2].legend()

        dataDB.label_plot_timestamps(ax[iDataType, 1], mousename, session)
        dataDB.label_plot_timestamps(ax[iDataType, 2], mousename, session)
        dataDB.label_plot_intervals(ax[iDataType, 1], mousename, session)
        dataDB.label_plot_intervals(ax[iDataType, 2], mousename, session)

    prefixPath = 'pics/bulk/traces/bymouse/'
    suffixPath = '_'.join([datatype, mousename, session])
    make_path(prefixPath)
    fig.savefig(prefixPath + 'traces_' + suffixPath + '.svg')
    plt.close()
def preprocess_3D(dataCanon, settings):
    # Compute time-average if requested, otherwise consider samples as extra trials
    if 'timeAvg' in settings and settings['timeAvg']:
        return np.mean(dataCanon, axis=1)
    else:
        return numpy_merge_dimensions(dataCanon, 1, 3)