Example #1
0
def get_Xc_yc(fname,p_smooth,unit_num,binsize):
    varlist = ['M', 'F', 'TH', 'PHIE']
    blk = neoUtils.get_blk(fname)
    blk_smooth = GLM.get_blk_smooth(fname,p_smooth)

    cbool = neoUtils.get_Cbool(blk)
    X = GLM.create_design_matrix(blk,varlist)
    Xdot = GLM.get_deriv(blk,blk_smooth,varlist,[0,5,9]) #maybe only want one derivative?

    X = np.concatenate([X,Xdot],axis=1)
    X = neoUtils.replace_NaNs(X,'pchip')
    X = neoUtils.replace_NaNs(X,'interp')

    Xbin = GLM.bin_design_matrix(X,binsize=binsize)
    scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
    Xbin = scaler.fit_transform(Xbin)
    cbool_bin= GLM.bin_design_matrix(cbool[:,np.newaxis],binsize=binsize).ravel()

    y = neoUtils.concatenate_sp(blk)['cell_{}'.format(unit_num)]
    ybin = elephant.conversion.BinnedSpikeTrain(y,binsize=binsize*pq.ms).to_array().T.astype('f8')
    Xbin = Xbin[:ybin.shape[0],:]
    cbool_bin = cbool_bin[:ybin.shape[0]]
    yhat = np.zeros(ybin.shape[0])

    Xc = Xbin[cbool_bin,:]
    yc = ybin[cbool_bin,:]
    return(Xc,yc,cbool_bin,yhat)
Example #2
0
def bin_model(X, y, cbool, binsize):
    """
    This model will attempt to predict the number of spikes in a given bin that is larger than 1ms
    :param X: The full design matrix, binning is done here.
                - Assumes you have all covariates in X
                - Assumes you have NOT included spike history in X
    :param y: A neo spiketrain. Eventually will want to extend to allow for boolean
    :return:
    """
    # time zero will inform us about the proceeding spikes between time 0 and 1, which is why there is an extra index
    # in Xbin

    Xbin = GLM.bin_design_matrix(X, binsize=binsize)[:-1]
    scaler = StandardScaler(with_mean=False)
    Xbin = scaler.fit_transform(Xbin)
    cbool = GLM.bin_design_matrix(cbool[:, np.newaxis],
                                  binsize=binsize)[:-1].ravel()
    ybin = elephant.conversion.BinnedSpikeTrain(
        y, binsize=binsize * pq.ms).to_array().T.astype('f8')

    # add history
    Xbin = GLM.add_spike_history(Xbin, ybin, -1)  # no basis
    # prep model
    yhat = np.zeros(ybin.shape[0])
    # TODO run stm
    yhat_out, mdl = GLM.run_GLM(Xbin[cbool, :],
                                ybin[cbool, :],
                                family=sm.families.Poisson,
                                link=sm.genmod.families.links.log)
    yhat[cbool] = yhat_out.ravel()
Example #3
0
def simulate(X, y, p_model, cbool, n_sims=50):
    sess = tf.Session()
    new_saver = tf.train.import_meta_graph(p_model + '.meta')
    new_saver.restore(sess, p_model)
    K = tf.get_collection('K')[0]
    H = tf.get_collection('H')[0]
    b = tf.get_collection('b')[0]

    K = sess.run(K)
    H = sess.run(H)
    b = sess.run(b)

    stim_curr = np.dot(X, K)
    stim_curr = np.sum(stim_curr, axis=1) + b

    # Warning! The spike history basis is hard coded.
    B = GLM.make_bases(3, [0, 3], 1)
    H = GLM.map_bases(H, B)[0].ravel()
    g = np.zeros([X.shape[0] + len(H), n_sims])  # total current?
    ysim = np.zeros([X.shape[0],
                     n_sims])  # response vector (simulated spiketrains)
    hcurr = np.zeros([X.shape[0] + len(H), n_sims])  # history current
    rsim = np.zeros_like(g)

    for runNum in range(n_sims):
        print('Simulation number {} of {}'.format(runNum + 1, n_sims))
        g[:, runNum] = np.concatenate([stim_curr, np.zeros([len(H)])])
        for t in xrange(stim_curr.shape[0]):
            rsim[t, runNum] = np.exp(g[t, runNum])
            if not cbool[t]:
                continue
            if np.random.rand() < (1 - np.exp(-rsim[t, runNum])):
                ysim[t, runNum] = 1
                g[t + 1:t + len(H) + 1, runNum] += H
                hcurr[t + 1:t + len(H) + 1, runNum] += H
    hcurr = hcurr[:X.shape[0], :]
    rsim = rsim[:X.shape[0], :]

    output = {}
    output['K'] = K
    output['H'] = H
    output['ysim'] = ysim
    output['rsim'] = rsim
    output['b'] = b
    output['Basis'] = B
    output['stim_curr'] = stim_curr
    output['hcurr'] = hcurr
    sess.close()
    return (output)
Example #4
0
def get_X_y(fname, unit_num=0):
    varlist = ['M', 'FX', 'FY', 'TH']
    blk = neoUtils.get_blk(fname)
    cbool = neoUtils.get_Cbool(blk)
    X = GLM.create_design_matrix(blk, varlist)
    Xdot, Xsmooth = GLM.get_deriv(blk, blk, varlist, [0, 5, 9])

    X = np.concatenate([X, Xdot], axis=1)
    X = neoUtils.replace_NaNs(X, 'pchip')
    X = neoUtils.replace_NaNs(X, 'interp')
    scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
    X = scaler.fit_transform(X)
    y = neoUtils.get_rate_b(blk, unit_num)[1][:, np.newaxis]
    yhat = np.zeros_like(y)
    return (X, y, cbool)
Example #5
0
def calc_MSE(fname, p_smooth, unit_num):
    blk = neoUtils.get_blk(fname)
    blk_smooth = GLM.get_blk_smooth(fname, p_smooth)
    varlist = ['M', 'F', 'TH', 'PHIE']
    root = neoUtils.get_root(blk, unit_num)
    print('Working on {}'.format(root))
    Xdot = GLM.get_deriv(blk, blk_smooth, varlist)[0]
    Xdot = np.reshape(Xdot, [-1, 8, 10])

    sp = neoUtils.concatenate_sp(blk)['cell_{}'.format(0)]
    cbool = neoUtils.get_Cbool(blk)
    mse = []
    for ii in range(Xdot.shape[1]):
        var_in = Xdot[:, ii, :].copy()
        mse.append(tuning_curve_MSE(var_in, sp, cbool, bins=50))
    return (mse)
Example #6
0
def smoothed_best():
    df = pd.read_csv(min_entropy, index_col='id')
    smooth_vals = np.arange(5, 100, 10).tolist()
    best_smooth = df.mode(axis=1)[0]
    best_idx = [smooth_vals.index(x) for x in best_smooth]
    best_idx = pd.DataFrame({'idx': best_idx}, index=best_smooth.index)

    for f in glob.glob(os.path.join(p_load, '*NEO.h5')):
        try:
            blk = neoUtils.get_blk(f)
            blk_smooth = GLM.get_blk_smooth(f, p_smooth)
            num_units = len(blk.channel_indexes[-1].units)
            for unit_num in range(num_units):
                varlist = ['M', 'F', 'TH', 'PHIE']
                root = neoUtils.get_root(blk, unit_num)
                print('Working on {}'.format(root))
                if root not in best_idx.index:
                    print('{} not found in best smoothing derivative data'.
                          format(root))
                    continue
                outname = os.path.join(
                    p_save,
                    'best_smoothing_deriv\\{}_best_smooth_pillowX.mat'.format(
                        root))
                X = GLM.create_design_matrix(blk, varlist)
                smoothing_to_use = best_idx.loc[root][0]

                Xdot = GLM.get_deriv(blk,
                                     blk_smooth,
                                     varlist,
                                     smoothing=[smoothing_to_use])[0]
                X = np.concatenate([X, Xdot], axis=1)
                y = neoUtils.get_rate_b(blk, unit_num)[1]
                cbool = neoUtils.get_Cbool(blk)
                arclengths = get_arclength_bool(blk, unit_num)

                sio.savemat(
                    outname, {
                        'X': X,
                        'y': y,
                        'cbool': cbool,
                        'smooth': best_smooth.loc[root],
                        'arclengths': arclengths
                    })
        except Exception as ex:
            print('Problem with {}:{}'.format(os.path.basename(f), ex))
Example #7
0
def get_X_y(fname, p_smooth, unit_num, pca_tgl=False, n_pcs=3):
    varlist = ['M', 'F', 'TH', 'PHIE']
    blk = neoUtils.get_blk(fname)
    blk_smooth = get_blk_smooth(fname, p_smooth)

    cbool = neoUtils.get_Cbool(blk)
    X = GLM.create_design_matrix(blk, varlist)
    Xdot, Xsmooth = GLM.get_deriv(blk, blk_smooth, varlist, [0, 5, 9])
    # if using the PCA decomposition of the inputs:
    if pca_tgl:

        X = neoUtils.replace_NaNs(X, 'pchip')
        X = neoUtils.replace_NaNs(X, 'interp')

        Xsmooth = neoUtils.replace_NaNs(Xsmooth, 'pchip')
        Xsmooth = neoUtils.replace_NaNs(Xsmooth, 'interp')

        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
        X = scaler.fit_transform(X)

        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
        Xsmooth = scaler.fit_transform(Xsmooth)

        pca = sklearn.decomposition.PCA()
        X_pc = pca.fit_transform(X)[:, :n_pcs]
        pca = sklearn.decomposition.PCA()
        Xs_pc = pca.fit_transform(Xsmooth)[:, :n_pcs]
        zero_pad = np.zeros([1, n_pcs])
        Xd_pc = np.diff(np.concatenate([zero_pad, Xs_pc], axis=0), axis=0)
        X = np.concatenate([X_pc, Xd_pc], axis=1)

        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
        X = scaler.fit_transform(X)
    else:
        X = np.concatenate([X, Xdot], axis=1)
        X = neoUtils.replace_NaNs(X, 'pchip')
        X = neoUtils.replace_NaNs(X, 'interp')
        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
        X = scaler.fit_transform(X)

    y = neoUtils.get_rate_b(blk, unit_num)[1][:, np.newaxis]
    # Xc = X[cbool,:]
    # yc = y[cbool]
    yhat = np.zeros_like(y)
    return (X, y, cbool)
Example #8
0
def plot_pca_spaces(fname,
                    unit_num,
                    p_smooth=None,
                    deriv_smooth=[9],
                    n_dims=3):
    """
    Plot the PCA tuning spaces
    :param fname: Filename of the neo data
    :param unit_num: unit number to use
    :param p_smooth: [optional] If using derivative, this is where the smooth data live
    :param deriv_smooth: If using derivative, tells us what derivative smoothing to use
    :return:
    """

    # Get the standard data, from which the PCA will be computed
    blk = neoUtils.get_blk(fname)
    cbool = neoUtils.get_Cbool(blk)
    varlist = ['M', 'F', 'TH', 'PHIE']
    X = GLM.create_design_matrix(blk, varlist)
    sp = neoUtils.concatenate_sp(blk)['cell_{}'.format(unit_num)]
    r = neoUtils.get_rate_b(blk, unit_num)[0]

    # If smoothing directory is given, then add the derivative data
    if p_smooth is not None:
        blk_smooth = GLM.get_blk_smooth(fname, p_smooth)
        Xdot = GLM.get_deriv(blk, blk_smooth, varlist, deriv_smooth)[0]
        X = np.concatenate([X, Xdot], axis=1)
        X[np.isnan(X)] = 0
    else:
        print('\tNot using derivative information')
    scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
    X_scale = np.zeros_like(X)
    X_scale[cbool, :] = scaler.fit_transform(X[cbool, :])
    pca = sklearn.decomposition.PCA()
    X_pcs = np.zeros_like(X)
    X_pcs[cbool, :] = pca.fit_transform(X_scale[cbool, :])
    for ii in range(n_dims):
        var = X_pcs[:, ii]
        response, edges = varTuning.stim_response_hist(var, sp, cbool)

    response, edges1, edges2 = varTuning.joint_response_hist(
        X_pcs[:, 0], X_pcs[:, 1], sp, cbool, 40)

    response, edges1, edges2 = varTuning.joint_response_hist(
        X_pcs[:, -1], X_pcs[:, -2], sp, cbool, 40)
Example #9
0
def get_X_y(fname,p_smooth,unit_num=0):
    varlist = ['M', 'F', 'TH', 'PHIE']
    blk = neoUtils.get_blk(fname)
    blk_smooth = GLM.get_blk_smooth(fname,p_smooth)

    cbool = neoUtils.get_Cbool(blk)
    X = GLM.create_design_matrix(blk,varlist)
    Xdot,Xsmooth = GLM.get_deriv(blk,blk_smooth,varlist,[9])

    X = np.concatenate([X,Xdot],axis=1)
    X = neoUtils.replace_NaNs(X,'pchip')
    X = neoUtils.replace_NaNs(X,'interp')
    scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
    X = scaler.fit_transform(X)

    y = neoUtils.get_rate_b(blk,unit_num)[1][:,np.newaxis]
    y[np.invert(cbool)]=0
    return(X,y,cbool)
Example #10
0
def get_first_spike_vals(fname, p_smooth, unit_num):
    """
    Return a dataframe with length Ncontacts and the value of
    relevant stimulus features at that time

    :param blk:         neo block
    :param unit_num:    int
    :return: pandas dataframe
    """
    # get the blocks
    blk = neoUtils.get_blk(fname)
    blk_smooth = GLM.get_blk_smooth(fname, p_smooth)
    # get the trains and times of first spikes
    _, _, trains = spikeAnalysis.get_contact_sliced_trains(blk, unit_num)
    t_idx = [
        train[0].magnitude if len(train) > 0 else np.nan for train in trains
    ]
    t_idx = np.array(t_idx)
    t_idx = t_idx[np.isfinite(t_idx)].astype('int')
    # get the stimuli
    varlist = ['M', 'F', 'TH', 'PHIE']
    X = GLM.create_design_matrix(blk, varlist)
    Xsmooth = GLM.get_deriv(blk, blk_smooth, varlist, smoothing=[9])[1]
    MB = np.sqrt(X[:, 1]**2 + X[:, 2]**2)[:, np.newaxis]
    FB = np.sqrt(X[:, 4]**2 + X[:, 5]**2)[:, np.newaxis]
    RB = np.sqrt(X[:, 6]**2 + X[:, 7]**2)[:, np.newaxis]
    # use smooth to calculate derivative
    MBsmooth = np.sqrt(Xsmooth[:, 1]**2 + Xsmooth[:, 2]**2)[:, np.newaxis]
    FBsmooth = np.sqrt(Xsmooth[:, 4]**2 + Xsmooth[:, 5]**2)[:, np.newaxis]
    RBsmooth = np.sqrt(Xsmooth[:, 6]**2 + Xsmooth[:, 7]**2)[:, np.newaxis]

    X = np.concatenate([MB, FB, RB], axis=1)
    Xsmooth = np.concatenate([MBsmooth, FBsmooth, RBsmooth], axis=1)
    Xdot = np.diff(np.concatenate([np.zeros([1, 3]), Xsmooth]), axis=0)
    X = np.concatenate([X, Xdot], axis=1)

    #extract stimulus at time of first spike and output to a dataframe
    vals = X[t_idx]
    vallist = ['MB', 'FB', 'RB', 'MBdot', 'FBdot', 'RBdot']
    df = pd.DataFrame()
    for ii in range(len(vallist)):
        df[vallist[ii]] = vals[ii, :]
    df['id'] = neoUtils.get_root(blk, unit_num)
    return (df)
Example #11
0
def smoothed_mechanics():
    """
    use this function to grab the data from the smoothed mechanics and the
    derivative of the same
    """

    f_arclength = '/projects/p30144/_VG3D/deflections/direction_arclength_FR_group_data.csv'
    f_list = glob.glob(os.path.join(p_load, '*NEO.h5'))
    f_list.sort()

    for f in f_list:
        try:
            blk = neoUtils.get_blk(f)
            blk_smooth = GLM.get_blk_smooth(f, p_smooth)
            num_units = len(blk.channel_indexes[-1].units)
            for unit_num in range(num_units):
                varlist = ['M', 'F', 'TH', 'PHIE']
                root = neoUtils.get_root(blk, unit_num)
                print('Working on {}'.format(root))
                outname = os.path.join(p_save,
                                       '{}_smooth_mechanicsX.mat'.format(root))

                Xdot, X = GLM.get_deriv(blk,
                                        blk_smooth,
                                        varlist,
                                        smoothing=[5])
                X = np.concatenate([X, Xdot], axis=1)
                y = neoUtils.get_rate_b(blk, unit_num)[1]
                cbool = neoUtils.get_Cbool(blk)
                arclengths = get_arclength_bool(blk,
                                                unit_num,
                                                fname=f_arclength)

                sio.savemat(
                    outname, {
                        'X': X,
                        'y': y,
                        'cbool': cbool,
                        'smooth': 55,
                        'arclengths': arclengths
                    })
        except Exception as ex:
            print('Problem with {}:{}'.format(os.path.basename(f), ex))
Example #12
0
def get_components(fname,p_smooth=None,smooth_idx=9):
    ''' Get the PCA comonents given a filename'''
    varlist = ['M', 'F', 'TH', 'PHIE']
    blk = neoUtils.get_blk(fname)
    cbool = neoUtils.get_Cbool(blk)
    root = neoUtils.get_root(blk,0)[:-2]
    X = GLM.create_design_matrix(blk,varlist)
    if p_smooth is not None:
        blk_smooth = GLM.get_blk_smooth(fname,p_smooth)
        Xdot = GLM.get_deriv(blk,blk_smooth,varlist,smoothing=[smooth_idx])[0]
        X = np.concatenate([X,Xdot],axis=1)
    X[np.invert(cbool),:]=0
    X = neoUtils.replace_NaNs(X,'pchip')
    X = neoUtils.replace_NaNs(X,'interp')

    scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
    X[cbool,:] = scaler.fit_transform(X[cbool,:])

    pca = sklearn.decomposition.PCA()
    pca.fit_transform(X[cbool,:])

    return(pca,root)
Example #13
0
def calc_corr(fname, p_smooth, unit_num):
    blk = neoUtils.get_blk(fname)
    blk_smooth = GLM.get_blk_smooth(fname, p_smooth)
    varlist = ['M', 'F', 'TH', 'PHIE']
    component_list = [
        '{}_dot'.format(x)
        for x in ['Mx', 'My', 'Mz', 'Fx', 'Fy', 'Fz', 'TH', 'PHI']
    ]
    root = neoUtils.get_root(blk, unit_num)
    Xdot = GLM.get_deriv(blk, blk_smooth, varlist)[0]
    Xdot = np.reshape(Xdot, [-1, 8, 10])
    windows = np.arange(5, 100, 10)

    sp = neoUtils.concatenate_sp(blk)['cell_{}'.format(0)]
    cbool = neoUtils.get_Cbool(blk)
    corr = []
    R = []
    # loop over variables
    for ii in range(Xdot.shape[1]):
        var_in = Xdot[:, ii, :].copy()
        # loop over smoothing
        r = []
        for jj in range(var_in.shape[1]):
            kernel = elephant.kernels.GaussianKernel(pq.ms * windows[jj])
            FR = elephant.statistics.instantaneous_rate(sp,
                                                        pq.ms,
                                                        kernel=kernel)
            idx = np.isfinite(var_in[:, jj])
            r.append(
                scipy.corrcoef(var_in[:, jj].ravel()[idx],
                               FR.magnitude.ravel()[idx])[0, 1])
        R.append(r)
    R = np.array(R)
    df = pd.DataFrame(data=R, columns=['{}ms'.format(x) for x in windows])
    df.index = component_list
    return (df)
Example #14
0
def smoothed(smooth_idx=9):
    smooth_vals = np.arange(5, 100, 10)
    sub_p_save = os.path.join(
        p_save, '{}ms_smoothing_deriv'.format(smooth_vals[smooth_idx]))
    if not os.path.isdir(sub_p_save):
        os.mkdir(sub_p_save)
    for f in glob.glob(os.path.join(p_load, '*NEO.h5')):
        try:
            blk = neoUtils.get_blk(f)
            blk_smooth = GLM.get_blk_smooth(f, p_smooth)
            num_units = len(blk.channel_indexes[-1].units)
            for unit_num in range(num_units):
                varlist = ['M', 'F', 'TH', 'PHIE']
                root = neoUtils.get_root(blk, unit_num)
                print('Working on {}'.format(root))
                outname = os.path.join(
                    sub_p_save,
                    '{}ms_{}_pillowX.mat'.format(smooth_vals[smooth_idx],
                                                 root))

                X = GLM.create_design_matrix(blk, varlist)
                Xdot = GLM.get_deriv(blk, blk_smooth, varlist, [smooth_idx])[0]
                X = np.concatenate([X, Xdot], axis=1)
                sp = neoUtils.concatenate_sp(blk)['cell_{}'.format(unit_num)]
                y = neoUtils.get_rate_b(blk, unit_num)[1]
                cbool = neoUtils.get_Cbool(blk)
                arclengths = get_arclength_bool(blk, unit_num)

                sio.savemat(outname, {
                    'X': X,
                    'y': y,
                    'cbool': cbool,
                    'arclengths': arclengths
                })
        except Exception as ex:
            print('Problem with {}:{}'.format(os.path.basename(f), ex))
Example #15
0
def calc_contribs(best_model, model_dict, spike_train, center_ego_params,
                  center_dist_params, allo_params, speed_params, Xe, Xd, Xa,
                  Xs):
    ''' calculate the effect on different measures of goodness-of-fit when adding
    or subtracting each variable '''

    #start a dict for contribs
    contribs = {}
    #note which variables we're looking at
    variables = [('allo', ), ('center_ego', ), ('center_dist', ), ('speed', )]

    #if the best model is a single-variable model... and not null...
    if len(best_model) == 1 and 'uniform' not in best_model:
        #calculate goodness-of-fit measures for the null model
        uniform_model_dict = full_classify.run_final(
            'uniform', 1., center_ego_params, center_dist_params, allo_params,
            speed_params, Xe, Xd, Xa, Xs, spike_train)
        #calculate difference in goodness-of-fit measures between the best model and the null
        #model -- these are the contributions for the single encoded variable
        contribs[best_model] = {}
        contribs[best_model][
            'll'] = model_dict['ll'] - uniform_model_dict['ll']
        contribs[best_model][
            'llps'] = model_dict['llps'] - uniform_model_dict['llps']
        contribs[best_model]['explained_var'] = model_dict[
            'explained_var'] - uniform_model_dict['explained_var']
        contribs[best_model][
            'corr_r'] = model_dict['corr_r'] - uniform_model_dict['corr_r']
        contribs[best_model]['pseudo_r2'] = model_dict[
            'pseudo_r2'] - uniform_model_dict['pseudo_r2']

        #for each variable...
        for var in variables:
            #not including the one we just looked at...
            if frozenset(var) != best_model:
                #create a new model which contains the single encoded variable as well as
                #this new variable
                new_model = frozenset(chain(list(best_model), list(var)))

                #calc a new scale factor
                new_scale_factor = full_classify.calc_scale_factor(
                    new_model, center_ego_params, center_dist_params,
                    allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train)
                #run the new model and collect the result
                new_model_dict = full_classify.run_final(
                    new_model, new_scale_factor, center_ego_params,
                    center_dist_params, allo_params, speed_params, Xe, Xd, Xa,
                    Xs, spike_train)

                #calculate difference in goodness-of-fit measures between the new model and
                #the best model -- these are the contributions from the added variable
                contribs[frozenset(var)] = {}
                contribs[frozenset(
                    var)]['ll'] = new_model_dict['ll'] - model_dict['ll']
                contribs[frozenset(
                    var)]['llps'] = new_model_dict['llps'] - model_dict['llps']
                contribs[frozenset(var)]['explained_var'] = new_model_dict[
                    'explained_var'] - model_dict['explained_var']
                contribs[frozenset(var)][
                    'corr_r'] = new_model_dict['corr_r'] - model_dict['corr_r']
                contribs[frozenset(var)]['pseudo_r2'] = new_model_dict[
                    'pseudo_r2'] - model_dict['pseudo_r2']

    #otherwise, if there are multiple variables in the best model...
    elif len(best_model) > 1:
        #for each variable in the whole list...
        for var in variables:
            #if this variable is included in the best model...
            if var[0] in best_model:
                #create a new model that includes all the variables in the best
                #model EXCEPT this one
                new_model = []
                for i in best_model:
                    if i != var[0]:
                        new_model.append(i)
                new_model = frozenset(new_model)

                #calculate the new scale factor
                new_scale_factor = full_classify.calc_scale_factor(
                    new_model, center_ego_params, center_dist_params,
                    allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train)
                #run the new model and collect the result
                new_model_dict = full_classify.run_final(
                    new_model, new_scale_factor, center_ego_params,
                    center_dist_params, allo_params, speed_params, Xe, Xd, Xa,
                    Xs, spike_train)
                #calculate difference in goodness-of-fit measures between the best model and
                #the new model -- these are the contributions from the subtracted variable
                contribs[frozenset(var)] = {}
                contribs[frozenset(
                    var)]['ll'] = model_dict['ll'] - new_model_dict['ll']
                contribs[frozenset(
                    var)]['llps'] = model_dict['llps'] - new_model_dict['llps']
                contribs[frozenset(var)]['explained_var'] = model_dict[
                    'explained_var'] - new_model_dict['explained_var']
                contribs[frozenset(var)][
                    'corr_r'] = model_dict['corr_r'] - new_model_dict['corr_r']
                contribs[frozenset(var)]['pseudo_r2'] = model_dict[
                    'pseudo_r2'] - new_model_dict['pseudo_r2']

            #otherwise...
            else:
                #make a new model that adds the current variable to the best model
                new_model = frozenset(chain(list(best_model), list(var)))

                #calc the new scale factor
                new_scale_factor = full_classify.calc_scale_factor(
                    new_model, center_ego_params, center_dist_params,
                    allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train)
                #run the new model and collect the result
                new_model_dict = full_classify.run_final(
                    new_model, new_scale_factor, center_ego_params,
                    center_dist_params, allo_params, speed_params, Xe, Xd, Xa,
                    Xs, spike_train)
                #calculate difference in goodness-of-fit measures between the new model and
                #the best model -- these are the contributions from the added variable
                contribs[frozenset(var)] = {}
                contribs[frozenset(
                    var)]['ll'] = new_model_dict['ll'] - model_dict['ll']
                contribs[frozenset(
                    var)]['llps'] = new_model_dict['llps'] - model_dict['llps']
                contribs[frozenset(var)]['explained_var'] = new_model_dict[
                    'explained_var'] - model_dict['explained_var']
                contribs[frozenset(var)][
                    'corr_r'] = new_model_dict['corr_r'] - model_dict['corr_r']
                contribs[frozenset(var)]['pseudo_r2'] = new_model_dict[
                    'pseudo_r2'] - model_dict['pseudo_r2']

    #add the new stuff to the model dict
    model_dict['contribs'] = contribs
    model_dict['best_model'] = best_model

    #return the model dict
    return model_dict
Example #16
0
def X_to_pillow(X):
    B = GLM.make_bases(5, [0, 10])
    Xb = GLM.apply_bases(X, B[0])
    scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
    return (scaler.fit_transform(Xb))
Example #17
0
def init_model_params():
    sigma_vals = np.arange(2, 200, 4) * pq.ms
    B = GLM.make_bases(5, [0, 15], b=2)
    winsize = int(B[0].shape[0])
    return sigma_vals, B, winsize
Example #18
0
print(a)
'''
'''
from sklearn import datasets
iris = datasets.load_iris()
x = iris.data
y = iris.target
y[y>1]=1


import linear
md = linear.logit()
md.fit(x,y)
# md.trainProc()
# print(md.result)
print(md.predict(x))
'''

from sklearn import datasets
bos = datasets.load_boston()
x = bos.data
y = bos.target
x = pd.DataFrame(x)
x = np.array((x - x.mean()) / x.std())  # 不归一化容易出现 float64溢出
y = (y - y.mean()) / y.std()
import GLM
md = GLM.linearRegression()
md.fit(x, y, alpha=0.01, iter=500)
md.trainProc()
print(md.coef)
Example #19
0
def main():
    # ================================ #
    # SET UP OPTION PARSER
    # ================================ #
    usage = "usage: %prog filename [options]"
    parser = OptionParser(usage)
    parser.add_option('-p',
                      '--prefix',
                      dest='prefix',
                      default='model_results',
                      type=str,
                      help='prefix to append to the results filename')
    parser.add_option(
        '-v',
        '--varlist',
        dest='varlist',
        default='M',
        type=str,
        help=
        'list of strings which indicate which variables to include in the model'
    )
    parser.add_option('-b',
                      '--binsize',
                      dest='binsize',
                      default=1,
                      type=int,
                      help='number of milliseconds to bin the spikes.')
    parser.add_option(
        '-D',
        '--deriv_tgl',
        action='store_true',
        dest='deriv_tgl',
        default=False,
        help=
        'Derivative toggle, set to true to include the derivative in the model'
    )
    parser.add_option(
        '-P',
        '--pillow_tgl',
        action='store_true',
        dest='pillow_tgl',
        default=False,
        help=
        'Basis toggle, set to true to map the inputs to a pillow basis \nfor use in the GLM only at this time'
    )
    parser.add_option(
        '--GLM',
        action='store_true',
        dest='glm_tgl',
        default=False,
        help=
        'Toggles a GLM model. \nIf pillow toggle is false, takes an input where each point in the windowsize is its own dimension '
    )
    parser.add_option('--GAM',
                      action='store_true',
                      dest='gam_tgl',
                      default=False,
                      help='GAM toggle, call the flag to run a GAM')
    parser.add_option(
        '-C',
        '--conv_tgl',
        action='store_true',
        dest='conv_tgl',
        default=False,
        help=
        'Convolutional network toggle. Call the flag to run a convolutional network'
    )
    parser.add_option(
        '--plot_tgl',
        action='store_true',
        dest='plot_tgl',
        default=False,
        help=
        'Plot toggle, call to plot the results during the run. This should never be called on quest.'
    )
    parser.add_option(
        '-w',
        '--window',
        dest='window',
        default=1,
        type=int,
        help=
        'Window into the past to set the convolutional window to look in ms')
    parser.add_option('-n',
                      '--num_conv',
                      dest='max_num_conv',
                      default=4,
                      type=int,
                      help='Max number of convolutional nodes to use')
    parser.add_option('--l2',
                      dest='l2_penalty',
                      default=1e-6,
                      type=float,
                      help='l2 penalty')
    parser.add_option(
        '-k',
        '--kernel',
        dest='kernel_mode',
        default='gaussian',
        type=str,
        help='Kernel Mode (\'box\',\'gaussian\',\'exp\',\'alpha\',\'epan\')')
    parser.add_option(
        '--STM',
        '--STM_tgl',
        action='store_true',
        dest='stm_tgl',
        default=False,
        help='STM toggle. Call the flag to run a STM network (Thies 2013)')
    parser.add_option('--num_stm_components',
                      action='store',
                      dest='num_stm_components',
                      default=3,
                      type=int,
                      help='Number of components to use in the STM model')
    parser.add_option('--num_stm_features',
                      action='store',
                      dest='num_stm_features',
                      default=20,
                      type=int,
                      help='Number of features to use in the STM model')
    parser.add_option(
        '--silence_noncontact',
        action='store_true',
        dest='silence_noncontact',
        default=False,
        help=
        'If called, sets all spiking that occurs during non_contact to zero')

    (options, args) = parser.parse_args()
    if len(args) < 1:
        parser.error('Need to pass a filename first')

    # map options
    plot_tgl = options.plot_tgl
    pillow_tgl = options.pillow_tgl
    varlist = options.varlist.split(',')
    conv_tgl = options.conv_tgl
    gam_tgl = options.gam_tgl
    binsize = options.binsize
    deriv_tgl = options.deriv_tgl
    prefix = options.prefix
    max_num_conv = options.max_num_conv
    l2_penalty = options.l2_penalty
    kernel_mode = options.kernel_mode

    # Get desired filenames
    fname = args[0]
    p_save = os.path.join(os.path.split(fname)[0], 'results')
    print(os.path.basename(fname))

    # read data in
    fid = neo.io.NixIO(fname)
    blk = fid.read_block()

    # set binsize to a quantity
    binsize = binsize * pq.ms

    # initialize parameters
    sigma_vals, B, winsize = init_model_params()

    # calculate the design matrices based on input toggles
    X = create_design_matrix(blk,
                             varlist,
                             window=options.window,
                             binsize=options.binsize,
                             deriv_tgl=deriv_tgl,
                             bases=None)

    # calculate pillow bases if desired.
    if pillow_tgl:
        B = GLM.make_bases(5, [0, 15], 2)
        bases = B[0]
        X_pillow = create_design_matrix(blk,
                                        varlist,
                                        deriv_tgl=options.deriv_tgl,
                                        bases=bases)
    else:
        B = None
        bases = None
        X_pillow = X

    for unit in blk.channel_indexes[-1].units:
        # ===================================== #
        # INIT OUTPUTS
        # ===================================== #
        yhat = {}
        mdl = {}
        corrs = {}
        weights = {}

        id = get_root(blk, int(unit.name[-1]))
        f_save = os.path.join(p_save, '{}_{}.npz'.format(prefix, id))
        if os.path.isfile(f_save):
            raise Warning('Output file found. Skipping {}'.format(id))
            continue

        # ===================================== #
        # GET SPIKE TIMES
        # CONVERT TO BINNED SPIKE TRAIN
        # ===================================== #
        sp = concatenate_sp(blk)[unit.name]
        b = elephant.conversion.BinnedSpikeTrain(sp, binsize=binsize)
        Cbool = get_Cbool(blk, -1)

        spike_isbool = binsize == pq.ms
        if spike_isbool:
            y = b.to_bool_array().ravel().astype('float32')
        else:
            y = b.to_array().ravel().astype('float32')

        if options.silence_noncontact:
            y[np.invert(Cbool)] = 0
        # ===================================== #
        # MAKE TENSOR FOR CONV NETS
        # ===================================== #
        Xt = create_design_matrix(blk,
                                  varlist,
                                  window=1,
                                  deriv_tgl=deriv_tgl,
                                  bases=None)
        Xt = make_binned_tensor(Xt, b, window_size=options.window)

        # ===================================== #
        # RUN ALL THE MODELS REQUESTED
        # ===================================== #
        if options.glm_tgl:
            if pillow_tgl:
                yhat['glm'], mdl['glm'] = run_GLM(X_pillow, y)
                weights['glm'] = mdl['glm'].params
            else:
                yhat['glm'], mdl['glm'] = run_GLM(X, y)
                weights['glm'] = mdl['glm'].params
        if gam_tgl:
            yhat['gam'], mdl['gam'] = run_GAM(X, y)

        if conv_tgl:
            for num_filters in range(1, max_num_conv + 1):
                mdl_name = 'conv_{}_node'.format(num_filters)
                yhat[mdl_name], mdl[mdl_name] = conv_model(
                    Xt,
                    y[:, np.newaxis, np.newaxis],
                    num_filters=num_filters,
                    winsize=options.window,
                    is_bool=spike_isbool,
                    l2_penalty=l2_penalty)
                weights[mdl_name] = mdl[mdl_name].get_weights()[0]

        if options.stm_tgl:
            yhat['stm'], mdl['stm'] = run_STM(
                X,
                y,
                num_components=options.num_stm_components,
                num_features=options.num_stm_features)

        # ===================================== #
        # EVALUATE ALL THE MODELS -- THIS MAY NEED TO BE ALTERED
        # ===================================== #

        for model in yhat.iterkeys():
            corrs[model] = evaluate_correlation(yhat[model],
                                                sp,
                                                kernel_mode=kernel_mode,
                                                Cbool=Cbool,
                                                sigma_vals=sigma_vals)
        # ===================================== #
        # PLOT IF REQUESTED
        # ===================================== #
        if plot_tgl:
            for model in yhat.iterkeys():
                plt.plot(sigma_vals, corrs[model])

            ax = plt.gca()
            ax.set_ylim(-0.1, 1)
            ax.legend(corrs.keys())
            ax.set_xlabel('{} Rate Kernel Sigma'.format(options.kernel_mode))
            ax.set_ylabel('Pearson Correlation')
            ax.set_title(id)
            plt.savefig(
                os.path.join(
                    p_save, 'performance_{}_{}.svg'.format(options.prefix,
                                                           id)))
            plt.close('all')

        # ===================================== #
        # SAVE THE MODEL OUTPUTS
        # ===================================== #

        np.savez(f_save,
                 corrs=corrs,
                 yhat=yhat,
                 sigma_vals=sigma_vals,
                 mdl=mdl,
                 y=y,
                 X=X,
                 Cbool=Cbool,
                 options=options,
                 B=B)
Example #20
0
def build_GLM_model(Xraw,
                    yraw,
                    savefile,
                    nfilts=4,
                    hist=False,
                    learning_rate=1e-5,
                    epochs=100,
                    batch_size=256,
                    family='p',
                    min_delta=0.1,
                    patience=8):
    tf.reset_default_graph()
    if batch_size is None:
        batch_size = Xraw.shape[0]
    if hist:
        B = GLM.make_bases(3, [0, 3], 1)
        yhistraw = GLM.add_spike_history(Xraw, yraw, B)[:, Xraw.shape[1]:]

    # make data a multiple of batchsize and batch it
    n_del = Xraw.shape[0] % batch_size
    X = Xraw[n_del:, :]
    y = yraw[n_del:]
    n_batches = X.shape[0] / batch_size
    batched_x = np.split(X, n_batches)
    batched_y = np.split(y, n_batches)
    if hist:
        yhist = yhistraw[n_del:, :]
        batched_yhist = np.split(yhist, n_batches)

    # init vars
    mdl_input = tf.placeholder(tf.float32, [None, X.shape[1]])
    mdl_output = tf.placeholder(tf.float32, [None, 1])

    if hist:
        mdl_yhist = tf.placeholder(tf.float32, [None, yhist.shape[1]])
    # init weights
    if hist:
        H = tf.Variable(tf.zeros([yhist.shape[1], 1]), name='HistoryFilters')
        tf.add_to_collection('H', H)

    K = tf.Variable(tf.random_normal([X.shape[1], nfilts], stddev=0.003),
                    name='StimFilters')
    tf.add_to_collection('K', K)

    b = tf.Variable(tf.random_normal([1]), name='bias')
    tf.add_to_collection('b', b)

    #### The model ###
    # Hidden Layer
    hidden_out = tf.matmul(mdl_input, K)
    # hidden_out = tf.nn.relu(hidden_out)

    Ksum = tf.reduce_sum(hidden_out, axis=1)
    if hist:
        H = tf.clip_by_value(H, -np.inf, 0.)
        Ksum = tf.add(tf.squeeze(tf.matmul(mdl_yhist, H)), Ksum)
    Ksum = tf.add(Ksum, b)

    # define cost function as negative log liklihood of Poisson spiking
    if family == 'p':
        conditional_intensity = tf.exp(Ksum)
        cost = neglogliklihood(conditional_intensity, mdl_output)
    elif family == 'b':
        conditional_intensity = tf.sigmoid(Ksum)
        #cost = tf.reduce_mean(-tf.reduce_sum(mdl_output*conditional_intensity), reduction_indices=1)
        cost = neglogliklihood_bernoulli(conditional_intensity, mdl_output)

    # optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    # optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(cost)
    optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
    init = tf.global_variables_initializer()
    sess = tf.Session()

    sess.run(init)
    # loop over entire dataset multiple times
    all_cost = [np.Inf]

    patience_cnt = 0
    for epoch in range(epochs):
        # loop over sub_batches
        avg_cost = 0.
        for ii in range(n_batches):
            if hist:
                _, c = sess.run(
                    [optimizer, cost],
                    feed_dict={
                        mdl_input: batched_x[ii],
                        mdl_output: batched_y[ii],
                        mdl_yhist: batched_yhist[ii]
                    })
            else:
                _, c = sess.run([optimizer, cost],
                                feed_dict={
                                    mdl_input: batched_x[ii],
                                    mdl_output: batched_y[ii]
                                })
            avg_cost += c / n_batches

            # Early Stopping
        # print('AVG:{}, Most Recent:{}'.format(avg_cost, all_cost[-1]))

        if epoch > 0 and ((all_cost[-1] - avg_cost) > min_delta):
            patience_cnt = 0
        else:
            patience_cnt += 1

        if patience_cnt >= patience:
            print('Early Stopping...')
            break
        all_cost.append(avg_cost)

        print('Epoch:{}\t, Cost={}'.format(epoch, avg_cost))
    print('Done!')
    # plt.plot(all_cost)
    # plt.show()
    print('saving to {}'.format(savefile))
    saver = tf.train.Saver()
    saver.save(sess, savefile)
    sess.close()
    print('Saved session to {}'.format(savefile))