def get_Xc_yc(fname,p_smooth,unit_num,binsize): varlist = ['M', 'F', 'TH', 'PHIE'] blk = neoUtils.get_blk(fname) blk_smooth = GLM.get_blk_smooth(fname,p_smooth) cbool = neoUtils.get_Cbool(blk) X = GLM.create_design_matrix(blk,varlist) Xdot = GLM.get_deriv(blk,blk_smooth,varlist,[0,5,9]) #maybe only want one derivative? X = np.concatenate([X,Xdot],axis=1) X = neoUtils.replace_NaNs(X,'pchip') X = neoUtils.replace_NaNs(X,'interp') Xbin = GLM.bin_design_matrix(X,binsize=binsize) scaler = sklearn.preprocessing.StandardScaler(with_mean=False) Xbin = scaler.fit_transform(Xbin) cbool_bin= GLM.bin_design_matrix(cbool[:,np.newaxis],binsize=binsize).ravel() y = neoUtils.concatenate_sp(blk)['cell_{}'.format(unit_num)] ybin = elephant.conversion.BinnedSpikeTrain(y,binsize=binsize*pq.ms).to_array().T.astype('f8') Xbin = Xbin[:ybin.shape[0],:] cbool_bin = cbool_bin[:ybin.shape[0]] yhat = np.zeros(ybin.shape[0]) Xc = Xbin[cbool_bin,:] yc = ybin[cbool_bin,:] return(Xc,yc,cbool_bin,yhat)
def bin_model(X, y, cbool, binsize): """ This model will attempt to predict the number of spikes in a given bin that is larger than 1ms :param X: The full design matrix, binning is done here. - Assumes you have all covariates in X - Assumes you have NOT included spike history in X :param y: A neo spiketrain. Eventually will want to extend to allow for boolean :return: """ # time zero will inform us about the proceeding spikes between time 0 and 1, which is why there is an extra index # in Xbin Xbin = GLM.bin_design_matrix(X, binsize=binsize)[:-1] scaler = StandardScaler(with_mean=False) Xbin = scaler.fit_transform(Xbin) cbool = GLM.bin_design_matrix(cbool[:, np.newaxis], binsize=binsize)[:-1].ravel() ybin = elephant.conversion.BinnedSpikeTrain( y, binsize=binsize * pq.ms).to_array().T.astype('f8') # add history Xbin = GLM.add_spike_history(Xbin, ybin, -1) # no basis # prep model yhat = np.zeros(ybin.shape[0]) # TODO run stm yhat_out, mdl = GLM.run_GLM(Xbin[cbool, :], ybin[cbool, :], family=sm.families.Poisson, link=sm.genmod.families.links.log) yhat[cbool] = yhat_out.ravel()
def simulate(X, y, p_model, cbool, n_sims=50): sess = tf.Session() new_saver = tf.train.import_meta_graph(p_model + '.meta') new_saver.restore(sess, p_model) K = tf.get_collection('K')[0] H = tf.get_collection('H')[0] b = tf.get_collection('b')[0] K = sess.run(K) H = sess.run(H) b = sess.run(b) stim_curr = np.dot(X, K) stim_curr = np.sum(stim_curr, axis=1) + b # Warning! The spike history basis is hard coded. B = GLM.make_bases(3, [0, 3], 1) H = GLM.map_bases(H, B)[0].ravel() g = np.zeros([X.shape[0] + len(H), n_sims]) # total current? ysim = np.zeros([X.shape[0], n_sims]) # response vector (simulated spiketrains) hcurr = np.zeros([X.shape[0] + len(H), n_sims]) # history current rsim = np.zeros_like(g) for runNum in range(n_sims): print('Simulation number {} of {}'.format(runNum + 1, n_sims)) g[:, runNum] = np.concatenate([stim_curr, np.zeros([len(H)])]) for t in xrange(stim_curr.shape[0]): rsim[t, runNum] = np.exp(g[t, runNum]) if not cbool[t]: continue if np.random.rand() < (1 - np.exp(-rsim[t, runNum])): ysim[t, runNum] = 1 g[t + 1:t + len(H) + 1, runNum] += H hcurr[t + 1:t + len(H) + 1, runNum] += H hcurr = hcurr[:X.shape[0], :] rsim = rsim[:X.shape[0], :] output = {} output['K'] = K output['H'] = H output['ysim'] = ysim output['rsim'] = rsim output['b'] = b output['Basis'] = B output['stim_curr'] = stim_curr output['hcurr'] = hcurr sess.close() return (output)
def get_X_y(fname, unit_num=0): varlist = ['M', 'FX', 'FY', 'TH'] blk = neoUtils.get_blk(fname) cbool = neoUtils.get_Cbool(blk) X = GLM.create_design_matrix(blk, varlist) Xdot, Xsmooth = GLM.get_deriv(blk, blk, varlist, [0, 5, 9]) X = np.concatenate([X, Xdot], axis=1) X = neoUtils.replace_NaNs(X, 'pchip') X = neoUtils.replace_NaNs(X, 'interp') scaler = sklearn.preprocessing.StandardScaler(with_mean=False) X = scaler.fit_transform(X) y = neoUtils.get_rate_b(blk, unit_num)[1][:, np.newaxis] yhat = np.zeros_like(y) return (X, y, cbool)
def calc_MSE(fname, p_smooth, unit_num): blk = neoUtils.get_blk(fname) blk_smooth = GLM.get_blk_smooth(fname, p_smooth) varlist = ['M', 'F', 'TH', 'PHIE'] root = neoUtils.get_root(blk, unit_num) print('Working on {}'.format(root)) Xdot = GLM.get_deriv(blk, blk_smooth, varlist)[0] Xdot = np.reshape(Xdot, [-1, 8, 10]) sp = neoUtils.concatenate_sp(blk)['cell_{}'.format(0)] cbool = neoUtils.get_Cbool(blk) mse = [] for ii in range(Xdot.shape[1]): var_in = Xdot[:, ii, :].copy() mse.append(tuning_curve_MSE(var_in, sp, cbool, bins=50)) return (mse)
def smoothed_best(): df = pd.read_csv(min_entropy, index_col='id') smooth_vals = np.arange(5, 100, 10).tolist() best_smooth = df.mode(axis=1)[0] best_idx = [smooth_vals.index(x) for x in best_smooth] best_idx = pd.DataFrame({'idx': best_idx}, index=best_smooth.index) for f in glob.glob(os.path.join(p_load, '*NEO.h5')): try: blk = neoUtils.get_blk(f) blk_smooth = GLM.get_blk_smooth(f, p_smooth) num_units = len(blk.channel_indexes[-1].units) for unit_num in range(num_units): varlist = ['M', 'F', 'TH', 'PHIE'] root = neoUtils.get_root(blk, unit_num) print('Working on {}'.format(root)) if root not in best_idx.index: print('{} not found in best smoothing derivative data'. format(root)) continue outname = os.path.join( p_save, 'best_smoothing_deriv\\{}_best_smooth_pillowX.mat'.format( root)) X = GLM.create_design_matrix(blk, varlist) smoothing_to_use = best_idx.loc[root][0] Xdot = GLM.get_deriv(blk, blk_smooth, varlist, smoothing=[smoothing_to_use])[0] X = np.concatenate([X, Xdot], axis=1) y = neoUtils.get_rate_b(blk, unit_num)[1] cbool = neoUtils.get_Cbool(blk) arclengths = get_arclength_bool(blk, unit_num) sio.savemat( outname, { 'X': X, 'y': y, 'cbool': cbool, 'smooth': best_smooth.loc[root], 'arclengths': arclengths }) except Exception as ex: print('Problem with {}:{}'.format(os.path.basename(f), ex))
def get_X_y(fname, p_smooth, unit_num, pca_tgl=False, n_pcs=3): varlist = ['M', 'F', 'TH', 'PHIE'] blk = neoUtils.get_blk(fname) blk_smooth = get_blk_smooth(fname, p_smooth) cbool = neoUtils.get_Cbool(blk) X = GLM.create_design_matrix(blk, varlist) Xdot, Xsmooth = GLM.get_deriv(blk, blk_smooth, varlist, [0, 5, 9]) # if using the PCA decomposition of the inputs: if pca_tgl: X = neoUtils.replace_NaNs(X, 'pchip') X = neoUtils.replace_NaNs(X, 'interp') Xsmooth = neoUtils.replace_NaNs(Xsmooth, 'pchip') Xsmooth = neoUtils.replace_NaNs(Xsmooth, 'interp') scaler = sklearn.preprocessing.StandardScaler(with_mean=False) X = scaler.fit_transform(X) scaler = sklearn.preprocessing.StandardScaler(with_mean=False) Xsmooth = scaler.fit_transform(Xsmooth) pca = sklearn.decomposition.PCA() X_pc = pca.fit_transform(X)[:, :n_pcs] pca = sklearn.decomposition.PCA() Xs_pc = pca.fit_transform(Xsmooth)[:, :n_pcs] zero_pad = np.zeros([1, n_pcs]) Xd_pc = np.diff(np.concatenate([zero_pad, Xs_pc], axis=0), axis=0) X = np.concatenate([X_pc, Xd_pc], axis=1) scaler = sklearn.preprocessing.StandardScaler(with_mean=False) X = scaler.fit_transform(X) else: X = np.concatenate([X, Xdot], axis=1) X = neoUtils.replace_NaNs(X, 'pchip') X = neoUtils.replace_NaNs(X, 'interp') scaler = sklearn.preprocessing.StandardScaler(with_mean=False) X = scaler.fit_transform(X) y = neoUtils.get_rate_b(blk, unit_num)[1][:, np.newaxis] # Xc = X[cbool,:] # yc = y[cbool] yhat = np.zeros_like(y) return (X, y, cbool)
def plot_pca_spaces(fname, unit_num, p_smooth=None, deriv_smooth=[9], n_dims=3): """ Plot the PCA tuning spaces :param fname: Filename of the neo data :param unit_num: unit number to use :param p_smooth: [optional] If using derivative, this is where the smooth data live :param deriv_smooth: If using derivative, tells us what derivative smoothing to use :return: """ # Get the standard data, from which the PCA will be computed blk = neoUtils.get_blk(fname) cbool = neoUtils.get_Cbool(blk) varlist = ['M', 'F', 'TH', 'PHIE'] X = GLM.create_design_matrix(blk, varlist) sp = neoUtils.concatenate_sp(blk)['cell_{}'.format(unit_num)] r = neoUtils.get_rate_b(blk, unit_num)[0] # If smoothing directory is given, then add the derivative data if p_smooth is not None: blk_smooth = GLM.get_blk_smooth(fname, p_smooth) Xdot = GLM.get_deriv(blk, blk_smooth, varlist, deriv_smooth)[0] X = np.concatenate([X, Xdot], axis=1) X[np.isnan(X)] = 0 else: print('\tNot using derivative information') scaler = sklearn.preprocessing.StandardScaler(with_mean=False) X_scale = np.zeros_like(X) X_scale[cbool, :] = scaler.fit_transform(X[cbool, :]) pca = sklearn.decomposition.PCA() X_pcs = np.zeros_like(X) X_pcs[cbool, :] = pca.fit_transform(X_scale[cbool, :]) for ii in range(n_dims): var = X_pcs[:, ii] response, edges = varTuning.stim_response_hist(var, sp, cbool) response, edges1, edges2 = varTuning.joint_response_hist( X_pcs[:, 0], X_pcs[:, 1], sp, cbool, 40) response, edges1, edges2 = varTuning.joint_response_hist( X_pcs[:, -1], X_pcs[:, -2], sp, cbool, 40)
def get_X_y(fname,p_smooth,unit_num=0): varlist = ['M', 'F', 'TH', 'PHIE'] blk = neoUtils.get_blk(fname) blk_smooth = GLM.get_blk_smooth(fname,p_smooth) cbool = neoUtils.get_Cbool(blk) X = GLM.create_design_matrix(blk,varlist) Xdot,Xsmooth = GLM.get_deriv(blk,blk_smooth,varlist,[9]) X = np.concatenate([X,Xdot],axis=1) X = neoUtils.replace_NaNs(X,'pchip') X = neoUtils.replace_NaNs(X,'interp') scaler = sklearn.preprocessing.StandardScaler(with_mean=False) X = scaler.fit_transform(X) y = neoUtils.get_rate_b(blk,unit_num)[1][:,np.newaxis] y[np.invert(cbool)]=0 return(X,y,cbool)
def get_first_spike_vals(fname, p_smooth, unit_num): """ Return a dataframe with length Ncontacts and the value of relevant stimulus features at that time :param blk: neo block :param unit_num: int :return: pandas dataframe """ # get the blocks blk = neoUtils.get_blk(fname) blk_smooth = GLM.get_blk_smooth(fname, p_smooth) # get the trains and times of first spikes _, _, trains = spikeAnalysis.get_contact_sliced_trains(blk, unit_num) t_idx = [ train[0].magnitude if len(train) > 0 else np.nan for train in trains ] t_idx = np.array(t_idx) t_idx = t_idx[np.isfinite(t_idx)].astype('int') # get the stimuli varlist = ['M', 'F', 'TH', 'PHIE'] X = GLM.create_design_matrix(blk, varlist) Xsmooth = GLM.get_deriv(blk, blk_smooth, varlist, smoothing=[9])[1] MB = np.sqrt(X[:, 1]**2 + X[:, 2]**2)[:, np.newaxis] FB = np.sqrt(X[:, 4]**2 + X[:, 5]**2)[:, np.newaxis] RB = np.sqrt(X[:, 6]**2 + X[:, 7]**2)[:, np.newaxis] # use smooth to calculate derivative MBsmooth = np.sqrt(Xsmooth[:, 1]**2 + Xsmooth[:, 2]**2)[:, np.newaxis] FBsmooth = np.sqrt(Xsmooth[:, 4]**2 + Xsmooth[:, 5]**2)[:, np.newaxis] RBsmooth = np.sqrt(Xsmooth[:, 6]**2 + Xsmooth[:, 7]**2)[:, np.newaxis] X = np.concatenate([MB, FB, RB], axis=1) Xsmooth = np.concatenate([MBsmooth, FBsmooth, RBsmooth], axis=1) Xdot = np.diff(np.concatenate([np.zeros([1, 3]), Xsmooth]), axis=0) X = np.concatenate([X, Xdot], axis=1) #extract stimulus at time of first spike and output to a dataframe vals = X[t_idx] vallist = ['MB', 'FB', 'RB', 'MBdot', 'FBdot', 'RBdot'] df = pd.DataFrame() for ii in range(len(vallist)): df[vallist[ii]] = vals[ii, :] df['id'] = neoUtils.get_root(blk, unit_num) return (df)
def smoothed_mechanics(): """ use this function to grab the data from the smoothed mechanics and the derivative of the same """ f_arclength = '/projects/p30144/_VG3D/deflections/direction_arclength_FR_group_data.csv' f_list = glob.glob(os.path.join(p_load, '*NEO.h5')) f_list.sort() for f in f_list: try: blk = neoUtils.get_blk(f) blk_smooth = GLM.get_blk_smooth(f, p_smooth) num_units = len(blk.channel_indexes[-1].units) for unit_num in range(num_units): varlist = ['M', 'F', 'TH', 'PHIE'] root = neoUtils.get_root(blk, unit_num) print('Working on {}'.format(root)) outname = os.path.join(p_save, '{}_smooth_mechanicsX.mat'.format(root)) Xdot, X = GLM.get_deriv(blk, blk_smooth, varlist, smoothing=[5]) X = np.concatenate([X, Xdot], axis=1) y = neoUtils.get_rate_b(blk, unit_num)[1] cbool = neoUtils.get_Cbool(blk) arclengths = get_arclength_bool(blk, unit_num, fname=f_arclength) sio.savemat( outname, { 'X': X, 'y': y, 'cbool': cbool, 'smooth': 55, 'arclengths': arclengths }) except Exception as ex: print('Problem with {}:{}'.format(os.path.basename(f), ex))
def get_components(fname,p_smooth=None,smooth_idx=9): ''' Get the PCA comonents given a filename''' varlist = ['M', 'F', 'TH', 'PHIE'] blk = neoUtils.get_blk(fname) cbool = neoUtils.get_Cbool(blk) root = neoUtils.get_root(blk,0)[:-2] X = GLM.create_design_matrix(blk,varlist) if p_smooth is not None: blk_smooth = GLM.get_blk_smooth(fname,p_smooth) Xdot = GLM.get_deriv(blk,blk_smooth,varlist,smoothing=[smooth_idx])[0] X = np.concatenate([X,Xdot],axis=1) X[np.invert(cbool),:]=0 X = neoUtils.replace_NaNs(X,'pchip') X = neoUtils.replace_NaNs(X,'interp') scaler = sklearn.preprocessing.StandardScaler(with_mean=False) X[cbool,:] = scaler.fit_transform(X[cbool,:]) pca = sklearn.decomposition.PCA() pca.fit_transform(X[cbool,:]) return(pca,root)
def calc_corr(fname, p_smooth, unit_num): blk = neoUtils.get_blk(fname) blk_smooth = GLM.get_blk_smooth(fname, p_smooth) varlist = ['M', 'F', 'TH', 'PHIE'] component_list = [ '{}_dot'.format(x) for x in ['Mx', 'My', 'Mz', 'Fx', 'Fy', 'Fz', 'TH', 'PHI'] ] root = neoUtils.get_root(blk, unit_num) Xdot = GLM.get_deriv(blk, blk_smooth, varlist)[0] Xdot = np.reshape(Xdot, [-1, 8, 10]) windows = np.arange(5, 100, 10) sp = neoUtils.concatenate_sp(blk)['cell_{}'.format(0)] cbool = neoUtils.get_Cbool(blk) corr = [] R = [] # loop over variables for ii in range(Xdot.shape[1]): var_in = Xdot[:, ii, :].copy() # loop over smoothing r = [] for jj in range(var_in.shape[1]): kernel = elephant.kernels.GaussianKernel(pq.ms * windows[jj]) FR = elephant.statistics.instantaneous_rate(sp, pq.ms, kernel=kernel) idx = np.isfinite(var_in[:, jj]) r.append( scipy.corrcoef(var_in[:, jj].ravel()[idx], FR.magnitude.ravel()[idx])[0, 1]) R.append(r) R = np.array(R) df = pd.DataFrame(data=R, columns=['{}ms'.format(x) for x in windows]) df.index = component_list return (df)
def smoothed(smooth_idx=9): smooth_vals = np.arange(5, 100, 10) sub_p_save = os.path.join( p_save, '{}ms_smoothing_deriv'.format(smooth_vals[smooth_idx])) if not os.path.isdir(sub_p_save): os.mkdir(sub_p_save) for f in glob.glob(os.path.join(p_load, '*NEO.h5')): try: blk = neoUtils.get_blk(f) blk_smooth = GLM.get_blk_smooth(f, p_smooth) num_units = len(blk.channel_indexes[-1].units) for unit_num in range(num_units): varlist = ['M', 'F', 'TH', 'PHIE'] root = neoUtils.get_root(blk, unit_num) print('Working on {}'.format(root)) outname = os.path.join( sub_p_save, '{}ms_{}_pillowX.mat'.format(smooth_vals[smooth_idx], root)) X = GLM.create_design_matrix(blk, varlist) Xdot = GLM.get_deriv(blk, blk_smooth, varlist, [smooth_idx])[0] X = np.concatenate([X, Xdot], axis=1) sp = neoUtils.concatenate_sp(blk)['cell_{}'.format(unit_num)] y = neoUtils.get_rate_b(blk, unit_num)[1] cbool = neoUtils.get_Cbool(blk) arclengths = get_arclength_bool(blk, unit_num) sio.savemat(outname, { 'X': X, 'y': y, 'cbool': cbool, 'arclengths': arclengths }) except Exception as ex: print('Problem with {}:{}'.format(os.path.basename(f), ex))
def calc_contribs(best_model, model_dict, spike_train, center_ego_params, center_dist_params, allo_params, speed_params, Xe, Xd, Xa, Xs): ''' calculate the effect on different measures of goodness-of-fit when adding or subtracting each variable ''' #start a dict for contribs contribs = {} #note which variables we're looking at variables = [('allo', ), ('center_ego', ), ('center_dist', ), ('speed', )] #if the best model is a single-variable model... and not null... if len(best_model) == 1 and 'uniform' not in best_model: #calculate goodness-of-fit measures for the null model uniform_model_dict = full_classify.run_final( 'uniform', 1., center_ego_params, center_dist_params, allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train) #calculate difference in goodness-of-fit measures between the best model and the null #model -- these are the contributions for the single encoded variable contribs[best_model] = {} contribs[best_model][ 'll'] = model_dict['ll'] - uniform_model_dict['ll'] contribs[best_model][ 'llps'] = model_dict['llps'] - uniform_model_dict['llps'] contribs[best_model]['explained_var'] = model_dict[ 'explained_var'] - uniform_model_dict['explained_var'] contribs[best_model][ 'corr_r'] = model_dict['corr_r'] - uniform_model_dict['corr_r'] contribs[best_model]['pseudo_r2'] = model_dict[ 'pseudo_r2'] - uniform_model_dict['pseudo_r2'] #for each variable... for var in variables: #not including the one we just looked at... if frozenset(var) != best_model: #create a new model which contains the single encoded variable as well as #this new variable new_model = frozenset(chain(list(best_model), list(var))) #calc a new scale factor new_scale_factor = full_classify.calc_scale_factor( new_model, center_ego_params, center_dist_params, allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train) #run the new model and collect the result new_model_dict = full_classify.run_final( new_model, new_scale_factor, center_ego_params, center_dist_params, allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train) #calculate difference in goodness-of-fit measures between the new model and #the best model -- these are the contributions from the added variable contribs[frozenset(var)] = {} contribs[frozenset( var)]['ll'] = new_model_dict['ll'] - model_dict['ll'] contribs[frozenset( var)]['llps'] = new_model_dict['llps'] - model_dict['llps'] contribs[frozenset(var)]['explained_var'] = new_model_dict[ 'explained_var'] - model_dict['explained_var'] contribs[frozenset(var)][ 'corr_r'] = new_model_dict['corr_r'] - model_dict['corr_r'] contribs[frozenset(var)]['pseudo_r2'] = new_model_dict[ 'pseudo_r2'] - model_dict['pseudo_r2'] #otherwise, if there are multiple variables in the best model... elif len(best_model) > 1: #for each variable in the whole list... for var in variables: #if this variable is included in the best model... if var[0] in best_model: #create a new model that includes all the variables in the best #model EXCEPT this one new_model = [] for i in best_model: if i != var[0]: new_model.append(i) new_model = frozenset(new_model) #calculate the new scale factor new_scale_factor = full_classify.calc_scale_factor( new_model, center_ego_params, center_dist_params, allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train) #run the new model and collect the result new_model_dict = full_classify.run_final( new_model, new_scale_factor, center_ego_params, center_dist_params, allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train) #calculate difference in goodness-of-fit measures between the best model and #the new model -- these are the contributions from the subtracted variable contribs[frozenset(var)] = {} contribs[frozenset( var)]['ll'] = model_dict['ll'] - new_model_dict['ll'] contribs[frozenset( var)]['llps'] = model_dict['llps'] - new_model_dict['llps'] contribs[frozenset(var)]['explained_var'] = model_dict[ 'explained_var'] - new_model_dict['explained_var'] contribs[frozenset(var)][ 'corr_r'] = model_dict['corr_r'] - new_model_dict['corr_r'] contribs[frozenset(var)]['pseudo_r2'] = model_dict[ 'pseudo_r2'] - new_model_dict['pseudo_r2'] #otherwise... else: #make a new model that adds the current variable to the best model new_model = frozenset(chain(list(best_model), list(var))) #calc the new scale factor new_scale_factor = full_classify.calc_scale_factor( new_model, center_ego_params, center_dist_params, allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train) #run the new model and collect the result new_model_dict = full_classify.run_final( new_model, new_scale_factor, center_ego_params, center_dist_params, allo_params, speed_params, Xe, Xd, Xa, Xs, spike_train) #calculate difference in goodness-of-fit measures between the new model and #the best model -- these are the contributions from the added variable contribs[frozenset(var)] = {} contribs[frozenset( var)]['ll'] = new_model_dict['ll'] - model_dict['ll'] contribs[frozenset( var)]['llps'] = new_model_dict['llps'] - model_dict['llps'] contribs[frozenset(var)]['explained_var'] = new_model_dict[ 'explained_var'] - model_dict['explained_var'] contribs[frozenset(var)][ 'corr_r'] = new_model_dict['corr_r'] - model_dict['corr_r'] contribs[frozenset(var)]['pseudo_r2'] = new_model_dict[ 'pseudo_r2'] - model_dict['pseudo_r2'] #add the new stuff to the model dict model_dict['contribs'] = contribs model_dict['best_model'] = best_model #return the model dict return model_dict
def X_to_pillow(X): B = GLM.make_bases(5, [0, 10]) Xb = GLM.apply_bases(X, B[0]) scaler = sklearn.preprocessing.StandardScaler(with_mean=False) return (scaler.fit_transform(Xb))
def init_model_params(): sigma_vals = np.arange(2, 200, 4) * pq.ms B = GLM.make_bases(5, [0, 15], b=2) winsize = int(B[0].shape[0]) return sigma_vals, B, winsize
print(a) ''' ''' from sklearn import datasets iris = datasets.load_iris() x = iris.data y = iris.target y[y>1]=1 import linear md = linear.logit() md.fit(x,y) # md.trainProc() # print(md.result) print(md.predict(x)) ''' from sklearn import datasets bos = datasets.load_boston() x = bos.data y = bos.target x = pd.DataFrame(x) x = np.array((x - x.mean()) / x.std()) # 不归一化容易出现 float64溢出 y = (y - y.mean()) / y.std() import GLM md = GLM.linearRegression() md.fit(x, y, alpha=0.01, iter=500) md.trainProc() print(md.coef)
def main(): # ================================ # # SET UP OPTION PARSER # ================================ # usage = "usage: %prog filename [options]" parser = OptionParser(usage) parser.add_option('-p', '--prefix', dest='prefix', default='model_results', type=str, help='prefix to append to the results filename') parser.add_option( '-v', '--varlist', dest='varlist', default='M', type=str, help= 'list of strings which indicate which variables to include in the model' ) parser.add_option('-b', '--binsize', dest='binsize', default=1, type=int, help='number of milliseconds to bin the spikes.') parser.add_option( '-D', '--deriv_tgl', action='store_true', dest='deriv_tgl', default=False, help= 'Derivative toggle, set to true to include the derivative in the model' ) parser.add_option( '-P', '--pillow_tgl', action='store_true', dest='pillow_tgl', default=False, help= 'Basis toggle, set to true to map the inputs to a pillow basis \nfor use in the GLM only at this time' ) parser.add_option( '--GLM', action='store_true', dest='glm_tgl', default=False, help= 'Toggles a GLM model. \nIf pillow toggle is false, takes an input where each point in the windowsize is its own dimension ' ) parser.add_option('--GAM', action='store_true', dest='gam_tgl', default=False, help='GAM toggle, call the flag to run a GAM') parser.add_option( '-C', '--conv_tgl', action='store_true', dest='conv_tgl', default=False, help= 'Convolutional network toggle. Call the flag to run a convolutional network' ) parser.add_option( '--plot_tgl', action='store_true', dest='plot_tgl', default=False, help= 'Plot toggle, call to plot the results during the run. This should never be called on quest.' ) parser.add_option( '-w', '--window', dest='window', default=1, type=int, help= 'Window into the past to set the convolutional window to look in ms') parser.add_option('-n', '--num_conv', dest='max_num_conv', default=4, type=int, help='Max number of convolutional nodes to use') parser.add_option('--l2', dest='l2_penalty', default=1e-6, type=float, help='l2 penalty') parser.add_option( '-k', '--kernel', dest='kernel_mode', default='gaussian', type=str, help='Kernel Mode (\'box\',\'gaussian\',\'exp\',\'alpha\',\'epan\')') parser.add_option( '--STM', '--STM_tgl', action='store_true', dest='stm_tgl', default=False, help='STM toggle. Call the flag to run a STM network (Thies 2013)') parser.add_option('--num_stm_components', action='store', dest='num_stm_components', default=3, type=int, help='Number of components to use in the STM model') parser.add_option('--num_stm_features', action='store', dest='num_stm_features', default=20, type=int, help='Number of features to use in the STM model') parser.add_option( '--silence_noncontact', action='store_true', dest='silence_noncontact', default=False, help= 'If called, sets all spiking that occurs during non_contact to zero') (options, args) = parser.parse_args() if len(args) < 1: parser.error('Need to pass a filename first') # map options plot_tgl = options.plot_tgl pillow_tgl = options.pillow_tgl varlist = options.varlist.split(',') conv_tgl = options.conv_tgl gam_tgl = options.gam_tgl binsize = options.binsize deriv_tgl = options.deriv_tgl prefix = options.prefix max_num_conv = options.max_num_conv l2_penalty = options.l2_penalty kernel_mode = options.kernel_mode # Get desired filenames fname = args[0] p_save = os.path.join(os.path.split(fname)[0], 'results') print(os.path.basename(fname)) # read data in fid = neo.io.NixIO(fname) blk = fid.read_block() # set binsize to a quantity binsize = binsize * pq.ms # initialize parameters sigma_vals, B, winsize = init_model_params() # calculate the design matrices based on input toggles X = create_design_matrix(blk, varlist, window=options.window, binsize=options.binsize, deriv_tgl=deriv_tgl, bases=None) # calculate pillow bases if desired. if pillow_tgl: B = GLM.make_bases(5, [0, 15], 2) bases = B[0] X_pillow = create_design_matrix(blk, varlist, deriv_tgl=options.deriv_tgl, bases=bases) else: B = None bases = None X_pillow = X for unit in blk.channel_indexes[-1].units: # ===================================== # # INIT OUTPUTS # ===================================== # yhat = {} mdl = {} corrs = {} weights = {} id = get_root(blk, int(unit.name[-1])) f_save = os.path.join(p_save, '{}_{}.npz'.format(prefix, id)) if os.path.isfile(f_save): raise Warning('Output file found. Skipping {}'.format(id)) continue # ===================================== # # GET SPIKE TIMES # CONVERT TO BINNED SPIKE TRAIN # ===================================== # sp = concatenate_sp(blk)[unit.name] b = elephant.conversion.BinnedSpikeTrain(sp, binsize=binsize) Cbool = get_Cbool(blk, -1) spike_isbool = binsize == pq.ms if spike_isbool: y = b.to_bool_array().ravel().astype('float32') else: y = b.to_array().ravel().astype('float32') if options.silence_noncontact: y[np.invert(Cbool)] = 0 # ===================================== # # MAKE TENSOR FOR CONV NETS # ===================================== # Xt = create_design_matrix(blk, varlist, window=1, deriv_tgl=deriv_tgl, bases=None) Xt = make_binned_tensor(Xt, b, window_size=options.window) # ===================================== # # RUN ALL THE MODELS REQUESTED # ===================================== # if options.glm_tgl: if pillow_tgl: yhat['glm'], mdl['glm'] = run_GLM(X_pillow, y) weights['glm'] = mdl['glm'].params else: yhat['glm'], mdl['glm'] = run_GLM(X, y) weights['glm'] = mdl['glm'].params if gam_tgl: yhat['gam'], mdl['gam'] = run_GAM(X, y) if conv_tgl: for num_filters in range(1, max_num_conv + 1): mdl_name = 'conv_{}_node'.format(num_filters) yhat[mdl_name], mdl[mdl_name] = conv_model( Xt, y[:, np.newaxis, np.newaxis], num_filters=num_filters, winsize=options.window, is_bool=spike_isbool, l2_penalty=l2_penalty) weights[mdl_name] = mdl[mdl_name].get_weights()[0] if options.stm_tgl: yhat['stm'], mdl['stm'] = run_STM( X, y, num_components=options.num_stm_components, num_features=options.num_stm_features) # ===================================== # # EVALUATE ALL THE MODELS -- THIS MAY NEED TO BE ALTERED # ===================================== # for model in yhat.iterkeys(): corrs[model] = evaluate_correlation(yhat[model], sp, kernel_mode=kernel_mode, Cbool=Cbool, sigma_vals=sigma_vals) # ===================================== # # PLOT IF REQUESTED # ===================================== # if plot_tgl: for model in yhat.iterkeys(): plt.plot(sigma_vals, corrs[model]) ax = plt.gca() ax.set_ylim(-0.1, 1) ax.legend(corrs.keys()) ax.set_xlabel('{} Rate Kernel Sigma'.format(options.kernel_mode)) ax.set_ylabel('Pearson Correlation') ax.set_title(id) plt.savefig( os.path.join( p_save, 'performance_{}_{}.svg'.format(options.prefix, id))) plt.close('all') # ===================================== # # SAVE THE MODEL OUTPUTS # ===================================== # np.savez(f_save, corrs=corrs, yhat=yhat, sigma_vals=sigma_vals, mdl=mdl, y=y, X=X, Cbool=Cbool, options=options, B=B)
def build_GLM_model(Xraw, yraw, savefile, nfilts=4, hist=False, learning_rate=1e-5, epochs=100, batch_size=256, family='p', min_delta=0.1, patience=8): tf.reset_default_graph() if batch_size is None: batch_size = Xraw.shape[0] if hist: B = GLM.make_bases(3, [0, 3], 1) yhistraw = GLM.add_spike_history(Xraw, yraw, B)[:, Xraw.shape[1]:] # make data a multiple of batchsize and batch it n_del = Xraw.shape[0] % batch_size X = Xraw[n_del:, :] y = yraw[n_del:] n_batches = X.shape[0] / batch_size batched_x = np.split(X, n_batches) batched_y = np.split(y, n_batches) if hist: yhist = yhistraw[n_del:, :] batched_yhist = np.split(yhist, n_batches) # init vars mdl_input = tf.placeholder(tf.float32, [None, X.shape[1]]) mdl_output = tf.placeholder(tf.float32, [None, 1]) if hist: mdl_yhist = tf.placeholder(tf.float32, [None, yhist.shape[1]]) # init weights if hist: H = tf.Variable(tf.zeros([yhist.shape[1], 1]), name='HistoryFilters') tf.add_to_collection('H', H) K = tf.Variable(tf.random_normal([X.shape[1], nfilts], stddev=0.003), name='StimFilters') tf.add_to_collection('K', K) b = tf.Variable(tf.random_normal([1]), name='bias') tf.add_to_collection('b', b) #### The model ### # Hidden Layer hidden_out = tf.matmul(mdl_input, K) # hidden_out = tf.nn.relu(hidden_out) Ksum = tf.reduce_sum(hidden_out, axis=1) if hist: H = tf.clip_by_value(H, -np.inf, 0.) Ksum = tf.add(tf.squeeze(tf.matmul(mdl_yhist, H)), Ksum) Ksum = tf.add(Ksum, b) # define cost function as negative log liklihood of Poisson spiking if family == 'p': conditional_intensity = tf.exp(Ksum) cost = neglogliklihood(conditional_intensity, mdl_output) elif family == 'b': conditional_intensity = tf.sigmoid(Ksum) #cost = tf.reduce_mean(-tf.reduce_sum(mdl_output*conditional_intensity), reduction_indices=1) cost = neglogliklihood_bernoulli(conditional_intensity, mdl_output) # optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) # optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(cost) optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # loop over entire dataset multiple times all_cost = [np.Inf] patience_cnt = 0 for epoch in range(epochs): # loop over sub_batches avg_cost = 0. for ii in range(n_batches): if hist: _, c = sess.run( [optimizer, cost], feed_dict={ mdl_input: batched_x[ii], mdl_output: batched_y[ii], mdl_yhist: batched_yhist[ii] }) else: _, c = sess.run([optimizer, cost], feed_dict={ mdl_input: batched_x[ii], mdl_output: batched_y[ii] }) avg_cost += c / n_batches # Early Stopping # print('AVG:{}, Most Recent:{}'.format(avg_cost, all_cost[-1])) if epoch > 0 and ((all_cost[-1] - avg_cost) > min_delta): patience_cnt = 0 else: patience_cnt += 1 if patience_cnt >= patience: print('Early Stopping...') break all_cost.append(avg_cost) print('Epoch:{}\t, Cost={}'.format(epoch, avg_cost)) print('Done!') # plt.plot(all_cost) # plt.show() print('saving to {}'.format(savefile)) saver = tf.train.Saver() saver.save(sess, savefile) sess.close() print('Saved session to {}'.format(savefile))