def load_data(batch_size, is_training=True): if is_training: data_file = os.path.join( cfg.affnist_data_dir, 'peppered_training_and_validation_batches', cfg.centered + '_percent_centered_' + cfg.peppered + '_percent_transformed.mat') images_per_transformation = int( (TOTAL_TRAINING_IMAGES * int(cfg.peppered) / 100) / 32) num_base_img = int(TOTAL_TRAINING_IMAGES * int(cfg.centered) / 100) num_inputs = images_per_transformation * 32 + num_base_img num_training = num_inputs * 84 / 100 num_training_eval = num_inputs - num_training # NOTE: Assert we have the correct number of total inputs, as expected data = loadmat(data_file) images = data['affNISTdata']['image'].transpose().reshape( num_inputs, 40, 40, 1).astype(np.float32) labels = data['affNISTdata']['label_int'].astype(np.uint8) assert images.shape == (num_inputs, 40, 40, 1) assert labels.shape == (num_inputs, ) trX = images[:num_training] / 255. trY = labels[:num_training] valX = images[num_training_eval:, ] / 255. valY = labels[num_training_eval:] num_tr_batch = num_training // cfg.batch_size num_val_batch = num_training_eval // cfg.batch_size return trX, trY, num_tr_batch, valX, valY, num_val_batch else: # NOTE: Swap those two lines below to get some basic transformed test if cfg.peppered == '0': data_file = os.path.join(cfg.affnist_data_dir, 'just_centered', 'test.mat') else: data_file = os.path.join(cfg.affnist_data_dir, 'transformed', 'test_batches', '15.mat') data = loadmat(data_file) images = data['affNISTdata']['image'].transpose().reshape( 10000, 40, 40, 1).astype(np.float32) labels = data['affNISTdata']['label_int'].astype(np.float32) assert images.shape == (10000, 40, 40, 1) assert labels.shape == (10000, ) imgs = images / 255. labs = labels num_te_batch = 10000 // cfg.batch_size return imgs, labs, num_te_batch
def __init__(self, scanNum, xlen, ylen, basePath='/Users/alec/UCSB/scan_data/'): self.scanNum = scanNum self.dataFiles = self.get_data_files(basePath) self.param = loadmat.loadmat(self.dataFiles[0])['scan']['param'] self.param['xlen'] = xlen self.param['ylen'] = ylen self.dataForward, self.dataReverse = self.load_data()
def load_data(self): data = dict() dataForward = dict() dataReverse = dict() xlen = self.param['xlen'] for key in loadmat.loadmat(self.dataFiles[0])['scan']['data'].keys(): data[key] = [] for file in self.dataFiles: fileData = loadmat.loadmat(file)['scan']['data'] for key in data.keys(): data[key].append(fileData[key]) for key in data.keys(): dataForward[key] = [] dataReverse[key] = [] for i in range(0, len(self.dataFiles), 2*xlen): dataForward[key].append(data[key][i : i + xlen]) dataReverse[key].append(np.flip(data[key][i + xlen : i + 2*xlen], axis=0)) return dataForward, dataReverse
def HG_regression_surr_random_SGE(DATASET, numiter = 1000): ''' creates random surrogate data numiter times calculate regression on each surrogate data set saves out distribution of regression parameters for surrogate data only runs on duration electrodes ''' SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta' subj, task = DATASET.split('_') print (DATASET) folder = 'maxes_medians_stds_lats' features = ['maxes_rel','medians', 'stds', 'lats'] filename = os.path.join(SJdir, 'Subjs', subj, task, 'subj_globals.mat') data_dict = loadmat.loadmat(filename) srate = [data_dict.get(k) for k in ['srate']][0] srate = float(srate) filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv') df_pattern = pd.read_csv(filename) bad_df = pd.DataFrame({'GP44_DecisionAud':233, 'GP15_SelfVis':1, 'JH2_FaceEmo':113, 'GP35_FaceEmo':60}, index = range(1)).T bad_df = bad_df.reset_index() bad_df.columns = ['subj_task','elec'] #get data print ('get data') data_dict, start_idx, end_idx, start_idx_resp, end_idx_resp = stats_static250(subj, task, df_pattern) ##reject outliers print ('\nreject outliers') data_dict_clean = reject_outliers(DATASET, data_dict, start_idx, end_idx, start_idx_resp, end_idx_resp) #run regression for stim and resp scores, coefs, alphas, pvals = [[] for i in range(4)] for lock in ['resp', 'stim']: print ('run regression on %s\n' %(lock)) coef, score, alpha, pval, nulls = run_regression(DATASET, data_dict_clean[lock], numiter = numiter) #save out dataframes saveDir = os.path.join(SJdir, 'PCA', 'Stats', 'Regression', 'unsmoothed', folder, 'static_250windows', lock) if not(os.path.exists(saveDir)): os.makedirs(saveDir) df = pd.DataFrame({'score':score, 'coef':coef, 'pval':pval, 'alpha':alpha}) df = df[['score','pval','alpha','coef']] filename = os.path.join(saveDir, '_'.join([DATASET, 'regression_values_%s.csv' %(lock)])) df.to_csv(filename) print('saving %s\n' %(filename)) sys.stdout.flush()
def shadeplots_faces_stats(subj, task, elecs_list, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', baseline = -500): #get data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_eleclist.mat') data_dict = loadmat.loadmat(filename) srate, elecs, data, RTs, onsets_stim, onsets_resp, data_resp = [data_dict.get(k) for k in ['srate','elecs','data_percent', 'RTs', 'onsets_stim', 'onsets_resp', 'data_percent_resp']] bl_st = baseline/1000*srate filename = os.path.join(SJdir, 'Anat', 'ShadePlots_Faces', '_'.join([subj, task, 'maxes']) +'.csv') peaks, lats, peaks_resp, lats_resp, peaks_maxRT, lats_maxRT, peaks_mean, lats_mean, peaks_mean_resp, lats_mean_resp = [ dict() for x in range(10)] for i, e in enumerate(elecs_list): edata = data[i, :, :].squeeze() edata_resp = data_resp[i,:,:].squeeze() #get maxes from stim onset to resp + 300ms p, l = [list() for x in range(2)] for m in range(edata.shape[0]): #per trial p.append(edata[m,abs(bl_st) : abs(bl_st) + RTs[m] + (300/1000*srate)].max()) l.append(edata[m,abs(bl_st) : abs(bl_st) + RTs[m] + (300/1000*srate)].argmax()) peaks[e] = p lats[e] = l peaks_resp[e] = edata_resp.max(axis = 1) lats_resp[e] = edata_resp.argmax(axis = 1) #get maxes in a single window (stim onset to max RT + 500) peaks_maxRT[e] = edata[:, abs(bl_st) : abs(bl_st) + RTs.max() + (500/1000*srate)].max(axis = 1) lats_maxRT[e] = edata[:, abs(bl_st) : abs(bl_st) + RTs.max() + (500/1000*srate)].argmax(axis = 1) #get maxes and latencies on the mean trace peaks_mean[e] = edata[:, abs(bl_st) : abs(bl_st) + RTs.max() + (500/1000*srate)].mean(axis = 0).max() lats_mean[e] = edata[:, abs(bl_st) : abs(bl_st) + RTs.max() + (500/1000*srate)].mean(axis = 0).argmax() peaks_mean_resp[e] = edata_resp.mean(axis = 0).max() lats_mean_resp[e] = edata_resp.mean(axis = 0).argmax() #save stats (single trials) filename = os.path.join(SJdir, 'Anat', 'ShadePlots_Faces', 'SingleTrials', 'data', 'RT_300ms_pertrial' ''.join([subj, '_', task, '.p'])) data_dict = {'peaks':peaks, 'lats':lats, 'peaks_resp' : peaks_resp, 'lats_resp' : lats_resp, 'peaks_maxRT' : peaks_maxRT, 'lats_maxRT' : lats_maxRT, 'peaks_mean' : peaks_mean, 'lats_mean' : lats_mean, 'lats_mean_resp' : lats_mean_resp, 'peaks_mean_resp' : peaks_mean_resp} with open(filename, 'w') as f: pickle.dump(data_dict, f) f.close() return data_dict
def run_pipeline(iF): try: print('Now working on '+ iF) dataset = lm.loadmat(iF) dataset = preprocess(dataset) if 'anatomy' not in dataset.keys(): return else: anatomy = dataset['anatomy'] if 'parent_shifted' in anatomy: group = anatomy['parent_shifted'] else: group = anatomy['cluster_parent'] region = 'MEC' idx = [region in ss for ss in group] idx = np.array(idx) idx = idx[dataset['sp']['cgs']==2] if idx.sum()==0: return dataset['spikecount']=dataset['spikecount'][:,idx] (model, bl_scores) = eval_and_train(dataset) (Ypred,Ytrue,speed,trial,c_matrix) = score_gain_model(model,dataset) plt.plot(Ytrue) plt.plot(dataset['posx_centers'][Ypred-1]) name = os.path.basename(iF)[0:-4] plt.savefig('F:\\temp\\classifier_out\\'+region +'_'+ name + '.png') plt.close() tmp_array = np.array([Ypred,Ytrue,speed,trial,dataset['posx_edges']]) np.save('F:\\temp\\classifier_out\\'+region +'_'+ name + '_scores.npy',tmp_array) #np.save('/oak/stanford/groups/giocomo/attialex/processed_data/classifier_output1/'+region +'_'+ name + '_scores.npy',tmp_array) #np.save('/oak/stanford/groups/giocomo/attialex/processed_data/classifier_output1/'+region +'_'+ name + '_confMatrix.npy',conf_matrix) except Exception as e: print(str(e)) print('not working') pass
def runForFile(good_cells,sn_this,labels,umap_save_path,good_cells_orig,xcorr=None): data = lm.loadmat(sn_this) summary = [] ds_factor = 5 for iClu,cluID in enumerate(np.unique(labels)): n=np.sum(labels==cluID) pwd_this = mean_pwd[iClu] if n>=10 and pwd_this>0.88: if xcorr is not None: xcorr_this = xcorr[labels==cluID] else: xcorr_this = None good_cells_this = good_cells[labels==cluID] (Xu,X_pca)=runUMAPForCluster(good_cells_this,data,ds_factor=ds_factor) #fig=plotResults(Xu,data['trial'],data['posx'],speed) _,sn=os.path.split(fi) sn_new = sn.replace('.mat','_clu{}.png'.format(cluID)) savepath = os.path.join('/Volumes/T7/attialex/umap_dark',sn_new) #fig.savefig(savepath) #plt.close(fig) summary.append((Xu,cluID,X_pca,xcorr_this)) else: print('skipping clu {}, n: {},pwd: {:.2f}'.format(cluID,n,pwd_this)) # import pdb # pdb.set_trace() (Xu,X_pca)=runUMAPForCluster(good_cells_orig,data,ds_factor=ds_factor) # run once for all cells as sanity check summary.append((Xu,cluID,X_pca,None)) # import pdb # pdb.set_trace() if len(summary)>0: fig = plotSummary(summary,data,ds_factor) _,sn = os.path.split(sn_this) sn = sn.replace('.mat','_UMAPSummary.png') # import pdb # pdb.set_trace() fig.savefig(os.path.join(umap_save_path,sn)) return
def plot_average_overlap(subj, task, resplocked = False, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta'): """ plots of traces (not a shade plot bc no significance window calculated) average for 1. easy task overlap elecs 2. diff task overlap elecs 3. diff task unique elcs """ filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') data = loadmat.loadmat(filename) srate = data['srate'] elecs = data['active_elecs'] RTs = data['RTs'] bl_st = data['Params']['bl_st']/1000*srate data = data['data_percent'] RTs = RTs+abs(bl_st) bl_st = int(bl_st) overlapfile = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', subj+'_ovelapped_dur_elecs.csv') df = pd.read_csv(overlapfile) easy_overlap = df.easy.dropna()[np.in1d(df.overlapped_elecs.dropna(), df.easy.dropna())] diff_overlap = df.difficult.dropna()[np.in1d(df.overlapped_elecs.dropna(), df.difficult.dropna())] diff_unique = df.unique_to_diff.dropna() elec_dict = {'easy_overlap':easy_overlap, 'diff_overlap':diff_overlap, 'diff_unique':diff_unique} data_dict = dict() #average data per grouping for k in elec_dict.keys(): elec_list = elec_dict[k] eidx = np.in1d(elecs, elec_list) if resplocked: tmp = np.empty((data.shape[0], data.shape[1], len(np.arange(bl_st,abs(bl_st))))) #elecs x trials x time for j, e in enumerate(eidx): #elecs tmp2 = np.empty((data.shape[1], len(np.arange(bl_st, abs(bl_st))))) #per elec, trials x time for i, r in enumerate(RTs): #trials tmp2[i,:] = data[e,i,(r-abs(bl_st)):(r+abs(bl_st))] tmp[j, :, :] = tmp2 data_dict[k] = tmp.mean(axis = 1).mean(axis = 0) else: data_dict[k] = data[eidx,:,:].mean(axis = 1).mean(axis = 0) #plot f, ax = plt.subplots(1, 1, figsize = (30,10)) scale_min = min([min(data_dict[x]) for x in data_dict.keys()]) scale_max = max([max(data_dict[x]) for x in data_dict.keys()]) tmp = (np.arange(scale_min, scale_max)) for i, k in enumerate(data_dict.keys()): data = data_dict[k] ax.plot(np.arange(bl_st, data.shape[0]+bl_st), data, zorder = 1, linewidth = 3, label = k) ax.set_ylim([scale_min, scale_max]) ax.axhline(y = 0, color = 'k', lw = 3, label = None) #xaxis ax.axvline(x = 0, color = 'k', lw = 3, label = None) ax.set_ylabel('% change HG') ax.set_xlabel('time (ms)') ax.autoscale(tight=True) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() legend1 = ax.legend(loc = 'best') ax.set_title(' '.join([subj, task])) filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust','elecs','significance_windows', 'smoothed', 'mean_traces', 'images', 'median_split', '_'.join([subj,task, 'easy_diff_overlap_unique'])) if resplocked: filename = filename + '_resplocked' plt.savefig(filename+'.png') plt.close()
def test_mlp(learning_rate, L1_reg, L2_reg, n_epochs, hidden_layers_sizes, trainpath, trainlist, validset, batch_size, datasel, shuffle, scaling, dropout, earlystop, dumppath): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the dataset """ print locals() datasets = loadmat(trainpath=trainpath,trainlist=trainlist,validset=validset,shuffle=shuffle,datasel=datasel, scaling=scaling,robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) nclass = max(train_set_y.eval()) + 1 print "n_in = %d"%train_set_x.get_value(borrow=True).shape[1] print "n_out = %d"%nclass # construct the MLP class classifier = MLP( rng=rng, input=x, n_in=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_out=nclass ) # dropout the hidden layers trng = RandomStreams(1234) use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX)) if dropout: # classifier.input = dropout_layer(use_noise, classifier.input, trng, 0.8) for i in range(classifier.n_layers): classifier.hiddenlayers[i].output = dropout_layer(use_noise, classifier.hiddenlayers[i].output, trng, 0.5) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # end-snippet-4 validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) train_score = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) pred_probs = theano.function( inputs=[index], outputs=classifier.predprobs, givens={ x: train_set_x[index:1000], # y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.996 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 training_history=[] start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs): if earlystop and done_looping: print 'early-stopping' break epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): use_noise.set_value(1.) # use dropout minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set use_noise.set_value(0.) # at validation/testing time, no dropout validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] #training_losses = [train_score(i) for i in xrange(n_train_batches)] this_validation_loss = numpy.mean(validation_losses) #this_training_loss = numpy.mean(training_losses) #training_history.append([iter,this_training_loss,this_validation_loss]) training_history.append([iter,this_validation_loss]) # print('epoch %i, minibatch %i/%i, training error %f %%' % # (epoch, minibatch_index + 1, n_train_batches, # this_training_loss * 100.)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) print('iter = %d' % iter) print('patience = %d' % patience) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) numpy.savez(dumppath, model=classifier.params, training_history=training_history, best_validation_loss=best_validation_loss) best_validation_loss = this_validation_loss best_iter = iter print('best_validation_loss %f' % best_validation_loss) if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() # final save numpy.savez(dumppath, model=classifier.params, training_history=training_history, best_validation_loss=best_validation_loss) print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i') % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def shadeplots_elecs_stats(): """ calculates mean, peak, latency, and std per trial for all electrodes in an active cluster - added medians and coefficient of variation uses windows for individual electrodes from PCA/Stats/single_electrode_windows_withdesignation.csv saves pickle file with numbers per trial in ShadePlots_hclust/elecs/significance_windows/static FOR DURATION - does not have RT-dependent window per trial. Uses max RT. """ SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/' filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_withdesignation_EDITED.csv') df = pd.read_csv(filename) for s_t in df.groupby(['subj','task']): subj, task = s_t[0] #load data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') data_dict = loadmat.loadmat(filename) active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']] bl_st = Params['bl_st'] bl_st = bl_st/1000*srate #sys.stdout.flush() cofvar, maxes_rel, medians, means, stds, maxes, lats, sums, lats_pro, RTs, num_dropped = [dict() for i in range(11)] RT = RT + abs(bl_st) #RTs are calculated from stim onset, need to account for bl in HG_elecMTX_percent for row in s_t[1].itertuples(): _, _, subj, task, cluster, pattern, elec, start_idx, end_idx, start_idx_resp, end_idx_resp, _, _ = row eidx = np.in1d(active_elecs, elec) data = data_all[eidx,:,:].squeeze() st_resp = 0 #define start and end indices based on electrode type if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]): start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) if start_idx == end_idx: continue #for SR elecs that dont' have stimlocked (CP9, e91) #num_to_drop = 0 #calculate stats (single trials) means[elec] = data[:,start_idx:end_idx].mean(axis = 1) stds[elec] = data[:,start_idx:end_idx].std(axis = 1) maxes[elec] = data[:,start_idx:end_idx].max(axis = 1) lats[elec] = data[:,start_idx:end_idx].argmax(axis = 1) sums[elec] = data[:, start_idx:end_idx].sum(axis = 1) lats_pro[elec] = lats[elec] / len(np.arange(start_idx, end_idx)) RTs[elec] = RT #num_dropped[elec] = num_to_drop medians[elec] = stats.nanmedian(data[:,start_idx:end_idx], axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] cofvar[elec] = stds[elec]/means[elec] #update dataframe #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0] #df.ix[ix,'dropped'] = num_to_drop if pattern == 'R': start_idx_resp = start_idx_resp + abs(st_resp) end_idx_resp = end_idx_resp + abs(st_resp) if start_idx_resp == end_idx_resp: continue #for inactive R elecs (not clear why on spreadsheet) #create data matrix data_resp = np.empty(data.shape) for j, r in enumerate(RT): tmp = data[j, r + start_idx_resp : r + end_idx_resp] tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999) data_resp[j,:] = tmp data_resp[data_resp == -999] = np.nan #nanidx = np.isnan(np.nanmean(data_resp, axis = 1)) #if start > end ''' if np.any(nanidx): #drop equivalent number of long RTs num_to_drop = np.sum(nanidx) i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs nanidx[i] = True #mark the long trials as bad too num_dropped[elec] = num_to_drop * 2 #dropping both ends of RT distribution #calculate params for (single trials) data_resp[nanidx,:] = np.nan means[elec] = np.nanmean(data_resp, axis = 1) stds[elec] = np.nanstd(data_resp, axis = 1) maxes[elec] = np.nanmax(data_resp, axis = 1) sums[elec] = np.nansum(data_resp, axis = 1) medians[elec] = stats.nanmedian(data_resp, axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] cofvar[elec] = stds[elec]/means[elec] data_resp[nanidx,0] = -999 tmp_lat = np.nanargmax(data_resp, axis = 1) tmp_lat = np.ndarray.astype(tmp_lat, dtype = float) tmp_lat[nanidx] = np.nan lats[elec] = tmp_lat lats_pro[elec] = tmp_lat / np.sum(~np.isnan(data_resp), axis = 1) tmp_RT = np.ndarray.astype(RT, dtype = float) tmp_RT[nanidx] = np.nan RTs[elec] = tmp_RT else: num_to_drop = 0 num_dropped[elec] = num_to_drop ''' lats[elec] = np.nanargmax(data_resp, axis = 1) lats_pro[elec] = np.nanargmax(data_resp, axis = 1) / np.sum(~np.isnan(data_resp), axis = 1) RTs[elec] = RT means[elec] = np.nanmean(data_resp, axis = 1) stds[elec] = np.nanstd(data_resp, axis = 1) maxes[elec] = np.nanmax(data_resp, axis = 1) sums[elec] = np.nansum(data_resp, axis = 1) medians[elec] = stats.nanmedian(data_resp, axis = 1) maxes_rel[elec] = maxes[elec] - means[elec] cofvar[elec] = stds[elec]/means[elec] #update dataframe #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0] #df.ix[ix,'dropped'] = num_to_drop * 2 #dropping both ends of RT distribution if pattern == 'D': start_idx = start_idx + abs(bl_st) #end_idx_resp = end_idx_resp + abs(st_resp) end_idx_resp = end_idx_resp + max(RT) #RT already has baseline in it #num_to_drop = 0 #calculate stats (single trials) means[elec] = data[:,start_idx:end_idx_resp].mean(axis = 1) stds[elec] = data[:,start_idx:end_idx_resp].std(axis = 1) maxes[elec] = data[:,start_idx:end_idx_resp].max(axis = 1) lats[elec] = data[:,start_idx:end_idx_resp].argmax(axis = 1) sums[elec] = data[:, start_idx:end_idx_resp].sum(axis = 1) lats_pro[elec] = lats[elec] / len(np.arange(start_idx, end_idx_resp)) RTs[elec] = RT #num_dropped[elec] = num_to_drop medians[elec] = np.median(data[:,start_idx:end_idx_resp], axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] cofvar[elec] = stds[elec]/means[elec] #update dataframe #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0] #df.ix[ix,'dropped'] = num_to_drop #save stats (single trials) filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'static', 'data', ''.join([subj, '_', task, '.p'])) data_dict = {'active_elecs': active_elecs, 'lats_pro': lats_pro, 'sums':sums, 'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st,'RTs':RTs, 'dropped':num_dropped, 'maxes_rel' : maxes_rel, 'medians' : medians, 'variations': cofvar} with open(filename, 'w') as f: pickle.dump(data_dict, f) f.close() #save csv file (without dropping trials) for k in data_dict.keys(): if k in ['bl_st', 'srate','active_elecs', 'dropped']: continue data = pd.DataFrame(data_dict[k]) filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'static', 'csv_files', 'orig', '_'.join([subj, task, k]) + '.csv') data.to_csv(filename, index = False)
def HG_regression_surr_random_SGE(DATASET, numiter = 1000): ''' creates random surrogate data numiter times calculate regression on each surrogate data set saves out distribution of regression parameters for surrogate data only runs on duration electrodes ''' SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta' subj, task = DATASET.split('_') print (DATASET) all_coefs, all_scores, all_alphas = [[] for i in range(3)] folder = 'maxes_medians_stds' features = ['maxes_rel','medians', 'stds'] filename = os.path.join(SJdir, 'Subjs', subj, task, 'subj_globals.mat') data_dict = loadmat.loadmat(filename) srate = [data_dict.get(k) for k in ['srate']][0] srate = float(srate) filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv') df_pattern = pd.read_csv(filename) bad_df = pd.DataFrame({'GP44_DecisionAud':233, 'GP15_SelfVis':1, 'JH2_FaceEmo':113, 'GP35_FaceEmo':60}, index = range(1)).T bad_df = bad_df.reset_index() bad_df.columns = ['subj_task','elec'] for i in range(numiter): print ('iteration %i out of %i' %(i, numiter)) #get surrogate data print ('get surrogate data') data_dict = shadeplots_elecs_stats_surr_random(subj, task, df_pattern, id_num = i) if len(data_dict['RTs'])==0: print('skipping %s %s - no duration elecs\n' %(subj, task)) sys.stdout.flush() return ##reject outliers print ('\nreject outliers') data_dict_clean = reject_outliers(DATASET, data_dict, srate, df_pattern, bad_df = bad_df) #run regression (without pvalue) print ('run regression') coefs, score, alpha = run_regression(DATASET, data_dict_clean) #accumulate all_coefs.append(coefs) all_scores.append(score) all_alphas.append(alpha) #save out dataframes scores = pd.DataFrame(all_scores) coefs = pd.DataFrame(all_coefs) alphas = pd.DataFrame(all_alphas) saveDir = os.path.join(SJdir, 'PCA', 'Stats', 'Regression', 'unsmoothed', folder, 'surr_distributions') if not(os.path.exists(saveDir)): os.makedirs(saveDir) filename = os.path.join(saveDir, '_'.join([DATASET, 'coefs_surr_dist.csv'])) coefs.to_csv(filename) filename = os.path.join(saveDir, '_'.join([DATASET, 'alphas_surr_dist.csv'])) alphas.to_csv(filename) filename = os.path.join(saveDir, '_'.join([DATASET, 'scores_surr_dist.csv'])) scores.to_csv(filename) print('saving %s\n' %(filename)) sys.stdout.flush()
def test_mlp(learning_rate, L1_reg, L2_reg, n_epochs, hidden_layers_sizes, dataset, batch_size, datasel, shuffle, scaling, dropout, earlystop, dumppath): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the dataset """ print locals() datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) nclass = max(train_set_y.eval()) + 1 print "n_in = %d" % train_set_x.get_value(borrow=True).shape[1] print "n_out = %d" % nclass # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_out=nclass) # dropout the hidden layers trng = RandomStreams(1234) use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX)) if dropout: # classifier.input = dropout_layer(use_noise, classifier.input, trng, 0.8) for i in range(classifier.n_layers): classifier.hiddenlayers[i].output = dropout_layer( use_noise, classifier.hiddenlayers[i].output, trng, 0.5) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) train_score = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) pred_probs = theano.function( inputs=[index], outputs=classifier.predprobs, givens={ x: train_set_x[index:1000], # y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 100 * n_train_batches # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs): if earlystop and done_looping: print 'early-stopping' break # while (epoch < n_epochs): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): use_noise.set_value(1.) # use dropout minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set use_noise.set_value( 0.) # at validation/testing time, no dropout validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] training_losses = [ train_score(i) for i in xrange(n_train_batches) ] this_validation_loss = numpy.mean(validation_losses) this_training_loss = numpy.mean(training_losses) probs = [pred_probs(i) for i in xrange(n_train_batches)] print('epoch %i, minibatch %i/%i, training error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_training_loss * 100.)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save model with open(dumppath, "wb") as f: cPickle.dump(classifier.params, f) best_validation_loss = this_validation_loss best_iter = iter ''' # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
'NP_DATA_corrected') files = glob.glob(os.path.join(neuropix_folder, '*.mat')) #files = glob.glob('/oak/stanford/groups/giocomo/attialex/NP_DATA/np*_gain*.mat' path = '/Volumes/Samsung_T5/attialex/python_circular_gain_' + gain TRIALS = np.arange(5, 21) if not os.path.exists(path): os.makedirs(path) for iF in files: session_name = os.path.split(iF)[-1] print(session_name) if 'mismatch' in session_name or 'playback' in session_name or 'dark' in session_name: print('skipping {}'.format(session_name)) continue data = lm.loadmat(iF) try: ons = get_gain_onsets(data, float(gain), 100) except: ons = [] try: for nbr, iO in enumerate(ons): trials = iO + np.arange(-5, 4) output = run_for_file_gain(data, trials) sn = session_name[0:-4] session_name = '{}_{}.mat'.format(sn, nbr + 1) if output is not None: plt.subplot(211)
def shadeplots_elecs_stats(): """ calculates mean, max, min, latency, median, and std on the mean trace for trial for all electrodes in an active cluster OLD - uses electrodes and windows from PCA/Stats/single_electrode_windows_withdesignation_EDITED.csv NOW - uses electrodes and windows from PCA/csvs_FINAL/final_windows.csv (after going through and editing them) calculates both stimulus and response locked parameters """ SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/' #filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv') filename = os.path.join(SJdir, 'PCA', 'csvs_FINAL', 'final_windows.csv') df = pd.read_csv(filename) for s_t in df.groupby(['subj','task']): subj, task = s_t[0] #load data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') data_dict = loadmat.loadmat(filename) active_elecs, Params, srate, RT, data_trials = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']] srate = float(srate) data_all = data_trials.mean(axis = 1) #mean across trials, (new shape is elecs x time) bl_st = -500/1000*srate #in data points maxes, lats, RTs, RTs_median, RTs_min, lats_static, lats_min_static, lats_semi_static = [dict() for i in range(8)] RT = RT + abs(bl_st) #RTs are calculated from stim/cue onset, need to account for bl in HG_elecMTX_percent for row in s_t[1].itertuples(): _, subj, task, elec, pattern, cluster, start_idx, end_idx, start_idx_resp, end_idx_resp = row #in datapoints eidx = np.in1d(active_elecs, elec) data = data_all[eidx,:].squeeze() #mean trace #define start and end indices based on electrode type if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]): start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) if pattern == 'R': start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) if pattern == 'D': start_idx = start_idx + abs(bl_st) end_idx = np.median(RT) + end_idx_resp if start_idx == end_idx: continue #for SR elecs that only have response activity - don't calculate a mean value #calculate stats (mean trace) maxes[elec] = data[start_idx:end_idx].max() lats[elec] = (data[start_idx:end_idx].argmax()+1)/srate*1000 #within HG window RTs[elec] = (RT+bl_st).mean()/srate*1000 #from stimulus onset (adjusted for all subjects) RTs_median[elec] = np.median(RT+bl_st)/srate*1000 #from stimulus onset (adjusted for all subjects) RTs_min[elec] = np.min(RT+bl_st)/srate*1000 #from stimulus onset (adjusted for all subjects) lats_static[elec] = (data[abs(bl_st)::].argmax()+1)/srate*1000 #from stimulus onset to end (adjusted for all subjects) lats_semi_static[elec] = (data[start_idx::].argmax()+1)/srate*1000 #from HG onset data_dict = {'maxes':maxes, 'lats':lats, 'RTs':RTs, 'RTs_median': RTs_median, 'RTs_min' : RTs_min, 'lats_static' : lats_static, 'lats_semi_static' : lats_semi_static} #update csv file for k in data_dict.keys(): if k in ['bl_st', 'srate','active_elecs']: data_dict.pop(k, None) df_values = pd.DataFrame(data_dict) #save dataframe with values for all elecs for subject/task - later combined into mean_traces_all_elecs.csv in elec_values.ipynb filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', '_'.join([subj, task]) + '.csv') df_values.to_csv(filename)
if __name__ == '__main__': root = '/Users/attialex/distance_tuning' umap_version = 'Cosine_PCAUMAP' files = glob.glob(os.path.join('/Users/attialex/distance_tuning', '*.mat')) umap_save_path = os.path.join(root, umap_version) if not os.path.isdir(umap_save_path): os.makedirs(umap_save_path) shutil.copy2( '/Users/attialex/code/AlexA_Library/NP_python/distance_tuning_clustering.py', umap_save_path) for fi in files: print(fi) data_out = lm.loadmat(fi) data_out = data_out['data_out'] idx = data_out['pvals'] < 0.05 if sum(idx) < 30: continue _, sn_darkData = os.path.split(fi) data_path = '/Volumes/T7/attialex/NP_DATA_corrected' data = lm.loadmat(os.path.join(data_path, sn_darkData)) xcorrs = data_out['xcorrs'][idx] reducer = umap.UMAP(n_components=2) #reducer = PCA(n_components=2) X_new = reducer.fit_transform(xcorrs)
def stats_static250(subj, task, df_pattern, start = 0, end = 250, start_idx_resp = -250, end_idx_resp = 0): """ calculates params per electrode on for stim:stim+250 and resp-250:resp windows. drops trials that are <250 ms uses windows for individual electrodes from df_pattern (PCA/Stats/single_electrode_windows_csvs/single_electrode_windows_withdesignation_EDITED.csv) Uses unsmoothed data hardcoded params - medians, maxes_rel, stds, latencies, maxes, means returns dictionary with features. each feature is dictionary of elecs """ SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/' #load data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat') data_dict = loadmat.loadmat(filename) active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']] bl_st = 500/1000*srate #for my data, remove cue from baseline - start/end_idx are relative to cue onset) - change 12/24 - okay with RT 12/25 RT = RT + abs(bl_st) #RTs are calculated from stim (my data cue) onset, need to account for bl in HG_elecMTX_percent (for 500, not 1000 baseline 12/25) #define start and end windows (stim locked) start = np.round((start / 1000 * srate) + abs(bl_st)) end = np.round((end / 1000 * srate)) start_idx_resp = np.round(start_idx_resp / 1000 * srate) end_idx_resp = np.round(end_idx_resp / 1000 * srate) RTs, medians, maxes_rel, means, stds, maxes, lats = [{'stim':dict(), 'resp':dict()} for i in range(7)] s_t = df_pattern[((df_pattern.subj == subj) & (df_pattern.task == task))] for e in s_t.elec.values: _, subj, task, cluster, pattern, elec, start_idx, end_idx, _, _, _, _ = s_t[s_t.elec == e].values[0] if (end_idx - start_idx) < (end- start): #HG duration is less than window size (250 or 500) print ('skipping %s %s %i' %(subj, task, e)) sys.stdout.flush() continue print('%i...' %(elec), end = "") sys.stdout.flush() eidx = np.in1d(active_elecs, elec) data = data_all[eidx,:,:].squeeze() start_idx = start_idx + start #start and end relative to HG onset end_idx = start_idx + end #calculate values (single trials) means['stim'][elec] = np.nanmean(data[:,start_idx:end_idx], axis =1) stds['stim'][elec] = np.nanstd(data[:,start_idx:end_idx], axis = 1) maxes['stim'][elec] = np.nanmax(data[:,start_idx:end_idx], axis = 1) medians['stim'][elec] = stats.nanmedian(data[:,start_idx:end_idx], axis = 1) maxes_rel['stim'][elec] = maxes['stim'][elec] - means['stim'][elec] lats['stim'][elec] = np.argmax(data[:,start_idx:end_idx], axis = 1) data_resp = np.empty((len(RT), len(np.arange(start_idx_resp, end_idx_resp)))) for j, r in enumerate(RT): data_resp[j,:] = data[j, r + start_idx_resp : r + end_idx_resp] means['resp'][elec] = np.nanmean(data_resp, axis = 1) stds['resp'][elec] = np.nanstd(data_resp, axis = 1) maxes['resp'][elec] = np.nanmax(data_resp, axis = 1) medians['resp'][elec] = stats.nanmedian(data_resp, axis = 1) maxes_rel['resp'][elec] = maxes['resp'][elec]-means['resp'][elec] lats['resp'][elec] = np.argmax(data_resp, axis = 1) RTs['stim'][elec] = RT RTs['resp'][elec] = RT #output dictionary of params per elec data_dict = {'RTs' : RTs, 'maxes_rel' : maxes_rel, 'medians' : medians, 'stds': stds, 'lats' : lats, 'means' : means, 'maxes' : maxes} return data_dict, start_idx, end_idx, start_idx_resp, end_idx_resp
def shadeplots_elecs_stats(): """ calculates mean, peak, latency, and std per trial for all electrodes in an active cluster - added medians and coefficient of variation and mins uses windows for individual electrodes from PCA/Stats/single_electrode_windows_withdesignation.csv saves pickle file with numbers per trial in ShadePlots_hclust/elecs/significance_windows *** runs on unsmoothed data (12/11/14)*** """ SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/' #filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv') filename = os.path.join(SJdir, 'PCA', 'csvs_FINAL', 'mean_traces_all_subjs_dropSR.csv') df = pd.read_csv(filename) for s_t in df.groupby(['subj','task']): subj, task = s_t[0] #load data #filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat') filename = os.path.join(SJdir, 'Subjs',subj, task, 'HG_elecMTX_zscore.mat') data_dict = loadmat.loadmat(filename) active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_zscore']] bl_st = Params['bl_st'] bl_st = bl_st/1000*srate if task in ['DecisionAud', 'DecisionVis']: bl_st = 500/1000*srate #remove cue from baseline - start/end_idx are relative to cue onset) - change 12/24 - okay with RT 12/25 cofvar, maxes_rel, medians, means, stds, maxes, lats, sums, lats_pro, RTs, num_dropped, mins, lats_min = [dict() for i in range(13)] RT = RT + abs(bl_st) #RTs are calculated from stim/cue onset, need to account for bl in HG_elecMTX_percent (for 500, not 1000 baseline 12/25) for row in s_t[1].itertuples(): _, subj, task, elec, pattern, cluster, start_idx, end_idx, start_idx_resp, end_idx_resp, RTs_values, RTs_median, RTs_min, lats_values, lats_semi_static, lats_static, max_vals, ROI = row eidx = np.in1d(active_elecs, elec) data = data_all[eidx,:,:].squeeze() st_resp = 0 #define start and end indices based on electrode type if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]): start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) if start_idx == end_idx: continue #for SR elecs that dont' have stimlocked (CP9, e91) num_to_drop = 0 #calculate stats (single trials) means[elec] = data[:,start_idx:end_idx].mean(axis = 1) stds[elec] = data[:,start_idx:end_idx].std(axis = 1) maxes[elec] = data[:,start_idx:end_idx].max(axis = 1) lats[elec] = data[:,start_idx:end_idx].argmax(axis = 1) lats_min[elec] = data[:, start_idx:end_idx].argmin(axis = 1) sums[elec] = data[:, start_idx:end_idx].sum(axis = 1) lats_pro[elec] = lats[elec] / len(np.arange(start_idx, end_idx)) RTs[elec] = RT num_dropped[elec] = num_to_drop medians[elec] = stats.nanmedian(data[:,start_idx:end_idx], axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] cofvar[elec] = stds[elec]/means[elec] mins[elec] = data[:,start_idx:end_idx].min(axis = 1) #update dataframe #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0] #df.ix[ix,'dropped'] = num_to_drop if pattern == 'R': start_idx_resp = start_idx_resp + abs(st_resp) end_idx_resp = end_idx_resp + abs(st_resp) if start_idx_resp == end_idx_resp: continue #for inactive R elecs (not clear why on spreadsheet) #create data matrix data_resp = np.empty(data.shape) for j, r in enumerate(RT): tmp = data[j, r + start_idx_resp : r + end_idx_resp] tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999) data_resp[j,:] = tmp data_resp[data_resp == -999] = np.nan nanidx = np.isnan(np.nanmean(data_resp, axis = 1)) #if start > end if np.any(nanidx): #drop equivalent number of long RTs num_to_drop = np.sum(nanidx) i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs nanidx[i] = True #mark the long trials as bad too num_dropped[elec] = num_to_drop * 2 #dropping both ends of RT distribution #calculate params for (single trials) data_resp[nanidx,:] = np.nan means[elec] = np.nanmean(data_resp, axis = 1) stds[elec] = np.nanstd(data_resp, axis = 1) maxes[elec] = np.nanmax(data_resp, axis = 1) sums[elec] = np.nansum(data_resp, axis = 1) medians[elec] = stats.nanmedian(data_resp, axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] cofvar[elec] = stds[elec]/means[elec] mins[elec] = np.nanmin(data_resp, axis = 1) data_resp[nanidx,0] = -999 tmp_lat = np.nanargmax(data_resp, axis = 1) tmp_lat = np.ndarray.astype(tmp_lat, dtype = float) tmp_lat[nanidx] = np.nan lats[elec] = tmp_lat lats_pro[elec] = tmp_lat / np.sum(~np.isnan(data_resp), axis = 1) data_resp[nanidx,0] = 9999 tmp_lat = np.nanargmin(data_resp, axis = 1) tmp_lat = np.ndarray.astype(tmp_lat, dtype = float) tmp_lat[nanidx] = np.nan lats_min[elec] = tmp_lat tmp_RT = np.ndarray.astype(RT, dtype = float) tmp_RT[nanidx] = np.nan RTs[elec] = tmp_RT else: num_to_drop = 0 num_dropped[elec] = num_to_drop lats[elec] = np.nanargmax(data_resp, axis = 1) lats_min[elec] = np.nanargmin(data_resp, axis = 1) lats_pro[elec] = np.nanargmax(data_resp, axis = 1) / np.sum(~np.isnan(data_resp), axis = 1) RTs[elec] = RT means[elec] = np.nanmean(data_resp, axis = 1) stds[elec] = np.nanstd(data_resp, axis = 1) maxes[elec] = np.nanmax(data_resp, axis = 1) sums[elec] = np.nansum(data_resp, axis = 1) mins[elec] = np.nanmin(data_resp, axis = 1) medians[elec] = stats.nanmedian(data_resp, axis = 1) maxes_rel[elec] = maxes[elec] - means[elec] cofvar[elec] = stds[elec]/means[elec] #update dataframe #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0] #df.ix[ix,'dropped'] = num_to_drop * 2 #dropping both ends of RT distribution if pattern == 'D': start_idx = start_idx + abs(bl_st) end_idx_resp = end_idx_resp + abs(st_resp) #create data matrices data_dur = np.empty(data.shape) for j, r in enumerate(RT): tmp = data[j, start_idx : r + end_idx_resp] tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999) data_dur[j,:] = tmp data_dur[data_dur == -999] = np.nan nanidx = np.isnan(np.nanmean(data_dur, axis = 1)) #if start > end if np.any(nanidx): #drop equivalent number of long RTs num_to_drop = np.sum(nanidx) i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs nanidx[i] = True #mark the long trials as bad too num_dropped[elec] = num_to_drop * 2 #dropping both ends of RT distribution #calculate params for single trials data_dur[nanidx, :] = np.nan means[elec] = np.nanmean(data_dur, axis = 1) stds[elec] = np.nanstd(data_dur, axis = 1) maxes[elec] = np.nanmax(data_dur, axis = 1) sums[elec] = np.nansum(data_dur, axis = 1) medians[elec] = stats.nanmedian(data_dur, axis = 1) maxes_rel[elec] = maxes[elec] - means[elec] cofvar[elec] = stds[elec]/means[elec] mins[elec] = np.nanmin(data_dur, axis = 1) data_dur[nanidx,0] = -999 tmp_lat = np.nanargmax(data_dur, axis = 1) tmp_lat = np.ndarray.astype(tmp_lat, dtype = float) tmp_lat[nanidx] = np.nan lats[elec] = tmp_lat lats_pro[elec] = tmp_lat / np.sum(~np.isnan(data_dur), axis = 1) data_dur[nanidx, 0] = 9999 tmp_lat = np.nanargmin(data_dur, axis = 1) tmp_lat = np.ndarray.astype(tmp_lat, dtype = float) tmp_lat[nanidx] = np.nan lats_min[elec] = tmp_lat tmp_RT = np.ndarray.astype(RT, dtype = float) tmp_RT[nanidx] = np.nan RTs[elec] = tmp_RT else: num_to_drop = 0 num_dropped[elec] = num_to_drop means[elec] = np.nanmean(data_dur, axis = 1) stds[elec] = np.nanstd(data_dur, axis = 1) maxes[elec] = np.nanmax(data_dur, axis = 1) sums[elec] = np.nansum(data_dur, axis = 1) medians[elec] = stats.nanmedian(data_dur, axis = 1) maxes_rel[elec] = maxes[elec] - means[elec] cofvar[elec] = stds[elec]/means[elec] mins[elec] = np.nanmin(data_dur, axis = 1) lats[elec] = np.nanargmax(data_dur, axis = 1) lats_min[elec] = np.nanargmin(data_dur, axis = 1) lats_pro[elec] = np.nanargmax(data_dur, axis = 1) / np.sum(~np.isnan(data_dur), axis = 1) RTs[elec] = RT #update dataframe #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0] #df.ix[ix,'dropped'] = num_to_drop * 2 #dropping both ends of RT distribution #save stats (single trials) filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'unsmoothed', 'data', ''.join([subj, '_', task, '.p'])) data_dict = {'active_elecs': active_elecs, 'lats_pro': lats_pro, 'sums':sums, 'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st,'RTs':RTs, 'dropped':num_dropped, 'maxes_rel' : maxes_rel, 'medians' : medians, 'variations': cofvar, 'mins': mins, 'lats_min':lats_min} with open(filename, 'w') as f: pickle.dump(data_dict, f) f.close() #save csv file (without dropping trials) for k in data_dict.keys(): if k in ['bl_st', 'srate','active_elecs', 'dropped']: continue data = pd.DataFrame(data_dict[k]) filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'zscore', 'csv_files', '_'.join([subj, task, k]) + '.csv') data.to_csv(filename, index = False)
import numpy as np import os.path import scipy.io from loadmat import loadmat import matplotlib.pyplot as plt import matplotlib as mpl %matplotlib inline default_dpi = mpl.rcParamsDefault['figure.dpi'] mpl.rcParams['figure.dpi'] = default_dpi*2 # load gulfport campus image img_fname = 'muufl_gulfport_campus_w_lidar_1.mat' spectra_fname = 'tgt_img_spectra.mat' dataset = loadmat(img_fname)['hsi'] hsi = dataset['Data'] # check out the shape of the data n_r,n_c,n_b = hsi.shape hsi.shape # pull a 'random' pixel/spectrum rr,cc = 150,150 spectrum = hsi[rr,cc,:] spectrum # plot a spectrum plt.plot(spectrum)
import numpy as np import scipy.io as sio from SOCfromOCVtemp import SOCfromOCVtemp from OCVfromSOCtemp import OCVfromSOCtemp from InitializeSPKF import initSPKF from IterationSPKF import iterSPKF from loadmat import loadmat from RetrieveParamESCmodel import getParamESC from matplotlib import pyplot as plt "Load ESC battery model file" E2model = loadmat('E2model.mat') model = E2model['model'] "Load cell test data" E2_DYN_15_P05 = loadmat('E2_DYN_15_P05') DYNData = E2_DYN_15_P05['DYNData'] T = 5 ##Temperature = 5 Degree time = DYNData['script1']['time'].flatten() deltat = time[1] - time[0] time = time - time[0] current = DYNData['script1']['current'].flatten() voltage = DYNData['script1']['voltage'].flatten() soc = DYNData['script1']['soc'].flatten() "Reserve space for predicted SOC its bounds" sochat = np.zeros(soc.size) socbound = np.zeros(soc.size) "Define Covariance matrices"
def RT_median_split(DATASET, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/', numiter = 1000): filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_withdesignation_EDITED_dropped_withROI.csv') df = pd.read_csv(filename) subj, task = DATASET.split('_') #load data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat') data_dict = loadmat.loadmat(filename) Params, srate, data_percent, active_elecs, RT = [data_dict.get(k) for k in ['Params', 'srate', 'data_percent', 'active_elecs', 'RTs']] bl_st = Params['bl_st'] bl_st = bl_st/1000*srate #load RTs csv file filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'csv_files', '_'.join([subj, task, 'RTs']) + '.csv') data = pd.read_csv(filename) RTs = np.round(np.array(data)[:,0]) #don't remove baseline. want RT to include baseline so can index properly (here they already include baseline from Shadeplots_elecs_stats.py) #sort trials by RTs idx = np.argsort(RTs) data_percent = data_percent[:, idx, :] RTs = RTs[idx] median_idx = np.floor(data_percent.shape[1]/2) #index of median split for this subject df_subj = df[(df.subj == subj) & (df.task == task)][['elec','start_idx','end_idx','start_idx_resp','end_idx_resp', 'pattern']] #iterate on electrodes for row in df_subj.itertuples(): _, elec, start_idx, end_idx, start_idx_resp, end_idx_resp, pattern = row print('%s %s e%i, %s' %(subj, task, elec, pattern)) eidx = np.where(elec == active_elecs)[0][0] skews, kurts, means, medians, means_l, means_s, medians_s, medians_l, skews_s, skews_l, kurts_s, kurts_l = [[] for i in range(12)] skews_surr, kurts_surr, means_surr, medians_surr, means_l_surr, means_s_surr = [[] for i in range(6)] if (pattern == 'S') | (pattern == 'SR'): start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) shorttrials, longtrials, trial_lengths = [[] for i in range(3)] for i, r in enumerate(RTs): if i < median_idx: shorttrials.extend(data_percent[eidx, i, start_idx:end_idx]) trial_lengths.append(int(end_idx-start_idx)) #length of each short trial so can use for long trial indexing elif i > median_idx: #might only work with odd num of trials longtrials.extend(data_percent[eidx, i, start_idx:end_idx]) if (pattern == 'R'): start_idx = start_idx_resp end_idx = end_idx_resp shorttrials, longtrials, trial_lengths = [[] for i in range(3)] for i, r in enumerate(RTs): if i < median_idx: shorttrials.extend(data_percent[eidx, i, int(r)+start_idx:int(r)+end_idx]) trial_lengths.append(int(end_idx-start_idx+1)) #length of each short trial so can use for long trial indexing elif i > median_idx: #might only work with odd num of trials longtrials.extend(data_percent[eidx, i, int(r)+start_idx:int(r)+end_idx]) if pattern == 'D': start_idx = start_idx + abs(bl_st) end_idx = end_idx_resp #create data vectors for long and short trials shorttrials, longtrials, trial_lengths = [[] for i in range(3)] for i, r in enumerate(RTs): if i < median_idx: shorttrials.extend(data_percent[eidx, i, start_idx:int(r)+end_idx]) trial_lengths.append(int(r+end_idx-start_idx+1)) #length of each short trial so can use for long trial indexing elif i > median_idx: #might only work with odd num of trials longtrials.extend(data_percent[eidx, i, start_idx:int(r)+end_idx]) #bootstrap from long distribution print('\tbootstrapping from long distribution') for j in range(numiter): randidx = np.random.permutation(len(longtrials))[0:len(shorttrials)] longsample = np.array(longtrials)[randidx] #calculate stats for duration sample skews.append(stats.skew(longsample) - stats.skew(shorttrials)) kurts.append(stats.kurtosis(longsample) - stats.kurtosis(shorttrials)) means.append(np.mean(longsample) - np.mean(shorttrials)) medians.append(np.median(longsample) - np.median(shorttrials)) means_l.append(np.mean(longsample)) skews_l.append(stats.skew(longsample)) kurts_l.append(stats.kurtosis(longsample)) medians_l.append(np.median(longsample)) else: #calculate stats for for nonduration no need to subsample long sample longsample = longtrials skews.append(stats.skew(longsample) - stats.skew(shorttrials)) kurts.append(stats.kurtosis(longsample) - stats.kurtosis(shorttrials)) means.append(np.mean(longsample) - np.mean(shorttrials)) medians.append(np.median(longsample) - np.median(shorttrials)) means_l.append(np.mean(longsample)) skews_l.append(stats.skew(longsample)) kurts_l.append(stats.kurtosis(longsample)) medians_l.append(np.median(longsample)) #calculate values for short trials (same for duration and nonduration) medians_s.append(np.median(shorttrials)) means_s.append(np.mean(shorttrials)) kurts_s.append(stats.kurtosis(shorttrials)) skews_s.append(stats.skew(shorttrials)) #create permuted difference distribution print ('\tcalculating surrogate stats...') for j in range(numiter): randidx = np.random.permutation(len(shorttrials)*2) #no overlap between 'short' and 'long' datapoints randidx_short = randidx[0:len(randidx)/2] randidx_long = randidx[len(randidx)/2+1::] shorttrials_surr = data_percent[eidx,:,:].flatten()[randidx_short] longsample_surr = data_percent[eidx,:,:].flatten()[randidx_long] #calculate stats skews_surr.append(stats.skew(longsample_surr) - stats.skew(shorttrials_surr)) kurts_surr.append(stats.kurtosis(longsample_surr) - stats.kurtosis(shorttrials_surr)) means_surr.append(np.mean(longsample_surr) - np.mean(shorttrials_surr)) medians_surr.append(np.median(longsample_surr) - np.median(shorttrials_surr)) means_l_surr.append(np.mean(longsample_surr)) means_s_surr.append(np.mean(shorttrials_surr)) #calculate pvalue if np.mean(means) <= np.mean(means_surr): p_mean = sum(means_surr<np.mean(means))/len(means_surr) else: p_mean = sum(means_surr>np.mean(means))/len(means_surr) if np.mean(medians) <= np.mean(medians_surr): p_median = sum(medians_surr<np.mean(medians))/len(medians_surr) else: p_median = sum(medians_surr>np.mean(medians))/len(medians_surr) if np.mean(skews) <= np.mean(skews_surr): p_skew = sum(skews_surr<np.mean(skews))/len(skews_surr) else: p_skew = sum(skews_surr>np.mean(skews))/len(skews_surr) if np.mean(kurts) <= np.mean(kurts_surr): p_kurt = sum(kurts_surr<np.mean(kurts))/len(kurts_surr) else: p_kurt = sum(kurts_surr>np.mean(kurts))/len(kurts_surr) #save print('\tsaving') data_dict = {'p_mean' : p_mean, 'p_median' : p_median, 'p_skew' : p_skew, 'p_kurt' : p_kurt, 'pattern':pattern, 'skews':skews, 'kurts':kurts, 'means':means, 'medians':medians, 'means_s':means_s, 'means_l':means_l, 'medians_l':medians_l, 'medians_s':medians_s, 'skews_l':skews_l, 'skews_s':skews_s,'kurts_l':kurts_l, 'kurts_s':kurts_s, 'shorttrials':shorttrials, 'longtrials':longtrials, 'longsample':longsample, 'skew_surr':skews_surr, 'kurtosis_surr':kurts_surr, 'mean_surr':means_surr, 'median_surr':medians_surr} filename = os.path.join(SJdir, 'PCA', 'Stats', 'RT_median_split', '%s_%s_e%i_distributions.p' %(subj, task, elec)) pickle.dump(data_dict, open(filename, "wb"))
root =r'C:\Users\attialex\Documents\distance_tuning' data_dir = r'F:\attialex\NP_DATA_corrected' umap_version = 'Vanilla_otherData_pcaumap' files = glob.glob(os.path.join(root,'*.mat')) umap_save_path = os.path.join(root,umap_version) if not os.path.isdir(umap_save_path): os.makedirs(umap_save_path) # import pdb # pdb.set_trace() shutil.copy2(os.path.abspath(__file__),umap_save_path) for fi in files: print(fi) data_out = lm.loadmat(fi) data_out = data_out['data_out'] idx = data_out['pvals']<0.05 if sum(idx)<30: continue _,sn_darkData=os.path.split(fi) # data = lm.loadmat(os.path.join(data_path,sn_darkData)) xcorrs = data_out['xcorrs'][idx] reducer = umap.UMAP(n_components=2) #reducer = PCA(n_components=2) X_new = reducer.fit_transform(xcorrs) labels,fig,mean_pwd = cluster_plotXCorrs(X_new,data_out['peak_loc_all'][idx]/5,xcorrs)
import traceback if __name__ == '__main__': #files = glob.glob('/Volumes/T7/attialex/NP_DATA_corrected/*.mat') #im_path ='/Volumes/T7/attialex/umap_baseline' im_path = r'F:\attialex\umap_BLAverageSpatialMap_MEC_v2' files = glob.glob(r'F:\attialex\NP_DATA_corrected\np*.mat') if not os.path.isdir(im_path): os.makedirs(im_path) shutil.copy2(os.path.abspath(__file__), im_path) ds_factor = 5 for fi in files: try: data = lm.loadmat(fi) gain_val = 0.8 values = (data['trial_gain'] == gain_val) & (data['trial_contrast'] == 100) matches = (np.logical_not(values[:-1])) & (values[1:]) onsets = np.where(matches)[0] + 1 if len(onsets) == 0: continue trial_range = onsets[0] + np.arange(-5, 11) trial_range = np.arange(2, 21) try: anatomy = data['anatomy'] except: print('no anatomy')
def shadeplots_allelecs(DATASET, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', thresh = 10, chunk_size = 100, baseline = -500, black_chunk_size = 0): """ calculate onset and offset window for every active electrode (ignoring clusters) saves csv for each sub/task for easy plotting later includes real vs empty - 2 conditions difference """ subj, task = DATASET.split('_') #filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_empty.mat') filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') data = loadmat.loadmat(filename) srate = data['srate'] active_elecs = data['active_elecs'] data = data['data_percent'] #convert to srate bl_st = baseline/1000*srate chunksize = chunk_size/1000*srate black_chunksize = black_chunk_size/1000*srate if task in ['DecisionAud']: st_tp = 600/1000*srate elif task in ['DecisionVis']: st_tp = 500/1000*srate else: st_tp = 0 #filename = os.path.join(SJdir, 'PCA', 'ShadePlots_allelecs', ''.join([subj, '_', task, '_empty.csv'])) filename = os.path.join(SJdir, 'PCA', 'ShadePlots_allelecs', ''.join([subj, '_', task, '.csv'])) subjs = list(); tasks = list(); pthr = list(); elecs = list(); starts = list(); ends = list(); for i, e in enumerate(active_elecs): pvals = list(); edata = data[i,:] nozero = np.copy(edata) nozero[:,nozero.mean(axis=0)<0] = 0 #zero out negative values in mean for j in np.arange(abs(bl_st)+st_tp, edata.shape[1]): (t, p) = stats.ttest_1samp(nozero[:,j], 0) pvals.append(p) thr = fdr_correct.fdr2(pvals, q = 0.05) H = np.array(np.array(pvals<thr)).astype('int') if (thr>0): #find elecs with window that > chunksize and > threshold (10%) passed_thresh = edata[:, abs(bl_st)+st_tp::].mean(axis=0)>thresh sig_and_thresh = H * passed_thresh difference = np.diff(sig_and_thresh, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp)) elif start_idx.size < end_idx.size: start_idx = np.append(0, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(0, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp)) start_idx = start_idx + st_tp #shift by st_tp end_idx = end_idx + st_tp chunk = (end_idx - start_idx) >= chunksize if sum(chunk) > 0: #significant windows on elecs that passed threshold (10%) (ignoring threshold and chunksize) difference = np.diff(H, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp)) elif start_idx.size < end_idx.size: start_idx = np.append(0, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(0, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp)) start_idx = start_idx + st_tp #shift by st_tp end_idx = end_idx + st_tp black_chunk = (start_idx[1:] - end_idx[:-1])> black_chunksize #combine window separated by <200ms tmp = np.append(1,black_chunk).astype('bool') end_idx = end_idx[np.append(np.where(np.in1d(start_idx, start_idx[tmp]))[0][1:]-1, -1)] start_idx = start_idx[tmp] #drop chunks that <100ms chunk = (end_idx - start_idx) >= chunksize start_idx = start_idx[chunk] end_idx = end_idx[chunk] else: #no chunks start_idx = np.zeros((1,)) end_idx = np.zeros((1,)) else: #thr<0 start_idx = np.zeros((1,)) end_idx = np.zeros((1,)) subjs.extend([subj] * len(start_idx)) tasks.extend([task] * len(end_idx)) elecs.extend([e] * len(start_idx)) pthr.extend([thr] * len(end_idx)) starts.extend(start_idx) ends.extend(end_idx) data_dict = {'edata':edata, 'bl_st':bl_st, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate,'thresh':thresh, 'chunksize':chunksize, 'black_chunksize':black_chunksize} #data_path = os.path.join(SJdir, 'PCA','ShadePlots_allelecs', 'data',''.join([subj, '_', task, '_e', str(e), '_empty.p']) data_path = os.path.join(SJdir, 'PCA','ShadePlots_allelecs', 'data',''.join([subj, '_', task, '_e', str(e), '.p'])) with open(data_path, 'w') as f: pickle.dump(data_dict, f) f.close() sig_windows = pd.DataFrame({'subj':subjs, 'task':tasks, 'elec':elecs, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends}) sig_windows = sig_windows[['subj','task','elec', 'start_idx','end_idx','pthreshold']] sig_windows.to_csv(filename)
def shadeplots_elecs_stats_surr_random(id_num = 99): """ calculates params per electrode on surrogate data. Surrogate data is HG windows concatenated and circshifted. Only active HG included. calculates mean, peak, latency, and std per trial for all electrodes in an active cluster - added medians and coefficient of variation and mins uses windows for individual electrodes from PCA/Stats/single_electrode_windows_csvs/single_electrode_windows_withdesignation_EDITED.csv saves pickle file with numbers per trial in ShadePlots_hclust/elecs/significance_windows/unsmoothed Uses unsmoothed data No latencies for duration elecs Added fake data with trial index 12/18/14 """ SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/' saveDir_csv = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'unsmoothed', 'csv_files', 'orig', 'surr_rand_' + str(id_num)) saveDir_data= os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'unsmoothed', 'data', 'surr_rand_' + str(id_num)) if not(os.path.exists(saveDir_csv)) and not(os.path.exists(saveDir_data)): os.mkdir(saveDir_csv) os.mkdir(saveDir_data) print('making:\n%s\n%s' %(saveDir_csv, saveDir_data)) else: print('either %s\n or %s\n already exists!\nterminating...' %(saveDir_csv, saveDir_data)) #return filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv') df = pd.read_csv(filename) for s_t in df.groupby(['subj','task']): subj, task = s_t[0] #load data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat') data_dict = loadmat.loadmat(filename) active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']] #bl_st = Params['bl_st'] #bl_st = bl_st/1000*srate #if task in ['DecisionAud', 'DecisionVis']: bl_st = 500/1000*srate #for my data, remove cue from baseline - start/end_idx are relative to cue onset) - change 12/24 - okay with RT 12/25 RT = RT + abs(bl_st) #RTs are calculated from stim (my data cue) onset, need to account for bl in HG_elecMTX_percent (for 500, not 1000 baseline 12/25) maxes_idx, medians_idx, cofvar, maxes_rel, medians, means, stds, maxes, lats, sums, lats_pro, RTs, num_dropped, mins, lats_min = [dict() for i in range(15)] for row in s_t[1].itertuples(): _, _, subj, task, cluster, pattern, elec, start_idx, end_idx, start_idx_resp, end_idx_resp, _, _ = row eidx = np.in1d(active_elecs, elec) data = data_all[eidx,:,:].squeeze() #define start and end indices based on electrode type if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]): start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) if start_idx == end_idx: continue #for SR elecs that dont' have stimlocked (CP9, e91) print('%s %s %i %s\n' %(subj, task, elec, pattern)) #make surrogate dataset based on activity window data_surr = data[:, start_idx:end_idx].flatten() #take HG windows randidx = np.random.permutation(len(data_surr)) data_surr = data_surr.flatten() data_surr = data_surr[randidx] #shuffle data_surr = data_surr.reshape((data.shape[0], -1)) #reshape data into matrix data_idx = np.ones_like(data[:, start_idx:end_idx]) data_idx = (data_idx.transpose() * range(data_idx.shape[0])).transpose() #each trial labeled by trial number data_idx = data_idx.flatten() data_idx = data_idx[randidx] data_idx = data_idx.reshape((data.shape[0], -1)) #calculate stats (single trials) means[elec] = data_surr.mean(axis = 1) stds[elec] = data_surr.std(axis = 1) maxes[elec] = data_surr.max(axis = 1) lats[elec] = data_surr.argmax(axis = 1) lats_min[elec] = data_surr.argmin(axis = 1) sums[elec] = data_surr.sum(axis = 1) lats_pro[elec] = lats[elec] / len(np.arange(start_idx, end_idx)) RTs[elec] = RT medians[elec] = stats.nanmedian(data_surr, axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] cofvar[elec] = stds[elec]/means[elec] mins[elec] = data_surr.min(axis = 1) medians_idx[elec] = stats.nanmedian(data_idx, axis = 1) maxes_idx[elec] = data_idx.max(axis = 1) if pattern == 'R': start_idx_resp = start_idx_resp end_idx_resp = end_idx_resp if start_idx_resp == end_idx_resp: continue #for inactive R elecs (not clear why on spreadsheet) print('%s %s %i %s\n' %(subj, task, elec, pattern)) #create data matrix data_resp = np.empty(data.shape) for j, r in enumerate(RT): tmp = data[j, r + start_idx_resp : r + end_idx_resp] tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999) data_resp[j,:] = tmp data_resp[data_resp == -999] = np.nan nanidx = np.isnan(np.nanmean(data_resp, axis = 1)) #if start > end for a trial (short RTs) if np.any(nanidx): #drop equivalent number of long RTs num_to_drop = np.sum(nanidx) i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs nanidx[i] = True #mark the long trials as bad too data_resp[nanidx,:] = np.nan #drop nan from RTs tmp_RT = np.ndarray.astype(RT, dtype = float) tmp_RT[nanidx] = np.nan RTs[elec] = tmp_RT #make surrogate data data_surr = data_resp.flatten() #take HG window data_surr_drop = np.isnan(data_surr) #for dropping trials from data_idx data_surr = data_surr[~np.isnan(data_surr)] #remove nan (also drops trials that are completely nan) randidx = np.random.permutation(len(data_surr)) #shuffle data_surr = data_surr[randidx] data_surr = data_surr.reshape((data_resp.shape[0],-1)) #reshape trials x time (no nan buffer) data_surr = np.insert(data_surr, nanidx, np.empty((1, data_surr.shape[1])) * np.nan, axis = 0) #insert nan rows (numtrials of _surr == _resp) #make index matrix data_idx = np.ones_like(data_resp) data_idx = (data_idx.transpose() * range(data_idx.shape[0])).transpose() data_idx = data_idx.flatten() data_idx = data_idx[~data_surr_drop] #drop nan trials data_idx = data_idx[randidx] data_idx = data_idx.reshape((data_resp.shape[0], -1)) #reshape data_idx = np.insert(data_idx, nanidx, np.empty((1, data_resp.shape[1])) * np.nan, axis = 0) #insert nan rows (numtrials of _idx == _resp) else: #make surrogate data data_surr = data_resp.flatten() #take HG window data_surr_drop = np.isnan(data_surr) #for dropping trials from data_idx data_surr = data_surr[~np.isnan(data_surr)] #remove nan randidx = np.random.permutation(len(data_surr)) #shuffle data_surr = data_surr[randidx] data_surr = data_surr.reshape((data_resp.shape[0],-1)) #reshape RTs[elec] = RT data_idx = np.ones_like(data_resp) data_idx = (data_idx.transpose() * range(data_idx.shape[0])).transpose() #each trial labeled by trial number data_idx = data_idx.flatten() data_idx = data_idx[~data_surr_drop] #drop nan trials based on data_surr data_idx = data_idx[randidx] data_idx = data_idx.reshape((data_resp.shape[0],-1)) #reshape #reshape data_surr with nan buffer at end data_resp_surr = np.empty_like(data_resp) for j in range(data_surr.shape[0]): tmp = data_surr[j,:] tmp = np.pad(tmp, (0, data_resp.shape[1]-len(tmp)), 'constant', constant_values = -999) data_resp_surr[j,:] = tmp data_resp_surr[data_resp_surr == -999] = np.nan #reshape data_idx with nan data_idx_surr = np.empty_like(data_resp) for j in range(data_idx.shape[0]): tmp = data_idx[j,:] tmp = np.pad(tmp, (0, data_resp.shape[1]-len(tmp)), 'constant', constant_values = -999) data_idx_surr[j,:] = tmp data_idx_surr[data_idx_surr == -999] = np.nan #calculate params for (single trials) means[elec] = np.nanmean(data_resp_surr, axis = 1) stds[elec] = np.nanstd(data_resp_surr, axis = 1) maxes[elec] = np.nanmax(data_resp_surr, axis = 1) sums[elec] = np.nansum(data_resp_surr, axis = 1) medians[elec] = stats.nanmedian(data_resp_surr, axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] cofvar[elec] = stds[elec]/means[elec] mins[elec] = np.nanmin(data_resp_surr, axis = 1) medians_idx[elec] = stats.nanmedian(data_idx_surr, axis = 1) maxes_idx[elec] = np.nanmax(data_idx_surr, axis = 1) if pattern == 'D': start_idx = start_idx + abs(bl_st) end_idx_resp = end_idx_resp print('%s %s %i %s\n' %(subj, task, elec, pattern)) #create data matrices data_dur = np.empty(data.shape) for j, r in enumerate(RT): tmp = data[j, start_idx : r + end_idx_resp] tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999) data_dur[j,:] = tmp data_dur[data_dur == -999] = np.nan nanidx = np.isnan(np.nanmean(data_dur, axis = 1)) #if start > end if np.any(nanidx): #drop equivalent number of long RTs num_to_drop = np.sum(nanidx) i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs nanidx[i] = True #mark the long trials as bad too data_dur[nanidx, :] = np.nan #drop nan from RTs tmp_RT = np.ndarray.astype(RT, dtype = float) tmp_RT[nanidx] = np.nan RTs[elec] = tmp_RT else: RTs[elec] = RT #make surrogate data data_surr = data_dur.flatten() #take HG window data_surr_drop = np.isnan(data_surr) #for data_idx dropping points based on data_surr data_surr = data_surr[~np.isnan(data_surr)] #drop nan datapoints (pull out only HG) randidx = np.random.permutation(len(data_surr)) #shuffle data_surr = data_surr[randidx] #reshape data_surr with nan data_dur_surr = np.empty_like(data_dur) start = 0 for j in range(data_dur.shape[0]): trial_length = sum(~np.isnan(data_dur[j,:])) if j>0: start = end end = start + trial_length if trial_length>0: #not a nan trial tmp = data_surr[start:end] tmp = np.pad(tmp, (0, data_dur.shape[1]-len(tmp)), 'constant', constant_values = -999) data_dur_surr[j,:] = tmp else: #nan trial data_dur_surr[j,:] = -999 data_dur_surr[data_dur_surr == -999] = np.nan #make surrogate data for idx data_idx = np.ones_like(data_dur) data_idx = (data_idx.transpose() * range(data_idx.shape[0])).transpose() #trials x time with index for trial data data_idx = data_idx.flatten() data_idx = data_idx[~data_surr_drop] #remove datapoints that are missing in data_surr (to get same number of points) (pull out HG) data_idx = data_idx[randidx] #shuffle #reshape data_idx with nan data_dur_idx = np.empty_like(data_dur) start = 0 for j in range(data_dur.shape[0]): trial_length = sum(~np.isnan(data_dur[j,:])) if j>0: start = end end = start + trial_length if trial_length>0: #not a nan trial tmp = data_idx[start:end] tmp = np.pad(tmp, (0, data_dur.shape[1]-len(tmp)), 'constant', constant_values = -999) data_dur_idx[j,:] = tmp else: #nan trial data_dur_idx[j,:] = -999 data_dur_idx[data_dur_idx == -999] = np.nan #calculate params for single trials means[elec] = np.nanmean(data_dur_surr, axis = 1) stds[elec] = np.nanstd(data_dur_surr, axis = 1) maxes[elec] = np.nanmax(data_dur_surr, axis = 1) sums[elec] = np.nansum(data_dur_surr, axis = 1) medians[elec] = stats.nanmedian(data_dur_surr, axis = 1) maxes_rel[elec] = maxes[elec] - means[elec] cofvar[elec] = stds[elec]/means[elec] mins[elec] = np.nanmin(data_dur_surr, axis = 1) medians_idx[elec] = stats.nanmedian(data_dur_idx, axis = 1) maxes_idx[elec] = np.nanmax(data_dur_idx, axis = 1) #save stats (single trials) filename = os.path.join(saveDir_data, ''.join([subj, '_', task, '_surr_rand.p'])) data_dict = {'active_elecs': active_elecs, 'lats_pro': lats_pro, 'sums':sums, 'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st,'RTs':RTs, 'dropped':num_dropped, 'maxes_rel' : maxes_rel, 'medians' : medians, 'variations': cofvar, 'mins': mins, 'lats_min':lats_min, 'medians_idx':medians_idx, 'maxes_idx':maxes_idx} with open(filename, 'w') as f: pickle.dump(data_dict, f) f.close() #save csv file for k in data_dict.keys(): if k in ['bl_st', 'srate','active_elecs', 'dropped']: continue data = pd.DataFrame(data_dict[k]) filename = os.path.join(saveDir_csv, '_'.join([subj, task, k]) + '_surr_rand.csv') #has nans for specific electrodes data.to_csv(filename, index = False) #save dataframe with dropped trials filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_withdesignation_EDITED_dropped_surr_rand_' + str(id_num) + '.csv') df.to_csv(filename)
def shadeplots_allelecs_2conditions(DATASET, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', chunk_size = 100, baseline = -500): """ calculate onset and offset window for difference between 2 conditions (real and empty) saves csv for each sub/task for easy plotting later #only relevant for EmoGen (not adjusted for my data start times) """ subj, task = DATASET.split('_') filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') data = loadmat.loadmat(filename) srate = data['srate'] active_elecs = data['active_elecs'] data = data['data_percent'] filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_empty.mat') data_empty = loadmat.loadmat(filename) data_empty = data_empty['data_percent'] #convert to srate bl_st = baseline/1000*srate chunksize = chunk_size/1000*srate st_tp = 0 filename = os.path.join(SJdir, 'PCA', 'ShadePlots_allelecs', ''.join([subj, '_', task, '_real_vs_empty.csv'])) subjs = list(); tasks = list(); pthr = list(); elecs = list(); starts = list(); ends = list(); for i, e in enumerate(active_elecs): pvals = list(); edata = data[i,:] edata_empty = data_empty[i,:] #ttest between conditions for every time point for j in np.arange(abs(bl_st)+st_tp, edata.shape[1]): (t, p) = stats.ttest_ind(edata[:,j], edata_empty[:,j], equal_var = True) pvals.append(p) thr = fdr_correct.fdr2(pvals, q = 0.05) H = np.array(np.array(pvals)<thr).astype('int') #significance windows difference = np.diff(H, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp)) elif start_idx.size < end_idx.size: start_idx = np.append(0, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(0, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp)) #drop chunks that < chunk_size chunk = (end_idx - start_idx) >= chunksize start_idx = start_idx[chunk] end_idx = end_idx[chunk] subjs.extend([subj] * len(start_idx)) tasks.extend([task] * len(end_idx)) elecs.extend([e] * len(start_idx)) pthr.extend([thr] * len(end_idx)) starts.extend(start_idx) ends.extend(end_idx) data_dict = {'edata':edata, 'edata_empty':edata_empty, 'bl_st':bl_st, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate,'chunksize':chunksize} data_path = os.path.join(SJdir, 'PCA','ShadePlots_allelecs', 'data',''.join([subj, '_', task, '_e', str(e), '_real_vs_empty.p'])) with open(data_path, 'w') as f: pickle.dump(data_dict, f) f.close() sig_windows = pd.DataFrame({'subj':subjs, 'task':tasks, 'elec':elecs, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends}) sig_windows = sig_windows[['subj','task','elec', 'start_idx','end_idx','pthreshold']] sig_windows.to_csv(filename)
def evaluate_lenet5(learning_rate=0.01, n_epochs=200, dataset='../testnn.mat', nkerns=[20, 20], batch_size=100): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(123) # datasets = load_data(dataset) datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # the below comments are examples of using this cnn to deal with MNIST with input feature size 784 = 28*28 # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, idim0_H, idim0_W)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, idim0_H, idim0_W), filter_shape=(nkerns[0], 1, fdim0_H, fdim0_W), poolsize=(pdim0_H, pdim0_W) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], idim1_H, idim1_W), filter_shape=(nkerns[1], nkerns[0], fdim1_H, fdim1_W), poolsize=(pdim1_H, pdim1_W) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * idim2_H * idim2_W, n_out=fdim2, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer nclass = max(train_set_y.eval()) + 1 layer3 = LogisticRegression(input=layer2.output, n_in=fdim2, n_out=nclass) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) train_score = theano.function( [index], layer3.errors(y), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs): if earlystop and done_looping: print 'early-stopping' break epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] training_losses = [train_score(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) this_training_loss = numpy.mean(training_losses) print('epoch %i, minibatch %i/%i, training error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_training_loss * 100.)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def stack_by_azimuth(ax,path2rf,lowT,highT,SNR_min=0.0,bin_by=1,nbins=10,bazi0=-180.0,bazi1=180.0): """ """ from matplotlib import pylab as plt from sys import path path.append('/Users/mancinelli/PROG/SUBS/PYTHON/') from loadmat import loadmat from numpy import zeros,isnan,std,mean from numpy.random import rand RFs_all=[] RPs_all=[] BAZIs_all=[] path=path if lowT<1.0: file_name='%s/RF_Depth_%.1fs_%ds.mat' % (path2rf,lowT,highT) else: file_name='%s/RF_Depth_%ds_%ds.mat' % (path2rf,lowT,highT) print '...loading %s' % (file_name) snr_limit=True; if snr_limit: snrfile='%s/SNR_%ds.txt' % (path2rf,lowT) file=open(snrfile) SNR=[] for line in file.readlines(): nfo=line.strip('\n').split() SNR.append(float(nfo[1])) file.close() matfile = loadmat(file_name) RFs = matfile["rfs"][:,:] BAZIs= matfile["BAZIsave"][:] RPs= matfile["RPsave"][:] depths = matfile["RF_Depth"][:,0] if len(SNR) != len(RFs): print '***Warning: len(SNR) != len(RFs) , %d , %d ' % (len(SNR), len(RFs)) dum=raw_input('Press enter to continue') tmp1,tmp2,tmp3=[],[],[] for ii in range(len(RFs)): if SNR[ii]>SNR_min: tmp1.append(RFs[ii]) tmp2.append(RPs[ii]) tmp3.append(BAZIs[ii]) RFs=tmp1 RPs=tmp2 BAZIs=tmp3 if bin_by == 1: x1=bazi1 x0=bazi0 xlist=BAZIs else: x0=min(RPs)*0.98 x1=max(RPs)*1.02 xlist=RPs stack=zeros(nbins*len(RFs[0])).reshape(nbins,len(RFs[0])) Nstack=zeros(nbins*len(RFs[0])).reshape(nbins,len(RFs[0])) for iRF,RF in enumerate(RFs): x=xlist[iRF] ibin=int( (x-x0) / (x1-x0) *nbins) if ibin < 0 or ibin > (nbins-1): print '***Warning ibin out of range, skipping...' continue for jj in range(len(RFs[0])): if isnan(RF[jj]) == False: stack[ibin,jj]=stack[ibin,jj]+RF[jj] Nstack[ibin,jj]=Nstack[ibin,jj]+1 for ibin in range(nbins): for jj in range(len(RFs[0])): if Nstack[ibin,jj]>0.: stack[ibin,jj]=stack[ibin,jj]/Nstack[ibin,jj] #demean and renorm for ibin in range(nbins): #stack[ibin,:]=stack[ibin,:]-mean(stack[ibin,:]) norm = max(abs(stack[ibin,:])) if norm>0.0: stack[ibin,:]=stack[ibin,:]/norm y2=min(depths) y1=max(depths) ax.imshow(stack.T,aspect='auto',cmap='RdBu_r',origin='upper',interpolation='nearest',extent=[x0,x1,y1,y2]) if bin_by==1: plt.xlabel('Back Azimuth (degrees)') else: plt.xlabel('Ray Parameter (s/km)') plt.ylabel('Depth (km)') return stack,y2,y1
def shadeplots_elecs_stats_surr_random(subj, task, df_pattern, id_num = 99): """ calculates params per electrode on surrogate data. Surrogate data is HG windows timepoints randomly shuffled. uses windows for individual electrodes from df_pattern (PCA/Stats/single_electrode_windows_csvs/single_electrode_windows_withdesignation_EDITED.csv) Uses unsmoothed data hardcoded - medians and maxes_rel and stds returns dictionary with features. each feature is dictionary of elecs """ SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/' #load data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat') data_dict = loadmat.loadmat(filename) active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']] bl_st = 500/1000*srate #for my data, remove cue from baseline - start/end_idx are relative to cue onset) - change 12/24 - okay with RT 12/25 RT = RT + abs(bl_st) #RTs are calculated from stim (my data cue) onset, need to account for bl in HG_elecMTX_percent (for 500, not 1000 baseline 12/25) RTs, medians, maxes_rel, means, stds, maxes = [dict() for i in range(6)] s_t = df_pattern[((df_pattern.subj == subj) & (df_pattern.task == task))] for e in s_t.elec.values: _, subj, task, cluster, pattern, elec, start_idx, end_idx, start_idx_resp, end_idx_resp, _, _ = s_t[s_t.elec == e].values[0] #if elec != 52: #HARDCODED # continue if pattern != 'D': #only run on duration electrodes continue print('%i...' %(elec), end = "") sys.stdout.flush() eidx = np.in1d(active_elecs, elec) data = data_all[eidx,:,:].squeeze() #define start and end indices based on electrode type if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]): start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) if start_idx == end_idx: continue #for SR elecs that dont' have stimlocked (CP9, e91) #make surrogate dataset based on activity window data_surr = data[:, start_idx:end_idx].flatten() #take HG windows randidx = np.random.permutation(len(data_surr)) data_surr = data_surr.flatten() data_surr = data_surr[randidx] #shuffle data_surr = data_surr.reshape((data.shape[0], -1)) #reshape data into matrix #calculate stats (single trials) means[elec] = data_surr.mean(axis = 1) stds[elec] = data_surr.std(axis = 1) maxes[elec] = data_surr.max(axis = 1) RTs[elec] = RT medians[elec] = stats.nanmedian(data_surr, axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] if pattern == 'R': start_idx_resp = start_idx_resp end_idx_resp = end_idx_resp if start_idx_resp == end_idx_resp: continue #for inactive R elecs (not clear why on spreadsheet) #create data matrix data_resp = np.empty(data.shape) for j, r in enumerate(RT): tmp = data[j, r + start_idx_resp : r + end_idx_resp] tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999) data_resp[j,:] = tmp data_resp[data_resp == -999] = np.nan nanidx = np.isnan(np.nanmean(data_resp, axis = 1)) #if start > end for a trial (short RTs) if np.any(nanidx): #drop equivalent number of long RTs num_to_drop = np.sum(nanidx) i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs nanidx[i] = True #mark the long trials as bad too data_resp[nanidx,:] = np.nan #drop nan from RTs tmp_RT = np.ndarray.astype(RT, dtype = float) tmp_RT[nanidx] = np.nan RTs[elec] = tmp_RT #make surrogate data data_surr = data_resp.flatten() #take HG window data_surr_drop = np.isnan(data_surr) #for dropping trials from data_idx data_surr = data_surr[~np.isnan(data_surr)] #remove nan (also drops trials that are completely nan) randidx = np.random.permutation(len(data_surr)) #shuffle data_surr = data_surr[randidx] data_surr = data_surr.reshape((data_resp.shape[0],-1)) #reshape trials x time (no nan buffer) data_surr = np.insert(data_surr, nanidx, np.empty((1, data_surr.shape[1])) * np.nan, axis = 0) #insert nan rows (numtrials of _surr == _resp) else: #make surrogate data data_surr = data_resp.flatten() #take HG window data_surr_drop = np.isnan(data_surr) #for dropping trials from data_idx data_surr = data_surr[~np.isnan(data_surr)] #remove nan randidx = np.random.permutation(len(data_surr)) #shuffle data_surr = data_surr[randidx] data_surr = data_surr.reshape((data_resp.shape[0],-1)) #reshape RTs[elec] = RT #reshape data_surr with nan buffer at end data_resp_surr = np.empty_like(data_resp) for j in range(data_surr.shape[0]): tmp = data_surr[j,:] tmp = np.pad(tmp, (0, data_resp.shape[1]-len(tmp)), 'constant', constant_values = -999) data_resp_surr[j,:] = tmp data_resp_surr[data_resp_surr == -999] = np.nan #calculate params for (single trials) means[elec] = np.nanmean(data_resp_surr, axis = 1) stds[elec] = np.nanstd(data_resp_surr, axis = 1) maxes[elec] = np.nanmax(data_resp_surr, axis = 1) medians[elec] = stats.nanmedian(data_resp_surr, axis = 1) maxes_rel[elec] = maxes[elec]-means[elec] if pattern == 'D': start_idx = start_idx + abs(bl_st) end_idx_resp = end_idx_resp #create data matrices data_dur = np.empty(data.shape) for j, r in enumerate(RT): tmp = data[j, start_idx : r + end_idx_resp] tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999) data_dur[j,:] = tmp data_dur[data_dur == -999] = np.nan nanidx = np.isnan(np.nanmean(data_dur, axis = 1)) #if start > end if np.any(nanidx): #drop equivalent number of long RTs num_to_drop = np.sum(nanidx) i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs nanidx[i] = True #mark the long trials as bad too data_dur[nanidx, :] = np.nan #drop nan from RTs tmp_RT = np.ndarray.astype(RT, dtype = float) tmp_RT[nanidx] = np.nan RTs[elec] = tmp_RT else: RTs[elec] = RT #make surrogate data data_surr = data_dur.flatten() #take HG window data_surr_drop = np.isnan(data_surr) #for data_idx dropping points based on data_surr data_surr = data_surr[~np.isnan(data_surr)] #drop nan datapoints (pull out only HG) randidx = np.random.permutation(len(data_surr)) #shuffle data_surr = data_surr[randidx] #reshape data_surr with nan data_dur_surr = np.empty_like(data_dur) start = 0 for j in range(data_dur.shape[0]): trial_length = sum(~np.isnan(data_dur[j,:])) if j>0: start = end end = start + trial_length if trial_length>0: #not a nan trial tmp = data_surr[start:end] tmp = np.pad(tmp, (0, data_dur.shape[1]-len(tmp)), 'constant', constant_values = -999) data_dur_surr[j,:] = tmp else: #nan trial data_dur_surr[j,:] = -999 data_dur_surr[data_dur_surr == -999] = np.nan #calculate params for single trials means[elec] = np.nanmean(data_dur_surr, axis = 1) stds[elec] = np.nanstd(data_dur_surr, axis = 1) maxes[elec] = np.nanmax(data_dur_surr, axis = 1) medians[elec] = stats.nanmedian(data_dur_surr, axis = 1) maxes_rel[elec] = maxes[elec] - means[elec] #output dictionary of params per elec data_dict = {'RTs':RTs, 'maxes_rel' : maxes_rel, 'medians' : medians, 'stds': stds} return data_dict
F1 = Filter(den=[ 1, -4.989216395071318, 9.956976990745105, -9.935631971923312, 4.957198554483321, -0.9893271782278296 ], num=[ 0.0001878726842913545, -0.0005635670357698394, 0.0003756943544614141, 0.0003756943544614141, -0.0005635670357698394, 0.0001878726842913545 ], name="Xilinx") S1 = State_Space(F1) # ------------------------- # Damien Lefebvre's example # This large system ($n=10$) comes from control theory: the filter is used as a controller for an active control of vehicle longitudinal oscillation~\cite{Lefe03} d = loadmat('exDL.mat')['DL_Cor'] A, B, C, D = [mat(d[x]) for x in ('a', 'b', 'c', 'd')] S2 = State_Space( Filter(A=A, B=B.transpose(), C=C, D=D, name='longitudinal')) # 'longitudinal oscillation controller DL' # SDR example (from Fig. 15 "Software-Defined Radio FPGA Cores: Building towards a Domain-Specific Language") # Fs=10kHz, Fstop1=2.190, Fpass1=2.1972, Fpass2=2.1974, Fstop2=2.210, Astop1=200dB, Apsass=0.1dB, Astop2=200dB # This filter comes from a testbench in Software-Defined Radio system~\cite[Fig. 15]{}. It a 6th order Butterworth filter designed with the following parameters: sampling frequency = 10 kHz, lower cutoff frequency = 2.190 kHz, higher cutoff frequency = 2.210 kHz, passband ripple = 0.1 dB, stopband attenuation = 200 dB. d = loadmat('SDR.mat') #WARNING: this filter is designed with SOS-structure. When converted to state-space (by matlab), I am not sure if the spectral radius is lower than 1 #(when fdatool makes the single-structure conversion, the filter is not stable anymore) A, B, C, D = [mat(d[x]) for x in ('A', 'B', 'C', 'D')] S3 = State_Space(Filter(A=A, B=B.transpose(), C=C, D=D, name='SDR')) # Software-Defined Radio
def shadeplots_elecs_stats(resplocked = False): """ calculates mean, max, min, latency, median, and std on the mean trace for trial for all electrodes in an active cluster OLD - uses electrodes and windows from PCA/Stats/single_electrode_windows_withdesignation_EDITED.csv NOW - uses electrodes and windows from PCA/csvs_FINAL/final_windows.csv (after going through and editing them) calculates both stimulus and response locked parameters """ SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/' #filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv') filename = os.path.join(SJdir, 'PCA', 'csvs_FINAL', 'final_windows.csv') df = pd.read_csv(filename) #df = df.query("subj not in ['GP27', 'GP44', 'ST28']") #drop unused subjects if resplocked: for s_t in df.groupby(['subj','task']): subj, task = s_t[0] #load data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') data_dict = loadmat.loadmat(filename) active_elecs, Params, srate, RT, data_trials = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']] srate = float(srate) data_all = data_trials.mean(axis = 1) #mean across trials, (new shape is elecs x time) bl_st = -500/1000*srate medians, means, stds, maxes, lats, lats_pro, RTs, mins, lats_min, RTs_median, RTs_min = [dict() for i in range(11)] RT = RT + abs(bl_st) #RTs are calculated from stim onset, need to account for bl in HG_elecMTX_percent for row in s_t[1].itertuples(): _, subj, task, cluster, pattern, elec, start_idx, end_idx, start_idx_resp, end_idx_resp = row eidx = np.in1d(active_elecs, elec) data = data_trials[eidx,:].squeeze() #only do response electrodes if pattern == 'R': start_idx_resp = start_idx_resp end_idx_resp = end_idx_resp if start_idx_resp == end_idx_resp: continue #for inactive R elecs (not clear why on spreadsheet) #create data matrix data_resp = np.empty((data_trials.shape[1], end_idx_resp-start_idx_resp)) for j, r in enumerate(RT): tmp = data[j, r + start_idx_resp : r + end_idx_resp] data_resp[j,:] = tmp data_resp = data_resp.mean(axis = 1) #mean acros trials, new shape is elecs x time #calculate stats (mean trace) means[elec] = data_resp.mean() stds[elec] = data_resp.std() maxes[elec] = data_resp.max() lats[elec] = (data_resp.argmax()+1)/srate*1000 lats_min[elec] = (data_resp.argmin()+1)/srate*1000 #convert to ms medians[elec] = stats.nanmedian(data_resp) mins[elec] = data_resp.min() RTs[elec] = (RT+Params['bl_st']/1000*srate).mean()/srate*1000 #from stimulus onset (adjusted for all subjects) RTs_median[elec] = np.median(RT+Params['bl_st']/1000*srate)/srate*1000 #from stimulus onset (adjusted for all subjects) RTs_min[elec] = np.min(RT+Params['bl_st']/1000*srate)/srate*1000 #from stimulus onset (adjusted for all subjects) #save stats (mean traces) filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'data', ''.join([subj, '_', task, '_resplocked.p'])) data_dict = {'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st, 'RTs':RTs, 'medians' : medians, 'mins': mins, 'lats_min':lats_min, 'RTs_median': RTs_median, 'RTs_min': RTs_min} with open(filename, 'w') as f: pickle.dump(data_dict, f) f.close() #update csv file for k in data_dict.keys(): if k in ['bl_st', 'srate','active_elecs']: data_dict.pop(k, None) df_values = pd.DataFrame(data_dict) #save dataframe with values for all elecs for subject/task filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', '_'.join([subj, task, 'resplocked']) + '.csv') df_values.to_csv(filename) else: #not response locked for s_t in df.groupby(['subj','task']): subj, task = s_t[0] #if ((subj == 'ST1') and (task == 'SelfAud') and (cluster == 2)): #drop bc garbage cluster # continue #load data filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') data_dict = loadmat.loadmat(filename) active_elecs, Params, srate, RT, data_trials = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']] srate = float(srate) data_all = data_trials.mean(axis = 1) #mean across trials, (new shape is elecs x time) bl_st = -500/1000*srate #in data points filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'unsmoothed', 'data', ''.join([subj, '_', task, '.p'])) #for medians and means data_dict = pickle.load(open(filename, 'rb')) #keys are medians, means, for single trial values medians, means, stds, maxes, lats, RTs, mins, lats_min, RTs_median, RTs_min, lats_static, lats_min_static, lats_semi_static = [dict() for i in range(13)] RT = RT + abs(bl_st) #RTs are calculated from stim/cue onset, need to account for bl in HG_elecMTX_percent for row in s_t[1].itertuples(): _, subj, task, elec, pattern, cluster, start_idx, end_idx, start_idx_resp, end_idx_resp = row #in datapoints eidx = np.in1d(active_elecs, elec) data = data_all[eidx,:].squeeze() #mean trace #define start and end indices based on electrode type if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]): start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) if pattern == 'R': #fixed so can use stim locked onsets/offsets start_idx = start_idx + abs(bl_st) end_idx = end_idx + abs(bl_st) if pattern == 'D': start_idx = start_idx + abs(bl_st) end_idx = np.median(RT) + end_idx_resp if start_idx == end_idx: continue #for inactive R elecs (not clear why on spreadsheet) #calculate stats (mean trace) means[elec] = np.nanmean(data_dict['means'][elec]) #from single trials medians[elec] = np.nanmean(data_dict['medians'][elec]) #from single trials maxes[elec] = data[start_idx:end_idx].max() lats[elec] = (data[start_idx:end_idx].argmax()+1)/srate*1000 lats_min[elec] = (data[start_idx:end_idx].argmin()+1)/srate*1000 stds[elec] = data[start_idx:end_idx].std() mins[elec] = data[start_idx:end_idx].min() RTs[elec] = (RT+bl_st).mean()/srate*1000 #from stimulus onset (adjusted for all subjects) RTs_median[elec] = np.median(RT+bl_st)/srate*1000 #from stimulus onset (adjusted for all subjects) RTs_min[elec] = np.min(RT+bl_st)/srate*1000 #from stimulus onset (adjusted for all subjects) lats_static[elec] = (data[abs(bl_st)::].argmax()+1)/srate*1000 #from stimulus onset to end (adjusted for all subjects) lats_min_static[elec] = (data[abs(bl_st)::].argmin()+1)/srate*1000 #from stimulus onset to end (adjusted for all subjects) lats_semi_static[elec] = (data[start_idx::].argmax()+1)/srate*1000 #save stats (mean traces) #filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'data', ''.join([subj, '_', task, '.p'])) data_dict = {'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st, 'RTs':RTs, 'medians' : medians, 'mins': mins, 'lats_min':lats_min, 'RTs_median': RTs_median, 'RTs_min' : RTs_min, 'lats_static' : lats_static, 'lats_min_static' : lats_min_static, 'lats_semi_static' : lats_semi_static} #with open(filename, 'w') as f: # pickle.dump(data_dict, f) # f.close() #update csv file for k in data_dict.keys(): if k in ['bl_st', 'srate','active_elecs']: data_dict.pop(k, None) df_values = pd.DataFrame(data_dict) #save dataframe with values for all elecs for subject/task - later combined into mean_traces_all_elecs.csv in elec_values.ipynb filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', '_'.join([subj, task]) + '.csv') df_values.to_csv(filename)
def run_pipeline(filename): data = lm.loadmat(filename) session_name = os.path.basename(filename)[0:-4] (good_cells, pos_edges, trial_idx, spikelocations, spike_idx, location_vec) = prepareData(data) n_trials = 30 n_cells = len(good_cells) shape = (n_cells, len(pos_edges) - 1, n_trials) counts = np.zeros(shape, dtype=float) _fast_bin(counts, trial_idx, spikelocations, spike_idx) occupancy = np.zeros((len(pos_edges) - 1, n_trials), dtype=float) _fast_occ(occupancy, data['trial'] - 1, location_vec) for iT in range(n_trials): tmp = occupancy[:, iT] idx_v = np.flatnonzero(tmp) idx_n = np.flatnonzero(tmp == 0) tmp[idx_n] = np.interp(idx_n, idx_v, tmp[idx_v]) occupancy[:, iT] = tmp spMapN = np.zeros(counts.shape) for iC in range(n_cells): spMapN[iC, :, :] = np.divide(counts[iC, :, :], occupancy) spMapN = spi.gaussian_filter(spMapN, (0, 2, 0)) n_cells = len(good_cells) n_bins = len(pos_edges) - 1 spFlat = np.zeros((n_cells, n_trials * n_bins)) for iC in range(n_cells): spFlat[iC, :] = spMapN[iC, :, :].ravel(order='F') #spFlat = spFlat-spFlat.mean(axis=1)[:,np.newaxis] spFlat = normalize(spFlat, axis=0, norm='l2') for iC in range(n_cells): for iT in range(n_trials): start = iT * n_bins stop = (iT + 1) * n_bins trial_idx = np.arange(start, stop) tmp = spFlat[iC, trial_idx] spMapN[iC, :, iT] = tmp R = 5 # Fit CP tensor decomposition (two times). U = tt.ncp_bcd(spMapN, rank=R, verbose=False) V = tt.ncp_bcd(spMapN, rank=R, verbose=False) # Align the two fits and print a similarity score. sim = tt.kruskal_align(U.factors, V.factors, permute_U=True, permute_V=True) #print(sim) # Plot the results again to see alignment. fig, ax, po = tt.plot_factors(U.factors) tt.plot_factors(V.factors, fig=fig) fig.suptitle("aligned models") fig.tight_layout() fig.savefig('C:\\temp\\try3\\' + session_name + '_tca.png') ff = np.matmul(np.transpose(spFlat), spFlat) plt.figure() ax = plt.imshow(ff) plt.colorbar() plt.axvline(x=n_bins * 20, color='red', ls='--', linewidth=1) plt.axvline(x=n_bins * 21, color='green', ls='--', linewidth=1) plt.axhline(y=n_bins * 20, color='red', ls='--', linewidth=1) plt.axhline(y=n_bins * 21, color='green', ls='--', linewidth=1) plt.savefig('C:\\temp\\try3\\' + session_name + '_cov.png') plt.close('all')
def shadeplots_median_split(subj, task, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', thresh = 0, chunk_size = 0, baseline = -500, black_chunk_size = 0): """ takes median split of RTs and calculates difference between them (short vs long RT trials) only runs on elecs that are easy/difficult from overlap csv calculate onset and offset window for given electrode. Compares short vs long RT trials for each electrode for the unique/overlap tasks saves csv for each sub/task for easy plotting later """ filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') data = loadmat.loadmat(filename) srate = data['srate'] elecs = data['active_elecs'] RTs = data['RTs'] data = data['data_percent'] median_value = np.median(RTs) shortdata = data[:, RTs<median_value, :] longdata = data[:, RTs>median_value, :] #convert to srate bl_st = baseline/1000*srate chunksize = chunk_size/1000*srate black_chunksize = black_chunk_size/1000*srate subjs = list(); pthr = list(); elecs = list(); starts = list(); ends = list(); overlapfile = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', subj+'_ovelapped_dur_elecs.csv') df = pd.read_csv(overlapfile) elecs_list = np.unique((df.easy.fillna(0) + df.difficult.fillna(0)).values) for i, e in enumerate(elecs_list): idx = np.in1d(elecs_list, e) edataShort = shortdata[idx,:,:].squeeze() edataLong = longdata[idx,:,:].squeeze() pvals = list(); for j in np.arange(abs(bl_st), edataShort.shape[1]): (t, p) = stats.ttest_ind(edataShort[:,j], edataLong[:,j]) pvals.append(p) thr = fdr_correct.fdr2(pvals, q = 0.05) H = np.array(np.array(pvals<thr)).astype('int') if (thr>0): #find elecs with window that > chunksize and > threshold (10%) passed_thresh = abs(edataShort[:, abs(bl_st)::].mean(axis=0) - edataLong[:, abs(bl_st)::].mean(axis = 0)) >thresh #difference between blocks is > 10% threshold sig_and_thresh = H * passed_thresh difference = np.diff(sig_and_thresh, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, int(edataShort.shape[1]-abs(bl_st))) elif start_idx.size < end_idx.size: start_idx = np.append(0, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(0, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, int(edataShort.shape[1]-abs(bl_st))) chunk = (end_idx - start_idx) >= chunksize if sum(chunk) > 0: #significant windows on elecs that passed threshold (10%) (ignoring threshold and chunksize) difference = np.diff(H, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, int(edataShort.shape[1]-abs(bl_st))) elif start_idx.size < end_idx.size: start_idx = np.append(0, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(0, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, int(edataShort.shape[1]-abs(bl_st))) black_chunk = (start_idx[1:] - end_idx[:-1]) > black_chunksize #combine window separated by <200ms tmp = np.append(1,black_chunk).astype('bool') end_idx = end_idx[np.append(np.where(np.in1d(start_idx, start_idx[tmp]))[0][1:]-1, -1)] start_idx = start_idx[tmp] #drop chunks that <100ms chunk = (end_idx - start_idx) >= chunksize start_idx = start_idx[chunk] end_idx = end_idx[chunk] else: #no chunks start_idx = np.zeros((1,)) end_idx = np.zeros((1,)) else: #thr<0 start_idx = np.zeros((1,)) end_idx = np.zeros((1,)) subjs.extend([subj] * len(start_idx)) elecs.extend([e] * len(start_idx)) pthr.extend([thr] * len(end_idx)) starts.extend(start_idx) ends.extend(end_idx) data_dict = {'edataShort':edataShort, 'edataLong':edataLong, 'bl_st':bl_st, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate,'thresh':thresh, 'chunksize':chunksize, 'black_chunksize':black_chunksize} data_path = os.path.join(SJdir, 'PCA','ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', ''.join([subj,task, '_', 'Long_vs_Short', '_e', str(int(e)), '.p'])) with open(data_path, 'w') as f: pickle.dump(data_dict, f) f.close() filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed','mean_traces', 'csv_files', '_'.join([subj, task, 'long_vs_short_RTs']) +'.csv') sig_windows = pd.DataFrame({'subj':subjs, 'elec':elecs, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends}) sig_windows = sig_windows[['subj', 'elec', 'start_idx','end_idx','pthreshold']] sig_windows.to_csv(filename) return sig_windows
def shadeplots_clusters_resp(DATASET, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', thresh = 15, chunk_size = 100, start_resp = -500, end_resp = 500, baseline = -500, black_chunk_size = 0): """ calculate onset and offset window for every active electrode (ignoring clusters) saves csv for each sub/task for easy plotting later """ subj, task = DATASET.split('_') filenames = glob.glob(os.path.join(SJdir, 'PCA', 'SingleTrials_hclust', '_'.join([subj, task, 'c*mat']))) subjs = list(); tasks = list(); pthr = list(); clusts = list(); starts = list(); ends = list(); for filename in filenames: data = loadmat.loadmat(filename) srate = data['srate'] cdata = data['cdata'] RTs = data['RTs_all'] cluster = int(filename.split('_')[-1].split('.')[0][1:]) #convert to srate bl_st = baseline/1000*srate chunksize = chunk_size/1000*srate black_chunksize = black_chunk_size/1000*srate st_resp = int(start_resp/1000*srate) en_resp = int(end_resp/1000*srate) #shift RTs by baseline #if task in ['DecisionAud']: # st_tp = 600/1000*srate #elif task in ['DecisionVis']: # st_tp = 500/1000*srate #else: # st_tp = 0 #RTs = RTs+abs(bl_st)+st_tp RTs = RTs+abs(bl_st) #make resplocked cluster data cdata_resp = np.zeros((len(RTs), len(np.arange(st_resp, en_resp)))) RTs = RTs[RTs+st_resp>=0] #drop RTs that are too short for j, r in enumerate(RTs): cdata_resp[j,:] = cdata[j, r+st_resp:r+en_resp] nozero = np.copy(cdata_resp) nozero[:,nozero.mean(axis=0)<0] = 0 #zero out negative values pvals = list(); for t in np.arange(0, cdata_resp.shape[1]): (t, p) = stats.ttest_1samp(nozero[:,t], 0) pvals.append(p) thr = fdr_correct.fdr2(pvals, q = 0.05) H = np.array((pvals<thr)).astype('int') if (thr>0): #find elecs with window that > chunksize and > threshold (10%) passed_thresh = cdata_resp.mean(axis = 0) > thresh sig_and_thresh = H * passed_thresh difference = np.diff(sig_and_thresh, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] start_idx = start_idx+st_resp #shift by 500 end_idx = end_idx+st_resp if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, en_resp) elif start_idx.size < end_idx.size: start_idx = np.append(st_resp, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(st_resp, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, en_resp) chunk = (end_idx - start_idx) >= chunksize if sum(chunk) > 0: #significant windows on those that passed threshold (10%) (ignoring threshold and chunksize) difference = np.diff(H, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] start_idx = start_idx+st_resp #shift by 500 end_idx = end_idx+st_resp if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, en_resp) elif start_idx.size < end_idx.size: start_idx = np.append(st_resp, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(st_resp, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, en_resp) black_chunk = (start_idx[1:] - end_idx[:-1])> black_chunksize #combine window separated by <200ms tmp = np.append(1,black_chunk).astype('bool') end_idx = end_idx[np.append(np.where(np.in1d(start_idx, start_idx[tmp]))[0][1:]-1, -1)] start_idx = start_idx[tmp] #drop chunks that <100ms chunk = (end_idx - start_idx) >= chunksize start_idx = start_idx[chunk] end_idx = end_idx[chunk] else: #no chunks start_idx = np.zeros((1,)) end_idx = np.zeros((1,)) else: #thr<0 start_idx = np.zeros((1,)) end_idx = np.zeros((1,)) subjs.extend([subj] * len(start_idx)) tasks.extend([task] * len(end_idx)) clusts.extend([cluster] * len(start_idx)) pthr.extend([thr] * len(end_idx)) starts.extend(start_idx) ends.extend(end_idx) data_dict = {'cdata_resp':cdata_resp, 'bl_st':bl_st, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate, 'chunksize': chunksize, 'black_chunksize':black_chunksize, 'cluster':cluster, 'thresh': thresh, 'st_resp':st_resp, 'en_resp':en_resp, 'RTs':RTs} data_path = os.path.join(SJdir, 'PCA','ShadePlots_hclust', 'resplocked_all', 'data',''.join([subj, '_', task, '_c', str(cluster), '.p'])) with open(data_path, 'w') as f: pickle.dump(data_dict, f) f.close() fname = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'resplocked_all', ''.join([subj, '_', task, '.csv'])) sig_windows = pd.DataFrame({'subj':subjs, 'task':tasks, 'cluster':clusts, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends}) sig_windows = sig_windows[['subj','task','cluster', 'start_idx','end_idx','pthreshold']] sig_windows.to_csv(fname)
import scipy.io from loadmat import loadmat import matplotlib as mpl %matplotlib inline default_dpi = mpl.rcParamsDefault['figure.dpi'] mpl.rcParams['figure.dpi'] = default_dpi*2 import matplotlib.pyplot as plt from hsi_detectors import smf_detector,ace_detector # load gulfport campus image img_fname = 'muufl_gulfport_campus_w_lidar_1.mat' spectra_fname = 'tgt_img_spectra.mat' dataset = loadmat(img_fname)['hsi'] hsi = dataset['Data'] n_r,n_c,n_b = hsi.shape wvl = dataset['info']['wavelength'] rgb = dataset['RGB'] # load the target signatures spectra_dataset = loadmat(spectra_fname) tgts = spectra_dataset['tgt_img_spectra']['spectra'] tgt_names = spectra_dataset['tgt_img_spectra']['names'] # check out the shape of the targets array tgts.shape # check out the target values tgts
def test_cnn(trainpath, trainlist, validset, dumppath, learning_rate=0.01, n_epochs=200, batch_size=100, earlystop=True): """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(123) # datasets = load_data(dataset) datasets = loadmat(trainpath=trainpath, trainlist=trainlist, validset=validset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # H - height; W - width # when the input is note salience matrix # idim0_H = 42 # idim0_W = 36 # fdim0_H = 6 # fdim0_W = 6 # when the input is chromagram idim0_H = 12 idim0_W = 12 fdim0_H = 2 fdim0_W = 2 pdim0_H = 2 pdim0_W = 2 idim1_H = (idim0_H - fdim0_H + 1) / pdim0_H idim1_W = (idim0_W - fdim0_W + 1) / pdim0_W fdim1_H = 2 fdim1_W = 2 pdim1_H = 2 pdim1_W = 2 idim2_H = (idim1_H - fdim1_H + 1) / pdim1_H idim2_W = (idim1_W - fdim1_W + 1) / pdim1_W fdim2 = 800 nkerns = [20, 20] # the below comments are examples of using this cnn to deal with chromagram with input feature size 144 = 12*12 # Reshape matrix of rasterized images of shape (batch_size, 12 * 12) # to a 4D tensor, compatible with our ConvPoolLayer # (12, 12) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, idim0_H, idim0_W)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (12-2+1 , 12-2+1) = (11, 11) # maxpooling reduces this further to (11/2, 11/2) = (5, 5) # 4D output tensor is thus of shape (batch_size, nkerns[0], 5, 5) layer0 = ConvPoolLayer(rng, input=layer0_input, input_shape=(batch_size, 1, idim0_H, idim0_W), filter_shape=(nkerns[0], 1, fdim0_H, fdim0_W), poolsize=(pdim0_H, pdim0_W)) # Construct the second convolutional pooling layer # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4) # maxpooling reduces this further to (4/2, 4/2) = (2, 2) # 4D output tensor is thus of shape (batch_size, nkerns[1], 2, 2) layer1 = ConvPoolLayer(rng, input=layer0.output, input_shape=(batch_size, nkerns[0], idim1_H, idim1_W), filter_shape=(nkerns[1], nkerns[0], fdim1_H, fdim1_W), poolsize=(pdim1_H, pdim1_W)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 2 * 2), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * idim2_H * idim2_W, n_out=fdim2, activation=T.nnet.relu) # classify the values of the fully-connected sigmoidal layer nclass = max(train_set_y.eval()) + 1 layer3 = LogisticRegression(input=layer2.output, n_in=fdim2, n_out=nclass) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) train_score = theano.function( [index], layer3.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.996 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 training_history = [] start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs): if earlystop and done_looping: print 'early-stopping' break epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] #training_losses = [train_score(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) #this_training_loss = numpy.mean(training_losses) #training_history.append([iter,this_training_loss,this_validation_loss]) training_history.append([iter, this_validation_loss]) # print('epoch %i, minibatch %i/%i, training error %f %%' % # (epoch, minibatch_index + 1, n_train_batches, # this_training_loss * 100.)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) print('iter = %d' % iter) print('patience = %d' % patience) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) numpy.savez(dumppath, model=params, training_history=training_history, best_validation_loss=best_validation_loss) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter print('best_validation_loss %f' % best_validation_loss) if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() # final save numpy.savez(dumppath, model=params, training_history=training_history, best_validation_loss=best_validation_loss) print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ') % (best_validation_loss * 100., best_iter + 1)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_DBN(finetune_lr, pretraining_epochs, pretrain_lr, cdk, usepersistent, training_epochs, L1_reg, L2_reg, hidden_layers_sizes, dataset, batch_size, output_folder, shuffle, scaling, dropout, first_layer, dumppath): """ Demonstrates how to train and test a Deep Belief Network. :type finetune_lr: float :param finetune_lr: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type cdk: int :param cdk: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print locals() datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size print "%d training examples" % train_set_x.get_value(borrow=True).shape[0] print "%d feature dimensions" % train_set_x.get_value(borrow=True).shape[1] # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network nclass = max(train_set_y.eval())+1 dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=nclass, L1_reg=L1_reg, L2_reg=L2_reg, first_layer=first_layer) print 'n_ins:%d'% train_set_x.get_value(borrow=True).shape[1] print 'n_outs:%d'% nclass # getting pre-training and fine-tuning functions # save images of the weights(receptive fields) in this output folder # if not os.path.isdir(output_folder): # os.makedirs(output_folder) # os.chdir(output_folder) print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, cdk=cdk, usepersistent=usepersistent) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, train_model, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) trng = MRG_RandomStreams(1234) use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX)) if dropout: # dbn.x = dropout_layer(use_noise, dbn.x, trng, 0.8) for i in range(dbn.n_layers): dbn.sigmoid_layers[i].output = dropout_layer(use_noise, dbn.sigmoid_layers[i].output, trng, 0.5) # start-snippet-2 ######################### # PRETRAINING THE MODEL # ######################### print '... pre-training the model' plotting_time = 0. start_time = timeit.default_timer() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): if pretrain_dropout: use_noise.set_value(1.) # use dropout at pre-training # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) ''' for j in range(dbn.n_layers): if j == 0: # Plot filters after each training epoch plotting_start = timeit.default_timer() # Construct image from the weight matrix this_layer = dbn.rbm_layers[j] this_field = this_layer.W.get_value(borrow=True).T print "field shape (%d,%d)"%this_field.shape image = Image.fromarray( tile_raster_images( X=this_field[0:100], # take only the first 100 fields (100 * n_visible) #the img_shape and tile_shape depends on n_visible and n_hidden of this_layer # if n_visible = 144 (12,12), if n_visible = 1512 (36,42) img_shape=(12, 12), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = timeit.default_timer() plotting_time += (plotting_stop - plotting_start) ''' end_time = timeit.default_timer() # end-snippet-2 print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## print '... finetuning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 # while (epoch < training_epochs) and (not done_looping): while (epoch < training_epochs): if earlystop and done_looping: print 'early-stopping' break epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): use_noise.set_value(1.) # use dropout at training time minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: use_noise.set_value(0.) # stop dropout at validation/test time validation_losses = validate_model() training_losses = train_model() this_validation_loss = numpy.mean(validation_losses) this_training_loss = numpy.mean(training_losses) # also monitor the training losses print( 'epoch %i, minibatch %i/%i, training error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_training_loss * 100. ) ) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) with open(dumppath, "wb") as f: cPickle.dump(dbn.params, f) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter ''' # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) ) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def shadeplots_faces_resp(subj, elecs_list, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', thresh = 0, chunk_size = 0, baseline = -500, black_chunk_size = 0): """ calculate onset and offset window for given electrode. compares face emo to face gen saves csv for each sub/task for easy plotting later """ filename = os.path.join(SJdir, 'Subjs', subj, 'FaceEmo', 'HG_elecMTX_percent_eleclist.mat') data = loadmat.loadmat(filename) srate = data['srate'] elecsEmo = data['elec_list'] dataEmo = data['data_percent_resp'] filename = os.path.join(SJdir, 'Subjs', subj, 'FaceGen', 'HG_elecMTX_percent_eleclist.mat') data = loadmat.loadmat(filename) srate = data['srate'] elecsGen = data['elec_list'] dataGen = data['data_percent_resp'] #convert to srate chunksize = chunk_size/1000*srate black_chunksize = black_chunk_size/1000*srate filename = os.path.join(SJdir, 'Anat', 'ShadePlots_Faces', '_'.join([subj, 'Emo', 'vs', 'Gen']) +'_resp.csv') subjs = list(); pthr = list(); elecs = list(); starts = list(); ends = list(); for i, e in enumerate(elecs_list): idx_Emo, idx_Gen = (i, i) edataEmo = dataEmo[idx_Emo,:].squeeze() edataGen = dataGen[idx_Gen,:].squeeze() if edataEmo.shape[1]>edataGen.shape[1]: edataEmo = edataEmo[:,:edataGen.shape[1]] else: edataGen = edataGen[:,:edataEmo.shape[1]] pvals = list(); for j in np.arange(0, edataEmo.shape[1]): (t, p) = stats.ttest_ind(edataEmo[:,j], edataGen[:,j]) pvals.append(p) thr = fdr_correct.fdr2(pvals, q = 0.05) H = np.array(np.array(pvals<thr)).astype('int') if (thr>0): #find elecs with window that > chunksize and > threshold (10%) passed_thresh = abs(edataEmo[:, 0::].mean(axis=0) - edataGen[:, 0::].mean(axis = 0)) >thresh #difference between blocks is > 10% threshold sig_and_thresh = H * passed_thresh difference = np.diff(sig_and_thresh, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, int(edataEmo.shape[1])) elif start_idx.size < end_idx.size: start_idx = np.append(0, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(0, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, int(edataEmo.shape[1])) chunk = (end_idx - start_idx) >= chunksize if sum(chunk) > 0: #significant windows on elecs that passed threshold (10%) (ignoring threshold and chunksize) difference = np.diff(H, n = 1, axis = 0) start_idx = np.where(difference==1)[0]+1 end_idx = np.where(difference == -1)[0] if start_idx.size > end_idx.size: #last chunk goes until end end_idx = np.append(end_idx, int(edataEmo.shape[1])) elif start_idx.size < end_idx.size: start_idx = np.append(0, start_idx) #starts immediately significant if (start_idx.size!=0): if (start_idx[0] > end_idx[0]): #starts immediately significant start_idx = np.append(0, start_idx) if (start_idx.size!=0): if (end_idx[-1] < start_idx[-1]):#significant until end end_idx = np.append(end_idx, int(edataEmo.shape[1])) black_chunk = (start_idx[1:] - end_idx[:-1]) > black_chunksize #combine window separated by <200ms tmp = np.append(1,black_chunk).astype('bool') end_idx = end_idx[np.append(np.where(np.in1d(start_idx, start_idx[tmp]))[0][1:]-1, -1)] start_idx = start_idx[tmp] #drop chunks that <100ms chunk = (end_idx - start_idx) >= chunksize start_idx = start_idx[chunk] end_idx = end_idx[chunk] else: #no chunks start_idx = np.zeros((1,)) end_idx = np.zeros((1,)) else: #thr<0 start_idx = np.zeros((1,)) end_idx = np.zeros((1,)) start_idx = start_idx - np.round(500/1000*srate) #check, should shift it back to be -500 to 500 window end_idx = end_idx - np.round(500/1000*srate) subjs.extend([subj] * len(start_idx)) elecs.extend([e] * len(start_idx)) pthr.extend([thr] * len(end_idx)) starts.extend(start_idx) ends.extend(end_idx) data_dict = {'edataEmo':edataEmo, 'edataGen':edataGen, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate,'thresh':thresh, 'chunksize':chunksize, 'black_chunksize':black_chunksize} data_path = os.path.join(SJdir, 'Anat','ShadePlots_Faces', 'data',''.join([subj, '_', 'Emo_vs_Gen', '_e', str(e), '_resp.p'])) with open(data_path, 'w') as f: pickle.dump(data_dict, f) f.close() sig_windows = pd.DataFrame({'subj':subjs, 'elec':elecs, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends}) sig_windows = sig_windows[['subj', 'elec', 'start_idx','end_idx','pthreshold']] sig_windows.to_csv(filename) return sig_windows
def test_DBN(finetune_lr, pretraining_epochs, pretrain_lr, cdk, usepersistent, training_epochs, L1_reg, L2_reg, hidden_layers_sizes, dataset, batch_size, output_folder, shuffle, scaling, dropout, first_layer, dumppath): """ Demonstrates how to train and test a Deep Belief Network. :type finetune_lr: float :param finetune_lr: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type cdk: int :param cdk: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print locals() datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size print "%d training examples" % train_set_x.get_value(borrow=True).shape[0] print "%d feature dimensions" % train_set_x.get_value(borrow=True).shape[1] # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network nclass = max(train_set_y.eval()) + 1 dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=nclass, L1_reg=L1_reg, L2_reg=L2_reg, first_layer=first_layer) print 'n_ins:%d' % train_set_x.get_value(borrow=True).shape[1] print 'n_outs:%d' % nclass # SP contains an ordered list of (pos), ordered by chord class number [0,ydim-1] SP = balanced_seg.balanced(nclass, train_set_y) # getting pre-training and fine-tuning functions # save images of the weights(receptive fields) in this output folder # if not os.path.isdir(output_folder): # os.makedirs(output_folder) # os.chdir(output_folder) print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, cdk=cdk, usepersistent=usepersistent) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, train_model, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) trng = MRG_RandomStreams(1234) use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX)) if dropout: # dbn.x = dropout_layer(use_noise, dbn.x, trng, 0.8) for i in range(dbn.n_layers): dbn.sigmoid_layers[i].output = dropout_layer( use_noise, dbn.sigmoid_layers[i].output, trng, 0.5) # start-snippet-2 ######################### # PRETRAINING THE MODEL # ######################### print '... pre-training the model' plotting_time = 0. start_time = timeit.default_timer() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): if pretrain_dropout: use_noise.set_value(1.) # use dropout at pre-training # go through the training set c = [] for batch_index in xrange(n_train_batches): # FIXME: n_train_batches is a fake item bc_idx = balanced_seg.get_bc_idx(SP, nclass) c.append(pretraining_fns[i](bc_idx=bc_idx, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) ''' for j in range(dbn.n_layers): if j == 0: # Plot filters after each training epoch plotting_start = timeit.default_timer() # Construct image from the weight matrix this_layer = dbn.rbm_layers[j] this_field = this_layer.W.get_value(borrow=True).T print "field shape (%d,%d)"%this_field.shape image = Image.fromarray( tile_raster_images( X=this_field[0:100], # take only the first 100 fields (100 * n_visible) #the img_shape and tile_shape depends on n_visible and n_hidden of this_layer # if n_visible = 144 (12,12), if n_visible = 1512 (36,42) img_shape=(12, 12), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = timeit.default_timer() plotting_time += (plotting_stop - plotting_start) ''' end_time = timeit.default_timer() # end-snippet-2 print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## print '... finetuning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 # while (epoch < training_epochs) and (not done_looping): while (epoch < training_epochs): if earlystop and done_looping: print 'early-stopping' break epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): use_noise.set_value(1.) # use dropout at training time # FIXME: n_train_batches is a fake item bc_idx = balanced_seg.get_bc_idx(SP, nclass) minibatch_avg_cost = train_fn(bc_idx) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: use_noise.set_value(0.) # stop dropout at validation/test time validation_losses = validate_model() training_losses = train_model() this_validation_loss = numpy.mean(validation_losses) this_training_loss = numpy.mean(training_losses) # also monitor the training losses print('epoch %i, minibatch %i/%i, training error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_training_loss * 100.)) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) with open(dumppath, "wb") as f: cPickle.dump(dbn.params, f) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter ''' # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True if earlystop: break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def main(): # load the data hsi_image = loadmat('danforth_plant_ds551.mat')['plant'] # trim the noisy bands img_shape = hsi_image.shape n_r, n_c, n_b = hsi_image.shape # reshape the data because SPICE takes an MxN array, not a full HSI cube hsi_image = np.reshape(hsi_image, (img_shape[0] * img_shape[1], img_shape[2])) # take the hsi data at the "valid" points M = hsi_image # down sample the data for the sake of time in this demo input_data = M.T.astype(float) ds_data = input_data[:, ::20] # get the default parameters from the SPICE.py file params = SPICEParameters() # run the spice algorithm on the down sampled data [endmembers, ds_proportions] = SPICE(ds_data, params) # prompt the user to see if they would like to graph the output if input('Would you like to plot the output? (Y/n): ') == 'n': return # plot the wavelength versus the reflectance n_em = endmembers.shape[1] plt.plot(endmembers) plt.legend([str(i + 1) for i in range(n_em)]) plt.title('SPICE Endmembers') # unmix the data using the non-downsampled array and the endmembers that SPICE discovered s = input_data.max() P = unmix_qpp(input_data / s, endmembers / s) # re-ravel abundance maps P_imgs = [] for i in range(n_em): map_lin = P[:, i] P_imgs.append(np.reshape(map_lin, (n_r, n_c))) # display abundance maps in the form of a subplot fig, axes = plt.subplots(2, int(n_em / 2) + 1, squeeze=True) for i in range(n_em): im = axes.flat[i].imshow(P_imgs[i], vmin=0, vmax=1) axes.flat[i].set_title('SPICE Abundance Map %d' % (i + 1)) # add the original RGB image to the subplot # im = axes.flat[n_em].imshow(hsi['RGB']) # axes.flat[n_em].set_title('RGB Image') # fig.colorbar(im, ax=axes.ravel().tolist()) # # delete any empty subplots # if (n_em % 2 == 0): # fig.delaxes(axes.flatten()[(2*(int(n_em/2)+1)) -1]) plt.show()