예제 #1
0
def load_data(batch_size, is_training=True):
    if is_training:
        data_file = os.path.join(
            cfg.affnist_data_dir, 'peppered_training_and_validation_batches',
            cfg.centered + '_percent_centered_' + cfg.peppered +
            '_percent_transformed.mat')

        images_per_transformation = int(
            (TOTAL_TRAINING_IMAGES * int(cfg.peppered) / 100) / 32)
        num_base_img = int(TOTAL_TRAINING_IMAGES * int(cfg.centered) / 100)
        num_inputs = images_per_transformation * 32 + num_base_img

        num_training = num_inputs * 84 / 100
        num_training_eval = num_inputs - num_training

        # NOTE: Assert we have the correct number of total inputs, as expected
        data = loadmat(data_file)
        images = data['affNISTdata']['image'].transpose().reshape(
            num_inputs, 40, 40, 1).astype(np.float32)
        labels = data['affNISTdata']['label_int'].astype(np.uint8)
        assert images.shape == (num_inputs, 40, 40, 1)
        assert labels.shape == (num_inputs, )

        trX = images[:num_training] / 255.
        trY = labels[:num_training]

        valX = images[num_training_eval:, ] / 255.
        valY = labels[num_training_eval:]

        num_tr_batch = num_training // cfg.batch_size
        num_val_batch = num_training_eval // cfg.batch_size

        return trX, trY, num_tr_batch, valX, valY, num_val_batch

    else:
        # NOTE: Swap those two lines below to get some basic transformed test
        if cfg.peppered == '0':
            data_file = os.path.join(cfg.affnist_data_dir, 'just_centered',
                                     'test.mat')
        else:
            data_file = os.path.join(cfg.affnist_data_dir, 'transformed',
                                     'test_batches', '15.mat')

        data = loadmat(data_file)
        images = data['affNISTdata']['image'].transpose().reshape(
            10000, 40, 40, 1).astype(np.float32)
        labels = data['affNISTdata']['label_int'].astype(np.float32)
        assert images.shape == (10000, 40, 40, 1)
        assert labels.shape == (10000, )

        imgs = images / 255.
        labs = labels
        num_te_batch = 10000 // cfg.batch_size

        return imgs, labs, num_te_batch
예제 #2
0
 def __init__(self, scanNum, xlen, ylen, basePath='/Users/alec/UCSB/scan_data/'):
     self.scanNum = scanNum
     self.dataFiles = self.get_data_files(basePath)
     self.param = loadmat.loadmat(self.dataFiles[0])['scan']['param']
     self.param['xlen'] = xlen
     self.param['ylen'] = ylen
     self.dataForward, self.dataReverse = self.load_data()
예제 #3
0
 def load_data(self):
     data = dict()
     dataForward = dict()
     dataReverse = dict()
     xlen = self.param['xlen']
     for key in loadmat.loadmat(self.dataFiles[0])['scan']['data'].keys():
         data[key] = []
     for file in self.dataFiles:
         fileData = loadmat.loadmat(file)['scan']['data']
         for key in data.keys():
             data[key].append(fileData[key])
     for key in data.keys():
         dataForward[key] = []
         dataReverse[key] = []
         for i in range(0, len(self.dataFiles), 2*xlen):
             dataForward[key].append(data[key][i : i + xlen])
             dataReverse[key].append(np.flip(data[key][i + xlen : i + 2*xlen], axis=0))
     return dataForward, dataReverse
def HG_regression_surr_random_SGE(DATASET, numiter = 1000):
    '''
    creates random surrogate data numiter times
    calculate regression on each surrogate data set
    saves out distribution of regression parameters for surrogate data
    only runs on duration electrodes
    '''
    
    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta'
    subj, task = DATASET.split('_')
    print (DATASET)
    
    folder = 'maxes_medians_stds_lats'
    features = ['maxes_rel','medians', 'stds', 'lats']

    filename = os.path.join(SJdir, 'Subjs', subj, task, 'subj_globals.mat')
    data_dict = loadmat.loadmat(filename)
    srate = [data_dict.get(k) for k in ['srate']][0]
    srate = float(srate)
    
    filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv')
    df_pattern = pd.read_csv(filename)

    bad_df = pd.DataFrame({'GP44_DecisionAud':233, 'GP15_SelfVis':1, 'JH2_FaceEmo':113, 'GP35_FaceEmo':60}, index = range(1)).T
    bad_df = bad_df.reset_index()
    bad_df.columns = ['subj_task','elec']

    #get data
    print ('get data')
    data_dict, start_idx, end_idx, start_idx_resp, end_idx_resp = stats_static250(subj, task, df_pattern)

    ##reject outliers
    print ('\nreject outliers')
    data_dict_clean = reject_outliers(DATASET, data_dict, start_idx, end_idx, start_idx_resp, end_idx_resp)

    #run regression for stim and resp
    scores, coefs, alphas, pvals = [[] for i in range(4)]
    for lock in ['resp', 'stim']:
        print ('run regression on %s\n' %(lock))
        coef, score, alpha, pval, nulls = run_regression(DATASET,  data_dict_clean[lock], numiter = numiter)

        #save out dataframes
        saveDir = os.path.join(SJdir, 'PCA', 'Stats', 'Regression', 'unsmoothed', folder, 'static_250windows', lock)
        if not(os.path.exists(saveDir)):
            os.makedirs(saveDir)
 
        df = pd.DataFrame({'score':score, 'coef':coef, 'pval':pval, 'alpha':alpha})
        df = df[['score','pval','alpha','coef']]
        
        filename = os.path.join(saveDir, '_'.join([DATASET, 'regression_values_%s.csv' %(lock)]))
        df.to_csv(filename)
        print('saving %s\n' %(filename))
        sys.stdout.flush()
예제 #5
0
def shadeplots_faces_stats(subj, task, elecs_list, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', baseline = -500):

    #get data
    filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_eleclist.mat')
    data_dict = loadmat.loadmat(filename)
    srate, elecs, data, RTs, onsets_stim, onsets_resp, data_resp = [data_dict.get(k) for k in ['srate','elecs','data_percent', 'RTs', 'onsets_stim', 'onsets_resp', 'data_percent_resp']]

    bl_st = baseline/1000*srate

    filename = os.path.join(SJdir, 'Anat', 'ShadePlots_Faces', '_'.join([subj, task, 'maxes']) +'.csv')
    peaks, lats, peaks_resp, lats_resp, peaks_maxRT, lats_maxRT, peaks_mean, lats_mean, peaks_mean_resp, lats_mean_resp = [ dict() for x in range(10)]

    for i, e in enumerate(elecs_list):
        edata = data[i, :, :].squeeze()
        edata_resp = data_resp[i,:,:].squeeze()

        #get maxes from stim onset to resp + 300ms
        p, l = [list() for x in range(2)]
        for m in range(edata.shape[0]): #per trial
            
            p.append(edata[m,abs(bl_st) : abs(bl_st) + RTs[m] + (300/1000*srate)].max())
            l.append(edata[m,abs(bl_st) : abs(bl_st) + RTs[m] + (300/1000*srate)].argmax())
        
        peaks[e] = p
        lats[e] = l
        
        peaks_resp[e] = edata_resp.max(axis = 1)
        lats_resp[e] = edata_resp.argmax(axis = 1)
        

        #get maxes in a single window (stim onset to max RT + 500)
        peaks_maxRT[e] = edata[:, abs(bl_st) : abs(bl_st) + RTs.max() + (500/1000*srate)].max(axis = 1)
        lats_maxRT[e] = edata[:, abs(bl_st) : abs(bl_st) + RTs.max() + (500/1000*srate)].argmax(axis = 1)

        #get maxes and latencies on the mean trace
        peaks_mean[e] = edata[:, abs(bl_st) : abs(bl_st) + RTs.max() + (500/1000*srate)].mean(axis = 0).max()
        lats_mean[e] = edata[:, abs(bl_st) : abs(bl_st) + RTs.max() + (500/1000*srate)].mean(axis = 0).argmax()

        peaks_mean_resp[e] = edata_resp.mean(axis = 0).max()
        lats_mean_resp[e] = edata_resp.mean(axis = 0).argmax()

    #save stats (single trials)
    filename = os.path.join(SJdir, 'Anat', 'ShadePlots_Faces', 'SingleTrials', 'data', 'RT_300ms_pertrial' ''.join([subj, '_', task, '.p']))
    data_dict = {'peaks':peaks, 'lats':lats, 'peaks_resp' : peaks_resp, 'lats_resp' : lats_resp, 'peaks_maxRT' : peaks_maxRT, 'lats_maxRT' : lats_maxRT, 'peaks_mean' : peaks_mean, 'lats_mean' : lats_mean, 'lats_mean_resp' : lats_mean_resp, 'peaks_mean_resp' : peaks_mean_resp}

    with open(filename, 'w') as f:
        pickle.dump(data_dict, f)
        f.close()
    return data_dict
예제 #6
0
def run_pipeline(iF):
    try:
        
        print('Now working on '+ iF)
        dataset = lm.loadmat(iF)
        dataset = preprocess(dataset)
        if 'anatomy' not in dataset.keys():
            return
        else:
            anatomy = dataset['anatomy']
            if 'parent_shifted' in anatomy:
                group = anatomy['parent_shifted']
            else:
                group = anatomy['cluster_parent']
        region = 'MEC'
        idx = [region in ss for ss in group]
        idx = np.array(idx)
        idx = idx[dataset['sp']['cgs']==2]

        if idx.sum()==0:
            return
        
        dataset['spikecount']=dataset['spikecount'][:,idx]

        (model, bl_scores) = eval_and_train(dataset)
        (Ypred,Ytrue,speed,trial,c_matrix) = score_gain_model(model,dataset)
        plt.plot(Ytrue)

        plt.plot(dataset['posx_centers'][Ypred-1])
        name = os.path.basename(iF)[0:-4]
        plt.savefig('F:\\temp\\classifier_out\\'+region +'_'+ name + '.png')
        plt.close()
        tmp_array = np.array([Ypred,Ytrue,speed,trial,dataset['posx_edges']])
        np.save('F:\\temp\\classifier_out\\'+region +'_'+ name + '_scores.npy',tmp_array)
        #np.save('/oak/stanford/groups/giocomo/attialex/processed_data/classifier_output1/'+region +'_'+ name + '_scores.npy',tmp_array)
        #np.save('/oak/stanford/groups/giocomo/attialex/processed_data/classifier_output1/'+region +'_'+ name + '_confMatrix.npy',conf_matrix)
    except Exception as e:
        print(str(e))
        print('not working')
        pass
예제 #7
0
def runForFile(good_cells,sn_this,labels,umap_save_path,good_cells_orig,xcorr=None):
    data = lm.loadmat(sn_this)
    summary = []
    ds_factor = 5
    for iClu,cluID in enumerate(np.unique(labels)):
        n=np.sum(labels==cluID)
        pwd_this = mean_pwd[iClu]
        if n>=10 and pwd_this>0.88:
            if xcorr is not None:
                xcorr_this = xcorr[labels==cluID]
            else:
                xcorr_this = None
            good_cells_this = good_cells[labels==cluID]
            (Xu,X_pca)=runUMAPForCluster(good_cells_this,data,ds_factor=ds_factor)
            #fig=plotResults(Xu,data['trial'],data['posx'],speed)
            _,sn=os.path.split(fi)
            sn_new = sn.replace('.mat','_clu{}.png'.format(cluID))
            savepath = os.path.join('/Volumes/T7/attialex/umap_dark',sn_new)
            #fig.savefig(savepath)
            #plt.close(fig)
            summary.append((Xu,cluID,X_pca,xcorr_this))
        else:
            print('skipping clu {}, n: {},pwd: {:.2f}'.format(cluID,n,pwd_this))
    # import pdb
    # pdb.set_trace()
    (Xu,X_pca)=runUMAPForCluster(good_cells_orig,data,ds_factor=ds_factor) # run once for all cells as sanity check
    summary.append((Xu,cluID,X_pca,None))
    # import pdb
    # pdb.set_trace()
    if len(summary)>0:        
        fig = plotSummary(summary,data,ds_factor)
        _,sn = os.path.split(sn_this)
        sn = sn.replace('.mat','_UMAPSummary.png')
#         import pdb
#         pdb.set_trace()
        fig.savefig(os.path.join(umap_save_path,sn))
    return    
예제 #8
0
def plot_average_overlap(subj, task, resplocked = False, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta'):
        """
        plots of traces (not a shade plot bc no significance window calculated)
        average for
        1. easy task overlap elecs
        2. diff task overlap elecs
        3. diff task unique elcs
        """
        
        filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat')
        data = loadmat.loadmat(filename)
        srate = data['srate']
        elecs = data['active_elecs']
        RTs = data['RTs']
        bl_st = data['Params']['bl_st']/1000*srate
        data = data['data_percent']
        
        RTs = RTs+abs(bl_st)
        bl_st = int(bl_st)

        overlapfile = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', subj+'_ovelapped_dur_elecs.csv')
        df = pd.read_csv(overlapfile)
        easy_overlap = df.easy.dropna()[np.in1d(df.overlapped_elecs.dropna(), df.easy.dropna())]
        diff_overlap = df.difficult.dropna()[np.in1d(df.overlapped_elecs.dropna(), df.difficult.dropna())]
        diff_unique = df.unique_to_diff.dropna()

        elec_dict = {'easy_overlap':easy_overlap, 'diff_overlap':diff_overlap, 'diff_unique':diff_unique}
        data_dict = dict()

        #average data per grouping
        for k in elec_dict.keys():
            elec_list = elec_dict[k]
            eidx = np.in1d(elecs, elec_list)

            if resplocked:
                tmp = np.empty((data.shape[0], data.shape[1], len(np.arange(bl_st,abs(bl_st))))) #elecs x trials x time
                for j, e in enumerate(eidx): #elecs
                    tmp2 = np.empty((data.shape[1], len(np.arange(bl_st, abs(bl_st))))) #per elec, trials x time
                    for i, r in enumerate(RTs): #trials
                        tmp2[i,:] = data[e,i,(r-abs(bl_st)):(r+abs(bl_st))]
                    tmp[j, :, :] = tmp2
                data_dict[k] = tmp.mean(axis = 1).mean(axis = 0)    
            else:
                data_dict[k] = data[eidx,:,:].mean(axis = 1).mean(axis = 0)

        #plot
        f, ax = plt.subplots(1, 1, figsize = (30,10))
        scale_min = min([min(data_dict[x]) for x in data_dict.keys()])
        scale_max = max([max(data_dict[x]) for x in data_dict.keys()])

        tmp = (np.arange(scale_min, scale_max))

        for i, k in enumerate(data_dict.keys()):
            data = data_dict[k]
            ax.plot(np.arange(bl_st, data.shape[0]+bl_st), data, zorder = 1, linewidth = 3, label = k)

        ax.set_ylim([scale_min, scale_max])

        ax.axhline(y = 0, color = 'k', lw = 3, label = None) #xaxis
        ax.axvline(x = 0, color = 'k', lw = 3, label = None)

        ax.set_ylabel('% change HG')
        ax.set_xlabel('time (ms)')
        ax.autoscale(tight=True)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.get_xaxis().tick_bottom()
        ax.get_yaxis().tick_left()
        legend1 = ax.legend(loc = 'best')

        ax.set_title(' '.join([subj, task]))

        filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust','elecs','significance_windows', 'smoothed', 'mean_traces', 'images', 'median_split', '_'.join([subj,task, 'easy_diff_overlap_unique']))
        if resplocked:
            filename = filename + '_resplocked'
 
        plt.savefig(filename+'.png')
        plt.close()
예제 #9
0
def test_mlp(learning_rate, L1_reg, L2_reg, n_epochs,
             hidden_layers_sizes, trainpath, trainlist, validset, batch_size, datasel,
             shuffle, scaling, dropout, earlystop, dumppath):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the dataset


   """
    print locals()
    
    datasets = loadmat(trainpath=trainpath,trainlist=trainlist,validset=validset,shuffle=shuffle,datasel=datasel,
                       scaling=scaling,robust=robust)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)
    
    nclass = max(train_set_y.eval()) + 1
    print "n_in = %d"%train_set_x.get_value(borrow=True).shape[1]
    print "n_out = %d"%nclass

    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=train_set_x.get_value(borrow=True).shape[1],
        hidden_layers_sizes=hidden_layers_sizes,
        n_out=nclass
    )
    
    # dropout the hidden layers
    trng = RandomStreams(1234)
    use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX))
    if dropout:
        # classifier.input = dropout_layer(use_noise, classifier.input, trng, 0.8)
        for i in range(classifier.n_layers):
            classifier.hiddenlayers[i].output = dropout_layer(use_noise, classifier.hiddenlayers[i].output, trng, 0.5)
            
    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )
    # end-snippet-4

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )
    
    train_score = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )
    
    pred_probs = theano.function(
        inputs=[index],
        outputs=classifier.predprobs,
        givens={
            x: train_set_x[index:1000],
            # y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.996   # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    training_history=[]
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs):
        if earlystop and done_looping:
            print 'early-stopping'
            break
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            use_noise.set_value(1.) # use dropout
            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                use_noise.set_value(0.) # at validation/testing time, no dropout
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                #training_losses = [train_score(i) for i in xrange(n_train_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                #this_training_loss = numpy.mean(training_losses)
                #training_history.append([iter,this_training_loss,this_validation_loss])
                training_history.append([iter,this_validation_loss])
                
#                print('epoch %i, minibatch %i/%i, training error %f %%' %
#                      (epoch, minibatch_index + 1, n_train_batches,
#                       this_training_loss * 100.))
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                print('iter = %d' % iter)
                print('patience = %d' % patience)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)
                        
                    numpy.savez(dumppath, model=classifier.params, training_history=training_history,
                                best_validation_loss=best_validation_loss)
                        
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    print('best_validation_loss %f' % best_validation_loss)

            if patience <= iter:
                done_looping = True
                if earlystop:
                    break

    end_time = timeit.default_timer()
    
    # final save
    numpy.savez(dumppath, model=classifier.params, training_history=training_history,
                                best_validation_loss=best_validation_loss)
    
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i') %
          (best_validation_loss * 100., best_iter + 1))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def shadeplots_elecs_stats():
    """ 
    calculates mean, peak, latency, and std per trial for all electrodes in an active cluster - added medians and coefficient of variation
    uses windows for individual electrodes from PCA/Stats/single_electrode_windows_withdesignation.csv
    saves pickle file with numbers per trial in ShadePlots_hclust/elecs/significance_windows/static
    FOR DURATION - does not have RT-dependent window per trial. Uses max RT.
    """

    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/'

    filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_withdesignation_EDITED.csv')
    df = pd.read_csv(filename)

    for s_t in df.groupby(['subj','task']):

        subj, task = s_t[0]
        #load data
        filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat')
        data_dict = loadmat.loadmat(filename)

        active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']]
        bl_st = Params['bl_st']
        bl_st = bl_st/1000*srate
        #sys.stdout.flush()

        cofvar, maxes_rel, medians, means, stds, maxes, lats, sums, lats_pro, RTs, num_dropped = [dict() for i in range(11)]
        
        RT = RT + abs(bl_st) #RTs are calculated from stim onset, need to account for bl in HG_elecMTX_percent

        for row in s_t[1].itertuples():
            _, _, subj, task, cluster, pattern, elec, start_idx, end_idx, start_idx_resp, end_idx_resp, _, _ = row
            eidx = np.in1d(active_elecs, elec)
            data = data_all[eidx,:,:].squeeze()

            st_resp = 0

            #define start and end indices based on electrode type
            if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]):
                start_idx = start_idx + abs(bl_st)
                end_idx = end_idx + abs(bl_st)
                if start_idx == end_idx:
                	continue #for SR elecs that dont' have stimlocked (CP9, e91)

                #num_to_drop = 0

                #calculate stats (single trials)
                means[elec] = data[:,start_idx:end_idx].mean(axis = 1)
                stds[elec] = data[:,start_idx:end_idx].std(axis = 1)
                maxes[elec] = data[:,start_idx:end_idx].max(axis = 1)
                lats[elec] = data[:,start_idx:end_idx].argmax(axis = 1)
                sums[elec] = data[:, start_idx:end_idx].sum(axis = 1)
                lats_pro[elec] = lats[elec] / len(np.arange(start_idx, end_idx))
                RTs[elec] = RT
                #num_dropped[elec] = num_to_drop

                medians[elec] = stats.nanmedian(data[:,start_idx:end_idx], axis = 1)
                maxes_rel[elec] = maxes[elec]-means[elec]
                cofvar[elec] = stds[elec]/means[elec]

                #update dataframe
                #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0]
                #df.ix[ix,'dropped'] = num_to_drop


            if pattern == 'R':
                start_idx_resp = start_idx_resp + abs(st_resp)
                end_idx_resp = end_idx_resp + abs(st_resp)

                if start_idx_resp == end_idx_resp:
                	continue  #for inactive R elecs (not clear why on spreadsheet)

                #create data matrix
                data_resp = np.empty(data.shape)
                for j, r in enumerate(RT):
                    tmp = data[j, r + start_idx_resp : r + end_idx_resp]
                    tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999)
                    data_resp[j,:] = tmp
                data_resp[data_resp == -999] = np.nan

                #nanidx = np.isnan(np.nanmean(data_resp, axis = 1)) #if start > end
                '''
                if np.any(nanidx):

                    #drop equivalent number of long RTs
                    num_to_drop = np.sum(nanidx)
                    i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs
                    nanidx[i] = True #mark the long trials as bad too
                    num_dropped[elec] = num_to_drop * 2 #dropping both ends of RT distribution

                    #calculate params for (single trials)
                    data_resp[nanidx,:] = np.nan
                    means[elec] = np.nanmean(data_resp, axis = 1)
                    stds[elec] = np.nanstd(data_resp, axis = 1)
                    maxes[elec] = np.nanmax(data_resp, axis = 1)
                    sums[elec] = np.nansum(data_resp, axis = 1)

                    medians[elec] = stats.nanmedian(data_resp, axis = 1)
                    maxes_rel[elec] = maxes[elec]-means[elec]

                    cofvar[elec] = stds[elec]/means[elec]

                    data_resp[nanidx,0] = -999
                    tmp_lat = np.nanargmax(data_resp, axis = 1)
                    tmp_lat = np.ndarray.astype(tmp_lat, dtype = float)
                    tmp_lat[nanidx] = np.nan
                    lats[elec] = tmp_lat
                    lats_pro[elec] = tmp_lat / np.sum(~np.isnan(data_resp), axis = 1)

                    tmp_RT = np.ndarray.astype(RT, dtype = float)
                    tmp_RT[nanidx] = np.nan
                    RTs[elec] = tmp_RT

                else:
                    num_to_drop = 0
                    num_dropped[elec] = num_to_drop
                '''

                lats[elec] = np.nanargmax(data_resp, axis = 1)
                lats_pro[elec] = np.nanargmax(data_resp, axis = 1) / np.sum(~np.isnan(data_resp), axis = 1)
                RTs[elec] = RT
                means[elec] = np.nanmean(data_resp, axis = 1)
                stds[elec] = np.nanstd(data_resp, axis = 1)
                maxes[elec] = np.nanmax(data_resp, axis = 1)
                sums[elec] = np.nansum(data_resp, axis = 1)

                medians[elec] = stats.nanmedian(data_resp, axis = 1)
                maxes_rel[elec] = maxes[elec] - means[elec]
                cofvar[elec] = stds[elec]/means[elec]

                #update dataframe
                #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0]
                #df.ix[ix,'dropped'] = num_to_drop * 2 #dropping both ends of RT distribution

            if pattern == 'D':
                start_idx = start_idx + abs(bl_st)
                #end_idx_resp = end_idx_resp + abs(st_resp)
                end_idx_resp = end_idx_resp + max(RT) #RT already has baseline in it

                #num_to_drop = 0

                #calculate stats (single trials)
                means[elec] = data[:,start_idx:end_idx_resp].mean(axis = 1)
                stds[elec] = data[:,start_idx:end_idx_resp].std(axis = 1)
                maxes[elec] = data[:,start_idx:end_idx_resp].max(axis = 1)
                lats[elec] = data[:,start_idx:end_idx_resp].argmax(axis = 1)
                sums[elec] = data[:, start_idx:end_idx_resp].sum(axis = 1)
                lats_pro[elec] = lats[elec] / len(np.arange(start_idx, end_idx_resp))
                RTs[elec] = RT
                #num_dropped[elec] = num_to_drop

                medians[elec] = np.median(data[:,start_idx:end_idx_resp], axis = 1)
                maxes_rel[elec] = maxes[elec]-means[elec]
                cofvar[elec] = stds[elec]/means[elec]

                #update dataframe
                #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0]
                #df.ix[ix,'dropped'] = num_to_drop


        #save stats (single trials)
        filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'static', 'data', ''.join([subj, '_', task, '.p']))
        data_dict = {'active_elecs': active_elecs, 'lats_pro': lats_pro, 'sums':sums, 'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st,'RTs':RTs, 'dropped':num_dropped, 'maxes_rel' : maxes_rel, 'medians' : medians, 'variations': cofvar}

        with open(filename, 'w') as f:
            pickle.dump(data_dict, f)
            f.close()

        #save csv file (without dropping trials)
        for k in data_dict.keys():
            if k in ['bl_st', 'srate','active_elecs', 'dropped']:
                continue
            data = pd.DataFrame(data_dict[k])
        
            filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'static', 'csv_files', 'orig', '_'.join([subj, task, k]) + '.csv')
            data.to_csv(filename, index = False)
def HG_regression_surr_random_SGE(DATASET, numiter = 1000):
    '''
    creates random surrogate data numiter times
    calculate regression on each surrogate data set
    saves out distribution of regression parameters for surrogate data
    only runs on duration electrodes
    '''
    
    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta'
    subj, task = DATASET.split('_')
    print (DATASET)

    all_coefs, all_scores, all_alphas = [[] for i in range(3)]
    
    folder = 'maxes_medians_stds'
    features = ['maxes_rel','medians', 'stds']

    filename = os.path.join(SJdir, 'Subjs', subj, task, 'subj_globals.mat')
    data_dict = loadmat.loadmat(filename)
    srate = [data_dict.get(k) for k in ['srate']][0]
    srate = float(srate)
    
    filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv')
    df_pattern = pd.read_csv(filename)

    bad_df = pd.DataFrame({'GP44_DecisionAud':233, 'GP15_SelfVis':1, 'JH2_FaceEmo':113, 'GP35_FaceEmo':60}, index = range(1)).T
    bad_df = bad_df.reset_index()
    bad_df.columns = ['subj_task','elec']

    for i in range(numiter):
        print ('iteration %i out of %i' %(i, numiter))
        #get surrogate data
        print ('get surrogate data')
        data_dict = shadeplots_elecs_stats_surr_random(subj, task, df_pattern, id_num = i)
        
        if len(data_dict['RTs'])==0:
            print('skipping %s %s - no duration elecs\n' %(subj, task))
            sys.stdout.flush()
            return

        ##reject outliers
        print ('\nreject outliers')
        data_dict_clean = reject_outliers(DATASET, data_dict, srate, df_pattern, bad_df = bad_df)

        #run regression (without pvalue)
        print ('run regression')
        coefs, score, alpha = run_regression(DATASET, data_dict_clean)

        #accumulate
        all_coefs.append(coefs)
        all_scores.append(score)
        all_alphas.append(alpha)

    #save out dataframes
    scores = pd.DataFrame(all_scores)
    coefs = pd.DataFrame(all_coefs)
    alphas = pd.DataFrame(all_alphas)

    saveDir = os.path.join(SJdir, 'PCA', 'Stats', 'Regression', 'unsmoothed', folder, 'surr_distributions')
    if not(os.path.exists(saveDir)):
        os.makedirs(saveDir)
    filename = os.path.join(saveDir, '_'.join([DATASET, 'coefs_surr_dist.csv']))
    coefs.to_csv(filename)
    filename = os.path.join(saveDir, '_'.join([DATASET, 'alphas_surr_dist.csv']))
    alphas.to_csv(filename)
    filename = os.path.join(saveDir, '_'.join([DATASET, 'scores_surr_dist.csv']))
    scores.to_csv(filename)
    print('saving %s\n' %(filename))
    sys.stdout.flush()
예제 #12
0
def test_mlp(learning_rate, L1_reg, L2_reg, n_epochs, hidden_layers_sizes,
             dataset, batch_size, datasel, shuffle, scaling, dropout,
             earlystop, dumppath):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the dataset


   """
    print locals()

    datasets = loadmat(dataset=dataset,
                       shuffle=shuffle,
                       datasel=datasel,
                       scaling=scaling,
                       robust=robust)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    nclass = max(train_set_y.eval()) + 1
    print "n_in = %d" % train_set_x.get_value(borrow=True).shape[1]
    print "n_out = %d" % nclass

    # construct the MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=train_set_x.get_value(borrow=True).shape[1],
                     hidden_layers_sizes=hidden_layers_sizes,
                     n_out=nclass)

    # dropout the hidden layers
    trng = RandomStreams(1234)
    use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX))
    if dropout:
        # classifier.input = dropout_layer(use_noise, classifier.input, trng, 0.8)
        for i in range(classifier.n_layers):
            classifier.hiddenlayers[i].output = dropout_layer(
                use_noise, classifier.hiddenlayers[i].output, trng, 0.5)

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    train_score = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    pred_probs = theano.function(
        inputs=[index],
        outputs=classifier.predprobs,
        givens={
            x: train_set_x[index:1000],
            # y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 100 * n_train_batches  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.999  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs):
        if earlystop and done_looping:
            print 'early-stopping'
            break
    # while (epoch < n_epochs):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            use_noise.set_value(1.)  # use dropout
            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                use_noise.set_value(
                    0.)  # at validation/testing time, no dropout
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                training_losses = [
                    train_score(i) for i in xrange(n_train_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                this_training_loss = numpy.mean(training_losses)
                probs = [pred_probs(i) for i in xrange(n_train_batches)]

                print('epoch %i, minibatch %i/%i, training error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_training_loss * 100.))

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)
                    # save model
                    with open(dumppath, "wb") as f:
                        cPickle.dump(classifier.params, f)

                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    '''
                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    '''

            if patience <= iter:
                done_looping = True
                if earlystop:
                    break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
예제 #13
0
                                   'NP_DATA_corrected')
    files = glob.glob(os.path.join(neuropix_folder, '*.mat'))
    #files = glob.glob('/oak/stanford/groups/giocomo/attialex/NP_DATA/np*_gain*.mat'
    path = '/Volumes/Samsung_T5/attialex/python_circular_gain_' + gain
    TRIALS = np.arange(5, 21)

    if not os.path.exists(path):
        os.makedirs(path)

    for iF in files:
        session_name = os.path.split(iF)[-1]
        print(session_name)
        if 'mismatch' in session_name or 'playback' in session_name or 'dark' in session_name:
            print('skipping {}'.format(session_name))
            continue
        data = lm.loadmat(iF)
        try:
            ons = get_gain_onsets(data, float(gain), 100)
        except:
            ons = []

        try:

            for nbr, iO in enumerate(ons):
                trials = iO + np.arange(-5, 4)
                output = run_for_file_gain(data, trials)
                sn = session_name[0:-4]
                session_name = '{}_{}.mat'.format(sn, nbr + 1)

                if output is not None:
                    plt.subplot(211)
def shadeplots_elecs_stats():
    """ 
    calculates mean, max, min, latency, median, and std on the mean trace for trial for all electrodes in an active cluster
    OLD - uses electrodes and windows from PCA/Stats/single_electrode_windows_withdesignation_EDITED.csv
    NOW - uses electrodes and windows from PCA/csvs_FINAL/final_windows.csv (after going through and editing them)
    calculates both stimulus and response locked parameters
    """

    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/'

    #filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv')
    filename = os.path.join(SJdir, 'PCA', 'csvs_FINAL', 'final_windows.csv')
    df = pd.read_csv(filename)
                

    for s_t in df.groupby(['subj','task']):

        subj, task = s_t[0]

        #load data
        filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') 
        data_dict = loadmat.loadmat(filename)

        active_elecs, Params, srate, RT, data_trials = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']]
        srate = float(srate)
        data_all = data_trials.mean(axis = 1) #mean across trials, (new shape is elecs x time)
        bl_st = -500/1000*srate #in data points

        maxes, lats, RTs, RTs_median, RTs_min, lats_static, lats_min_static, lats_semi_static = [dict() for i in range(8)]

        RT = RT + abs(bl_st) #RTs are calculated from stim/cue onset, need to account for bl in HG_elecMTX_percent 

        for row in s_t[1].itertuples():
            _, subj, task, elec, pattern, cluster, start_idx, end_idx, start_idx_resp, end_idx_resp = row #in datapoints
            eidx = np.in1d(active_elecs, elec)
            data = data_all[eidx,:].squeeze() #mean trace


            #define start and end indices based on electrode type
            if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]):
                start_idx = start_idx + abs(bl_st)
                end_idx = end_idx + abs(bl_st)

            if pattern == 'R': 
                start_idx = start_idx + abs(bl_st)
                end_idx = end_idx + abs(bl_st)

            if pattern == 'D':
                start_idx = start_idx + abs(bl_st)
                end_idx = np.median(RT) + end_idx_resp

            if start_idx == end_idx:
                continue  #for SR elecs that only have response activity - don't calculate a mean value

            #calculate stats (mean trace)

            maxes[elec] = data[start_idx:end_idx].max()
            lats[elec] = (data[start_idx:end_idx].argmax()+1)/srate*1000 #within HG window

            RTs[elec] = (RT+bl_st).mean()/srate*1000 #from stimulus onset (adjusted for all subjects)
            RTs_median[elec] = np.median(RT+bl_st)/srate*1000 #from stimulus onset (adjusted for all subjects)
            RTs_min[elec] = np.min(RT+bl_st)/srate*1000 #from stimulus onset (adjusted for all subjects)

            lats_static[elec] = (data[abs(bl_st)::].argmax()+1)/srate*1000 #from stimulus onset to end (adjusted for all subjects)
            lats_semi_static[elec] = (data[start_idx::].argmax()+1)/srate*1000 #from HG onset


        data_dict = {'maxes':maxes, 'lats':lats, 'RTs':RTs, 'RTs_median': RTs_median, 'RTs_min' : RTs_min, 'lats_static' : lats_static, 'lats_semi_static' : lats_semi_static}

        #update csv file        
        for k in data_dict.keys():
            if k in ['bl_st', 'srate','active_elecs']:
                data_dict.pop(k, None)

        df_values = pd.DataFrame(data_dict)

        #save dataframe with values for all elecs for subject/task - later combined into mean_traces_all_elecs.csv in elec_values.ipynb
        filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', '_'.join([subj, task]) + '.csv')
        df_values.to_csv(filename)
예제 #15
0
if __name__ == '__main__':
    root = '/Users/attialex/distance_tuning'
    umap_version = 'Cosine_PCAUMAP'
    files = glob.glob(os.path.join('/Users/attialex/distance_tuning', '*.mat'))
    umap_save_path = os.path.join(root, umap_version)

    if not os.path.isdir(umap_save_path):
        os.makedirs(umap_save_path)
    shutil.copy2(
        '/Users/attialex/code/AlexA_Library/NP_python/distance_tuning_clustering.py',
        umap_save_path)

    for fi in files:
        print(fi)
        data_out = lm.loadmat(fi)
        data_out = data_out['data_out']
        idx = data_out['pvals'] < 0.05
        if sum(idx) < 30:
            continue

        _, sn_darkData = os.path.split(fi)
        data_path = '/Volumes/T7/attialex/NP_DATA_corrected'
        data = lm.loadmat(os.path.join(data_path, sn_darkData))

        xcorrs = data_out['xcorrs'][idx]

        reducer = umap.UMAP(n_components=2)
        #reducer = PCA(n_components=2)
        X_new = reducer.fit_transform(xcorrs)
def stats_static250(subj, task, df_pattern, start = 0, end = 250, start_idx_resp = -250, end_idx_resp = 0):

    """ 
    calculates params per electrode on for stim:stim+250 and resp-250:resp windows.

    drops trials that are <250 ms

    uses windows for individual electrodes from df_pattern (PCA/Stats/single_electrode_windows_csvs/single_electrode_windows_withdesignation_EDITED.csv)
    
    Uses unsmoothed data

    hardcoded params - medians, maxes_rel, stds, latencies, maxes, means

    returns dictionary with features. each feature is dictionary of elecs
    """

    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/'

    #load data
    filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat')
    data_dict = loadmat.loadmat(filename)

    active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']]

    bl_st = 500/1000*srate #for my data, remove cue from baseline - start/end_idx are relative to cue onset) - change 12/24 - okay with RT 12/25

    RT = RT + abs(bl_st) #RTs are calculated from stim (my data cue) onset, need to account for bl in HG_elecMTX_percent (for 500, not 1000 baseline 12/25)
    
    #define start and end windows (stim locked)
    start = np.round((start / 1000 * srate) + abs(bl_st))
    end = np.round((end / 1000 * srate))

    start_idx_resp = np.round(start_idx_resp / 1000 * srate)
    end_idx_resp = np.round(end_idx_resp / 1000 * srate)

    RTs, medians, maxes_rel, means, stds, maxes, lats = [{'stim':dict(), 'resp':dict()} for i in range(7)]
    
    s_t = df_pattern[((df_pattern.subj == subj) & (df_pattern.task == task))]

    for e in s_t.elec.values:

        _, subj, task, cluster, pattern, elec, start_idx, end_idx, _, _, _, _ = s_t[s_t.elec == e].values[0]
        
        if (end_idx - start_idx) < (end- start): #HG duration is less than window size (250 or 500)
            print ('skipping %s %s %i' %(subj, task, e))
            sys.stdout.flush()
            continue

        print('%i...' %(elec), end = "")
        sys.stdout.flush()

        eidx = np.in1d(active_elecs, elec)
        data = data_all[eidx,:,:].squeeze()
    
        start_idx = start_idx + start #start and end relative to HG onset
        end_idx = start_idx + end
        
        #calculate values (single trials)
        means['stim'][elec] = np.nanmean(data[:,start_idx:end_idx], axis =1)
        stds['stim'][elec] = np.nanstd(data[:,start_idx:end_idx], axis = 1)
        maxes['stim'][elec] = np.nanmax(data[:,start_idx:end_idx], axis = 1)
        medians['stim'][elec] = stats.nanmedian(data[:,start_idx:end_idx], axis = 1)
        maxes_rel['stim'][elec] = maxes['stim'][elec] - means['stim'][elec]
        lats['stim'][elec] = np.argmax(data[:,start_idx:end_idx], axis = 1)

        data_resp = np.empty((len(RT), len(np.arange(start_idx_resp, end_idx_resp))))
        for j, r in enumerate(RT):
            data_resp[j,:] = data[j, r + start_idx_resp : r + end_idx_resp]

        means['resp'][elec] = np.nanmean(data_resp, axis = 1)
        stds['resp'][elec] = np.nanstd(data_resp, axis = 1)
        maxes['resp'][elec] = np.nanmax(data_resp, axis = 1)
        medians['resp'][elec] = stats.nanmedian(data_resp, axis = 1)
        maxes_rel['resp'][elec] = maxes['resp'][elec]-means['resp'][elec]
        lats['resp'][elec] = np.argmax(data_resp, axis = 1)
        
        RTs['stim'][elec] = RT
        RTs['resp'][elec] = RT

    #output dictionary of params per elec
    data_dict = {'RTs' : RTs, 'maxes_rel' : maxes_rel, 'medians' : medians, 'stds': stds, 'lats' : lats, 'means' : means, 'maxes' : maxes}

    return data_dict, start_idx, end_idx, start_idx_resp, end_idx_resp
예제 #17
0
def shadeplots_elecs_stats():
    """ 
    calculates mean, peak, latency, and std per trial for all electrodes in an active cluster - added medians and coefficient of variation and mins
    uses windows for individual electrodes from PCA/Stats/single_electrode_windows_withdesignation.csv
    saves pickle file with numbers per trial in ShadePlots_hclust/elecs/significance_windows
    *** runs on unsmoothed data (12/11/14)***
    """

    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/'

    #filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv')
    filename = os.path.join(SJdir, 'PCA', 'csvs_FINAL', 'mean_traces_all_subjs_dropSR.csv')
    df = pd.read_csv(filename)

    for s_t in df.groupby(['subj','task']):

        subj, task = s_t[0]
        #load data
        #filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat')
        filename = os.path.join(SJdir, 'Subjs',subj, task, 'HG_elecMTX_zscore.mat')
        data_dict = loadmat.loadmat(filename)

        active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_zscore']]
        bl_st = Params['bl_st']
        bl_st = bl_st/1000*srate
        
        if task in ['DecisionAud', 'DecisionVis']:
            bl_st = 500/1000*srate #remove cue from baseline - start/end_idx are relative to cue onset) - change 12/24 - okay with RT 12/25

        cofvar, maxes_rel, medians, means, stds, maxes, lats, sums, lats_pro, RTs, num_dropped, mins, lats_min = [dict() for i in range(13)]
        
        RT = RT + abs(bl_st) #RTs are calculated from stim/cue onset, need to account for bl in HG_elecMTX_percent (for 500, not 1000 baseline 12/25)

        for row in s_t[1].itertuples():
            _, subj, task, elec, pattern, cluster, start_idx, end_idx, start_idx_resp, end_idx_resp, RTs_values, RTs_median, RTs_min, lats_values, lats_semi_static, lats_static, max_vals, ROI = row
            eidx = np.in1d(active_elecs, elec)
            data = data_all[eidx,:,:].squeeze()

            st_resp = 0

            #define start and end indices based on electrode type
            if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]):
                start_idx = start_idx + abs(bl_st)
                end_idx = end_idx + abs(bl_st)
                if start_idx == end_idx:
                	continue #for SR elecs that dont' have stimlocked (CP9, e91)

                num_to_drop = 0

                #calculate stats (single trials)
                means[elec] = data[:,start_idx:end_idx].mean(axis = 1)
                stds[elec] = data[:,start_idx:end_idx].std(axis = 1)
                maxes[elec] = data[:,start_idx:end_idx].max(axis = 1)
                lats[elec] = data[:,start_idx:end_idx].argmax(axis = 1)
                lats_min[elec] = data[:, start_idx:end_idx].argmin(axis = 1)

                sums[elec] = data[:, start_idx:end_idx].sum(axis = 1)
                lats_pro[elec] = lats[elec] / len(np.arange(start_idx, end_idx))
                RTs[elec] = RT
                num_dropped[elec] = num_to_drop

                medians[elec] = stats.nanmedian(data[:,start_idx:end_idx], axis = 1)
                maxes_rel[elec] = maxes[elec]-means[elec]
                cofvar[elec] = stds[elec]/means[elec]
                mins[elec] = data[:,start_idx:end_idx].min(axis = 1)

                #update dataframe
                #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0]
                #df.ix[ix,'dropped'] = num_to_drop


            if pattern == 'R':
                start_idx_resp = start_idx_resp + abs(st_resp)
                end_idx_resp = end_idx_resp + abs(st_resp)

                if start_idx_resp == end_idx_resp:
                	continue  #for inactive R elecs (not clear why on spreadsheet)

                #create data matrix
                data_resp = np.empty(data.shape)
                for j, r in enumerate(RT):
                    tmp = data[j, r + start_idx_resp : r + end_idx_resp]
                    tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999)
                    data_resp[j,:] = tmp
                data_resp[data_resp == -999] = np.nan

                nanidx = np.isnan(np.nanmean(data_resp, axis = 1)) #if start > end
                if np.any(nanidx):

                    #drop equivalent number of long RTs
                    num_to_drop = np.sum(nanidx)
                    i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs
                    nanidx[i] = True #mark the long trials as bad too
                    num_dropped[elec] = num_to_drop * 2 #dropping both ends of RT distribution

                    #calculate params for (single trials)
                    data_resp[nanidx,:] = np.nan
                    means[elec] = np.nanmean(data_resp, axis = 1)
                    stds[elec] = np.nanstd(data_resp, axis = 1)
                    maxes[elec] = np.nanmax(data_resp, axis = 1)
                    sums[elec] = np.nansum(data_resp, axis = 1)

                    medians[elec] = stats.nanmedian(data_resp, axis = 1)
                    maxes_rel[elec] = maxes[elec]-means[elec]
                    cofvar[elec] = stds[elec]/means[elec]
                    mins[elec] = np.nanmin(data_resp, axis = 1)

                    data_resp[nanidx,0] = -999
                    tmp_lat = np.nanargmax(data_resp, axis = 1)
                    tmp_lat = np.ndarray.astype(tmp_lat, dtype = float)
                    tmp_lat[nanidx] = np.nan
                    lats[elec] = tmp_lat
                    lats_pro[elec] = tmp_lat / np.sum(~np.isnan(data_resp), axis = 1)

                    data_resp[nanidx,0] = 9999
                    tmp_lat = np.nanargmin(data_resp, axis = 1)
                    tmp_lat = np.ndarray.astype(tmp_lat, dtype = float)
                    tmp_lat[nanidx] = np.nan
                    lats_min[elec] = tmp_lat

                    tmp_RT = np.ndarray.astype(RT, dtype = float)
                    tmp_RT[nanidx] = np.nan
                    RTs[elec] = tmp_RT

                else:
                    num_to_drop = 0
                    num_dropped[elec] = num_to_drop
                    lats[elec] = np.nanargmax(data_resp, axis = 1)
                    lats_min[elec] = np.nanargmin(data_resp, axis = 1)

                    lats_pro[elec] = np.nanargmax(data_resp, axis = 1) / np.sum(~np.isnan(data_resp), axis = 1)
                    RTs[elec] = RT
                    means[elec] = np.nanmean(data_resp, axis = 1)
                    stds[elec] = np.nanstd(data_resp, axis = 1)
                    maxes[elec] = np.nanmax(data_resp, axis = 1)
                    sums[elec] = np.nansum(data_resp, axis = 1)
                    mins[elec] = np.nanmin(data_resp, axis = 1)

                    medians[elec] = stats.nanmedian(data_resp, axis = 1)
                    maxes_rel[elec] = maxes[elec] - means[elec]
                    cofvar[elec] = stds[elec]/means[elec]

                #update dataframe
                #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0]
                #df.ix[ix,'dropped'] = num_to_drop * 2 #dropping both ends of RT distribution

            if pattern == 'D':
                start_idx = start_idx + abs(bl_st)
                end_idx_resp = end_idx_resp + abs(st_resp)

                #create data matrices
                data_dur = np.empty(data.shape)
                for j, r in enumerate(RT):
                    tmp = data[j, start_idx : r + end_idx_resp]
                    tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999)
                    data_dur[j,:] = tmp
                data_dur[data_dur == -999] = np.nan

		                
                nanidx = np.isnan(np.nanmean(data_dur, axis = 1)) #if start > end
                if np.any(nanidx):

                    #drop equivalent number of long RTs
                    num_to_drop = np.sum(nanidx)
                    i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs
                    nanidx[i] = True #mark the long trials as bad too
                    num_dropped[elec] = num_to_drop * 2 #dropping both ends of RT distribution

                    #calculate params for single trials
                    data_dur[nanidx, :] = np.nan
                    means[elec] = np.nanmean(data_dur, axis = 1)
                    stds[elec] = np.nanstd(data_dur, axis = 1)
                    maxes[elec] = np.nanmax(data_dur, axis = 1)
                    sums[elec] = np.nansum(data_dur, axis = 1)

                    medians[elec] = stats.nanmedian(data_dur, axis = 1)
                    maxes_rel[elec] = maxes[elec] - means[elec]
                    cofvar[elec] = stds[elec]/means[elec]
                    mins[elec] = np.nanmin(data_dur, axis = 1)

                    data_dur[nanidx,0] = -999
                    tmp_lat = np.nanargmax(data_dur, axis = 1)
                    tmp_lat = np.ndarray.astype(tmp_lat, dtype = float)
                    tmp_lat[nanidx] = np.nan
                    lats[elec] = tmp_lat
                    lats_pro[elec] = tmp_lat / np.sum(~np.isnan(data_dur), axis = 1)

                    data_dur[nanidx, 0] = 9999
                    tmp_lat = np.nanargmin(data_dur, axis = 1)
                    tmp_lat = np.ndarray.astype(tmp_lat, dtype = float)
                    tmp_lat[nanidx] = np.nan
                    lats_min[elec] = tmp_lat

                    tmp_RT = np.ndarray.astype(RT, dtype = float)
                    tmp_RT[nanidx] = np.nan
                    RTs[elec] = tmp_RT
                else:
                    num_to_drop = 0
                    num_dropped[elec] = num_to_drop
                    means[elec] = np.nanmean(data_dur, axis = 1)
                    stds[elec] = np.nanstd(data_dur, axis = 1)
                    maxes[elec] = np.nanmax(data_dur, axis = 1)
                    sums[elec] = np.nansum(data_dur, axis = 1)

                    medians[elec] = stats.nanmedian(data_dur, axis = 1)
                    maxes_rel[elec] = maxes[elec] - means[elec]
                    cofvar[elec] = stds[elec]/means[elec]
                    mins[elec] = np.nanmin(data_dur, axis = 1)

                    lats[elec] = np.nanargmax(data_dur, axis = 1)
                    lats_min[elec] = np.nanargmin(data_dur, axis = 1)
                    lats_pro[elec] = np.nanargmax(data_dur, axis = 1) / np.sum(~np.isnan(data_dur), axis = 1)
                    RTs[elec] = RT

                #update dataframe
                #ix = np.where([(df.subj == subj) & (df.task == task) & (df.elec == elec)])[1][0]
                #df.ix[ix,'dropped'] = num_to_drop * 2 #dropping both ends of RT distribution

        #save stats (single trials)
        filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'unsmoothed', 'data', ''.join([subj, '_', task, '.p']))
        data_dict = {'active_elecs': active_elecs, 'lats_pro': lats_pro, 'sums':sums, 'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st,'RTs':RTs, 'dropped':num_dropped, 'maxes_rel' : maxes_rel, 'medians' : medians, 'variations': cofvar, 'mins': mins, 'lats_min':lats_min}
        
        with open(filename, 'w') as f:
            pickle.dump(data_dict, f)
            f.close()

        #save csv file (without dropping trials)
        for k in data_dict.keys():
            if k in ['bl_st', 'srate','active_elecs', 'dropped']:
                continue
            data = pd.DataFrame(data_dict[k])
        
            filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'zscore', 'csv_files', '_'.join([subj, task, k]) + '.csv')
            data.to_csv(filename, index = False)
import numpy as np
import os.path
import scipy.io
from loadmat import loadmat

import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
default_dpi = mpl.rcParamsDefault['figure.dpi']
mpl.rcParams['figure.dpi'] = default_dpi*2

# load gulfport campus image
img_fname = 'muufl_gulfport_campus_w_lidar_1.mat'
spectra_fname = 'tgt_img_spectra.mat'

dataset = loadmat(img_fname)['hsi']

hsi = dataset['Data']

# check out the shape of the data
n_r,n_c,n_b = hsi.shape
hsi.shape

# pull a 'random' pixel/spectrum
rr,cc = 150,150
spectrum = hsi[rr,cc,:]
spectrum

# plot a spectrum
plt.plot(spectrum)
예제 #19
0
import numpy as np
import scipy.io as sio
from SOCfromOCVtemp import SOCfromOCVtemp
from OCVfromSOCtemp import OCVfromSOCtemp
from InitializeSPKF import initSPKF
from IterationSPKF import iterSPKF
from loadmat import loadmat
from RetrieveParamESCmodel import getParamESC
from matplotlib import pyplot as plt

"Load ESC battery model file"
E2model = loadmat('E2model.mat')
model = E2model['model']

"Load cell test data"
E2_DYN_15_P05 = loadmat('E2_DYN_15_P05')
DYNData = E2_DYN_15_P05['DYNData']
T = 5  ##Temperature = 5 Degree

time = DYNData['script1']['time'].flatten()
deltat = time[1] - time[0]
time = time - time[0]
current = DYNData['script1']['current'].flatten()
voltage = DYNData['script1']['voltage'].flatten()
soc = DYNData['script1']['soc'].flatten()

"Reserve space for predicted SOC its bounds"
sochat = np.zeros(soc.size)
socbound = np.zeros(soc.size)

"Define Covariance matrices"
예제 #20
0
def RT_median_split(DATASET, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/', numiter = 1000):
    
    filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_withdesignation_EDITED_dropped_withROI.csv')
    df = pd.read_csv(filename)

    subj, task = DATASET.split('_')

    #load data
    filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat')
    data_dict = loadmat.loadmat(filename)
    Params, srate, data_percent, active_elecs, RT = [data_dict.get(k) for k in ['Params', 'srate', 'data_percent', 'active_elecs', 'RTs']]
    bl_st = Params['bl_st']
    bl_st = bl_st/1000*srate

    #load RTs csv file   
    filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'csv_files', '_'.join([subj, task, 'RTs']) + '.csv')
    data = pd.read_csv(filename)
    RTs = np.round(np.array(data)[:,0])
    #don't remove baseline. want RT to include baseline so can index properly (here they already include baseline from Shadeplots_elecs_stats.py)        

    #sort trials by RTs
    idx = np.argsort(RTs)
    data_percent = data_percent[:, idx, :]
    RTs = RTs[idx]

    median_idx = np.floor(data_percent.shape[1]/2) #index of median split for this subject
    df_subj = df[(df.subj == subj) & (df.task == task)][['elec','start_idx','end_idx','start_idx_resp','end_idx_resp', 'pattern']]

    #iterate on electrodes
    for row in df_subj.itertuples():
        
        _, elec, start_idx, end_idx, start_idx_resp, end_idx_resp, pattern = row
        
        print('%s %s e%i, %s' %(subj, task, elec, pattern))

        eidx = np.where(elec == active_elecs)[0][0]

        skews, kurts, means, medians, means_l, means_s, medians_s, medians_l, skews_s, skews_l, kurts_s, kurts_l = [[] for i in range(12)]    
        skews_surr, kurts_surr, means_surr, medians_surr, means_l_surr, means_s_surr = [[] for i in range(6)]

        if (pattern == 'S') | (pattern == 'SR'):
            start_idx = start_idx + abs(bl_st)
            end_idx = end_idx + abs(bl_st)

            shorttrials, longtrials, trial_lengths = [[] for i in range(3)]
            for i, r in enumerate(RTs):
                if i < median_idx:
                    shorttrials.extend(data_percent[eidx, i, start_idx:end_idx])
                    trial_lengths.append(int(end_idx-start_idx)) #length of each short trial so can use for long trial indexing
                elif i > median_idx: #might only work with odd num of trials
                    longtrials.extend(data_percent[eidx, i, start_idx:end_idx])

        if (pattern == 'R'):
            start_idx = start_idx_resp
            end_idx = end_idx_resp

            shorttrials, longtrials, trial_lengths = [[] for i in range(3)]
            for i, r in enumerate(RTs):
                if i < median_idx:
                    shorttrials.extend(data_percent[eidx, i, int(r)+start_idx:int(r)+end_idx])
                    trial_lengths.append(int(end_idx-start_idx+1)) #length of each short trial so can use for long trial indexing
                elif i > median_idx: #might only work with odd num of trials
                    longtrials.extend(data_percent[eidx, i, int(r)+start_idx:int(r)+end_idx])

        if pattern == 'D':
            start_idx = start_idx + abs(bl_st)
            end_idx = end_idx_resp

            #create data vectors for long and short trials
            shorttrials, longtrials, trial_lengths = [[] for i in range(3)]
            for i, r in enumerate(RTs):
                if i < median_idx:
                    shorttrials.extend(data_percent[eidx, i, start_idx:int(r)+end_idx])
                    trial_lengths.append(int(r+end_idx-start_idx+1)) #length of each short trial so can use for long trial indexing
                elif i > median_idx: #might only work with odd num of trials
                    longtrials.extend(data_percent[eidx, i, start_idx:int(r)+end_idx])

            #bootstrap from long distribution
            print('\tbootstrapping from long distribution')
            for j in range(numiter):
                randidx = np.random.permutation(len(longtrials))[0:len(shorttrials)]
                longsample = np.array(longtrials)[randidx]

                #calculate stats for duration sample
                skews.append(stats.skew(longsample) - stats.skew(shorttrials))
                kurts.append(stats.kurtosis(longsample) - stats.kurtosis(shorttrials))
                means.append(np.mean(longsample) - np.mean(shorttrials))
                medians.append(np.median(longsample) - np.median(shorttrials))
                means_l.append(np.mean(longsample))
                skews_l.append(stats.skew(longsample))
                kurts_l.append(stats.kurtosis(longsample))
                medians_l.append(np.median(longsample))

        else: #calculate stats for for nonduration no need to subsample long sample
            longsample = longtrials
            skews.append(stats.skew(longsample) - stats.skew(shorttrials))
            kurts.append(stats.kurtosis(longsample) - stats.kurtosis(shorttrials))
            means.append(np.mean(longsample) - np.mean(shorttrials))
            medians.append(np.median(longsample) - np.median(shorttrials)) 
            means_l.append(np.mean(longsample))
            skews_l.append(stats.skew(longsample))
            kurts_l.append(stats.kurtosis(longsample))
            medians_l.append(np.median(longsample))

        #calculate values for short trials (same for duration and nonduration)
        medians_s.append(np.median(shorttrials))
        means_s.append(np.mean(shorttrials))
        kurts_s.append(stats.kurtosis(shorttrials))
        skews_s.append(stats.skew(shorttrials))

        #create permuted difference distribution
        print ('\tcalculating surrogate stats...')
        for j in range(numiter):
            randidx = np.random.permutation(len(shorttrials)*2) #no overlap between 'short' and 'long' datapoints
            randidx_short = randidx[0:len(randidx)/2] 
            randidx_long = randidx[len(randidx)/2+1::]
            shorttrials_surr = data_percent[eidx,:,:].flatten()[randidx_short]
            longsample_surr = data_percent[eidx,:,:].flatten()[randidx_long]

            #calculate stats
            skews_surr.append(stats.skew(longsample_surr) - stats.skew(shorttrials_surr))
            kurts_surr.append(stats.kurtosis(longsample_surr) - stats.kurtosis(shorttrials_surr))
            means_surr.append(np.mean(longsample_surr) - np.mean(shorttrials_surr))
            medians_surr.append(np.median(longsample_surr) - np.median(shorttrials_surr))
            means_l_surr.append(np.mean(longsample_surr))
            means_s_surr.append(np.mean(shorttrials_surr)) 

        #calculate pvalue
        if np.mean(means) <= np.mean(means_surr):
            p_mean = sum(means_surr<np.mean(means))/len(means_surr)
        else:
            p_mean = sum(means_surr>np.mean(means))/len(means_surr)
           
        if np.mean(medians) <= np.mean(medians_surr):
            p_median = sum(medians_surr<np.mean(medians))/len(medians_surr)
        else:
            p_median = sum(medians_surr>np.mean(medians))/len(medians_surr)
        
        if np.mean(skews) <= np.mean(skews_surr):
            p_skew = sum(skews_surr<np.mean(skews))/len(skews_surr)
        else:
            p_skew = sum(skews_surr>np.mean(skews))/len(skews_surr)
       
        if np.mean(kurts) <= np.mean(kurts_surr):
            p_kurt = sum(kurts_surr<np.mean(kurts))/len(kurts_surr)
        else:
            p_kurt = sum(kurts_surr>np.mean(kurts))/len(kurts_surr)
            
        #save
        print('\tsaving')
        data_dict = {'p_mean' : p_mean, 'p_median' : p_median, 'p_skew' : p_skew, 'p_kurt' : p_kurt, 'pattern':pattern, 'skews':skews, 'kurts':kurts, 'means':means, 'medians':medians, 'means_s':means_s, 'means_l':means_l, 'medians_l':medians_l, 'medians_s':medians_s, 'skews_l':skews_l, 'skews_s':skews_s,'kurts_l':kurts_l, 'kurts_s':kurts_s, 'shorttrials':shorttrials, 'longtrials':longtrials, 'longsample':longsample, 'skew_surr':skews_surr, 'kurtosis_surr':kurts_surr, 'mean_surr':means_surr, 'median_surr':medians_surr}
        filename = os.path.join(SJdir, 'PCA', 'Stats', 'RT_median_split', '%s_%s_e%i_distributions.p' %(subj, task, elec))
        pickle.dump(data_dict, open(filename, "wb"))
예제 #21
0
    root =r'C:\Users\attialex\Documents\distance_tuning'
    data_dir = r'F:\attialex\NP_DATA_corrected'
    umap_version = 'Vanilla_otherData_pcaumap'
    files = glob.glob(os.path.join(root,'*.mat'))
    umap_save_path = os.path.join(root,umap_version)
    

    if not os.path.isdir(umap_save_path):
        os.makedirs(umap_save_path)
    # import pdb
    # pdb.set_trace()
    shutil.copy2(os.path.abspath(__file__),umap_save_path)

    for fi in files:
        print(fi)
        data_out = lm.loadmat(fi)
        data_out = data_out['data_out']
        idx = data_out['pvals']<0.05
        if sum(idx)<30:
            continue

        _,sn_darkData=os.path.split(fi)
    #     data = lm.loadmat(os.path.join(data_path,sn_darkData))

        xcorrs = data_out['xcorrs'][idx]

        reducer = umap.UMAP(n_components=2)
        #reducer = PCA(n_components=2)
        X_new = reducer.fit_transform(xcorrs)

        labels,fig,mean_pwd = cluster_plotXCorrs(X_new,data_out['peak_loc_all'][idx]/5,xcorrs)
import traceback

if __name__ == '__main__':

    #files = glob.glob('/Volumes/T7/attialex/NP_DATA_corrected/*.mat')
    #im_path ='/Volumes/T7/attialex/umap_baseline'
    im_path = r'F:\attialex\umap_BLAverageSpatialMap_MEC_v2'
    files = glob.glob(r'F:\attialex\NP_DATA_corrected\np*.mat')
    if not os.path.isdir(im_path):
        os.makedirs(im_path)

    shutil.copy2(os.path.abspath(__file__), im_path)
    ds_factor = 5
    for fi in files:
        try:
            data = lm.loadmat(fi)
            gain_val = 0.8
            values = (data['trial_gain'] == gain_val) & (data['trial_contrast']
                                                         == 100)
            matches = (np.logical_not(values[:-1])) & (values[1:])
            onsets = np.where(matches)[0] + 1
            if len(onsets) == 0:
                continue

            trial_range = onsets[0] + np.arange(-5, 11)
            trial_range = np.arange(2, 21)

            try:
                anatomy = data['anatomy']
            except:
                print('no anatomy')
예제 #23
0
def shadeplots_allelecs(DATASET, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', thresh = 10, chunk_size = 100, baseline = -500, black_chunk_size = 0):
    """ 
    calculate onset and offset window for every active electrode (ignoring clusters)
    saves csv for each sub/task for easy plotting later
    includes real vs empty - 2 conditions difference

    """

    subj, task = DATASET.split('_')

    #filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_empty.mat')
    filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat')    
    data = loadmat.loadmat(filename)
    srate = data['srate']
    active_elecs = data['active_elecs']
    data = data['data_percent']

    #convert to srate
    bl_st = baseline/1000*srate
    chunksize = chunk_size/1000*srate
    black_chunksize = black_chunk_size/1000*srate

    if task in ['DecisionAud']:
        st_tp = 600/1000*srate
    elif task in ['DecisionVis']:
        st_tp = 500/1000*srate
    else:
        st_tp = 0

    #filename = os.path.join(SJdir, 'PCA', 'ShadePlots_allelecs', ''.join([subj, '_', task, '_empty.csv']))
    filename = os.path.join(SJdir, 'PCA', 'ShadePlots_allelecs', ''.join([subj, '_', task, '.csv']))
    subjs = list(); tasks = list(); pthr = list(); elecs = list(); starts = list(); ends = list(); 

    for i, e in enumerate(active_elecs):

        pvals = list();
        edata = data[i,:]
        nozero = np.copy(edata)
        nozero[:,nozero.mean(axis=0)<0] = 0 #zero out negative values in mean

        for j in np.arange(abs(bl_st)+st_tp, edata.shape[1]):
            (t, p) = stats.ttest_1samp(nozero[:,j], 0)
            pvals.append(p)
        thr = fdr_correct.fdr2(pvals, q = 0.05)
        H = np.array(np.array(pvals<thr)).astype('int')

        if (thr>0):

            #find elecs with window that > chunksize and > threshold (10%)
            passed_thresh = edata[:, abs(bl_st)+st_tp::].mean(axis=0)>thresh
            sig_and_thresh = H * passed_thresh
            difference = np.diff(sig_and_thresh, n = 1, axis = 0)
            start_idx = np.where(difference==1)[0]+1
            end_idx = np.where(difference == -1)[0]

            if start_idx.size > end_idx.size: #last chunk goes until end
                end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp))

            elif start_idx.size < end_idx.size:
                start_idx = np.append(0, start_idx) #starts immediately significant

            if (start_idx.size!=0):
                if (start_idx[0] > end_idx[0]): #starts immediately significant
                    start_idx = np.append(0, start_idx)
            if (start_idx.size!=0):
                if (end_idx[-1] < start_idx[-1]):#significant until end
                    end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp))

            start_idx = start_idx + st_tp #shift by st_tp
            end_idx = end_idx + st_tp
            chunk = (end_idx - start_idx) >= chunksize
            if sum(chunk) > 0:
                #significant windows on elecs that passed threshold (10%) (ignoring threshold and chunksize)
                difference = np.diff(H, n = 1, axis = 0)
                start_idx = np.where(difference==1)[0]+1
                end_idx = np.where(difference == -1)[0]

                if start_idx.size > end_idx.size: #last chunk goes until end
                    end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp))

                elif start_idx.size < end_idx.size:
                    start_idx = np.append(0, start_idx) #starts immediately significant

                if (start_idx.size!=0):
                    if (start_idx[0] > end_idx[0]): #starts immediately significant
                        start_idx = np.append(0, start_idx)
                if (start_idx.size!=0):
                    if (end_idx[-1] < start_idx[-1]):#significant until end
                        end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp))

                start_idx = start_idx + st_tp #shift by st_tp
                end_idx = end_idx + st_tp

                black_chunk = (start_idx[1:] - end_idx[:-1])> black_chunksize #combine window separated by <200ms

                tmp = np.append(1,black_chunk).astype('bool')
                end_idx = end_idx[np.append(np.where(np.in1d(start_idx, start_idx[tmp]))[0][1:]-1, -1)]
                start_idx = start_idx[tmp]           

                #drop chunks that <100ms
                chunk = (end_idx - start_idx) >= chunksize
                start_idx = start_idx[chunk]
                end_idx = end_idx[chunk]
            else: #no chunks
                start_idx = np.zeros((1,))
                end_idx = np.zeros((1,))
                
        else: #thr<0
            start_idx = np.zeros((1,))
            end_idx = np.zeros((1,))

        subjs.extend([subj] * len(start_idx))
        tasks.extend([task] * len(end_idx))
        elecs.extend([e] * len(start_idx))
        pthr.extend([thr] * len(end_idx))
        starts.extend(start_idx)
        ends.extend(end_idx)

        data_dict = {'edata':edata, 'bl_st':bl_st, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate,'thresh':thresh, 'chunksize':chunksize, 'black_chunksize':black_chunksize}
        #data_path = os.path.join(SJdir, 'PCA','ShadePlots_allelecs', 'data',''.join([subj, '_', task, '_e', str(e), '_empty.p'])
        data_path = os.path.join(SJdir, 'PCA','ShadePlots_allelecs', 'data',''.join([subj, '_', task, '_e', str(e), '.p']))

        with open(data_path, 'w') as f:
            pickle.dump(data_dict, f)
            f.close()

    sig_windows = pd.DataFrame({'subj':subjs, 'task':tasks, 'elec':elecs, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends})
    sig_windows = sig_windows[['subj','task','elec', 'start_idx','end_idx','pthreshold']]
    sig_windows.to_csv(filename)
def shadeplots_elecs_stats_surr_random(id_num = 99):

    """ 
    calculates params per electrode on surrogate data. Surrogate data is HG windows concatenated and circshifted. Only active HG included.
    calculates mean, peak, latency, and std per trial for all electrodes in an active cluster - added medians and coefficient of variation and mins
    uses windows for individual electrodes from PCA/Stats/single_electrode_windows_csvs/single_electrode_windows_withdesignation_EDITED.csv
    saves pickle file with numbers per trial in ShadePlots_hclust/elecs/significance_windows/unsmoothed
    Uses unsmoothed data
    No latencies for duration elecs
    Added fake data with trial index 12/18/14
    """

    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/'

    saveDir_csv = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'unsmoothed', 'csv_files', 'orig', 'surr_rand_' + str(id_num))
    saveDir_data= os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'unsmoothed', 'data', 'surr_rand_' + str(id_num))

    if not(os.path.exists(saveDir_csv)) and not(os.path.exists(saveDir_data)):
        os.mkdir(saveDir_csv)
        os.mkdir(saveDir_data)
        print('making:\n%s\n%s' %(saveDir_csv, saveDir_data))

    else:
        print('either %s\n or %s\n already exists!\nterminating...' %(saveDir_csv, saveDir_data))
        #return

    filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv')
    df = pd.read_csv(filename)

    for s_t in df.groupby(['subj','task']):

        subj, task = s_t[0]
        
        #load data
        filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat')
        data_dict = loadmat.loadmat(filename)

        active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']]
        #bl_st = Params['bl_st']
        #bl_st = bl_st/1000*srate
        #if task in ['DecisionAud', 'DecisionVis']:
        
        bl_st = 500/1000*srate #for my data, remove cue from baseline - start/end_idx are relative to cue onset) - change 12/24 - okay with RT 12/25
        
        RT = RT + abs(bl_st) #RTs are calculated from stim (my data cue) onset, need to account for bl in HG_elecMTX_percent (for 500, not 1000 baseline 12/25)

        maxes_idx, medians_idx, cofvar, maxes_rel, medians, means, stds, maxes, lats, sums, lats_pro, RTs, num_dropped, mins, lats_min = [dict() for i in range(15)]

        for row in s_t[1].itertuples():
            _, _, subj, task, cluster, pattern, elec, start_idx, end_idx, start_idx_resp, end_idx_resp, _, _ = row
            eidx = np.in1d(active_elecs, elec)
            data = data_all[eidx,:,:].squeeze()


            #define start and end indices based on electrode type
            if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]):
                start_idx = start_idx + abs(bl_st)
                end_idx = end_idx + abs(bl_st)
                if start_idx == end_idx:
                	continue #for SR elecs that dont' have stimlocked (CP9, e91)
                
                print('%s %s %i %s\n' %(subj, task, elec, pattern))
                
                #make surrogate dataset based on activity window
                data_surr = data[:, start_idx:end_idx].flatten() #take HG windows
                randidx = np.random.permutation(len(data_surr))
                data_surr = data_surr.flatten()
                data_surr = data_surr[randidx] #shuffle
                data_surr = data_surr.reshape((data.shape[0], -1)) #reshape data into matrix                

                data_idx = np.ones_like(data[:, start_idx:end_idx])
                data_idx = (data_idx.transpose() * range(data_idx.shape[0])).transpose() #each trial labeled by trial number
                data_idx = data_idx.flatten()
                data_idx = data_idx[randidx]
                data_idx = data_idx.reshape((data.shape[0], -1))

                #calculate stats (single trials)
                means[elec] = data_surr.mean(axis = 1)
                stds[elec] = data_surr.std(axis = 1)
                maxes[elec] = data_surr.max(axis = 1)
                lats[elec] = data_surr.argmax(axis = 1)
                lats_min[elec] = data_surr.argmin(axis = 1)

                sums[elec] = data_surr.sum(axis = 1)
                lats_pro[elec] = lats[elec] / len(np.arange(start_idx, end_idx))
                RTs[elec] = RT
                
                medians[elec] = stats.nanmedian(data_surr, axis = 1)
                maxes_rel[elec] = maxes[elec]-means[elec]
                cofvar[elec] = stds[elec]/means[elec]
                mins[elec] = data_surr.min(axis = 1)
        
                medians_idx[elec] = stats.nanmedian(data_idx, axis = 1)
                maxes_idx[elec] = data_idx.max(axis = 1)

            if pattern == 'R':
                start_idx_resp = start_idx_resp
                end_idx_resp = end_idx_resp

                if start_idx_resp == end_idx_resp:
                	continue  #for inactive R elecs (not clear why on spreadsheet)

                print('%s %s %i %s\n' %(subj, task, elec, pattern))

                #create data matrix
                data_resp = np.empty(data.shape)
                for j, r in enumerate(RT):
                    tmp = data[j, r + start_idx_resp : r + end_idx_resp]
                    tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999)
                    data_resp[j,:] = tmp
                data_resp[data_resp == -999] = np.nan

                nanidx = np.isnan(np.nanmean(data_resp, axis = 1)) #if start > end for a trial (short RTs)
                if np.any(nanidx):

                    #drop equivalent number of long RTs
                    num_to_drop = np.sum(nanidx)
                    i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs
                    nanidx[i] = True #mark the long trials as bad too
                    data_resp[nanidx,:] = np.nan

                    #drop nan from RTs
                    tmp_RT = np.ndarray.astype(RT, dtype = float)
                    tmp_RT[nanidx] = np.nan
                    RTs[elec] = tmp_RT
                    
                    #make surrogate data
                    data_surr = data_resp.flatten() #take HG window
                    data_surr_drop = np.isnan(data_surr) #for dropping trials from data_idx
                    data_surr = data_surr[~np.isnan(data_surr)] #remove nan (also drops trials that are completely nan)
                    randidx = np.random.permutation(len(data_surr)) #shuffle
                    data_surr = data_surr[randidx]
                    data_surr = data_surr.reshape((data_resp.shape[0],-1)) #reshape trials x time (no nan buffer)
                    data_surr = np.insert(data_surr, nanidx, np.empty((1, data_surr.shape[1])) * np.nan, axis = 0) #insert nan rows (numtrials of _surr == _resp)

                    #make index matrix
                    data_idx = np.ones_like(data_resp)
                    data_idx = (data_idx.transpose() * range(data_idx.shape[0])).transpose()
                    data_idx = data_idx.flatten() 
                    data_idx = data_idx[~data_surr_drop] #drop nan trials
                    data_idx = data_idx[randidx]
                    data_idx = data_idx.reshape((data_resp.shape[0], -1)) #reshape
                    data_idx = np.insert(data_idx, nanidx, np.empty((1, data_resp.shape[1])) * np.nan, axis = 0) #insert nan rows (numtrials of _idx == _resp)

                else: 
                    #make surrogate data
                    data_surr = data_resp.flatten() #take HG window
                    data_surr_drop = np.isnan(data_surr) #for dropping trials from data_idx
                    data_surr = data_surr[~np.isnan(data_surr)] #remove nan 

                    randidx = np.random.permutation(len(data_surr)) #shuffle
                    data_surr = data_surr[randidx]
                    data_surr = data_surr.reshape((data_resp.shape[0],-1)) #reshape                    

                    RTs[elec] = RT

                    data_idx = np.ones_like(data_resp)
                    data_idx = (data_idx.transpose() * range(data_idx.shape[0])).transpose() #each trial labeled by trial number
                    data_idx = data_idx.flatten()
                    data_idx = data_idx[~data_surr_drop] #drop nan trials based on data_surr
                    data_idx = data_idx[randidx]
                    data_idx = data_idx.reshape((data_resp.shape[0],-1)) #reshape                    
                    
                #reshape data_surr with nan buffer at end
                data_resp_surr = np.empty_like(data_resp)
                for j in range(data_surr.shape[0]):
                    tmp = data_surr[j,:]
                    tmp = np.pad(tmp, (0, data_resp.shape[1]-len(tmp)), 'constant', constant_values = -999)
                    data_resp_surr[j,:] = tmp
                data_resp_surr[data_resp_surr == -999] = np.nan

                #reshape data_idx with nan
                data_idx_surr = np.empty_like(data_resp)
                for j in range(data_idx.shape[0]):
                    tmp = data_idx[j,:]
                    tmp = np.pad(tmp, (0, data_resp.shape[1]-len(tmp)), 'constant', constant_values = -999)
                    data_idx_surr[j,:] = tmp
                data_idx_surr[data_idx_surr == -999] = np.nan
                                
                #calculate params for (single trials)
                means[elec] = np.nanmean(data_resp_surr, axis = 1)
                stds[elec] = np.nanstd(data_resp_surr, axis = 1)
                maxes[elec] = np.nanmax(data_resp_surr, axis = 1)
                sums[elec] = np.nansum(data_resp_surr, axis = 1)

                medians[elec] = stats.nanmedian(data_resp_surr, axis = 1)
                maxes_rel[elec] = maxes[elec]-means[elec]
                cofvar[elec] = stds[elec]/means[elec]
                mins[elec] = np.nanmin(data_resp_surr, axis = 1)

                medians_idx[elec] = stats.nanmedian(data_idx_surr, axis = 1)
                maxes_idx[elec] = np.nanmax(data_idx_surr, axis = 1)

            if pattern == 'D':
                start_idx = start_idx + abs(bl_st)
                end_idx_resp = end_idx_resp
                
                print('%s %s %i %s\n' %(subj, task, elec, pattern))

                #create data matrices
                data_dur = np.empty(data.shape)
                for j, r in enumerate(RT):
                    tmp = data[j, start_idx : r + end_idx_resp]
                    tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999)
                    data_dur[j,:] = tmp
                data_dur[data_dur == -999] = np.nan
    
                nanidx = np.isnan(np.nanmean(data_dur, axis = 1)) #if start > end
                if np.any(nanidx):
                    #drop equivalent number of long RTs
                    num_to_drop = np.sum(nanidx)
                    i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs
                    nanidx[i] = True #mark the long trials as bad too
                    data_dur[nanidx, :] = np.nan

                    #drop nan from RTs
                    tmp_RT = np.ndarray.astype(RT, dtype = float)
                    tmp_RT[nanidx] = np.nan
                    RTs[elec] = tmp_RT
                else:
                    RTs[elec] = RT     

                #make surrogate data
                data_surr = data_dur.flatten() #take HG window
                data_surr_drop = np.isnan(data_surr) #for data_idx dropping points based on data_surr
                data_surr = data_surr[~np.isnan(data_surr)] #drop nan datapoints (pull out only HG)
                randidx = np.random.permutation(len(data_surr)) #shuffle
                data_surr = data_surr[randidx]

                #reshape data_surr with nan
                data_dur_surr = np.empty_like(data_dur)
                start = 0
                for j in range(data_dur.shape[0]):
                    trial_length = sum(~np.isnan(data_dur[j,:]))
                    if j>0:
                        start = end
                    end = start + trial_length
                    if trial_length>0: #not a nan trial
                        tmp = data_surr[start:end]
                        tmp = np.pad(tmp, (0, data_dur.shape[1]-len(tmp)), 'constant', constant_values = -999)
                        data_dur_surr[j,:] = tmp
                    else: #nan trial
                        data_dur_surr[j,:] = -999
                data_dur_surr[data_dur_surr == -999] = np.nan

                #make surrogate data for idx
                data_idx = np.ones_like(data_dur)
                data_idx = (data_idx.transpose() * range(data_idx.shape[0])).transpose() #trials x time with index for trial data
                data_idx = data_idx.flatten() 
                data_idx = data_idx[~data_surr_drop] #remove datapoints that are missing in data_surr (to get same number of points) (pull out HG)
                data_idx = data_idx[randidx] #shuffle

                #reshape data_idx with nan
                data_dur_idx = np.empty_like(data_dur)
                start = 0
                for j in range(data_dur.shape[0]):
                    trial_length = sum(~np.isnan(data_dur[j,:]))
                    if j>0:
                        start = end
                    end = start + trial_length
                    if trial_length>0: #not a nan trial
                        tmp = data_idx[start:end]
                        tmp = np.pad(tmp, (0, data_dur.shape[1]-len(tmp)), 'constant', constant_values = -999)
                        data_dur_idx[j,:] = tmp
                    else: #nan trial
                        data_dur_idx[j,:] = -999
                data_dur_idx[data_dur_idx == -999] = np.nan

                #calculate params for single trials
                means[elec] = np.nanmean(data_dur_surr, axis = 1)
                stds[elec] = np.nanstd(data_dur_surr, axis = 1)
                maxes[elec] = np.nanmax(data_dur_surr, axis = 1)
                sums[elec] = np.nansum(data_dur_surr, axis = 1)

                medians[elec] = stats.nanmedian(data_dur_surr, axis = 1)
                maxes_rel[elec] = maxes[elec] - means[elec]
                cofvar[elec] = stds[elec]/means[elec]
                mins[elec] = np.nanmin(data_dur_surr, axis = 1)

                medians_idx[elec] = stats.nanmedian(data_dur_idx, axis = 1)
                maxes_idx[elec] = np.nanmax(data_dur_idx, axis = 1)

        #save stats (single trials)
        filename = os.path.join(saveDir_data, ''.join([subj, '_', task, '_surr_rand.p']))
        data_dict = {'active_elecs': active_elecs, 'lats_pro': lats_pro, 'sums':sums, 'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st,'RTs':RTs, 'dropped':num_dropped, 'maxes_rel' : maxes_rel, 'medians' : medians, 'variations': cofvar, 'mins': mins, 'lats_min':lats_min, 'medians_idx':medians_idx, 'maxes_idx':maxes_idx}

        with open(filename, 'w') as f:
            pickle.dump(data_dict, f)
            f.close()

        #save csv file 
        for k in data_dict.keys():
            if k in ['bl_st', 'srate','active_elecs', 'dropped']:
                continue
            data = pd.DataFrame(data_dict[k])
        
            filename = os.path.join(saveDir_csv, '_'.join([subj, task, k]) + '_surr_rand.csv') #has nans for specific electrodes
            data.to_csv(filename, index = False)

    #save dataframe with dropped trials
    filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_withdesignation_EDITED_dropped_surr_rand_' + str(id_num) + '.csv')
    df.to_csv(filename)    
예제 #25
0
def shadeplots_allelecs_2conditions(DATASET, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', chunk_size = 100, baseline = -500):
    """ 
    calculate onset and offset window for difference between 2 conditions (real and empty)
    saves csv for each sub/task for easy plotting later
    #only relevant for EmoGen (not adjusted for my data start times)

    """

    subj, task = DATASET.split('_')

    filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat')
    data = loadmat.loadmat(filename)
    srate = data['srate']
    active_elecs = data['active_elecs']
    data = data['data_percent']

    filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_empty.mat')
    data_empty = loadmat.loadmat(filename)
    data_empty = data_empty['data_percent']

    #convert to srate
    bl_st = baseline/1000*srate
    chunksize = chunk_size/1000*srate
    st_tp = 0

    filename = os.path.join(SJdir, 'PCA', 'ShadePlots_allelecs', ''.join([subj, '_', task, '_real_vs_empty.csv']))
    subjs = list(); tasks = list(); pthr = list(); elecs = list(); starts = list(); ends = list(); 

    for i, e in enumerate(active_elecs):

        pvals = list();
        edata = data[i,:]
        edata_empty = data_empty[i,:]

        #ttest between conditions for every time point
        for j in np.arange(abs(bl_st)+st_tp, edata.shape[1]):
            (t, p) = stats.ttest_ind(edata[:,j], edata_empty[:,j], equal_var = True)
            pvals.append(p)
        thr = fdr_correct.fdr2(pvals, q = 0.05)
        H = np.array(np.array(pvals)<thr).astype('int')

        #significance windows
        difference = np.diff(H, n = 1, axis = 0)
        start_idx = np.where(difference==1)[0]+1
        end_idx = np.where(difference == -1)[0]

        if start_idx.size > end_idx.size: #last chunk goes until end
            end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp))

        elif start_idx.size < end_idx.size:
            start_idx = np.append(0, start_idx) #starts immediately significant

        if (start_idx.size!=0):
            if (start_idx[0] > end_idx[0]): #starts immediately significant
                start_idx = np.append(0, start_idx)
        if (start_idx.size!=0):
            if (end_idx[-1] < start_idx[-1]):#significant until end
                end_idx = np.append(end_idx, int(edata.shape[1]-abs(bl_st)-st_tp))

        #drop chunks that < chunk_size
        chunk = (end_idx - start_idx) >= chunksize
        start_idx = start_idx[chunk]
        end_idx = end_idx[chunk]
 
        
        subjs.extend([subj] * len(start_idx))
        tasks.extend([task] * len(end_idx))
        elecs.extend([e] * len(start_idx))
        pthr.extend([thr] * len(end_idx))
        starts.extend(start_idx)
        ends.extend(end_idx)

        data_dict = {'edata':edata, 'edata_empty':edata_empty, 'bl_st':bl_st, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate,'chunksize':chunksize}
        data_path = os.path.join(SJdir, 'PCA','ShadePlots_allelecs', 'data',''.join([subj, '_', task, '_e', str(e), '_real_vs_empty.p']))
        with open(data_path, 'w') as f:
            pickle.dump(data_dict, f)
            f.close()

    sig_windows = pd.DataFrame({'subj':subjs, 'task':tasks, 'elec':elecs, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends})
    sig_windows = sig_windows[['subj','task','elec', 'start_idx','end_idx','pthreshold']]
    sig_windows.to_csv(filename)
예제 #26
0
def evaluate_lenet5(learning_rate=0.01, n_epochs=200,
                    dataset='../testnn.mat',
                    nkerns=[20, 20], batch_size=100):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(123)

    # datasets = load_data(dataset)
    datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    
    # the below comments are examples of using this cnn to deal with MNIST with input feature size 784 = 28*28
    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, idim0_H, idim0_W))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, idim0_H, idim0_W),
        filter_shape=(nkerns[0], 1, fdim0_H, fdim0_W),
        poolsize=(pdim0_H, pdim0_W)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], idim1_H, idim1_W),
        filter_shape=(nkerns[1], nkerns[0], fdim1_H, fdim1_W),
        poolsize=(pdim1_H, pdim1_W)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)
    
    
    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * idim2_H * idim2_W,
        n_out=fdim2,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    nclass = max(train_set_y.eval()) + 1
    layer3 = LogisticRegression(input=layer2.output, n_in=fdim2, n_out=nclass)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    
    train_score = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs):
        if earlystop and done_looping:
            print 'early-stopping'
            break
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                training_losses = [train_score(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                this_training_loss = numpy.mean(training_losses)
                print('epoch %i, minibatch %i/%i, training error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_training_loss * 100.))
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                if earlystop:
                    break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
예제 #27
0
def stack_by_azimuth(ax,path2rf,lowT,highT,SNR_min=0.0,bin_by=1,nbins=10,bazi0=-180.0,bazi1=180.0):
	"""
	"""
	from matplotlib import pylab as plt
	from sys import path
	path.append('/Users/mancinelli/PROG/SUBS/PYTHON/')
	from loadmat import loadmat
	from numpy import zeros,isnan,std,mean
	from numpy.random import rand
	
	RFs_all=[]
	RPs_all=[]
	BAZIs_all=[]

	path=path

	if lowT<1.0:
		file_name='%s/RF_Depth_%.1fs_%ds.mat' % (path2rf,lowT,highT)
	else:
		file_name='%s/RF_Depth_%ds_%ds.mat' % (path2rf,lowT,highT)

	print '...loading %s' % (file_name)
	
	snr_limit=True;
	if snr_limit:
		snrfile='%s/SNR_%ds.txt' % (path2rf,lowT)
		file=open(snrfile)
		SNR=[]
		for line in file.readlines():
			nfo=line.strip('\n').split()
			SNR.append(float(nfo[1]))
		file.close()

	matfile = loadmat(file_name)

	RFs = matfile["rfs"][:,:]
	
	BAZIs= matfile["BAZIsave"][:]
	RPs= matfile["RPsave"][:]
	depths = matfile["RF_Depth"][:,0]
	
	if len(SNR) != len(RFs):
		print '***Warning: len(SNR) != len(RFs) , %d , %d ' % (len(SNR), len(RFs))
		dum=raw_input('Press enter to continue')
		
	tmp1,tmp2,tmp3=[],[],[]
	for ii in range(len(RFs)):
		if SNR[ii]>SNR_min:
			
			tmp1.append(RFs[ii])
			tmp2.append(RPs[ii])
			tmp3.append(BAZIs[ii])
			
	RFs=tmp1
	RPs=tmp2
	BAZIs=tmp3
	
	if bin_by == 1:
		x1=bazi1
		x0=bazi0
		xlist=BAZIs
	else:
		x0=min(RPs)*0.98
		x1=max(RPs)*1.02
		xlist=RPs
	
	stack=zeros(nbins*len(RFs[0])).reshape(nbins,len(RFs[0]))
	Nstack=zeros(nbins*len(RFs[0])).reshape(nbins,len(RFs[0]))
	
	for iRF,RF in enumerate(RFs):
		x=xlist[iRF]
		ibin=int( (x-x0) / (x1-x0) *nbins)
		if ibin < 0 or ibin > (nbins-1):
			print '***Warning ibin out of range, skipping...'
			continue
		for jj in range(len(RFs[0])):
			if isnan(RF[jj]) == False:
				stack[ibin,jj]=stack[ibin,jj]+RF[jj]
				Nstack[ibin,jj]=Nstack[ibin,jj]+1
		
	for ibin in range(nbins):
		for jj in range(len(RFs[0])):
			if Nstack[ibin,jj]>0.:
				stack[ibin,jj]=stack[ibin,jj]/Nstack[ibin,jj]
				
	#demean and renorm
	for ibin in range(nbins):
		#stack[ibin,:]=stack[ibin,:]-mean(stack[ibin,:])
		norm = max(abs(stack[ibin,:]))
		if norm>0.0:
			stack[ibin,:]=stack[ibin,:]/norm
	
	
	y2=min(depths)
	y1=max(depths)
	
	ax.imshow(stack.T,aspect='auto',cmap='RdBu_r',origin='upper',interpolation='nearest',extent=[x0,x1,y1,y2])
	
	if bin_by==1:
		plt.xlabel('Back Azimuth (degrees)')
	else:
		plt.xlabel('Ray Parameter (s/km)')
	
	plt.ylabel('Depth (km)')
		
	return stack,y2,y1
def shadeplots_elecs_stats_surr_random(subj, task, df_pattern, id_num = 99):

    """ 
    calculates params per electrode on surrogate data. Surrogate data is HG windows timepoints randomly shuffled.

    uses windows for individual electrodes from df_pattern (PCA/Stats/single_electrode_windows_csvs/single_electrode_windows_withdesignation_EDITED.csv)
    
    Uses unsmoothed data

    hardcoded - medians and maxes_rel and stds

    returns dictionary with features. each feature is dictionary of elecs
    """

    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/'

    #load data
    filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent_unsmoothed.mat')
    data_dict = loadmat.loadmat(filename)

    active_elecs, Params, srate, RT, data_all = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']]

    bl_st = 500/1000*srate #for my data, remove cue from baseline - start/end_idx are relative to cue onset) - change 12/24 - okay with RT 12/25

    RT = RT + abs(bl_st) #RTs are calculated from stim (my data cue) onset, need to account for bl in HG_elecMTX_percent (for 500, not 1000 baseline 12/25)

    RTs, medians, maxes_rel, means, stds, maxes = [dict() for i in range(6)]
    
    s_t = df_pattern[((df_pattern.subj == subj) & (df_pattern.task == task))]
    for e in s_t.elec.values:

        _, subj, task, cluster, pattern, elec, start_idx, end_idx, start_idx_resp, end_idx_resp, _, _ = s_t[s_t.elec == e].values[0]
        
        #if elec != 52: #HARDCODED
        #    continue
        
        if pattern != 'D': #only run on duration electrodes
            continue


        print('%i...' %(elec), end = "")
        sys.stdout.flush()

        eidx = np.in1d(active_elecs, elec)
        data = data_all[eidx,:,:].squeeze()

        #define start and end indices based on electrode type
        if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]):
            start_idx = start_idx + abs(bl_st)
            end_idx = end_idx + abs(bl_st)
            if start_idx == end_idx:
                continue #for SR elecs that dont' have stimlocked (CP9, e91)

             #make surrogate dataset based on activity window
            data_surr = data[:, start_idx:end_idx].flatten() #take HG windows
            randidx = np.random.permutation(len(data_surr))
            data_surr = data_surr.flatten()
            data_surr = data_surr[randidx] #shuffle
            data_surr = data_surr.reshape((data.shape[0], -1)) #reshape data into matrix                

            #calculate stats (single trials)
            means[elec] = data_surr.mean(axis = 1)
            stds[elec] = data_surr.std(axis = 1)
            maxes[elec] = data_surr.max(axis = 1)
            RTs[elec] = RT

            medians[elec] = stats.nanmedian(data_surr, axis = 1)
            maxes_rel[elec] = maxes[elec]-means[elec]

        if pattern == 'R':
            start_idx_resp = start_idx_resp
            end_idx_resp = end_idx_resp

            if start_idx_resp == end_idx_resp:
                continue  #for inactive R elecs (not clear why on spreadsheet)

            #create data matrix
            data_resp = np.empty(data.shape)
            for j, r in enumerate(RT):
                tmp = data[j, r + start_idx_resp : r + end_idx_resp]
                tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999)
                data_resp[j,:] = tmp
            data_resp[data_resp == -999] = np.nan

            nanidx = np.isnan(np.nanmean(data_resp, axis = 1)) #if start > end for a trial (short RTs)
            if np.any(nanidx):

                #drop equivalent number of long RTs
                num_to_drop = np.sum(nanidx)
                i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs
                nanidx[i] = True #mark the long trials as bad too
                data_resp[nanidx,:] = np.nan

                #drop nan from RTs
                tmp_RT = np.ndarray.astype(RT, dtype = float)
                tmp_RT[nanidx] = np.nan
                RTs[elec] = tmp_RT

                #make surrogate data
                data_surr = data_resp.flatten() #take HG window
                data_surr_drop = np.isnan(data_surr) #for dropping trials from data_idx
                data_surr = data_surr[~np.isnan(data_surr)] #remove nan (also drops trials that are completely nan)
                randidx = np.random.permutation(len(data_surr)) #shuffle
                data_surr = data_surr[randidx]
                data_surr = data_surr.reshape((data_resp.shape[0],-1)) #reshape trials x time (no nan buffer)
                data_surr = np.insert(data_surr, nanidx, np.empty((1, data_surr.shape[1])) * np.nan, axis = 0) #insert nan rows (numtrials of _surr == _resp)

            else: 
                #make surrogate data
                data_surr = data_resp.flatten() #take HG window
                data_surr_drop = np.isnan(data_surr) #for dropping trials from data_idx
                data_surr = data_surr[~np.isnan(data_surr)] #remove nan 

                randidx = np.random.permutation(len(data_surr)) #shuffle
                data_surr = data_surr[randidx]
                data_surr = data_surr.reshape((data_resp.shape[0],-1)) #reshape                    

                RTs[elec] = RT

            #reshape data_surr with nan buffer at end
            data_resp_surr = np.empty_like(data_resp)
            for j in range(data_surr.shape[0]):
                tmp = data_surr[j,:]
                tmp = np.pad(tmp, (0, data_resp.shape[1]-len(tmp)), 'constant', constant_values = -999)
                data_resp_surr[j,:] = tmp
            data_resp_surr[data_resp_surr == -999] = np.nan

            #calculate params for (single trials)
            means[elec] = np.nanmean(data_resp_surr, axis = 1)
            stds[elec] = np.nanstd(data_resp_surr, axis = 1)
            maxes[elec] = np.nanmax(data_resp_surr, axis = 1)

            medians[elec] = stats.nanmedian(data_resp_surr, axis = 1)
            maxes_rel[elec] = maxes[elec]-means[elec]

        if pattern == 'D':
            start_idx = start_idx + abs(bl_st)
            end_idx_resp = end_idx_resp

            #create data matrices
            data_dur = np.empty(data.shape)
            for j, r in enumerate(RT):
                tmp = data[j, start_idx : r + end_idx_resp]
                tmp = np.pad(tmp, (0, data.shape[1]-len(tmp)), 'constant', constant_values = -999)
                data_dur[j,:] = tmp
            data_dur[data_dur == -999] = np.nan

            nanidx = np.isnan(np.nanmean(data_dur, axis = 1)) #if start > end
            if np.any(nanidx):
                #drop equivalent number of long RTs
                num_to_drop = np.sum(nanidx)
                i = np.argpartition(RT, -num_to_drop)[-num_to_drop :] #find the indices of the longest RTs
                nanidx[i] = True #mark the long trials as bad too
                data_dur[nanidx, :] = np.nan

                #drop nan from RTs
                tmp_RT = np.ndarray.astype(RT, dtype = float)
                tmp_RT[nanidx] = np.nan
                RTs[elec] = tmp_RT
            else:
                RTs[elec] = RT     

            #make surrogate data
            data_surr = data_dur.flatten() #take HG window
            data_surr_drop = np.isnan(data_surr) #for data_idx dropping points based on data_surr
            data_surr = data_surr[~np.isnan(data_surr)] #drop nan datapoints (pull out only HG)
            randidx = np.random.permutation(len(data_surr)) #shuffle
            data_surr = data_surr[randidx]

            #reshape data_surr with nan
            data_dur_surr = np.empty_like(data_dur)
            start = 0
            for j in range(data_dur.shape[0]):
                trial_length = sum(~np.isnan(data_dur[j,:]))
                if j>0:
                    start = end
                end = start + trial_length
                if trial_length>0: #not a nan trial
                    tmp = data_surr[start:end]
                    tmp = np.pad(tmp, (0, data_dur.shape[1]-len(tmp)), 'constant', constant_values = -999)
                    data_dur_surr[j,:] = tmp
                else: #nan trial
                    data_dur_surr[j,:] = -999
            data_dur_surr[data_dur_surr == -999] = np.nan

            #calculate params for single trials
            means[elec] = np.nanmean(data_dur_surr, axis = 1)
            stds[elec] = np.nanstd(data_dur_surr, axis = 1)
            maxes[elec] = np.nanmax(data_dur_surr, axis = 1)

            medians[elec] = stats.nanmedian(data_dur_surr, axis = 1)
            maxes_rel[elec] = maxes[elec] - means[elec]

    #output dictionary of params per elec
    data_dict = {'RTs':RTs, 'maxes_rel' : maxes_rel, 'medians' : medians, 'stds': stds}
    return data_dict
예제 #29
0
파일: SOP_coq.py 프로젝트: fixif/examples
F1 = Filter(den=[
    1, -4.989216395071318, 9.956976990745105, -9.935631971923312,
    4.957198554483321, -0.9893271782278296
],
            num=[
                0.0001878726842913545, -0.0005635670357698394,
                0.0003756943544614141, 0.0003756943544614141,
                -0.0005635670357698394, 0.0001878726842913545
            ],
            name="Xilinx")
S1 = State_Space(F1)

# -------------------------
# Damien Lefebvre's example
# This large system ($n=10$) comes from control theory: the filter is used as a controller for an active control of vehicle longitudinal oscillation~\cite{Lefe03}
d = loadmat('exDL.mat')['DL_Cor']
A, B, C, D = [mat(d[x]) for x in ('a', 'b', 'c', 'd')]
S2 = State_Space(
    Filter(A=A, B=B.transpose(), C=C, D=D,
           name='longitudinal'))  # 'longitudinal oscillation controller DL'

# SDR example (from Fig. 15 "Software-Defined Radio FPGA Cores: Building towards a Domain-Specific Language")
# Fs=10kHz, Fstop1=2.190, Fpass1=2.1972, Fpass2=2.1974, Fstop2=2.210, Astop1=200dB, Apsass=0.1dB, Astop2=200dB
# This filter comes from a testbench in Software-Defined Radio system~\cite[Fig. 15]{}. It a 6th order Butterworth filter designed with the following parameters: sampling frequency = 10 kHz, lower cutoff frequency = 2.190 kHz, higher cutoff frequency = 2.210 kHz, passband ripple = 0.1 dB, stopband attenuation = 200 dB.
d = loadmat('SDR.mat')
#WARNING: this filter is designed with SOS-structure. When converted to state-space (by matlab), I am not sure if the spectral radius is lower than 1
#(when fdatool makes the single-structure conversion, the filter is not stable anymore)
A, B, C, D = [mat(d[x]) for x in ('A', 'B', 'C', 'D')]
S3 = State_Space(Filter(A=A, B=B.transpose(), C=C, D=D,
                        name='SDR'))  # Software-Defined Radio
def shadeplots_elecs_stats(resplocked = False):
    """ 
    calculates mean, max, min, latency, median, and std on the mean trace for trial for all electrodes in an active cluster
    OLD - uses electrodes and windows from PCA/Stats/single_electrode_windows_withdesignation_EDITED.csv
    NOW - uses electrodes and windows from PCA/csvs_FINAL/final_windows.csv (after going through and editing them)
    calculates both stimulus and response locked parameters
    """

    SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta/'

    #filename = os.path.join(SJdir,'PCA', 'Stats', 'single_electrode_windows_csvs', 'single_electrode_windows_withdesignation_EDITED.csv')
    filename = os.path.join(SJdir, 'PCA', 'csvs_FINAL', 'final_windows.csv')
    df = pd.read_csv(filename)

    #df = df.query("subj not in ['GP27', 'GP44', 'ST28']") #drop unused subjects

    if resplocked:
        for s_t in df.groupby(['subj','task']):

            subj, task = s_t[0]
            #load data
            filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') 
            data_dict = loadmat.loadmat(filename)

            active_elecs, Params, srate, RT, data_trials = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']]
            srate = float(srate)
            data_all = data_trials.mean(axis = 1) #mean across trials, (new shape is elecs x time)
            
            bl_st = -500/1000*srate

            medians, means, stds, maxes, lats, lats_pro, RTs, mins, lats_min, RTs_median, RTs_min = [dict() for i in range(11)]

            RT = RT  + abs(bl_st) #RTs are calculated from stim onset, need to account for bl in HG_elecMTX_percent 

            for row in s_t[1].itertuples():
                _, subj, task, cluster, pattern, elec, start_idx, end_idx, start_idx_resp, end_idx_resp = row
                eidx = np.in1d(active_elecs, elec)
                data = data_trials[eidx,:].squeeze()

                #only do response electrodes
                if pattern == 'R': 
                    start_idx_resp = start_idx_resp
                    end_idx_resp = end_idx_resp

                    if start_idx_resp == end_idx_resp:
                        continue  #for inactive R elecs (not clear why on spreadsheet)

                    #create data matrix
                    data_resp = np.empty((data_trials.shape[1], end_idx_resp-start_idx_resp))
                    for j, r in enumerate(RT):
                        tmp = data[j, r + start_idx_resp : r + end_idx_resp]
                        data_resp[j,:] = tmp
                    data_resp = data_resp.mean(axis = 1) #mean acros trials, new shape is elecs x time

                    #calculate stats (mean trace)
                    means[elec] = data_resp.mean()
                    stds[elec] = data_resp.std()
                    maxes[elec] = data_resp.max()
                    lats[elec] = (data_resp.argmax()+1)/srate*1000
                    lats_min[elec] = (data_resp.argmin()+1)/srate*1000 #convert to ms
                    medians[elec] = stats.nanmedian(data_resp)
                    mins[elec] = data_resp.min()
                    RTs[elec] = (RT+Params['bl_st']/1000*srate).mean()/srate*1000 #from stimulus onset (adjusted for all subjects)
                    RTs_median[elec] = np.median(RT+Params['bl_st']/1000*srate)/srate*1000 #from stimulus onset (adjusted for all subjects)
                    RTs_min[elec] = np.min(RT+Params['bl_st']/1000*srate)/srate*1000 #from stimulus onset (adjusted for all subjects)


                #save stats (mean traces)
                filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'data', ''.join([subj, '_', task, '_resplocked.p']))
                data_dict = {'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st, 'RTs':RTs, 'medians' : medians, 'mins': mins, 'lats_min':lats_min, 'RTs_median': RTs_median, 'RTs_min': RTs_min}

                with open(filename, 'w') as f:
                    pickle.dump(data_dict, f)
                    f.close()

                #update csv file        
                for k in data_dict.keys():
                    if k in ['bl_st', 'srate','active_elecs']:
                        data_dict.pop(k, None)

                df_values = pd.DataFrame(data_dict)

                #save dataframe with values for all elecs for subject/task
                filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', '_'.join([subj, task, 'resplocked']) + '.csv')
                df_values.to_csv(filename)

                
    else: #not response locked 
        for s_t in df.groupby(['subj','task']):

            subj, task = s_t[0]

            #if ((subj == 'ST1') and (task == 'SelfAud') and (cluster == 2)): #drop bc garbage cluster
            #    continue

            #load data
            filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat') 
            data_dict = loadmat.loadmat(filename)

            active_elecs, Params, srate, RT, data_trials = [data_dict.get(k) for k in ['active_elecs','Params','srate','RTs','data_percent']]
            srate = float(srate)
            data_all = data_trials.mean(axis = 1) #mean across trials, (new shape is elecs x time)
            bl_st = -500/1000*srate #in data points

            filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'unsmoothed', 'data', ''.join([subj, '_', task, '.p'])) #for medians and means
            data_dict = pickle.load(open(filename, 'rb')) #keys are medians, means, for single trial values

            medians, means, stds, maxes, lats, RTs, mins, lats_min, RTs_median, RTs_min, lats_static, lats_min_static, lats_semi_static = [dict() for i in range(13)]

            RT = RT + abs(bl_st) #RTs are calculated from stim/cue onset, need to account for bl in HG_elecMTX_percent 

            for row in s_t[1].itertuples():
                _, subj, task, elec, pattern, cluster, start_idx, end_idx, start_idx_resp, end_idx_resp = row #in datapoints
                eidx = np.in1d(active_elecs, elec)
                data = data_all[eidx,:].squeeze() #mean trace

               
                #define start and end indices based on electrode type
                if any([(pattern == 'S'), (pattern == 'sustained'), (pattern == 'S+sustained'), (pattern == 'SR')]):
                    start_idx = start_idx + abs(bl_st)
                    end_idx = end_idx + abs(bl_st)

                if pattern == 'R': #fixed so can use stim locked onsets/offsets
                    start_idx = start_idx + abs(bl_st)
                    end_idx = end_idx + abs(bl_st)
                    
                if pattern == 'D':
                    start_idx = start_idx + abs(bl_st)
                    end_idx = np.median(RT) + end_idx_resp

                if start_idx == end_idx:
                    continue  #for inactive R elecs (not clear why on spreadsheet)

                #calculate stats (mean trace)
                means[elec] = np.nanmean(data_dict['means'][elec]) #from single trials
                medians[elec] = np.nanmean(data_dict['medians'][elec]) #from single trials
                maxes[elec] = data[start_idx:end_idx].max()
                lats[elec] = (data[start_idx:end_idx].argmax()+1)/srate*1000
                lats_min[elec] = (data[start_idx:end_idx].argmin()+1)/srate*1000
                stds[elec] = data[start_idx:end_idx].std()
                mins[elec] = data[start_idx:end_idx].min()
                RTs[elec] = (RT+bl_st).mean()/srate*1000 #from stimulus onset (adjusted for all subjects)
                RTs_median[elec] = np.median(RT+bl_st)/srate*1000 #from stimulus onset (adjusted for all subjects)
                RTs_min[elec] = np.min(RT+bl_st)/srate*1000 #from stimulus onset (adjusted for all subjects)
                lats_static[elec] = (data[abs(bl_st)::].argmax()+1)/srate*1000 #from stimulus onset to end (adjusted for all subjects)
                lats_min_static[elec] = (data[abs(bl_st)::].argmin()+1)/srate*1000 #from stimulus onset to end (adjusted for all subjects)
                lats_semi_static[elec] = (data[start_idx::].argmax()+1)/srate*1000

            #save stats (mean traces)
            #filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'data', ''.join([subj, '_', task, '.p']))
            data_dict = {'means':means, 'stds':stds, 'maxes':maxes, 'lats':lats, 'srate': srate, 'bl_st':bl_st, 'RTs':RTs, 'medians' : medians, 'mins': mins, 'lats_min':lats_min, 'RTs_median': RTs_median, 'RTs_min' : RTs_min, 'lats_static' : lats_static, 'lats_min_static' : lats_min_static, 'lats_semi_static' : lats_semi_static}

            #with open(filename, 'w') as f:
            #    pickle.dump(data_dict, f)
            #    f.close()

            #update csv file        
            for k in data_dict.keys():
                if k in ['bl_st', 'srate','active_elecs']:
                    data_dict.pop(k, None)

            df_values = pd.DataFrame(data_dict)

            #save dataframe with values for all elecs for subject/task - later combined into mean_traces_all_elecs.csv in elec_values.ipynb
            filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', '_'.join([subj, task]) + '.csv')
            df_values.to_csv(filename)
예제 #31
0
def run_pipeline(filename):

    data = lm.loadmat(filename)
    session_name = os.path.basename(filename)[0:-4]
    (good_cells, pos_edges, trial_idx, spikelocations, spike_idx,
     location_vec) = prepareData(data)
    n_trials = 30
    n_cells = len(good_cells)
    shape = (n_cells, len(pos_edges) - 1, n_trials)
    counts = np.zeros(shape, dtype=float)
    _fast_bin(counts, trial_idx, spikelocations, spike_idx)

    occupancy = np.zeros((len(pos_edges) - 1, n_trials), dtype=float)
    _fast_occ(occupancy, data['trial'] - 1, location_vec)

    for iT in range(n_trials):
        tmp = occupancy[:, iT]
        idx_v = np.flatnonzero(tmp)
        idx_n = np.flatnonzero(tmp == 0)
        tmp[idx_n] = np.interp(idx_n, idx_v, tmp[idx_v])
        occupancy[:, iT] = tmp

    spMapN = np.zeros(counts.shape)
    for iC in range(n_cells):
        spMapN[iC, :, :] = np.divide(counts[iC, :, :], occupancy)

    spMapN = spi.gaussian_filter(spMapN, (0, 2, 0))

    n_cells = len(good_cells)
    n_bins = len(pos_edges) - 1
    spFlat = np.zeros((n_cells, n_trials * n_bins))

    for iC in range(n_cells):
        spFlat[iC, :] = spMapN[iC, :, :].ravel(order='F')
    #spFlat = spFlat-spFlat.mean(axis=1)[:,np.newaxis]
    spFlat = normalize(spFlat, axis=0, norm='l2')
    for iC in range(n_cells):
        for iT in range(n_trials):
            start = iT * n_bins
            stop = (iT + 1) * n_bins
            trial_idx = np.arange(start, stop)
            tmp = spFlat[iC, trial_idx]
            spMapN[iC, :, iT] = tmp

    R = 5
    # Fit CP tensor decomposition (two times).
    U = tt.ncp_bcd(spMapN, rank=R, verbose=False)
    V = tt.ncp_bcd(spMapN, rank=R, verbose=False)

    # Align the two fits and print a similarity score.
    sim = tt.kruskal_align(U.factors,
                           V.factors,
                           permute_U=True,
                           permute_V=True)
    #print(sim)

    # Plot the results again to see alignment.
    fig, ax, po = tt.plot_factors(U.factors)
    tt.plot_factors(V.factors, fig=fig)
    fig.suptitle("aligned models")
    fig.tight_layout()
    fig.savefig('C:\\temp\\try3\\' + session_name + '_tca.png')

    ff = np.matmul(np.transpose(spFlat), spFlat)
    plt.figure()
    ax = plt.imshow(ff)
    plt.colorbar()
    plt.axvline(x=n_bins * 20, color='red', ls='--', linewidth=1)
    plt.axvline(x=n_bins * 21, color='green', ls='--', linewidth=1)
    plt.axhline(y=n_bins * 20, color='red', ls='--', linewidth=1)
    plt.axhline(y=n_bins * 21, color='green', ls='--', linewidth=1)
    plt.savefig('C:\\temp\\try3\\' + session_name + '_cov.png')
    plt.close('all')
예제 #32
0
def shadeplots_median_split(subj, task, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', thresh = 0, chunk_size = 0, baseline = -500, black_chunk_size = 0):
    """ 
    takes median split of RTs and calculates difference between them (short vs long RT trials)
    only runs on elecs that are easy/difficult from overlap csv
    calculate onset and offset window for given electrode.
    Compares short vs long RT trials for each electrode for the unique/overlap tasks
    saves csv for each sub/task for easy plotting later

    """

    filename = os.path.join(SJdir, 'Subjs', subj, task, 'HG_elecMTX_percent.mat')
    data = loadmat.loadmat(filename)
    srate = data['srate']
    elecs = data['active_elecs']
    RTs = data['RTs']
    data = data['data_percent']

    median_value = np.median(RTs)
    shortdata = data[:, RTs<median_value, :]
    longdata = data[:, RTs>median_value, :]
   

    #convert to srate
    bl_st = baseline/1000*srate
    chunksize = chunk_size/1000*srate
    black_chunksize = black_chunk_size/1000*srate

    subjs = list();  pthr = list(); elecs = list(); starts = list(); ends = list(); 

    overlapfile = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', subj+'_ovelapped_dur_elecs.csv')
    df = pd.read_csv(overlapfile)
    elecs_list = np.unique((df.easy.fillna(0) + df.difficult.fillna(0)).values)

    for i, e in enumerate(elecs_list):

        idx = np.in1d(elecs_list, e)
    
        edataShort = shortdata[idx,:,:].squeeze()
        edataLong = longdata[idx,:,:].squeeze()

        pvals = list();

        for j in np.arange(abs(bl_st), edataShort.shape[1]):
            (t, p) = stats.ttest_ind(edataShort[:,j], edataLong[:,j])
            pvals.append(p)
        thr = fdr_correct.fdr2(pvals, q = 0.05)
        H = np.array(np.array(pvals<thr)).astype('int')

        if (thr>0):

            #find elecs with window that > chunksize and > threshold (10%)
            passed_thresh = abs(edataShort[:, abs(bl_st)::].mean(axis=0) - edataLong[:, abs(bl_st)::].mean(axis = 0)) >thresh #difference between blocks is > 10% threshold
            sig_and_thresh = H * passed_thresh
            difference = np.diff(sig_and_thresh, n = 1, axis = 0)
            start_idx = np.where(difference==1)[0]+1
            end_idx = np.where(difference == -1)[0]

            if start_idx.size > end_idx.size: #last chunk goes until end
                end_idx = np.append(end_idx, int(edataShort.shape[1]-abs(bl_st)))

            elif start_idx.size < end_idx.size:
                start_idx = np.append(0, start_idx) #starts immediately significant

            if (start_idx.size!=0):
                if (start_idx[0] > end_idx[0]): #starts immediately significant
                    start_idx = np.append(0, start_idx)

            if (start_idx.size!=0):
                if (end_idx[-1] < start_idx[-1]):#significant until end
                    end_idx = np.append(end_idx, int(edataShort.shape[1]-abs(bl_st)))

            chunk = (end_idx - start_idx) >= chunksize

            if sum(chunk) > 0:
                #significant windows on elecs that passed threshold (10%) (ignoring threshold and chunksize)
                difference = np.diff(H, n = 1, axis = 0)
                start_idx = np.where(difference==1)[0]+1
                end_idx = np.where(difference == -1)[0]

                if start_idx.size > end_idx.size: #last chunk goes until end
                    end_idx = np.append(end_idx, int(edataShort.shape[1]-abs(bl_st)))

                elif start_idx.size < end_idx.size:
                    start_idx = np.append(0, start_idx) #starts immediately significant

                if (start_idx.size!=0):
                    if (start_idx[0] > end_idx[0]): #starts immediately significant
                        start_idx = np.append(0, start_idx)

                if (start_idx.size!=0):
                    if (end_idx[-1] < start_idx[-1]):#significant until end
                        end_idx = np.append(end_idx, int(edataShort.shape[1]-abs(bl_st)))


                black_chunk = (start_idx[1:] - end_idx[:-1]) > black_chunksize #combine window separated by <200ms

                tmp = np.append(1,black_chunk).astype('bool')
                end_idx = end_idx[np.append(np.where(np.in1d(start_idx, start_idx[tmp]))[0][1:]-1, -1)]
                start_idx = start_idx[tmp]           

                #drop chunks that <100ms
                chunk = (end_idx - start_idx) >= chunksize
                start_idx = start_idx[chunk]
                end_idx = end_idx[chunk]

            else: #no chunks
                start_idx = np.zeros((1,))
                end_idx = np.zeros((1,))
                
        else: #thr<0
            start_idx = np.zeros((1,))
            end_idx = np.zeros((1,))

        subjs.extend([subj] * len(start_idx))
        elecs.extend([e] * len(start_idx))
        pthr.extend([thr] * len(end_idx))
        starts.extend(start_idx)
        ends.extend(end_idx)

        data_dict = {'edataShort':edataShort, 'edataLong':edataLong, 'bl_st':bl_st, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate,'thresh':thresh, 'chunksize':chunksize, 'black_chunksize':black_chunksize}
        data_path = os.path.join(SJdir, 'PCA','ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed', 'mean_traces', 'csv_files', ''.join([subj,task, '_', 'Long_vs_Short', '_e', str(int(e)), '.p']))
       
        with open(data_path, 'w') as f:
            pickle.dump(data_dict, f)
            f.close()

    filename = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'elecs', 'significance_windows', 'smoothed','mean_traces', 'csv_files', '_'.join([subj, task, 'long_vs_short_RTs']) +'.csv')
    sig_windows = pd.DataFrame({'subj':subjs, 'elec':elecs, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends})
    sig_windows = sig_windows[['subj', 'elec', 'start_idx','end_idx','pthreshold']]
    sig_windows.to_csv(filename)
    
    return sig_windows
예제 #33
0
def shadeplots_clusters_resp(DATASET, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', thresh = 15, chunk_size = 100, start_resp = -500, end_resp = 500, baseline = -500, black_chunk_size = 0):
    """ 
    calculate onset and offset window for every active electrode (ignoring clusters)
    saves csv for each sub/task for easy plotting later
    """

    subj, task = DATASET.split('_')

    filenames = glob.glob(os.path.join(SJdir, 'PCA', 'SingleTrials_hclust', '_'.join([subj, task, 'c*mat'])))

    subjs = list(); tasks = list(); pthr = list(); clusts = list(); starts = list(); ends = list(); 
    for filename in filenames:

        data = loadmat.loadmat(filename)
        srate = data['srate']
        cdata = data['cdata']
        RTs = data['RTs_all']

        cluster = int(filename.split('_')[-1].split('.')[0][1:])

        #convert to srate
        bl_st = baseline/1000*srate
        chunksize = chunk_size/1000*srate
        black_chunksize = black_chunk_size/1000*srate
        st_resp = int(start_resp/1000*srate)
        en_resp = int(end_resp/1000*srate)

        #shift RTs by baseline
        #if task in ['DecisionAud']:
        #    st_tp = 600/1000*srate
        #elif task in ['DecisionVis']:
        #    st_tp = 500/1000*srate
        #else:
        #    st_tp = 0
        #RTs = RTs+abs(bl_st)+st_tp
        
        RTs = RTs+abs(bl_st)
            
        #make resplocked cluster data
        cdata_resp = np.zeros((len(RTs), len(np.arange(st_resp, en_resp))))
        RTs = RTs[RTs+st_resp>=0] #drop RTs that are too short

        for j, r in enumerate(RTs):
            cdata_resp[j,:] = cdata[j, r+st_resp:r+en_resp]

        nozero = np.copy(cdata_resp)
        nozero[:,nozero.mean(axis=0)<0] = 0 #zero out negative values
        
        pvals = list();
        for t in np.arange(0, cdata_resp.shape[1]):
            (t, p) = stats.ttest_1samp(nozero[:,t], 0)
            pvals.append(p)

        thr = fdr_correct.fdr2(pvals, q = 0.05)
        H = np.array((pvals<thr)).astype('int')

        if (thr>0):

            #find elecs with window that > chunksize and > threshold (10%)
            passed_thresh = cdata_resp.mean(axis = 0) > thresh
            sig_and_thresh = H * passed_thresh
            difference = np.diff(sig_and_thresh, n = 1, axis = 0)
            start_idx = np.where(difference==1)[0]+1
            end_idx = np.where(difference == -1)[0]

            start_idx = start_idx+st_resp #shift by 500
            end_idx = end_idx+st_resp

            if start_idx.size > end_idx.size: #last chunk goes until end
                end_idx = np.append(end_idx, en_resp)

            elif start_idx.size < end_idx.size:
                start_idx = np.append(st_resp, start_idx) #starts immediately significant

            if (start_idx.size!=0):
                if (start_idx[0] > end_idx[0]): #starts immediately significant
                    start_idx = np.append(st_resp, start_idx)
            if (start_idx.size!=0):
                if (end_idx[-1] < start_idx[-1]):#significant until end
                    end_idx = np.append(end_idx, en_resp)

            chunk = (end_idx - start_idx) >= chunksize

            if sum(chunk) > 0:
                #significant windows on those that passed threshold (10%) (ignoring threshold and chunksize)
                difference = np.diff(H, n = 1, axis = 0)
                start_idx = np.where(difference==1)[0]+1
                end_idx = np.where(difference == -1)[0]

                start_idx = start_idx+st_resp #shift by 500
                end_idx = end_idx+st_resp

                if start_idx.size > end_idx.size: #last chunk goes until end
                    end_idx = np.append(end_idx, en_resp)

                elif start_idx.size < end_idx.size:
                    start_idx = np.append(st_resp, start_idx) #starts immediately significant

                if (start_idx.size!=0):
                    if (start_idx[0] > end_idx[0]): #starts immediately significant
                        start_idx = np.append(st_resp, start_idx)
                if (start_idx.size!=0):
                    if (end_idx[-1] < start_idx[-1]):#significant until end
                        end_idx = np.append(end_idx, en_resp)

                black_chunk = (start_idx[1:] - end_idx[:-1])> black_chunksize #combine window separated by <200ms

                tmp = np.append(1,black_chunk).astype('bool')
                end_idx = end_idx[np.append(np.where(np.in1d(start_idx, start_idx[tmp]))[0][1:]-1, -1)]
                start_idx = start_idx[tmp]           

                #drop chunks that <100ms
                chunk = (end_idx - start_idx) >= chunksize
                start_idx = start_idx[chunk]
                end_idx = end_idx[chunk]
            else: #no chunks
                start_idx = np.zeros((1,))
                end_idx = np.zeros((1,))
                
        else: #thr<0
            start_idx = np.zeros((1,))
            end_idx = np.zeros((1,))

        subjs.extend([subj] * len(start_idx))
        tasks.extend([task] * len(end_idx))
        clusts.extend([cluster] * len(start_idx))
        pthr.extend([thr] * len(end_idx))
        starts.extend(start_idx)
        ends.extend(end_idx)
        
        data_dict = {'cdata_resp':cdata_resp, 'bl_st':bl_st, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate, 'chunksize': chunksize, 'black_chunksize':black_chunksize, 'cluster':cluster, 'thresh': thresh, 'st_resp':st_resp, 'en_resp':en_resp, 'RTs':RTs}
        data_path = os.path.join(SJdir, 'PCA','ShadePlots_hclust', 'resplocked_all', 'data',''.join([subj, '_', task, '_c', str(cluster), '.p']))
        
        with open(data_path, 'w') as f:
            pickle.dump(data_dict, f)
            f.close()
    
    fname = os.path.join(SJdir, 'PCA', 'ShadePlots_hclust', 'resplocked_all', ''.join([subj, '_', task, '.csv']))
    sig_windows = pd.DataFrame({'subj':subjs, 'task':tasks, 'cluster':clusts, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends})
    sig_windows = sig_windows[['subj','task','cluster', 'start_idx','end_idx','pthreshold']]
    sig_windows.to_csv(fname)
import scipy.io
from loadmat import loadmat

import matplotlib as mpl
%matplotlib inline
default_dpi = mpl.rcParamsDefault['figure.dpi']
mpl.rcParams['figure.dpi'] = default_dpi*2
import matplotlib.pyplot as plt

from hsi_detectors import smf_detector,ace_detector

# load gulfport campus image
img_fname = 'muufl_gulfport_campus_w_lidar_1.mat'
spectra_fname = 'tgt_img_spectra.mat'

dataset = loadmat(img_fname)['hsi']

hsi = dataset['Data']
n_r,n_c,n_b = hsi.shape
wvl = dataset['info']['wavelength']
rgb = dataset['RGB']

# load the target signatures
spectra_dataset = loadmat(spectra_fname)
tgts = spectra_dataset['tgt_img_spectra']['spectra']
tgt_names = spectra_dataset['tgt_img_spectra']['names']

# check out the shape of the targets array
tgts.shape
# check out the target values
tgts
예제 #35
0
def test_cnn(trainpath,
             trainlist,
             validset,
             dumppath,
             learning_rate=0.01,
             n_epochs=200,
             batch_size=100,
             earlystop=True):
    """

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(123)

    # datasets = load_data(dataset)
    datasets = loadmat(trainpath=trainpath,
                       trainlist=trainlist,
                       validset=validset,
                       shuffle=shuffle,
                       datasel=datasel,
                       scaling=scaling,
                       robust=robust)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]

    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # H - height; W - width
    # when the input is note salience matrix
    # idim0_H = 42
    # idim0_W = 36
    # fdim0_H = 6
    # fdim0_W = 6

    # when the input is chromagram
    idim0_H = 12
    idim0_W = 12

    fdim0_H = 2
    fdim0_W = 2
    pdim0_H = 2
    pdim0_W = 2

    idim1_H = (idim0_H - fdim0_H + 1) / pdim0_H
    idim1_W = (idim0_W - fdim0_W + 1) / pdim0_W

    fdim1_H = 2
    fdim1_W = 2
    pdim1_H = 2
    pdim1_W = 2

    idim2_H = (idim1_H - fdim1_H + 1) / pdim1_H
    idim2_W = (idim1_W - fdim1_W + 1) / pdim1_W

    fdim2 = 800

    nkerns = [20, 20]

    # the below comments are examples of using this cnn to deal with chromagram with input feature size 144 = 12*12
    # Reshape matrix of rasterized images of shape (batch_size, 12 * 12)
    # to a 4D tensor, compatible with our ConvPoolLayer
    # (12, 12) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, idim0_H, idim0_W))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (12-2+1 , 12-2+1) = (11, 11)
    # maxpooling reduces this further to (11/2, 11/2) = (5, 5)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 5, 5)
    layer0 = ConvPoolLayer(rng,
                           input=layer0_input,
                           input_shape=(batch_size, 1, idim0_H, idim0_W),
                           filter_shape=(nkerns[0], 1, fdim0_H, fdim0_W),
                           poolsize=(pdim0_H, pdim0_W))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4)
    # maxpooling reduces this further to (4/2, 4/2) = (2, 2)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 2, 2)

    layer1 = ConvPoolLayer(rng,
                           input=layer0.output,
                           input_shape=(batch_size, nkerns[0], idim1_H,
                                        idim1_W),
                           filter_shape=(nkerns[1], nkerns[0], fdim1_H,
                                         fdim1_W),
                           poolsize=(pdim1_H, pdim1_W))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 2 * 2),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * idim2_H * idim2_W,
                         n_out=fdim2,
                         activation=T.nnet.relu)

    # classify the values of the fully-connected sigmoidal layer
    nclass = max(train_set_y.eval()) + 1
    layer3 = LogisticRegression(input=layer2.output, n_in=fdim2, n_out=nclass)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    train_score = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.996  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    training_history = []
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs):
        if earlystop and done_looping:
            print 'early-stopping'
            break
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                #training_losses = [train_score(i) for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                #this_training_loss = numpy.mean(training_losses)

                #training_history.append([iter,this_training_loss,this_validation_loss])
                training_history.append([iter, this_validation_loss])

                #                print('epoch %i, minibatch %i/%i, training error %f %%' %
                #                      (epoch, minibatch_index + 1, n_train_batches,
                #                       this_training_loss * 100.))
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                print('iter = %d' % iter)
                print('patience = %d' % patience)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    numpy.savez(dumppath,
                                model=params,
                                training_history=training_history,
                                best_validation_loss=best_validation_loss)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    print('best_validation_loss %f' % best_validation_loss)

            if patience <= iter:
                done_looping = True
                if earlystop:
                    break

    end_time = timeit.default_timer()
    # final save
    numpy.savez(dumppath,
                model=params,
                training_history=training_history,
                best_validation_loss=best_validation_loss)

    print(('Optimization complete with best validation score of %f %%, '
           'obtained at iteration %i, ') %
          (best_validation_loss * 100., best_iter + 1))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
예제 #36
0
def test_DBN(finetune_lr, pretraining_epochs,
             pretrain_lr, cdk, usepersistent, training_epochs,
             L1_reg, L2_reg,
             hidden_layers_sizes,
             dataset, batch_size, output_folder, shuffle, scaling, dropout, first_layer, dumppath):
    """
    Demonstrates how to train and test a Deep Belief Network.

    :type finetune_lr: float
    :param finetune_lr: learning rate used in the finetune stage
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training
    :type cdk: int
    :param cdk: number of Gibbs steps in CD/PCD
    :type training_epochs: int
    :param training_epochs: maximal number of iterations ot run the optimizer
    :type dataset: string
    :param dataset: path the the pickled dataset
    :type batch_size: int
    :param batch_size: the size of a minibatch
    """
    print locals()
    
    datasets = loadmat(dataset=dataset, shuffle=shuffle, datasel=datasel, scaling=scaling, robust=robust)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    
    print "%d training examples" % train_set_x.get_value(borrow=True).shape[0]
    print "%d feature dimensions" % train_set_x.get_value(borrow=True).shape[1]

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'
    # construct the Deep Belief Network
    nclass = max(train_set_y.eval())+1
    dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.get_value(borrow=True).shape[1],
              hidden_layers_sizes=hidden_layers_sizes,
              n_outs=nclass, L1_reg=L1_reg, L2_reg=L2_reg, first_layer=first_layer)
    print 'n_ins:%d'% train_set_x.get_value(borrow=True).shape[1]
    print 'n_outs:%d'% nclass
    
    # getting pre-training and fine-tuning functions
    # save images of the weights(receptive fields) in this output folder
    # if not os.path.isdir(output_folder):
        # os.makedirs(output_folder)
    # os.chdir(output_folder)
    
    print '... getting the pretraining functions'
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                cdk=cdk, usepersistent=usepersistent)
    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'

    train_fn, train_model, validate_model, test_model = dbn.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )
    
    trng = MRG_RandomStreams(1234)
    use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX))
    if dropout:
        # dbn.x = dropout_layer(use_noise, dbn.x, trng, 0.8)
        for i in range(dbn.n_layers):
            dbn.sigmoid_layers[i].output = dropout_layer(use_noise, dbn.sigmoid_layers[i].output, trng, 0.5)

    # start-snippet-2
    #########################
    # PRETRAINING THE MODEL #
    #########################

    print '... pre-training the model'
    plotting_time = 0.
    start_time = timeit.default_timer()
    ## Pre-train layer-wise
    for i in xrange(dbn.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            if pretrain_dropout:
                use_noise.set_value(1.) # use dropout at pre-training
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)
            
            '''
            for j in range(dbn.n_layers):
                if j == 0:
                    # Plot filters after each training epoch
                    plotting_start = timeit.default_timer()
                    # Construct image from the weight matrix
                    this_layer = dbn.rbm_layers[j]
                    this_field = this_layer.W.get_value(borrow=True).T
                    print "field shape (%d,%d)"%this_field.shape
                    image = Image.fromarray(
                        tile_raster_images(
                            X=this_field[0:100], # take only the first 100 fields (100 * n_visible)
                            #the img_shape and tile_shape depends on n_visible and n_hidden of this_layer
                            # if n_visible = 144 (12,12), if n_visible = 1512 (36,42)
                            img_shape=(12, 12),
                            tile_shape=(10, 10),
                            tile_spacing=(1, 1)
                        )
                    )
                    image.save('filters_at_epoch_%i.png' % epoch)
                    plotting_stop = timeit.default_timer()
                    plotting_time += (plotting_stop - plotting_start)
            '''

    end_time = timeit.default_timer()
    # end-snippet-2
    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    ########################
    # FINETUNING THE MODEL #
    ########################

    print '... finetuning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.    # wait this much longer when a new best is
                              # found
    improvement_threshold = 0.999  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatches before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0

    # while (epoch < training_epochs) and (not done_looping):
    while (epoch < training_epochs):
        if earlystop and done_looping:
            print 'early-stopping'
            break
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            use_noise.set_value(1.) # use dropout at training time
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                use_noise.set_value(0.) # stop dropout at validation/test time
                validation_losses = validate_model()
                training_losses = train_model()
                this_validation_loss = numpy.mean(validation_losses)
                this_training_loss = numpy.mean(training_losses)
                
                # also monitor the training losses
                print(
                    'epoch %i, minibatch %i/%i, training error %f %%'
                    % (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_training_loss * 100.
                    )
                )
                
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%'
                    % (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)
                        
                    with open(dumppath, "wb") as f:
                        cPickle.dump(dbn.params, f)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    
                    '''
                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    '''

            if patience <= iter:
                done_looping = True
                if earlystop:
                    break

    end_time = timeit.default_timer()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'obtained at iteration %i, '
            'with test performance %f %%'
        ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time)
                                              / 60.))
예제 #37
0
def shadeplots_faces_resp(subj, elecs_list, SJdir = '/home/knight/matar/MATLAB/DATA/Avgusta', thresh = 0, chunk_size = 0, baseline = -500, black_chunk_size = 0):
    """ 
    calculate onset and offset window for given electrode.
    compares face emo to face gen
    saves csv for each sub/task for easy plotting later

    """

    filename = os.path.join(SJdir, 'Subjs', subj, 'FaceEmo', 'HG_elecMTX_percent_eleclist.mat')
    data = loadmat.loadmat(filename)
    srate = data['srate']
    elecsEmo = data['elec_list']
    dataEmo = data['data_percent_resp']

    filename = os.path.join(SJdir, 'Subjs', subj, 'FaceGen', 'HG_elecMTX_percent_eleclist.mat')
    data = loadmat.loadmat(filename)
    srate = data['srate']
    elecsGen = data['elec_list']
    dataGen = data['data_percent_resp']

    #convert to srate
    chunksize = chunk_size/1000*srate
    black_chunksize = black_chunk_size/1000*srate

    filename = os.path.join(SJdir, 'Anat', 'ShadePlots_Faces', '_'.join([subj, 'Emo', 'vs', 'Gen']) +'_resp.csv')
    subjs = list();  pthr = list(); elecs = list(); starts = list(); ends = list(); 

    for i, e in enumerate(elecs_list):

        idx_Emo, idx_Gen = (i, i)
        edataEmo = dataEmo[idx_Emo,:].squeeze()
        edataGen = dataGen[idx_Gen,:].squeeze()

        if edataEmo.shape[1]>edataGen.shape[1]:
            edataEmo = edataEmo[:,:edataGen.shape[1]]
        else:
            edataGen = edataGen[:,:edataEmo.shape[1]]

        pvals = list();

        for j in np.arange(0, edataEmo.shape[1]):
            (t, p) = stats.ttest_ind(edataEmo[:,j], edataGen[:,j])
            pvals.append(p)
        thr = fdr_correct.fdr2(pvals, q = 0.05)
        H = np.array(np.array(pvals<thr)).astype('int')

        if (thr>0):

            #find elecs with window that > chunksize and > threshold (10%)
            passed_thresh = abs(edataEmo[:, 0::].mean(axis=0) - edataGen[:, 0::].mean(axis = 0)) >thresh #difference between blocks is > 10% threshold
            sig_and_thresh = H * passed_thresh
            difference = np.diff(sig_and_thresh, n = 1, axis = 0)
            start_idx = np.where(difference==1)[0]+1
            end_idx = np.where(difference == -1)[0]

            if start_idx.size > end_idx.size: #last chunk goes until end
                end_idx = np.append(end_idx, int(edataEmo.shape[1]))

            elif start_idx.size < end_idx.size:
                start_idx = np.append(0, start_idx) #starts immediately significant

            if (start_idx.size!=0):
                if (start_idx[0] > end_idx[0]): #starts immediately significant
                    start_idx = np.append(0, start_idx)

            if (start_idx.size!=0):
                if (end_idx[-1] < start_idx[-1]):#significant until end
                    end_idx = np.append(end_idx, int(edataEmo.shape[1]))

            chunk = (end_idx - start_idx) >= chunksize

            if sum(chunk) > 0:
                #significant windows on elecs that passed threshold (10%) (ignoring threshold and chunksize)
                difference = np.diff(H, n = 1, axis = 0)
                start_idx = np.where(difference==1)[0]+1
                end_idx = np.where(difference == -1)[0]

                if start_idx.size > end_idx.size: #last chunk goes until end
                    end_idx = np.append(end_idx, int(edataEmo.shape[1]))

                elif start_idx.size < end_idx.size:
                    start_idx = np.append(0, start_idx) #starts immediately significant

                if (start_idx.size!=0):
                    if (start_idx[0] > end_idx[0]): #starts immediately significant
                        start_idx = np.append(0, start_idx)

                if (start_idx.size!=0):
                    if (end_idx[-1] < start_idx[-1]):#significant until end
                        end_idx = np.append(end_idx, int(edataEmo.shape[1]))


                black_chunk = (start_idx[1:] - end_idx[:-1]) > black_chunksize #combine window separated by <200ms

                tmp = np.append(1,black_chunk).astype('bool')
                end_idx = end_idx[np.append(np.where(np.in1d(start_idx, start_idx[tmp]))[0][1:]-1, -1)]
                start_idx = start_idx[tmp]           

                #drop chunks that <100ms
                chunk = (end_idx - start_idx) >= chunksize
                start_idx = start_idx[chunk]
                end_idx = end_idx[chunk]

            else: #no chunks
                start_idx = np.zeros((1,))
                end_idx = np.zeros((1,))
                
        else: #thr<0
            start_idx = np.zeros((1,))
            end_idx = np.zeros((1,))

        start_idx = start_idx - np.round(500/1000*srate) #check, should shift it back to be -500 to 500 window
        end_idx = end_idx - np.round(500/1000*srate)

        subjs.extend([subj] * len(start_idx))
        elecs.extend([e] * len(start_idx))
        pthr.extend([thr] * len(end_idx))
        starts.extend(start_idx)
        ends.extend(end_idx)

        data_dict = {'edataEmo':edataEmo, 'edataGen':edataGen, 'start_idx':start_idx, 'end_idx':end_idx, 'srate':srate,'thresh':thresh, 'chunksize':chunksize, 'black_chunksize':black_chunksize}
        data_path = os.path.join(SJdir, 'Anat','ShadePlots_Faces', 'data',''.join([subj, '_', 'Emo_vs_Gen', '_e', str(e), '_resp.p']))
       
        with open(data_path, 'w') as f:
            pickle.dump(data_dict, f)
            f.close()

    sig_windows = pd.DataFrame({'subj':subjs, 'elec':elecs, 'pthreshold':pthr, 'start_idx':starts, 'end_idx':ends})
    sig_windows = sig_windows[['subj', 'elec', 'start_idx','end_idx','pthreshold']]
    sig_windows.to_csv(filename)
    
    return sig_windows
예제 #38
0
def test_DBN(finetune_lr, pretraining_epochs, pretrain_lr, cdk, usepersistent,
             training_epochs, L1_reg, L2_reg, hidden_layers_sizes, dataset,
             batch_size, output_folder, shuffle, scaling, dropout, first_layer,
             dumppath):
    """
    Demonstrates how to train and test a Deep Belief Network.

    :type finetune_lr: float
    :param finetune_lr: learning rate used in the finetune stage
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training
    :type cdk: int
    :param cdk: number of Gibbs steps in CD/PCD
    :type training_epochs: int
    :param training_epochs: maximal number of iterations ot run the optimizer
    :type dataset: string
    :param dataset: path the the pickled dataset
    :type batch_size: int
    :param batch_size: the size of a minibatch
    """
    print locals()

    datasets = loadmat(dataset=dataset,
                       shuffle=shuffle,
                       datasel=datasel,
                       scaling=scaling,
                       robust=robust)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    print "%d training examples" % train_set_x.get_value(borrow=True).shape[0]
    print "%d feature dimensions" % train_set_x.get_value(borrow=True).shape[1]

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'
    # construct the Deep Belief Network
    nclass = max(train_set_y.eval()) + 1
    dbn = DBN(numpy_rng=numpy_rng,
              n_ins=train_set_x.get_value(borrow=True).shape[1],
              hidden_layers_sizes=hidden_layers_sizes,
              n_outs=nclass,
              L1_reg=L1_reg,
              L2_reg=L2_reg,
              first_layer=first_layer)
    print 'n_ins:%d' % train_set_x.get_value(borrow=True).shape[1]
    print 'n_outs:%d' % nclass

    # SP contains an ordered list of (pos), ordered by chord class number [0,ydim-1]
    SP = balanced_seg.balanced(nclass, train_set_y)

    # getting pre-training and fine-tuning functions
    # save images of the weights(receptive fields) in this output folder
    # if not os.path.isdir(output_folder):
    # os.makedirs(output_folder)
    # os.chdir(output_folder)

    print '... getting the pretraining functions'
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                cdk=cdk,
                                                usepersistent=usepersistent)
    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'

    train_fn, train_model, validate_model, test_model = dbn.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr)

    trng = MRG_RandomStreams(1234)
    use_noise = theano.shared(numpy.asarray(0., dtype=theano.config.floatX))
    if dropout:
        # dbn.x = dropout_layer(use_noise, dbn.x, trng, 0.8)
        for i in range(dbn.n_layers):
            dbn.sigmoid_layers[i].output = dropout_layer(
                use_noise, dbn.sigmoid_layers[i].output, trng, 0.5)

    # start-snippet-2
    #########################
    # PRETRAINING THE MODEL #
    #########################

    print '... pre-training the model'
    plotting_time = 0.
    start_time = timeit.default_timer()
    ## Pre-train layer-wise
    for i in xrange(dbn.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            if pretrain_dropout:
                use_noise.set_value(1.)  # use dropout at pre-training
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                # FIXME: n_train_batches is a fake item
                bc_idx = balanced_seg.get_bc_idx(SP, nclass)
                c.append(pretraining_fns[i](bc_idx=bc_idx, lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)
            '''
            for j in range(dbn.n_layers):
                if j == 0:
                    # Plot filters after each training epoch
                    plotting_start = timeit.default_timer()
                    # Construct image from the weight matrix
                    this_layer = dbn.rbm_layers[j]
                    this_field = this_layer.W.get_value(borrow=True).T
                    print "field shape (%d,%d)"%this_field.shape
                    image = Image.fromarray(
                        tile_raster_images(
                            X=this_field[0:100], # take only the first 100 fields (100 * n_visible)
                            #the img_shape and tile_shape depends on n_visible and n_hidden of this_layer
                            # if n_visible = 144 (12,12), if n_visible = 1512 (36,42)
                            img_shape=(12, 12),
                            tile_shape=(10, 10),
                            tile_spacing=(1, 1)
                        )
                    )
                    image.save('filters_at_epoch_%i.png' % epoch)
                    plotting_stop = timeit.default_timer()
                    plotting_time += (plotting_stop - plotting_start)
            '''

    end_time = timeit.default_timer()
    # end-snippet-2
    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
    ########################
    # FINETUNING THE MODEL #
    ########################

    print '... finetuning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.999  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatches before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0

    # while (epoch < training_epochs) and (not done_looping):
    while (epoch < training_epochs):
        if earlystop and done_looping:
            print 'early-stopping'
            break
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            use_noise.set_value(1.)  # use dropout at training time
            # FIXME: n_train_batches is a fake item
            bc_idx = balanced_seg.get_bc_idx(SP, nclass)
            minibatch_avg_cost = train_fn(bc_idx)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                use_noise.set_value(0.)  # stop dropout at validation/test time
                validation_losses = validate_model()
                training_losses = train_model()
                this_validation_loss = numpy.mean(validation_losses)
                this_training_loss = numpy.mean(training_losses)

                # also monitor the training losses
                print('epoch %i, minibatch %i/%i, training error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_training_loss * 100.))

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    with open(dumppath, "wb") as f:
                        cPickle.dump(dbn.params, f)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    '''
                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    '''

            if patience <= iter:
                done_looping = True
                if earlystop:
                    break

    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%, '
           'obtained at iteration %i, '
           'with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
예제 #39
0
def main():

    # load the data
    hsi_image = loadmat('danforth_plant_ds551.mat')['plant']

    # trim the noisy bands
    img_shape = hsi_image.shape
    n_r, n_c, n_b = hsi_image.shape

    # reshape the data because SPICE takes an MxN array, not a full HSI cube
    hsi_image = np.reshape(hsi_image,
                           (img_shape[0] * img_shape[1], img_shape[2]))
    # take the hsi data at the "valid" points

    M = hsi_image

    # down sample the data for the sake of time in this demo
    input_data = M.T.astype(float)
    ds_data = input_data[:, ::20]

    # get the default parameters from the SPICE.py file
    params = SPICEParameters()

    # run the spice algorithm on the down sampled data
    [endmembers, ds_proportions] = SPICE(ds_data, params)

    # prompt the user to see if they would like to graph the output
    if input('Would you like to plot the output? (Y/n): ') == 'n':
        return

    # plot the wavelength versus the reflectance
    n_em = endmembers.shape[1]
    plt.plot(endmembers)
    plt.legend([str(i + 1) for i in range(n_em)])
    plt.title('SPICE Endmembers')

    # unmix the data using the non-downsampled array and the endmembers that SPICE discovered
    s = input_data.max()
    P = unmix_qpp(input_data / s, endmembers / s)

    # re-ravel abundance maps
    P_imgs = []
    for i in range(n_em):
        map_lin = P[:, i]
        P_imgs.append(np.reshape(map_lin, (n_r, n_c)))

    # display abundance maps in the form of a subplot
    fig, axes = plt.subplots(2, int(n_em / 2) + 1, squeeze=True)
    for i in range(n_em):
        im = axes.flat[i].imshow(P_imgs[i], vmin=0, vmax=1)
        axes.flat[i].set_title('SPICE Abundance Map %d' % (i + 1))

    # add the original RGB image to the subplot
    # im = axes.flat[n_em].imshow(hsi['RGB'])
    # axes.flat[n_em].set_title('RGB Image')
    # fig.colorbar(im, ax=axes.ravel().tolist())

    # # delete any empty subplots
    # if (n_em % 2 == 0):
    #     fig.delaxes(axes.flatten()[(2*(int(n_em/2)+1)) -1])
    plt.show()