def load_recording(batch=None, cellid=None, reformat=True, by_sequence=True, recording_uri=None, **context): options = { 'cellid': cellid, 'batch': batch, 'rasterfs': 100, 'includeprestim': 1, 'stimfmt': 'ozgf', 'chancount': 18, 'pupil': 0, 'stim': 1, 'pertrial': 1, 'runclass': 'RDT', 'recache': False, } if recording_uri is None: recording_uri = baphy_data_path(**options) log.info('Loading recording from %s', recording_uri) rec = nems.recording.load_recording(recording_uri) if reformat: rec = reformat_RDT_recording(rec) if by_sequence: rec = average_away_epoch_occurrences(rec, '^SEQUENCE') return {'rec': rec}
def average_away_stim_occurrences(est, val, epoch_regex='^STIM_', **context): est = preproc.average_away_epoch_occurrences(est, epoch_regex=epoch_regex) val = preproc.average_away_epoch_occurrences(val, epoch_regex=epoch_regex) # mask out nan periods # d=np.isfinite(est['resp'].as_continuous()[[0],:]) # log.info('found %d non-nans in est', np.sum(d)) # est=est.create_mask() # est['mask']=est['mask']._modified_copy(d) # # d=np.isfinite(val['resp'].as_continuous()[[0],:]) # log.info('found %d non-nans in val', np.sum(d)) # val=val.create_mask() # val['mask']=val['mask']._modified_copy(d) return {'est': est, 'val': val}
def test_average_away_epoch_occurrences(recording): averaged_recording = average_away_epoch_occurrences(recording, '^stim') as1 = averaged_recording['stim'].extract_epoch('stim1') as2 = averaged_recording['stim'].extract_epoch('stim2') s1 = recording['stim'].extract_epoch('stim1') s2 = recording['stim'].extract_epoch('stim2') assert as1.shape == (1, 3, 49) assert s1.shape == (2, 3, 49) assert np.all(as1[0] == np.mean(s1, axis=0)) assert np.all(as2[0] == np.mean(s2, axis=0)) epochs = averaged_recording['stim'].epochs[['name', 'start', 'end']] assert epochs.iat[0, 0] == 'stim1' assert epochs.iat[1, 0] == 'stim2' assert epochs.iat[0, 2] == 0.98 assert epochs.iat[1, 2] == 1.96
def plot_linear_and_weighted_psths_(batch, cellid, weights=None, subset=None): rec_file = nw.generate_recording_uri(cellid, batch, loadkey='env.fs100') rec = recording.load_recording(rec_file) rec['resp'] = rec['resp'].extract_channels([cellid]) rec['resp'].fs = 200 SR = get_SR(rec) #COMPUTE ALL FOLLOWING metrics using smoothed driven rate est, val = rec.split_using_epoch_occurrence_counts(epoch_regex='^STIM_') val = preproc.average_away_epoch_occurrences(val, epoch_regex='^STIM_') val['resp'] = add_condition_epochs(val['resp']) fh, w_corrs, l_corrs = plot_linear_and_weighted_psths(val, SR, signame='resp', weights=weights, subset=subset, addsig='resp') return fh, w_corrs, l_corrs
} # get the name of the cached recording uri = nb.baphy_load_recording_uri(cellid=cellid, batch=batch, **options) rec = load_recording(uri) # convert to rasterized signals from PointProcess and TiledSignal rec['resp'] = rec['resp'].rasterize() rec['stim'] = rec['stim'].rasterize() rec['resp'] = rec['resp'].extract_channels([cellid]) rec.meta["cellid"] = cellid #est, val = estimation, validation data sets est, val = rec.split_using_epoch_occurrence_counts(epoch_regex="^STIM_") est = average_away_epoch_occurrences(est, epoch_regex="^STIM_") val = average_away_epoch_occurrences(val, epoch_regex="^STIM_") # get matrices for fitting: X_est = est['stim'].apply_mask().as_continuous() # frequency x time Y_est = est['resp'].apply_mask().as_continuous() # neuron x time # get matrices for testing model predictions: X_val = val['stim'].apply_mask().as_continuous() Y_val = val['resp'].apply_mask().as_continuous() # find a stimulus to display epoch_regex = '^STIM_' epochs_to_extract = ep.epoch_names_matching(val.epochs, epoch_regex) epoch = epochs_to_extract[0]
def load_TwoStim(batch, cellid, fit_epochs, modelspec_name, loader='env100', modelspecs_dir='/auto/users/luke/Code/nems/modelspecs', fs=100, get_est=True, get_stim=True, paths=None): # load into a recording object if not get_stim: loadkey = 'ns.fs100' else: raise RuntimeError('Put stimuli in batch') manager = BAPHYExperiment(cellid=cellid, batch=batch) options = {'rasterfs': 100, 'stim': False, 'resp': True} rec = manager.get_recording(**options) rec['resp'].fs = fs rec['resp'] = rec['resp'].extract_channels([cellid]) # ---------------------------------------------------------------------------- # DATA PREPROCESSING # # GOAL: Split your data into estimation and validation sets so that you can # know when your model exhibits overfitting. # Method #1: Find which stimuli have the most reps, use those for val # if not get_stim: # del rec.signals['stim'] ##Added Greg 9/22/21 for stim_epochs = ep.epoch_names_matching(rec['resp'].epochs, 'STIM_') val = rec.copy() val['resp'] = val['resp'].rasterize() val = preproc.average_away_epoch_occurrences(val, epoch_regex='^STIM_') if get_est: raise RuntimeError('Fix me') df0 = est['resp'].epochs.copy() df2 = est['resp'].epochs.copy() df0['name'] = df0['name'].apply(parse_stim_type) df0 = df0.loc[df0['name'].notnull()] df3 = pd.concat([df0, df2]) est['resp'].epochs = df3 est_sub = copy.deepcopy(est) est_sub['resp'] = est_sub['resp'].select_epochs(fit_epochs) else: est_sub = None df0 = val['resp'].epochs.copy() df2 = val['resp'].epochs.copy() # df0['name'] = df0['name'].apply(ts.parse_stim_type) df0['name'] = df0['name'].apply(ohel.label_ep_type) df0 = df0.loc[df0['name'].notnull()] df3 = pd.concat([df0, df2]) val['resp'].epochs = df3 val_sub = copy.deepcopy(val) val_sub['resp'] = val_sub['resp'].select_epochs(fit_epochs) # ---------------------------------------------------------------------------- # GENERATE SUMMARY STATISTICS if modelspec_name is None: return None, [est_sub], [val_sub] else: fit_epochs_str = "+".join([str(x) for x in fit_epochs]) mn = loader + '_subset_' + fit_epochs_str + '.' + modelspec_name an_ = modelspecs_dir + '/' + cellid + '/' + mn an = glob.glob(an_ + '*') if len(an) > 1: warnings.warn('{} models found, loading an[0]:{}'.format( len(an), an[0])) an = [an[0]] if len(an) == 1: filepath = an[0] modelspecs = [ms.load_modelspec(filepath)] modelspecs[0][0]['meta']['modelname'] = mn modelspecs[0][0]['meta']['cellid'] = cellid else: raise RuntimeError('not fit') # generate predictions est_sub, val_sub = nems.analysis.api.generate_prediction( est_sub, val_sub, modelspecs) est_sub, val_sub = nems.analysis.api.generate_prediction( est_sub, val_sub, modelspecs) return modelspecs, est_sub, val_sub
def calc_psth_metrics(batch, cellid, parmfile=None, paths=None): start_win_offset = 0 # Time (in sec) to offset the start of the window used to calculate threshold, exitatory percentage, and inhibitory percentage if parmfile: manager = BAPHYExperiment(parmfile) else: manager = BAPHYExperiment(cellid=cellid, batch=batch) options = ohel.get_load_options( batch) #gets options that will include gtgram if batch=339 rec = manager.get_recording(**options) area_df = db.pd_query( f"SELECT DISTINCT area FROM sCellFile where cellid like '{manager.siteid}%%'" ) area = area_df.area.iloc[0] if rec['resp'].epochs[rec['resp'].epochs['name'] == 'PASSIVE_EXPERIMENT'].shape[0] >= 2: rec = ohel.remove_olp_test(rec) rec['resp'] = rec['resp'].extract_channels([cellid]) resp = copy.copy(rec['resp'].rasterize()) rec['resp'].fs = 100 norm_spont, SR, STD = ohel.remove_spont_rate_std(resp) params = ohel.get_expt_params(resp, manager, cellid) epcs = rec['resp'].epochs[rec['resp'].epochs['name'] == 'PreStimSilence'].copy() ep2 = rec['resp'].epochs[rec['resp'].epochs['name'] == 'PostStimSilence'].iloc[0].copy() params['prestim'], params['poststim'] = epcs.iloc[0][ 'end'], ep2['end'] - ep2['start'] params['lenstim'] = ep2['end'] stim_epochs = ep.epoch_names_matching(resp.epochs, 'STIM_') if paths and cellid[:3] == 'TBR': print(f"Deprecated, run on {cellid} though...") stim_epochs, rec, resp = ohel.path_tabor_get_epochs( stim_epochs, rec, resp, params) epoch_repetitions = [resp.count_epoch(cc) for cc in stim_epochs] full_resp = np.empty((max(epoch_repetitions), len(stim_epochs), (int(params['lenstim']) * rec['resp'].fs))) full_resp[:] = np.nan for cnt, epo in enumerate(stim_epochs): resps_list = resp.extract_epoch(epo) full_resp[:resps_list.shape[0], cnt, :] = resps_list[:, 0, :] #Calculate a few metrics corcoef = ohel.calc_base_reliability(full_resp) avg_resp = ohel.calc_average_response(full_resp, params) snr = compute_snr(resp) #Grab and label epochs that have two sounds in them (no null) presil, postsil = int(params['prestim'] * rec['resp'].fs), int( params['poststim'] * rec['resp'].fs) twostims = resp.epochs[resp.epochs['name'].str.count('-0-1') == 2].copy() ep_twostim = twostims.name.unique().tolist() ep_twostim.sort() ep_names = resp.epochs[resp.epochs['name'].str.contains('STIM_')].copy() ep_names = ep_names.name.unique().tolist() ep_types = list(map(ohel.label_ep_type, ep_names)) ep_df = pd.DataFrame({'name': ep_names, 'type': ep_types}) cell_df = [] for cnt, stimmy in enumerate(ep_twostim): kind = ohel.label_pair_type(stimmy) seps = (stimmy.split('_')[1], stimmy.split('_')[2]) BG, FG = seps[0].split('-')[0][2:], seps[1].split('-')[0][2:] Aepo, Bepo = 'STIM_' + seps[0] + '_null', 'STIM_null_' + seps[1] rAB = resp.extract_epoch(stimmy) rA, rB = resp.extract_epoch(Aepo), resp.extract_epoch(Bepo) fn = lambda x: np.atleast_2d(sp.smooth(x.squeeze(), 3, 2) - SR) rAsm = np.squeeze(np.apply_along_axis(fn, 2, rA)) rBsm = np.squeeze(np.apply_along_axis(fn, 2, rB)) rABsm = np.squeeze(np.apply_along_axis(fn, 2, rAB)) rA_st, rB_st = rAsm[:, presil:-postsil], rBsm[:, presil:-postsil] rAB_st = rABsm[:, presil:-postsil] rAm, rBm = np.nanmean(rAsm, axis=0), np.nanmean(rBsm, axis=0) rABm = np.nanmean(rABsm, axis=0) AcorAB = np.corrcoef( rAm, rABm)[0, 1] # Corr between resp to A and resp to dual BcorAB = np.corrcoef( rBm, rABm)[0, 1] # Corr between resp to B and resp to dual A_FR, B_FR, AB_FR = np.nanmean(rA_st), np.nanmean(rB_st), np.nanmean( rAB_st) min_rep = np.min( (rA.shape[0], rB.shape[0])) #only will do something if SoundRepeats==Yes lin_resp = np.nanmean(rAsm[:min_rep, :] + rBsm[:min_rep, :], axis=0) supp = np.nanmean(lin_resp - AB_FR) AcorLin = np.corrcoef( rAm, lin_resp)[0, 1] # Corr between resp to A and resp to lin BcorLin = np.corrcoef( rBm, lin_resp)[0, 1] # Corr between resp to B and resp to lin Apref, Bpref = AcorAB - AcorLin, BcorAB - BcorLin pref = Apref - Bpref # if params['Binaural'] == 'Yes': # dA, dB = ohel.get_binaural_adjacent_epochs(stimmy) # # rdA, rdB = resp.extract_epoch(dA), resp.extract_epoch(dB) # rdAm = np.nanmean(np.squeeze(np.apply_along_axis(fn, 2, rdA))[:, presil:-postsil], axis=0) # rdBm = np.nanmean(np.squeeze(np.apply_along_axis(fn, 2, rdB))[:, presil:-postsil], axis=0) # # ABcordA = np.corrcoef(rABm, rdAm)[0, 1] # Corr between resp to AB and resp to BG swap # ABcordB = np.corrcoef(rABm, rdBm)[0, 1] # Corr between resp to AB and resp to FG swap cell_df.append({ 'epoch': stimmy, 'kind': kind, 'BG': BG, 'FG': FG, 'AcorAB': AcorAB, 'BcorAB': BcorAB, 'AcorLin': AcorLin, 'BcorLin': BcorLin, 'Apref': Apref, 'Bpref': Bpref, 'pref': pref, 'combo_FR': AB_FR, 'bg_FR': A_FR, 'fg_FR': B_FR, 'supp': supp }) cell_df = pd.DataFrame(cell_df) cell_df['SR'], cell_df['STD'] = SR, STD # cell_df['corcoef'], cell_df['avg_resp'], cell_df['snr'] = corcoef, avg_resp, snr cell_df.insert(loc=0, column='area', value=area) return cell_df # COMPUTE ALL FOLLOWING metrics using smoothed driven rate # est, val = rec.split_using_epoch_occurrence_counts(rec,epoch_regex='^STIM_') val = rec.copy() val['resp'] = val['resp'].rasterize() val['stim'] = val['stim'].rasterize() val = preproc.average_away_epoch_occurrences(val, epoch_regex='^STIM_') # smooth and subtract SR fn = lambda x: np.atleast_2d(sp.smooth(x.squeeze(), 3, 2) - SR) val['resp'] = val['resp'].transform(fn) val['resp'] = ohel.add_stimtype_epochs(val['resp']) if val['resp'].count_epoch('REFERENCE'): epochname = 'REFERENCE' else: epochname = 'TRIAL' sts = val['resp'].epochs['start'].copy() nds = val['resp'].epochs['end'].copy() sts_rec = rec['resp'].epochs['start'].copy() val['resp'].epochs['end'] = val['resp'].epochs['start'] + params['prestim'] ps = val['resp'].select_epochs([epochname]).as_continuous() ff = np.isfinite(ps) SR_av = ps[ff].mean() * resp.fs SR_av_std = ps[ff].std() * resp.fs # Compute max over single-voice trials val['resp'].epochs['end'] = nds val['resp'].epochs['start'] = sts val['resp'].epochs[ 'start'] = val['resp'].epochs['start'] + params['prestim'] TotalMax = np.nanmax(val['resp'].as_continuous()) ps = np.hstack((val['resp'].extract_epoch('10').flatten(), val['resp'].extract_epoch('01').flatten())) SinglesMax = np.nanmax(ps) # Compute threshold, exitatory percentage, and inhibitory percentage prestim, poststim = params['prestim'], params['poststim'] val['resp'].epochs['end'] = nds val['resp'].epochs['start'] = sts val['resp'].epochs[ 'start'] = val['resp'].epochs['start'] + prestim + start_win_offset val['resp'].epochs['end'] = val['resp'].epochs['end'] - poststim thresh = np.array(((SR + SR_av_std) / resp.fs, (SR - SR_av_std) / resp.fs)) thresh = np.array((SR / resp.fs + 0.1 * (SinglesMax - SR / resp.fs), (SR - SR_av_std) / resp.fs)) # SR/resp.fs - 0.5 * (np.nanmax(val['resp'].as_continuous()) - SR/resp.fs)] types = ['10', '01', '20', '02', '11', '12', '21', '22'] excitatory_percentage = {} inhibitory_percentage = {} Max = {} Mean = {} for _type in types: if _type in val['resp'].epochs.name.values: ps = val['resp'].extract_epoch(_type).flatten() ff = np.isfinite(ps) excitatory_percentage[_type] = (ps[ff] > thresh[0]).sum() / ff.sum() inhibitory_percentage[_type] = (ps[ff] < thresh[1]).sum() / ff.sum() Max[_type] = ps[ff].max() / SinglesMax Mean[_type] = ps[ff].mean() # Compute threshold, exitatory percentage, and inhibitory percentage just over onset time # restore times val['resp'].epochs['end'] = nds val['resp'].epochs['start'] = sts # Change epochs to stimulus onset times val['resp'].epochs['start'] = val['resp'].epochs['start'] + prestim val['resp'].epochs['end'] = val['resp'].epochs['start'] + prestim + .5 excitatory_percentage_onset = {} inhibitory_percentage_onset = {} Max_onset = {} for _type in types: ps = val['resp'].extract_epoch(_type).flatten() ff = np.isfinite(ps) excitatory_percentage_onset[_type] = (ps[ff] > thresh[0]).sum() / ff.sum() inhibitory_percentage_onset[_type] = (ps[ff] < thresh[1]).sum() / ff.sum() Max_onset[_type] = ps[ff].max() / SinglesMax # find correlations between double and single-voice responses val['resp'].epochs['end'] = nds val['resp'].epochs['start'] = sts val['resp'].epochs['start'] = val['resp'].epochs['start'] + prestim rec['resp'].epochs['start'] = rec['resp'].epochs['start'] + prestim # over stim on time to end + 0.5 val['linmodel'] = val['resp'].copy() val['linmodel']._data = np.full(val['linmodel']._data.shape, np.nan) types = ['11', '12', '21', '22'] epcs = val['resp'].epochs[val['resp'].epochs['name'].str.contains( 'STIM')].copy() epcs['type'] = epcs['name'].apply(ohel.label_ep_type) names = [[n.split('_')[1], n.split('_')[2]] for n in epcs['name']] EA = np.array([n[0] for n in names]) EB = np.array([n[1] for n in names]) r_dual_B, r_dual_A, r_dual_B_nc, r_dual_A_nc = {}, {}, {}, {} r_dual_B_bal, r_dual_A_bal = {}, {} r_lin_B, r_lin_A, r_lin_B_nc, r_lin_A_nc = {}, {}, {}, {} r_lin_B_bal, r_lin_A_bal = {}, {} N_ac = 200 full_resp = rec['resp'].rasterize() full_resp = full_resp.transform(fn) for _type in types: inds = np.nonzero(epcs['type'].values == _type)[0] rA_st, rB_st, r_st, rA_rB_st = [], [], [], [] init = True for ind in inds: # for each dual-voice response r = val['resp'].extract_epoch(epcs.iloc[ind]['name']) if np.any(np.isfinite(r)): print(epcs.iloc[ind]['name']) # Find the indicies of single-voice responses that match this dual-voice response indA = np.where((EA[ind] == EA) & (EB == 'null'))[0] indB = np.where((EB[ind] == EB) & (EA == 'null'))[0] if (len(indA) > 0) & (len(indB) > 0): # from pdb import set_trace # set_trace() rA = val['resp'].extract_epoch(epcs.iloc[indA[0]]['name']) rB = val['resp'].extract_epoch(epcs.iloc[indB[0]]['name']) r_st.append( full_resp.extract_epoch(epcs.iloc[ind]['name'])[:, 0, :]) rA_st_ = full_resp.extract_epoch( epcs.iloc[indA[0]]['name'])[:, 0, :] rB_st_ = full_resp.extract_epoch( epcs.iloc[indB[0]]['name'])[:, 0, :] rA_st.append(rA_st_) rB_st.append(rB_st_) minreps = np.min((rA_st_.shape[0], rB_st_.shape[0])) rA_rB_st.append(rA_st_[:minreps, :] + rB_st_[:minreps, :]) if init: rA_ = rA.squeeze() rB_ = rB.squeeze() r_ = r.squeeze() rA_rB_ = rA.squeeze() + rB.squeeze() init = False else: rA_ = np.hstack((rA_, rA.squeeze())) rB_ = np.hstack((rB_, rB.squeeze())) r_ = np.hstack((r_, r.squeeze())) rA_rB_ = np.hstack( (rA_rB_, rA.squeeze() + rB.squeeze())) val['linmodel'] = val['linmodel'].replace_epoch( epcs.iloc[ind]['name'], rA + rB, preserve_nan=False) ff = np.isfinite(r_) & np.isfinite(rA_) & np.isfinite( rB_) # find places with data r_dual_A[_type] = np.corrcoef(rA_[ff], r_[ff])[ 0, 1] # Correlation between response to A and response to dual r_dual_B[_type] = np.corrcoef(rB_[ff], r_[ff])[ 0, 1] # Correlation between response to B and response to dual r_lin_A[_type] = np.corrcoef( rA_[ff], rA_rB_[ff] )[0, 1] # Correlation between response to A and response to linear 'model' r_lin_B[_type] = np.corrcoef( rB_[ff], rA_rB_[ff] )[0, 1] # Correlation between response to B and response to linear 'model' # correlations over single-trial data minreps = np.min([x.shape[0] for x in r_st]) r_st = [x[:minreps, :] for x in r_st] r_st = np.concatenate(r_st, axis=1) rA_st = [x[:minreps, :] for x in rA_st] rA_st = np.concatenate(rA_st, axis=1) rB_st = [x[:minreps, :] for x in rB_st] rB_st = np.concatenate(rB_st, axis=1) rA_rB_st = [x[:minreps, :] for x in rA_rB_st] rA_rB_st = np.concatenate(rA_rB_st, axis=1) r_lin_A_bal[_type] = np.corrcoef(rA_st[0::2, ff].mean(axis=0), rA_rB_st[1::2, ff].mean(axis=0))[0, 1] r_lin_B_bal[_type] = np.corrcoef(rB_st[0::2, ff].mean(axis=0), rA_rB_st[1::2, ff].mean(axis=0))[0, 1] r_dual_A_bal[_type] = np.corrcoef(rA_st[0::2, ff].mean(axis=0), r_st[:, ff].mean(axis=0))[0, 1] r_dual_B_bal[_type] = np.corrcoef(rB_st[0::2, ff].mean(axis=0), r_st[:, ff].mean(axis=0))[0, 1] r_dual_A_nc[_type] = ohel.r_noise_corrected(rA_st, r_st) r_dual_B_nc[_type] = ohel.r_noise_corrected(rB_st, r_st) r_lin_A_nc[_type] = ohel.r_noise_corrected(rA_st, rA_rB_st) r_lin_B_nc[_type] = ohel.r_noise_corrected(rB_st, rA_rB_st) if _type == '11': r11 = nems.metrics.corrcoef._r_single(r_st, 200, 0) elif _type == '12': r12 = nems.metrics.corrcoef._r_single(r_st, 200, 0) elif _type == '21': r21 = nems.metrics.corrcoef._r_single(r_st, 200, 0) elif _type == '22': r22 = nems.metrics.corrcoef._r_single(r_st, 200, 0) # rac = _r_single(X, N) # r_ceiling = [nmet.r_ceiling(p, rec, 'pred', 'resp') for p in val_copy] # Things that used to happen only for _type is 'C' but still seem valid r_A_B = np.corrcoef(rA_[ff], rB_[ff])[0, 1] r_A_B_nc = r_noise_corrected(rA_st, rB_st) rAA = nems.metrics.corrcoef._r_single(rA_st, 200, 0) rBB = nems.metrics.corrcoef._r_single(rB_st, 200, 0) Np = 0 rAA_nc = np.zeros(Np) rBB_nc = np.zeros(Np) hv = int(minreps / 2) for i in range(Np): inds = np.random.permutation(minreps) rAA_nc[i] = sp.r_noise_corrected(rA_st[inds[:hv]], rA_st[inds[hv:]]) rBB_nc[i] = sp.r_noise_corrected(rB_st[inds[:hv]], rB_st[inds[hv:]]) ffA = np.isfinite(rAA_nc) ffB = np.isfinite(rBB_nc) rAAm = rAA_nc[ffA].mean() rBBm = rBB_nc[ffB].mean() mean_nsA = rA_st.sum(axis=1).mean() mean_nsB = rB_st.sum(axis=1).mean() min_nsA = rA_st.sum(axis=1).min() min_nsB = rB_st.sum(axis=1).min() # Calculate correlation between linear 'model and dual-voice response, and mean amount of suppression, enhancement relative to linear 'model' r_fit_linmodel = {} r_fit_linmodel_NM = {} r_ceil_linmodel = {} mean_enh = {} mean_supp = {} EnhP = {} SuppP = {} DualAboveZeroP = {} resp_ = copy.deepcopy(rec['resp'].rasterize()) resp_.epochs['start'] = sts_rec fn = lambda x: np.atleast_2d( sp.smooth(x.squeeze(), 3, 2) - SR / val['resp'].fs) resp_ = resp_.transform(fn) for _type in types: val_copy = copy.deepcopy(val) # from pdb import set_trace # set_trace() val_copy['resp'] = val_copy['resp'].select_epochs([_type]) # Correlation between linear 'model' (response to A plus response to B) and dual-voice response r_fit_linmodel_NM[_type] = nmet.corrcoef(val_copy, 'linmodel', 'resp') # r_ceil_linmodel[_type] = nems.metrics.corrcoef.r_ceiling(val_copy,rec,'linmodel', 'resp',exclude_neg_pred=False)[0] # Noise-corrected correlation between linear 'model' (response to A plus response to B) and dual-voice response r_ceil_linmodel[_type] = nems.metrics.corrcoef.r_ceiling( val_copy, rec, 'linmodel', 'resp')[0] pred = val_copy['linmodel'].as_continuous() resp = val_copy['resp'].as_continuous() ff = np.isfinite(pred) & np.isfinite(resp) # cc = np.corrcoef(sp.smooth(pred[ff],3,2), sp.smooth(resp[ff],3,2)) cc = np.corrcoef(pred[ff], resp[ff]) r_fit_linmodel[_type] = cc[0, 1] prdiff = resp[ff] - pred[ff] mean_enh[_type] = prdiff[prdiff > 0].mean() * val['resp'].fs mean_supp[_type] = prdiff[prdiff < 0].mean() * val['resp'].fs # Find percent of time response is suppressed vs enhanced relative to what would be expected by a linear sum of single-voice responses # First, jacknife to find... # Njk=10 # if _type is 'C': # stims=['STIM_T+si464+si464','STIM_T+si516+si516'] # else: # stims=['STIM_T+si464+si516', 'STIM_T+si516+si464'] # T=int(700+prestim*val['resp'].fs) # Tps=int(prestim*val['resp'].fs) # jns=np.zeros((Njk,T,len(stims))) # for ns in range(len(stims)): # for njk in range(Njk): # resp_jn=resp_.jackknife_by_epoch(Njk,njk,stims[ns]) # jns[njk,:,ns]=np.nanmean(resp_jn.extract_epoch(stims[ns]),axis=0) # jns=np.reshape(jns[:,Tps:,:],(Njk,700*len(stims)),order='F') # # lim_models=np.zeros((700,len(stims))) # for ns in range(len(stims)): # lim_models[:,ns]=val_copy['linmodel'].extract_epoch(stims[ns]) # lim_models=lim_models.reshape(700*len(stims),order='F') # # ff=np.isfinite(lim_models) # mean_diff=(jns[:,ff]-lim_models[ff]).mean(axis=0) # std_diff=(jns[:,ff]-lim_models[ff]).std(axis=0) # serr_diff=np.sqrt(Njk/(Njk-1))*std_diff # # thresh=3 # dual_above_zero = (jns[:,ff].mean(axis=0) > std_diff) # sig_enh = ((mean_diff/serr_diff) > thresh) & dual_above_zero # sig_supp = ((mean_diff/serr_diff) < -thresh) # DualAboveZeroP[_type] = (dual_above_zero).sum()/len(mean_diff) # EnhP[_type] = (sig_enh).sum()/len(mean_diff) # SuppP[_type] = (sig_supp).sum()/len(mean_diff) # time = np.arange(0, lim_models.shape[0])/ val['resp'].fs # plt.figure(); # plt.plot(time,jns.mean(axis=0),'.-k'); # plt.plot(time,lim_models,'.-g'); # plt.plot(time[sig_enh],lim_models[sig_enh],'.r') # plt.plot(time[sig_supp],lim_models[sig_supp],'.b') # plt.title('Type:{:s}, Enh:{:.2f}, Sup:{:.2f}, Resp_above_zero:{:.2f}'.format(_type,EnhP[_type],SuppP[_type],DualAboveZeroP[_type])) # from pdb import set_trace # set_trace() # a=2 # thrsh=5 # EnhP[_type] = ((prdiff*val['resp'].fs) > thresh).sum()/len(prdiff) # SuppP[_type] = ((prdiff*val['resp'].fs) < -thresh).sum()/len(prdiff) # return val # return {'excitatory_percentage':excitatory_percentage, # 'inhibitory_percentage':inhibitory_percentage, # 'r_fit_linmodel':r_fit_linmodel, # 'SR':SR, 'SR_std':SR_std, 'SR_av_std':SR_av_std} # return { 'thresh': thresh * val['resp'].fs, 'EP_A': excitatory_percentage['A'], 'EP_B': excitatory_percentage['B'], # 'EP_C':excitatory_percentage['C'], 'EP_I': excitatory_percentage['I'], 'IP_A': inhibitory_percentage['A'], 'IP_B': inhibitory_percentage['B'], # 'IP_C':inhibitory_percentage['C'], 'IP_I': inhibitory_percentage['I'], 'OEP_A': excitatory_percentage_onset['A'], 'OEP_B': excitatory_percentage_onset['B'], # 'OEP_C':excitatory_percentage_onset['C'], 'OEP_I': excitatory_percentage_onset['I'], 'OIP_A': inhibitory_percentage_onset['A'], 'OIP_B': inhibitory_percentage_onset['B'], # 'OIP_C':inhibitory_percentage_onset['C'], 'OIP_I': inhibitory_percentage_onset['I'], 'Max_A': Max['A'], 'Max_B': Max['B'], # 'Max_C':Max['C'], 'Max_I': Max['I'], 'Mean_A': Mean['A'], 'Mean_B': Mean['B'], # 'Mean_C':Mean['C'], 'Mean_I': Mean['I'], 'OMax_A': Max_onset['A'], 'OMax_B': Max_onset['B'], # 'OMax_C':Max_onset['C'], 'OMax_I': Max_onset['I'], 'TotalMax': TotalMax * val['resp'].fs, 'SinglesMax': SinglesMax * val['resp'].fs, # 'r_lin_C':r_fit_linmodel['C'], 'r_lin_I': r_fit_linmodel['I'], # 'r_lin_C_NM':r_fit_linmodel_NM['C'], 'r_lin_I_NM': r_fit_linmodel_NM['I'], # 'r_ceil_C':r_ceil_linmodel['C'], 'r_ceil_I': r_ceil_linmodel['I'], # 'MEnh_C':mean_enh['C'], 'MEnh_I': mean_enh['I'], # 'MSupp_C':mean_supp['C'], 'MSupp_I': mean_supp['I'], # 'EnhP_C':EnhP['C'], # 'EnhP_I':EnhP['I'], # 'SuppP_C':SuppP['C'], # 'SuppP_I':SuppP['I'], # 'DualAboveZeroP_C':DualAboveZeroP['C'], # 'DualAboveZeroP_I':DualAboveZeroP['I'], # 'r_dual_A_C':r_dual_A['C'], 'r_dual_A_I': r_dual_A['I'], # 'r_dual_B_C':r_dual_B['C'], 'r_dual_B_I': r_dual_B['I'], # 'r_dual_A_C_nc':r_dual_A_nc['C'], 'r_dual_A_I_nc': r_dual_A_nc['I'], # 'r_dual_B_C_nc':r_dual_B_nc['C'], 'r_dual_B_I_nc': r_dual_B_nc['I'], # 'r_dual_A_C_bal':r_dual_A_bal['C'], 'r_dual_A_I_bal': r_dual_A_bal['I'], # 'r_dual_B_C_bal':r_dual_B_bal['C'], 'r_dual_B_I_bal': r_dual_B_bal['I'], # 'r_lin_A_C':r_lin_A['C'], 'r_lin_A_I': r_lin_A['I'], # 'r_lin_B_C':r_lin_B['C'], 'r_lin_B_I': r_lin_B['I'], # 'r_lin_A_C_nc':r_lin_A_nc['C'], 'r_lin_A_I_nc': r_lin_A_nc['I'], # 'r_lin_B_C_nc':r_lin_B_nc['C'], 'r_lin_B_I_nc': r_lin_B_nc['I'], # 'r_lin_A_C_bal':r_lin_A_bal['C'], 'r_lin_A_I_bal': r_lin_A_bal['I'], # 'r_lin_B_C_bal':r_lin_B_bal['C'], 'r_lin_B_I_bal': r_lin_B_bal['I'], 'r_A_B': r_A_B, 'r_A_B_nc': r_A_B_nc, 'rAAm': rAAm, 'rBBm': rBBm, 'rAA': rAA, 'rBB': rBB, 'rII': rII, # 'rCC':rCC 'rAA_nc': rAA_nc, 'rBB_nc': rBB_nc, 'mean_nsA': mean_nsA, 'mean_nsB': mean_nsB, 'min_nsA': min_nsA, 'min_nsB': min_nsB, 'SR': SR, 'SR_std': SR_std, 'SR_av_std': SR_av_std, 'norm_spont': norm_spont, 'spont_rate': spont_rate, 'params': params, 'corcoef': corcoef, 'avg_resp': avg_resp, 'snr': snr, 'pair_names': twostims, 'suppression': supp_array, 'FR': FR_array, 'rec': rec, 'animal': cellid[:3] }
site_name, modelname)) recons_args = {'batch': 310, 'cellid_list': cells, 'modelname': modelname} recons_cache = ccache.make_cache( crec.reconsitute_rec, func_args=recons_args, classobj_name='reconstitution', recache=False, cache_folder='/home/mateo/mycache/reconstitute_recs', use_hash=True) reconstituted_recording = ccache.get_cache(recons_cache) # rasterizes and takes the PSTH of full length signals (containing multiple contex probes) reconstituted_recording.signals = { key: val.rasterize() for key, val in reconstituted_recording.signals.items() } reconstituted_recording = npre.average_away_epoch_occurrences( reconstituted_recording) pop_recs[site_name][modelname] = reconstituted_recording # removese the flat response of BRT057b todo solve the bug del (pop_recs['BRT057b']) # reorders in dictionary of signals, including only the response and the prediction of each mode # reformats the epochs pop_sigs = col.defaultdict(dict) for site_key, model_recs in pop_recs.items(): for modelname, rec in model_recs.items(): rec = cep.set_recording_subepochs(rec, set_pairs=False) pop_sigs[site_key][modelname] = rec['pred'].rasterize() pop_sigs[site_key]['resp'] = rec['resp'].rasterize()
epoch = stim_epochs[11] stim = rec['stim'] resp = rec['resp'] # get big stimulus and response matrices. Note that "time" here is real # experiment time, so repeats of the test stimulus show up as single-trial # stimuli. X = stim.rasterize().as_continuous() Y = resp.as_continuous() print('Single-trial stim/resp extracted to matrices X=({},{}) / Y=({},{})'. format(X.shape[0], X.shape[1], Y.shape[0], Y.shape[1])) # average responses across repetitions to get shorter time series rec_avg = preproc.average_away_epoch_occurrences(rec, epoch_regex='^STIM_') Xa = rec_avg['stim'].as_continuous() Ya = rec_avg['resp'].as_continuous() print('Trial-averaged stim/resp extracted to matrices Xa=({},{}) / Ya=({},{})'. format(Xa.shape[0], Xa.shape[1], Ya.shape[0], Ya.shape[1])) # here's how you get rasters aligned to each stimulus: # define regex for stimulus epochs epoch_regex = '^STIM_' epochs_to_extract = ep.epoch_names_matching(rec.epochs, epoch_regex) folded_resp = resp.extract_epochs(epochs_to_extract) print('Response to each stimulus extracted to folded_resp dictionary')
def split_val_and_average_reps(rec, epoch_regex='^STIM_', **context): est, val = rec.split_using_epoch_occurrence_counts(epoch_regex=epoch_regex) est = preproc.average_away_epoch_occurrences(est, epoch_regex=epoch_regex) val = preproc.average_away_epoch_occurrences(val, epoch_regex=epoch_regex) return {'est': est, 'val': val}
def average_away_stim_occurrences_rec(rec, epoch_regex='^STIM_', **context): rec = preproc.average_away_epoch_occurrences(rec, epoch_regex=epoch_regex) return {'rec': rec}
ctrans1 = context_transitions.index(ct1) ctrans2 = context_transitions.index(ct2) CT_pool.append(diff_arr[:, ctrans1, ctrans2, cell, :, 1]) CT_pool = np.stack(CT_pool, axis=0) toplot = np.sum(np.sum(CT_pool, axis=1), axis=0) # cumulative sum across ax.plot(toplot) ######################################################################################################################## # there is a lot of on reponsive cells, since we are using PSTHs and single cells, it is reasobale to do the analyss # over the PCs # averages away repetitios of STIM loaded_rec['resp'] = loaded_rec['resp'].rasterize() psth_rec = preproc.average_away_epoch_occurrences(loaded_rec, epoch_regex='^STIM_') # defines subepochs psth_rec = cpe.set_recording_subepochs(psth_rec, set_pairs=True) # signal PCA sig_pcs, stats = cpca.signal_PCA(psth_rec['resp']) # check explained variance fig, ax = plt.subplots() toplot = np.cumsum(stats.explained_variance_ratio_) ax.plot(toplot, '.-', color='black') ax.set_xlabel('Principal Component') ax.set_ylabel('cumulative variance explained') ax.legend() ax.set_title('PSTH PCA site {}'.format(site)) # check how the PCs look # response to sound. there is something in PC3
# load into a recording object rec = recording.load_recording(signals_dir + "/TAR010c-18-1.tgz") # ---------------------------------------------------------------------------- # DATA PREPROCESSING # # GOAL: Split your data into estimation and validation sets so that you can # know when your model exhibits overfitting. logging.info('Splitting into estimation and validation data sets...') # Method #1: Find which stimuli have the most reps, use those for val est, val = rec.split_using_epoch_occurrence_counts(epoch_regex='^STIM_') # Optional: Take nanmean of ALL occurrences of all signals est = preproc.average_away_epoch_occurrences(est, epoch_regex='^STIM_') val = preproc.average_away_epoch_occurrences(val, epoch_regex='^STIM_') # Method #1: Split based on time, where the first 80% is estimation data and # the last, last 20% is validation data. # est, val = rec.split_at_time(0.8) # Method #2: Split based on repetition number, rounded to the nearest rep. # est, val = rec.split_at_rep(0.8) # Method #3: Use the whole data set! (Usually for doing n-fold cross-val) # est = rec # val = rec # ---------------------------------------------------------------------------- # INITIALIZE MODELSPEC
def average_away_stim_occurrences(est, val, **context): est = preproc.average_away_epoch_occurrences(est, epoch_regex='^STIM_') val = preproc.average_away_epoch_occurrences(val, epoch_regex='^STIM_') return {'est': est, 'val': val}
def fit_model(cellid, batch=289, spec=[4, 8]): loadkey = "ozgf.fs100.ch18" recording_uri = nw.generate_recording_uri(cellid, batch, loadkey) rec = load_recording(recording_uri) rec['resp'] = rec['resp'].rasterize() est, val = rec.split_using_epoch_occurrence_counts(epoch_regex='^STIM_') # Optional: Take nanmean of ALL occurrences of all signals est = preproc.average_away_epoch_occurrences(est, epoch_regex='^STIM_') val = preproc.average_away_epoch_occurrences(val, epoch_regex='^STIM_') X_tr = est['stim'].as_continuous() Y_tr = est['resp'].as_continuous() X_te = val['stim'].as_continuous() Y_te = val['resp'].as_continuous() goodbins = np.isfinite(Y_tr[0, :]) Y_tr = Y_tr[:, goodbins] X_tr = X_tr[:, goodbins] goodbins = np.isfinite(Y_te[0, :]) Y_te = Y_te[:, goodbins] X_te = X_te[:, goodbins] # maxx=np.nanmax(X_tr) # maxy=np.nanmax(Y_tr) # X_tr /= maxx # X_te /= maxx # Y_tr /= maxy # Y_te /= maxy X_te = windowed(X_te) X_tr = windowed(X_tr) Y_te = Y_te.T Y_tr = Y_tr.T print('X_te :=', X_te.shape) print('Y_te :=', Y_te.shape) print('X_tr :=', X_tr.shape) print('Y_tr :=', Y_tr.shape) num_channels = Y_tr.shape[1] model = Sequential() model.add( Conv2D(spec[0], (3, 4), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer='l2', input_shape=X_tr.shape[1:])) model.add(Dropout(.3)) # model.add(Conv2D(4,3,activation='relu',padding='valid',kernel_initializer='he_normal',kernel_regularizer='l2')) # model.add(Dropout(.3)) #model.add(Conv2D(4,1,activation='relu',padding='valid',kernel_initializer='he_normal',kernel_regularizer='l2')) #model.add(Dropout(.3)) model.add(Flatten()) #model.add(Dense(spec[0],activation='relu',kernel_initializer='he_normal',kernel_regularizer='l2')) model.add( Dense(spec[1], activation='relu', kernel_initializer='he_normal', kernel_regularizer='l2')) model.add( Dense(1, activation='relu', kernel_initializer='he_normal', kernel_regularizer='l2', use_bias=False)) #model.add(Dense(1,activation='linear',kernel_initializer='he_normal',kernel_regularizer='l2',use_bias=False)) # Save model configuration model.summary() model_json = model.to_json() print('Training model for cell %s) ----------------' % (cellid)) # Training options modelstring = "{}_{}".format(spec[0], spec[1]) filepath = 'models2/model_%s_%s.hdf5' % (modelstring, cellid) checkpoint = ModelCheckpoint(filepath, mode='min', monitor='val_loss', save_best_only=True, save_weights_only=True) early_stop = EarlyStopping(mode='min', monitor='val_loss', patience=5) callbacks = [checkpoint, early_stop] # Train model model = model_from_json(model_json) model.compile(loss='mse', optimizer='rmsprop') good_start = False c = 0 while (c < 10) and not good_start: print('Initialization {}'.format(c)) Z_tr = model.predict(X_tr) tr_corr = np.corrcoef(Z_tr.T, Y_tr.T)[0, 1] print("Initial cc: {}".format(tr_corr)) model.fit(X_tr, Y_tr, batch_size=64, epochs=5, validation_split=.2, verbose=2, callbacks=callbacks) model.load_weights(filepath) Z_tr = model.predict(X_tr) tr_corr = np.corrcoef(Z_tr.T, Y_tr.T)[0, 1] print("Training cc after 3 epochs: {}".format(tr_corr)) if np.isfinite(tr_corr) and (tr_corr > 0): good_start = True else: reinitLayers(model) c += 1 model.fit(X_tr, Y_tr, batch_size=64, initial_epoch=5, epochs=20, validation_split=.2, verbose=2, callbacks=callbacks) model.load_weights(filepath) # Check test set prediction Z_te = model.predict(X_te) corr = np.corrcoef(Z_te.T, Y_te.T)[0, 1] return corr
# LOAD AND FORMAT RECORDING DATA with open(datafile, 'rb') as f: cellid, recname, fs, X, Y, epochs = pickle.load(f) stimchans = [str(x) for x in range(X.shape[0])] # borrowed from recording.load_recording_from_arrays resp = RasterizedSignal(fs, Y, 'resp', recname, epochs=epochs, chans=[cellid]) stim = RasterizedSignal(fs, X, 'stim', recname, epochs=epochs, chans=stimchans) signals = {'resp': resp, 'stim': stim} rec = recording.Recording(signals) #est, val = rec.split_at_time(0.2) est, val = rec.split_using_epoch_occurrence_counts(epoch_regex="^STIM_") est = preproc.average_away_epoch_occurrences( est, epoch_regex="^STIM_").apply_mask() val = preproc.average_away_epoch_occurrences( val, epoch_regex="^STIM_").apply_mask() sr_Hz = est['resp'].fs time_win_sec = 0.1 n_feats = est['stim'].shape[0] n_tps_per_stim = 550 n_stim = int(est['stim'].shape[1] / n_tps_per_stim) n_resp = 1 feat_dims = [n_stim, n_tps_per_stim, n_feats] data_dims = [n_stim, n_tps_per_stim, n_resp] v_feat_dims = [3, n_tps_per_stim, n_feats] v_data_dims = [3, n_tps_per_stim, n_resp]