def mean_sd_per_stim_by_batch(batch, loadkey='ozgf.fs100.ch18', max_db_scale=65, pre_log_floor=1, test_limit=None, save_path=None, load_path=None, manual_lims=False): if load_path is None: cellids = nd.get_batch_cells(batch, as_list=True) batch_results = {} stims_to_skip = [] for c in cellids[:test_limit]: # wastes some time calculating repeat stims, but oh well... # doesn't take that long anyway results = mean_sd_per_stim_by_cellid(c, batch, loadkey, max_db_scale, pre_log_floor, stims_to_skip) batch_results.update(results) stims_to_skip = list(batch_results.keys()) df_dict = {'stim': list(batch_results.keys()), 'stats': list(batch_results.values())} df = pd.DataFrame.from_dict(df_dict) df.set_index('stim', inplace=True) if save_path is not None: df.to_pickle(save_path) else: df = pd.read_pickle(load_path) stims = df.index.values.tolist() stats = df['stats'].values.tolist() batch_results = {k: v for k, v in zip(stims, stats)} fig = scatter_soundstats(batch_results, manual_lims=manual_lims) fig2 = plt.figure() text = ("x: mean level (db SPL)\n" "y: std (dB SPL)\n" "batch: %d\n" % batch) plt.text(0.1, 0.5, text) return fig, fig2
def main(): import argparse parser = argparse.ArgumentParser(description='Fit cell from batch to model') parser.add_argument('--wcg_n', type=int, help='wcg rank', default=2) parser.add_argument('--fir_n', type=int, help='FIR ntaps', default=15) args = parser.parse_args() template = '-m lbhb.analysis.rdt.do_fit --wcg_n {wcg_n} --fir_n {fir_n}' executable_path = '/auto/users/bburan/bin/miniconda3/envs/nems-intel/bin/python' for shuffle_phase in (True, False): for shuffle_stream in (True, False): modelname = generate_modelname(args.wcg_n, args.fir_n, shuffle_phase, shuffle_stream) script_path = template.format(wcg_n=args.wcg_n, fir_n=args.fir_n) if shuffle_phase: script_path += ' --shuffle-phase' if shuffle_stream: script_path += ' --shuffle-stream' modelname = hashlib.sha1(modelname.encode('ascii')).hexdigest() for batch in (269, 273): for cell in db.get_batch_cells(batch=batch)['cellid']: db.enqueue_single_model(cell, batch, modelname, force_rerun=True, user='******', executable_path=executable_path, script_path=script_path) print(f'Queued {cell}')
def cf_batch(batch, modelname, save_path=None, load_path=None, f_low=0.2, f_high=20, nf=18, method='gaussian', test_limit=None): if load_path is None: cells = nd.get_batch_cells(batch, as_list=True) cfs = [] cf_bins = [] skipped = [] for cellid in cells[:test_limit]: try: cf, cf_bin = cf_from_LN_strf(cellid, batch, modelname, f_low, f_high, nf, method) cfs.append(cf) cf_bins.append(cf_bin) except: # cell probably not fit for this model skipped.append(cellid) continue cellid_index = [c for c in cells[:test_limit] if c not in skipped] results = {'cellid': cellid_index, 'cf': cfs, 'cf_bin': cf_bins} df = pd.DataFrame.from_dict(results) df.set_index('cellid', inplace=True) if save_path is not None: df.to_pickle(save_path) else: df = pd.read_pickle(load_path) return df
def pup_pred_sum(batch=294, fs=4, jkn=20): """ # User parameters: batch = 294 # VOC + pupil #batch 289 # NAT + pupil # fs = 4 # 20 Hz or 4 Hz # jkn = 20 """ if batch == 294: modelnames = [ "psth.fs{}.pup-ld-st.pup_stategain.S_jk.nf{}-psthfr.j-basic".format(fs, jkn), "psth.fs{}.pup-ld-st.pup0_stategain.S_jk.nf{}-psthfr.j-basic".format(fs, jkn) ] elif batch == 289: modelnames = [ "psth.fs{}.pup-ld-st.pup-hrc_stategain.S_jk.nf{}-psthfr.j-basic".format(fs, jkn), "psth.fs{}.pup-ld-st.pup0-hrc_stategain.S_jk.nf{}-psthfr.j-basic".format(fs, jkn) ] celldata = nd.get_batch_cells(batch=batch) cellids = celldata['cellid'].tolist() d = pd.DataFrame(columns=['cellid','state_chan','MI','MI_pup0','g','d', 'r','r_pup0','r_se','r_se_pup0']) for mod_i, m in enumerate(modelnames): print('Loading ', m) modelspecs = nems_db.params._get_modelspecs(cellids, batch, m, multi='mean') for modelspec in modelspecs: c = modelspec[0]['meta']['cellid'] dc = modelspec[0]['phi']['d'] gain = modelspec[0]['phi']['g'] meta = ms.get_modelspec_metadata(modelspec) state_mod = meta['state_mod'] state_mod_se = meta['se_state_mod'] state_chans = meta['state_chans'] sc = 'pupil' j = 1 ii = ((d['cellid'] == c) & (d['state_chan'] == sc)) if np.sum(ii)==0: d = d.append({'cellid': c, 'state_chan': sc}, ignore_index=True) ii = ((d['cellid'] == c) & (d['state_chan'] == sc)) if mod_i == 0: d.loc[ii, 'MI'] = (state_mod[j]) d.loc[ii, 'g'] = (gain[0,j]) d.loc[ii, 'd'] = (dc[0,j]) d.loc[ii, 'r'] = (meta['r_test'][0]) d.loc[ii, 'r_se'] = (meta['se_test'][0]) elif mod_i == 1: d.loc[ii, 'MI_pup0'] = (state_mod[j]) d.loc[ii, 'r_pup0'] = (meta['r_test'][0]) d.loc[ii, 'r_se_pup0'] = (meta['se_test'][0]) d['goodcells'] = ((d['r']-d['r_se']) > (d['r_pup0']+d['r_se_pup0'])) #ax = None #stateplots.beta_comp(d['r_pup0'], d['r'], n1="r pup0", n2="r pup", # title='stategain', hist_range=[-0.05, 0.95], # ax=ax, highlight=d['goodcells']) return d
def plot_filtered_batch(batch, models, measure, plot_type, only_fair=True, include_outliers=False, display=True, extra_cols=[], snr=0.0, iso=0.0, snr_idx=0.0): cells = get_batch_cells(batch)['cellid'].tolist() cells = get_filtered_cells(cells, snr, iso, snr_idx) plot = get_plot(cells, models, batch, measure, plot_type, only_fair, include_outliers, display) plot.generate_plot() return plot
def dynamic_sigmoid_differences(batch, modelname, hist_bins=60, test_limit=None, save_path=None, load_path=None, use_quartiles=False, avg_bin_count=20): if load_path is None: cellids = nd.get_batch_cells(batch, as_list=True) ratios = [] for cellid in cellids[:test_limit]: xfspec, ctx = xhelp.load_model_xform(cellid, batch, modelname) val = ctx['val'].apply_mask() ctpred = val['ctpred'].as_continuous().flatten() pred_after = val['pred'].as_continuous().flatten() val_before = ms.evaluate(val, ctx['modelspec'], stop=-1) pred_before = val_before['pred'].as_continuous().flatten() median_ct = np.nanmedian(ctpred) if use_quartiles: low = np.percentile(ctpred, 25) high = np.percentile(ctpred, 75) low_mask = (ctpred >= low) & (ctpred < median_ct) high_mask = ctpred >= high else: low_mask = ctpred < median_ct high_mask = ctpred >= median_ct # TODO: do some kind of binning here since the two vectors # don't actually overlap in x axis mean_before, bin_masks = _binned_xvar(pred_before, avg_bin_count) low = _binned_yavg(pred_after, low_mask, bin_masks) high = _binned_yavg(pred_after, high_mask, bin_masks) ratio = np.nanmean((low - high) / (np.abs(low) + np.abs(high))) ratios.append(ratio) ratios = np.array(ratios) if save_path is not None: np.save(save_path, ratios) else: ratios = np.load(load_path) plt.figure() plt.hist(ratios, bins=hist_bins, color=[wsu_gray_light], edgecolor='black', linewidth=1) #plt.rc('text', usetex=True) #plt.xlabel(r'\texit{\frac{low-high}{\left|high\right|+\left|low\right|}}') plt.xlabel('(low - high)/(|low| + |high|)') plt.ylabel('cell count') plt.title("difference of low-contrast output and high-contrast output\n" "positive means low-contrast has higher firing rate on average")
def get_batch_modelspecs(batch, modelname, mod_key='id', limit=None, multi='mean'): celldata = nd.get_batch_cells(batch=batch) cellids = celldata['cellid'].tolist() if limit is not None: cellids = cellids[:limit] return _get_modelspecs(cellids, batch, modelname, multi=multi)
def snr_by_batch(batch, loadkey, save_path=None, load_path=None, frac_total=True, rec=None, siteids=None): snrs = [] cells = [] if load_path is None: if rec is None: if siteids is None: cellids = nd.get_batch_cells(batch, as_list=True) siteids = list(set([c.split('-')[0] for c in cellids])) for site in siteids: rec_path = xwrap.generate_recording_uri(site, batch, loadkey=loadkey) rec = nems.recording.load_recording(rec_path) est, val = rec.split_using_epoch_occurrence_counts('^STIM_') val = val.apply_mask() for cellid in rec['resp'].chans: resp = val['resp'].extract_channels([cellid]) snr = compute_snr(resp, frac_total=frac_total) snrs.append(snr) cells.append(cellid) print(f"{cellid}: {snr:.3f}") else: if isinstance(rec, str): rec = nems.recording.load_recording(rec) cellids = rec['resp'].chans est, val = rec.split_using_epoch_occurrence_counts('^STIM_') val = val.apply_mask() for cellid in cellids: log.info("computing SNR for cell: %s" % cellid) resp = val['resp'].extract_channels([cellid]) snr = compute_snr(resp, frac_total=frac_total) snrs.append(snr) cells = cellids results = {'cellid': cells, 'snr': snrs} df = pd.DataFrame.from_dict(results) df.dropna(inplace=True) df.set_index('cellid', inplace=True) if save_path is not None: df.to_csv(save_path) else: df = pd.read_csv(load_path, index_col=0) return df
def save_pred_matched_batch(batch, modelname, save_path, test_limit=None): cells = nd.get_batch_cells(batch, as_list=True) for cellid in cells[:test_limit]: try: fig = dynamic_sigmoid_pred_matched(cellid, batch, modelname) full_path = os.path.join(save_path, str(batch), cellid) fig.savefig(full_path, format='pdf') plt.close(fig) except: # model probably not fit for that cell continue
def cf_batch_rank1(batch, modelname, save_path=None, load_path=None, f_low=0.2, f_high=20, nf=18, test_limit=None): if load_path is not None: df = pd.read_pickle(load_path) return df cells = nd.get_batch_cells(batch, as_list=True) cfs = [] cf_bins = [] skipped = [] for cellid in cells[:test_limit]: try: xfspec, ctx = xhelp.load_model_xform(cellid, batch, modelname, eval_model=False) except: # cell probably not fit for this model skipped.append(cellid) continue modelspec = ctx['modelspec'] # mult by nf b/c x vals in gaussian coeffs module are divided by # number of channels # max and min bounds b/c means outside of bin range are allowed mean = min(max(0, np.asscalar(modelspec.phi[1]['mean']) * nf), nf - 1) khz_freqs = np.logspace(np.log(f_low), np.log(f_high), num=nf, base=np.e) cf_bin = int(round(mean)) cf = khz_freqs[cf_bin] cfs.append(cf) cf_bins.append(cf_bin) cellid_index = [c for c in cells[:test_limit] if c not in skipped] results = {'cellid': cellid_index, 'cf': cfs, 'cf_bin': cf_bins} df = pd.DataFrame.from_dict(results) df.set_index('cellid', inplace=True) if save_path is not None: df.to_pickle(save_path) return df
def relative_gain_by_batch(batch, loadkey='ozgf.fs100.ch18'): # get cellids list cellids = nd.get_batch_cells(batch) # load stim/resp for full batch recs = {c: nems.recording.load_recording( xwrap.generate_recording_uri(c, batch, loadkey)) for c in cellids} # break up into epochs by stim, remove pre/post silence sigs = {c: stim_resp_per_epoch(r) for c, r in recs.items()} # calc. stim means and sds for all cells, stims stim_m_sd = {c: spectrogram_mean_sd(s[0]) for c, s in sigs.items()}
def get_site_ids(batch): ''' returns a list of the site ids for all experiments of a given batch. This site ID helps finding all the cells within a population recorded simultaneusly :param batch: :return: ''' batch_cells = nd.get_batch_cells(batch) cellids = batch_cells.cellid.unique().tolist() site_IDs = col.defaultdict(list) for cell in cellids: site_ID = cell.split('-')[0] site_IDs[site_ID].append(cell) return dict(site_IDs)
def param_scatter_batch(batch, model1, model2, param, multi='mean', limit=None, mod_key='id'): celldata = nd.get_batch_cells(batch=batch) cellids = celldata['cellid'].tolist() if limit is not None: cellids = cellids[:limit] param_scatter(cellids, batch, model1, model2, param, multi=multi, mod_key=mod_key)
def fitted_params_per_batch(batch, modelname, mod_key='id', limit=None, multi='mean', meta=['r_test', 'r_fit', 'se_test'], stats_keys=['mean', 'std', 'sem', 'max', 'min']): celldata = nd.get_batch_cells(batch=batch) cellids = celldata['cellid'].tolist() if limit is not None: cellids = cellids[:limit] return fitted_params_per_cell(cellids, batch, modelname, mod_key=mod_key, stats_keys=stats_keys, meta=meta, multi=multi)
def get_valid_improvements(batch, model1, model2, threshold=2.5): # TODO: threshold 2.5 works for removing outliers in correlation scatter # and maximizes r, but need an unbiased way to pick this number. # Otherwise basically just cherrypicked the cutoff to make # correlation better. # NOTE: Also helps to do this for both gc and stp, then # list(set(gc_cells) & set(stp_cells)) to get the intersection. df1 = fitted_params_per_batch(batch, model1, stats_keys=[]) df2 = fitted_params_per_batch(batch, model2, stats_keys=[]) # fill in missing cellids w/ nan celldata = nd.get_batch_cells(batch=batch) cellids = celldata['cellid'].tolist() nrows = len(df1.index.values.tolist()) df1_cells = df1.loc['meta--r_test'].index.values.tolist()[5:] df2_cells = df2.loc['meta--r_test'].index.values.tolist()[5:] nan_series = pd.Series(np.full((nrows), np.nan)) df1_nans = 0 df2_nans = 0 for c in cellids: if c not in df1_cells: df1[c] = nan_series df1_nans += 1 if c not in df2_cells: df2[c] = nan_series df2_nans += 1 print("# missing cells: %d, %d" % (df1_nans, df2_nans)) # Force same cellid order now that cols are filled in df1 = df1[cellids] df2 = df2[cellids] ratio = df1.loc['meta--r_test'] / df2.loc['meta--r_test'] valid_improvements = ratio.loc[ratio < threshold].loc[ratio > 1 / threshold] return valid_improvements.index.values.tolist()
def second_fit_pop_models(batch, start_from=None, test_count=None): all_cellids = nd.get_batch_cells(batch, as_list=True) if batch == 322: sites = NAT4_A1_SITES else: sites = NAT4_PEG_SITES cellids = [ c for c in all_cellids if np.any([c.startswith(s.split('.')[0]) for s in sites]) ] modelnames = [] for k, v in MODELGROUPS.items(): if ('_single' not in k) and ('_exploration' not in k) and (k != 'LN'): modelnames.extend(v) iterator = cellids for siteid in iterator: for modelname in modelnames[start_from:test_count]: do_fit = True if not FORCE_RERUN: d = nd.pd_query( "SELECT * FROM Results WHERE cellid like %s and modelname=%s and batch=%s", params=(siteid + "%", modelname, batch)) if len(d) > 0: do_fit = False print(f'Fit exists for {siteid} {batch} {modelname}') if do_fit: nd.enqueue_models( celllist=[siteid], batch=batch, modellist=[modelname], user="******", #executable_path='/auto/users/jacob/bin/anaconda3/envs/jacob_nems/bin/python', executable_path= '/auto/users/svd/bin/miniconda3/envs/tf/bin/python', script_path= '/auto/users/jacob/bin/anaconda3/envs/jacob_nems/nems/scripts/fit_single.py' ) return modelnames
def strf_vs_resp_batch(batch, modelname, save_path, test_limit=None, continuous=False): cells = nd.get_batch_cells(batch, as_list=True) #plot_kwargs = {'alpha': 0.2, 's': 2} for cellid in cells[:test_limit]: try: fig = strf_vs_resp_by_contrast(cellid, batch, modelname, plot_stim=False, plot_contrast=False, continuous=continuous) full_path = os.path.join(save_path, str(batch), cellid) fig.savefig(full_path, format='pdf') plt.close(fig) except: # cell probably not fit for this model or batch print('error for cell: %s' % cellid) continue
def snr_by_batch(batch, loadkey, save_path=None, load_path=None, frac_total=True): cellids = nd.get_batch_cells(batch, as_list=True) siteids = list(set([c.split('-')[0] for c in cellids])) snrs = [] cells = [] if load_path is None: for site in siteids: rec_path = xwrap.generate_recording_uri(site, batch, loadkey=loadkey) rec = nems.recording.load_recording(rec_path) est, val = rec.split_using_epoch_occurrence_counts('^STIM_') for cellid in rec['resp'].chans: if cellid in cellids: resp = val.apply_mask()['resp'].extract_channels([cellid]) snr = compute_snr(resp, frac_total=frac_total) snrs.append(snr) cells.append(cellid) results = {'cellid': cells, 'snr': snrs} df = pd.DataFrame.from_dict(results) df.dropna(inplace=True) df.set_index('cellid', inplace=True) if save_path is not None: df.to_pickle(save_path) else: df = pd.read_pickle(load_path) return df
def mean_prior_used(batch, modelname): choices = [] cells = nd.get_batch_cells(batch, as_list=True) for i, c in enumerate(cells[400:500]): if 25 % (i + 1) == 0: print('cell %d/%d\n' % (i, len(cells))) try: xfspec, ctx = xhelp.load_model_xform(c, batch, modelname, eval_model=False) modelspec = ctx['modelspec'] choices.append(modelspec.meta.get('best_random_idx', 0)) except ValueError: # no result continue if choices: choices = np.array(choices).flatten() mean_count = np.sum(choices == 0) proportion = mean_count / len(choices) print('proportion mean prior used: %.4f' % proportion) else: print('no results found')
yesno = 'y' if deflate: yesno = input("Are drsc_axes.pickle results up-to-data?? (y/n)") resExt = '_deflate' else: resExt = '' if yesno=='y': pass elif yesno=='n': raise ValueError("If wanting to deflate out noise corr. effects, first update LV results by running and/or updating cache_delta_rsc_axis.py!") else: raise ValueError("Unknown response. Respond with y/n") dfs = [] for batch in batches: sites = np.unique([c[:7] for c in nd.get_batch_cells(batch).cellid]) sites = [s for s in sites if s!='CRD013b'] options = Aoptions[batch] time_bins = twin[batch] sites = [s for s in sites if (s!='CRD013b') & ('gus' not in s)] if batch == 302: sites1 = [s+'.e1:64' for s in sites] sites2 = [s+'.e65:128' for s in sites] sites = sites1 + sites2 for site in sites: if batch == 307: rawid = which_rawids(site) else: rawid = None manager = BAPHYExperiment(batch=batch, siteid=site[:7], rawid=rawid) rec = manager.get_recording(recache=recache, **options)
120539, 120542, 120544 ] # rawids for sessions with TONEinTORCs DS style straddlesetrawids = [ 120110, 120111, 120163, 120165, 120184, 120185, 120188, 120190, 120199, 120201, 120207, 120209, 120211, 120214, 120234, 120234, 120254, 120256, 120258, 120260, 120272, 120273, 120274, 120275, 120293, 120283, 120285, 120286, 120289, 120290, 120293, 120310, 120311, 120312, 120313, 120314, 120316, 120317, 120435, 120436, 120437 ] # ================================= batch 307 ==================================== perfile_df = pd.read_csv(os.path.join(fpath, str(307), 'd_pup_fil_sdexp.csv'), index_col=0) df_307 = pd.DataFrame() cells_307 = nd.get_batch_cells(307).cellid for cellid in cells_307: _, rawid = nd.get_stable_batch_cells(batch=307, cellid=cellid) sql = "SELECT value, svalue, rawid from gData where name='Trial_TargetIdxFreq' and rawid in {}".format( tuple(rawid)) d = nd.pd_query(sql, params=()) sql = "SELECT value, svalue, rawid from gData where name='Trial_RelativeTarRefdB' and rawid in {}".format( tuple(rawid)) d2 = nd.pd_query(sql, params=()) sql = "SELECT behavior, id from gDataRaw where id in {0}".format( tuple(rawid)) da = nd.pd_query(sql) d = d[d.rawid.isin( [r for r in da.id if da[da.id == r]['behavior'].values == 'active'])] d2 = d2[d2.rawid.isin(
def fit_bgfg_model(batch, site): cell_df = nd.get_batch_cells(batch) cellid = [cell for cell in cell_df['cellid'].tolist() if cell[:7] == site][0] fs = 100 manager = BAPHYExperiment(cellid=cellid, batch=batch) options = {'rasterfs': 100, 'stim': False, 'resp': True} rec = manager.get_recording(**options) newrec = ts.generate_psth_from_resp_bgfg(rec, manager) rec = newrec.copy() rec['resp'] = rec['resp'].rasterize() bgfg_psth_signal = rec['psth'].concatenate_channels( (rec['psth_bg'], rec['psth_fg'])) bgfg_psth_signal.name = 'psth_bgfg' rec.add_signal(bgfg_psth_signal) epoch_regex = '^STIM' rec = nems.preprocessing.average_away_epoch_occurrences( rec, epoch_regex=epoch_regex) # mask out epochs with "null" in the name ep = nems.epoch.epoch_names_matching(rec['psth'].epochs, '^STIM') for e in ep: if ('null' not in e) and ('0.5' not in e): print(e) rec = rec.or_mask(e) est = rec.copy() val = rec.copy() outputcount = rec['psth'].shape[0] inputcount = outputcount * 2 insignal = 'psth_bgfg' outsignal = 'psth_sp' modelspec_name = f'wc.{inputcount}x{outputcount}-lvl.{outputcount}' # record some meta data for display and saving meta = { 'cellid': site, 'batch': 1, 'modelname': modelspec_name, 'recording': est.name } modelspec = initializers.from_keywords(modelspec_name, meta=meta, input_name=insignal, output_name=outsignal) init_weights = np.eye(outputcount, outputcount) init_weights = np.concatenate((init_weights, init_weights), axis=1) modelspec[0]['phi']['coefficients'] = init_weights / 2 # RUN AN ANALYSIS # GOAL: Fit your model to your data, producing the improved modelspecs. # Note that: nems.analysis.* will return a list of modelspecs, sorted # in descending order of how they performed on the fitter's metric. # then fit full nonlinear model fit_kwargs = {'tolerance': 1e-5, 'max_iter': 100000} modelspec = nems.analysis.api.fit_basic(est, modelspec, fitter=scipy_minimize, fit_kwargs=fit_kwargs) # GENERATE SUMMARY STATISTICS print('Generating summary statistics ...') # generate predictions est, val = nems.analysis.api.generate_prediction(est, val, modelspec) # evaluate prediction accuracy modelspec = nems.analysis.api.standard_correlation(est, val, modelspec) print("Performance: r_fit={0:.3f} r_test={1:.3f}".format( modelspec.meta['r_fit'][0][0], modelspec.meta['r_test'][0][0])) ctx = {'modelspec': modelspec, 'rec': rec, 'val': val, 'est': est} xfspec = [] #import nems.gui.editors as gui #gui.browse_xform_fit(ctx, xfspec) f, ax = plt.subplots(4, 1, figsize=(12, 6)) cellnumber = 6 dur = 2000 r = val.apply_mask() ax[0].plot(r['pred'].as_continuous()[cellnumber, :dur]) ax[0].plot(r['psth_sp'].as_continuous()[cellnumber, :dur]) ax[1].plot(r['psth_fg'].as_continuous()[cellnumber, :dur]) ax[2].plot(r['psth_bg'].as_continuous()[cellnumber, :dur]) ax[3].plot(r['mask'].as_continuous()[0, :dur]) #plt.legend(('pred','actual','mask')) plt.figure() plt.imshow(modelspec.phi[0]['coefficients']) plt.colorbar() return modelspec, val, r # aw = browse_recording(val, ['psth_sp','pred', 'psth_bg', 'psth_fg'], cellid='ARM017a-01-10') # # batch=329 # cell_df=nd.get_batch_cells(batch) # cell_list=cell_df['cellid'].tolist() # fs=100 # # cell_list = [cell for cell in cell_list if cell[:3] != 'HOD'] # # cell_list = [cell for cell in cell_list if cell[:7] == 'ARM026b'] # cell_dict = {cell[0:7]:cell for cell in cell_list} # # rec_dict = dict() # for site, cell in cell_dict.items(): # manager = BAPHYExperiment(cellid=cell, batch=batch) # options = {'rasterfs': 100, # 'stim': False, # 'resp': True} # rec = manager.get_recording(**options) # rec_dict[site] = ts.generate_psth_from_resp_bgfg(rec, manager) # # cellid='ARM026b' # rec=rec_dict[cellid].copy() # rec['resp']=rec['resp'].rasterize() # # bgfg_psth_signal = rec['psth'].concatenate_channels((rec['psth_bg'], rec['psth_fg'])) # bgfg_psth_signal.name = 'psth_bgfg' # rec.add_signal(bgfg_psth_signal) # # epoch_regex = '^STIM' # rec = nems.preprocessing.average_away_epoch_occurrences(rec, epoch_regex=epoch_regex) # # mask out epochs with "null" in the name # ep = nems.epoch.epoch_names_matching(rec['psth'].epochs, '^STIM') # for e in ep: # if ('null' not in e) and ('0.5' not in e): # print(e) # rec = rec.or_mask(e) # # est=rec.copy() # val=rec.copy() # # outputcount=rec['psth'].shape[0] # inputcount=outputcount*2 # # insignal='psth_bgfg' # outsignal='psth_sp' # # modelspec_name = f'wc.{inputcount}x{outputcount}-lvl.{outputcount}' # # # record some meta data for display and saving # meta = {'cellid': cellid, # 'batch': 1, # 'modelname': modelspec_name, # 'recording': est.name # } # modelspec = initializers.from_keywords(modelspec_name, meta=meta, input_name=insignal, output_name=outsignal) # # init_weights = np.eye(outputcount,outputcount) # init_weights = np.concatenate((init_weights,init_weights), axis=1) # modelspec[0]['phi']['coefficients'] = init_weights/2 # # # RUN AN ANALYSIS # # # GOAL: Fit your model to your data, producing the improved modelspecs. # # Note that: nems.analysis.* will return a list of modelspecs, sorted # # in descending order of how they performed on the fitter's metric. # # # then fit full nonlinear model # fit_kwargs={'tolerance': 1e-5, 'max_iter': 100000} # modelspec = nems.analysis.api.fit_basic(est, modelspec, fitter=scipy_minimize, # fit_kwargs=fit_kwargs) # # # GENERATE SUMMARY STATISTICS # print('Generating summary statistics ...') # # # generate predictions # est, val = nems.analysis.api.generate_prediction(est, val, modelspec) # # # evaluate prediction accuracy # modelspec = nems.analysis.api.standard_correlation(est, val, modelspec) # # print("Performance: r_fit={0:.3f} r_test={1:.3f}".format( # modelspec.meta['r_fit'][0][0], # modelspec.meta['r_test'][0][0])) # # ctx = {'modelspec': modelspec, 'rec': rec, 'val': val, 'est': est} # xfspec=[] # # #import nems.gui.editors as gui # #gui.browse_xform_fit(ctx, xfspec) # # # f,ax=plt.subplots(4,1, figsize=(12,6)) # cellnumber=3 # dur=2000 # r=val.apply_mask() # ax[0].plot(r['pred'].as_continuous()[cellnumber,:dur]) # ax[0].plot(r['psth_sp'].as_continuous()[cellnumber,:dur]) # ax[1].plot(r['psth_fg'].as_continuous()[cellnumber,:dur]) # ax[2].plot(r['psth_bg'].as_continuous()[cellnumber,:dur]) # ax[3].plot(r['mask'].as_continuous()[0,:dur]) # # #plt.legend(('pred','actual','mask')) # # plt.figure() # plt.imshow(modelspec.phi[0]['coefficients']) # plt.colorbar() # # # # aw = browse_recording(val, ['psth_sp','pred', 'psth_bg', 'psth_fg'], cellid='ARM017a-01-10')
import nems.utilities as nu import copy import matplotlib.pyplot as plt import nems.modules.metrics as mt import numpy as np import pandas as pd ''' Single cell test run of SI calculation in batch 259 replaces data with oddball paradigm, evaluate to generate predicted data, uses predicted data as a proxy for SI calculation. ''' # Define batch, cellids, modelname batch = 259 cells_in_batch = ndb.get_batch_cells(batch=batch) cellids = cells_in_batch['cellid'].tolist() modelname1 = 'env100_dlog_fir15_dexp_fit01' modelname2 = 'env100_dlog_stp1pc_fir15_dexp_fit01' example_cell = 'chn008b-c2' # imports a ssa stack to extract the stimulus i.e. the oddball paradigm. 'gus019d-b1' has a reasonable stimulation # example to use ssa_stack = nu.io.load_single_model(cellid='gus019d-b1', batch=296, modelname='env100e_stp1pc_fir20_fit01_ssa') oddball = copy.deepcopy(ssa_stack.data[1][0]) # Jittered input data # import a stack from the data base. original_stack = nu.io.load_single_model(example_cell, batch, modelname1)
def single_scatter(batch, gc, stp, LN, combined, compare, plot_stat='r_ceiling', legend=False): all_batch_cells = nd.get_batch_cells(batch, as_list=True) df_r, df_c, df_e = get_dataframes(batch, gc, stp, LN, combined) e, a, g, s, c = improved_cells_to_list(batch, gc, stp, LN, combined, se_filter=True, LN_filter=False, as_lists=True) if plot_stat == 'r_ceiling': plot_df = df_c else: plot_df = df_r improved = c not_improved = list(set(a) - set(c)) models = [gc, stp, LN, combined] names = ['gc', 'stp', 'LN', 'combined'] m1 = models[compare[0]] m2 = models[compare[1]] name1 = names[compare[0]] name2 = names[compare[1]] n_batch = len(all_batch_cells) n_all = len(a) n_imp = len(improved) n_not_imp = len(not_improved) m1_scores = plot_df[m1][not_improved] m1_scores_improved = plot_df[m1][improved] m2_scores = plot_df[m2][not_improved] m2_scores_improved = plot_df[m2][improved] fig = plt.figure() plt.plot([0, 1], [0, 1], color='black', linewidth=1, linestyle='dashed', dashes=dash_spacing) plt.scatter(m1_scores, m2_scores, s=small_scatter, label='no imp.', color=model_colors['LN']) #color='none', #edgecolors='black', linewidth=0.35) plt.scatter(m1_scores_improved, m2_scores_improved, s=big_scatter, label='sig. imp.', color=model_colors['max']) #color='none', #edgecolors='black', linewidth=0.35) ax_remove_box() if legend: plt.legend() plt.xlim(0, 1) plt.ylim(0, 1) plt.tight_layout() plt.axes().set_aspect('equal') fig2 = plt.figure(figsize=text_fig) plt.text( 0.1, 0.5, "batch %d\n" "%d/%d auditory/total cells\n" "%d no improvements\n" "%d at least one improvement\n" "stat: %s, x: %s, y: %s" % (batch, n_all, n_batch, n_not_imp, n_imp, plot_stat, name1, name2)) return fig, fig2
for site in sites: a = 'af0:4.as0:4.sc.rb10' best_alpha = pd.read_csv( '/auto/users/hellerc/code/projects/nat_pupil_ms_final/dprime/best_alpha.csv', index_col=0) alpha = best_alpha.loc[site][0] alpha = (float(alpha.split(',')[0].replace('(', '')), float(alpha.split(',')[1].replace(')', ''))) a = 'af{0}.as{1}.sc.rb10'.format( str(alpha[0]).replace('.', ':'), str(alpha[1]).replace('.', ':')) modelname = 'ns.fs4.pup-ld-hrc-apm-pbal-psthfr-ev-residual-addmeta_lv.2xR.f.s-lvlogsig.3xR.ipsth_jk.nf5.p-pupLVbasic.constrLVonly.{}'.format( a) cellid = [c for c in nd.get_batch_cells(batch).cellid if site in c][0] mp = nd.get_results_file(batch, [modelname], [cellid]).modelpath[0] xfspec, ctx = xforms.load_analysis(mp) r = ctx['val'].apply_mask() fs = r['resp'].fs fast = r['lv'].extract_channels(['lv_fast'])._data.squeeze() slow = r['lv'].extract_channels(['lv_slow'])._data.squeeze() pupil = r['pupil']._data.squeeze() o = ss.periodogram(fast, fs=fs) F.append(o[1].squeeze()) Fm.append(o[0][np.argmax(o[1].squeeze())]) o = ss.periodogram(slow, fs=fs)
batch = 310 results_file = nd.get_results_file(batch) all_models = results_file.modelname.unique().tolist() result_paths = results_file.modelpath.tolist() mod_modelnames = [ss.replace('-', '_') for ss in all_models] models_shortname = { 'wc.2x2.c-fir.2x15-lvl.1-dexp.1': 'LN', 'wc.2x2.c-stp.2-fir.2x15-lvl.1-dexp.1': 'STP', 'wc.2x2.c-fir.2x15-lvl.1-stategain.18-dexp.1': 'pop', 'wc.2x2.c-stp.2-fir.2x15-lvl.1-stategain.18-dexp.1': 'STP_pop' } all_cells = nd.get_batch_cells(batch=310).cellid.tolist() goodcell = 'BRT037b-39-1' best_model = 'wc.2x2.c-stp.2-fir.2x15-lvl.1-stategain.18-dexp.1' test_path = '/auto/data/nems_db/results/310/BRT037b-39-1/BRT037b-39-1.wc.2x2.c_stp.2_fir.2x15_lvl.1_stategain.18_dexp.1.fit_basic.2018-11-14T093820/' rerun = False # compare goodness of fit between models # iteratively go trough file if rerun == True: population_metas = list() for filepath in result_paths: _, ctx = xforms.load_analysis(filepath=filepath, eval_model=True,
the shuffled and simulated are pooled across probes, and the probe dimention is collapsed into the repetitions afterword: depending on the duration of the segments used, the number or available repetitions change, thus with longer segments and therefore less repetitions (as lowe as 3) the LDA and dprime analysis starts to break. For the LDA this happens as the algorithm can more easily find a projection that minimizes the variance within category to values close to 0. this in turn propagates to the dprime, leading to artificially super high values. Not only this, given the parametric nature of the dprime analysis, 3 repetitions are by no means adequate. Some sites might work much better given that they have a greater number of repetitions ''' batch = 319 # NTI batch, Sam paradigm load_fs = 100 # sampling freq of loaded signal # check sites in batch batch_cells = nd.get_batch_cells(batch) cell_ids = batch_cells.cellid.unique().tolist() site_ids = set([cellid.split('-')[0] for cellid in cell_ids]) meta = { 'reliability': 0.1, # r value 'smoothing_window': 0, # ms 'raster_fs': 100, 'transitions': ['silence', 'continuous', 'similar', 'sharp'], 'significance': False, 'montecarlo': 1000, 'zscore': False } code_to_name = {'t': 'Probe', 'ct': 'Context'}
def load_population_stack(modelname, batch): meta = ndb.get_batch_cells(batch=301) cellids = meta['cellid'] cell1 = meta['cellid'][0] stack = ut.io.load_single_model(cell1, batch, modelname) nfiles = len(stack.data[-1]) a_p = [] stacks = [] for j in tqdm(range(1, nfiles, 2)): for i, cellid in (enumerate(cellids)): if j == 1: stack = ut.io.load_single_model(cellid, batch, modelname) stacks.append(stack) p = stacks[i].data[-1][j]['pred'].copy() r = stacks[i].data[-1][j]['resp_raw'].copy() if len(r.shape) == 2: r = r[np.newaxis, :, :] if r.shape[0] > 1: p = np.transpose(np.tile(p, (r.shape[1], 1, 1)).T, (0, 2, 1)) pup = stacks[i].data[-1][j]['pupil'].copy() if i == 0: if stack.data[-1][j]['stimparam'][0].find('_a_') > 0: for z in range(0, stack.data[-1][j]['resp_raw'].shape[1]): a_p.append(1) else: for z in range(0, stack.data[-1][j]['resp_raw'].shape[1]): a_p.append(0) if j == 1: if i == 0: pred = np.empty((p.shape + (len(cellids), ))) resp = np.empty((r.shape + (len(cellids), ))) pupil = np.empty((pup.shape)) ptemp = np.empty((r.shape + (len(cellids), ))) rtemp = np.empty((r.shape + (len(cellids), ))) pupTemp = np.empty((pup.shape)) pred[:, :, :, i] = p resp[:, :, :, i] = r pupil[:, :, :] = pup ptemp[:, :, :, i] = p rtemp[:, :, :, i] = r pupTemp[:, :, :] = pup if j > 1: if i == 0: ptemp = np.empty((r.shape + (len(cellids), ))) rtemp = np.empty((r.shape + (len(cellids), ))) pupTemp = np.empty((pup.shape)) ptemp[:, :, :, i] = p rtemp[:, :, :, i] = r pupTemp[:, :, :] = pup if j > 1: pred = np.concatenate((pred, ptemp), axis=1) resp = np.concatenate((resp, rtemp), axis=1) pupil = np.concatenate((pupil, pupTemp), axis=1) return resp, pred, pupil
def plot_save_examples(batch, compare, loader, basemodel, fitter, RELOAD=False): if batch in [301, 307]: area = "AC" else: area = "IC" d = nd.get_batch_cells(batch) cellids = list(d['cellid']) stats_list = [] root_path = '/auto/users/svd/projects/pupil-behavior' modelset = '{}_{}_{}_{}_{}_{}'.format(compare, area, batch, loader, basemodel, fitter) out_path = '{}/{}/'.format(root_path, modelset) if os.access(root_path, os.W_OK) and not (os.path.exists(out_path)): os.makedirs(out_path) datafile = out_path + 'results.csv' plt.close('all') if (not RELOAD) and (not os.path.isfile(datafile)): RELOAD = True print('datafile not found, reloading') if RELOAD: for cellid in cellids: if compare == "pb": fh, stats = stateplots.pb_model_plot(cellid, batch, loader=loader, basemodel=basemodel, fitter=fitter) elif compare == "ppas": fh, stats = stateplots.ppas_model_plot(cellid, batch, loader=loader, basemodel=basemodel, fitter=fitter) else: fh, stats = stateplots.pp_model_plot(cellid, batch, loader=loader, basemodel=basemodel, fitter=fitter) # fh2 = stateplots.pp_model_plot(cellid,batch) stats_list.append(stats) if os.access(out_path, os.W_OK): fh.savefig(out_path + cellid + '.pdf') fh.savefig(out_path + cellid + '.png') plt.close(fh) col_names = [ 'cellid', 'r_p0b0', 'r_p0b', 'r_pb0', 'r_pb', 'e_p0b0', 'e_p0b', 'e_pb0', 'e_pb', 'rf_p0b0', 'rf_p0b', 'rf_pb0', 'rf_pb', 'r_pup', 'r_beh', 'r_beh_pup0', 'pup_mod', 'beh_mod', 'pup_mod_n', 'beh_mod_n', 'pup_mod_beh0', 'beh_mod_pup0', 'pup_mod_beh0_n', 'beh_mod_pup0_n', 'd_pup', 'd_beh', 'g_pup', 'g_beh', 'ref_all_resp', 'ref_common_resp', 'tar_max_resp', 'tar_probe_resp' ] df = pd.DataFrame(columns=col_names) for stats in stats_list: df0 = pd.DataFrame([[ stats['cellid'], stats['r_test'][0], stats['r_test'][1], stats['r_test'][2], stats['r_test'][3], stats['se_test'][0], stats['se_test'][1], stats['se_test'][2], stats['se_test'][3], stats['r_floor'][0], stats['r_floor'][1], stats['r_floor'][2], stats['r_floor'][3], stats['r_test'][3] - stats['r_test'][1], stats['r_test'][3] - stats['r_test'][2], stats['r_test'][1] - stats['r_test'][0], stats['pred_mod'][0, 1], stats['pred_mod'][1, 2], stats['pred_mod_norm'][0, 1], stats['pred_mod_norm'][1, 2], stats['pred_mod_full'][0, 1], stats['pred_mod_full'][1, 2], stats['pred_mod_full_norm'][0, 1], stats['pred_mod_full_norm'][1, 2], stats['b'][3, 1], stats['b'][3, 2], stats['g'][3, 1], stats['g'][3, 2], stats['ref_all_resp'], stats['ref_common_resp'], stats['tar_max_resp'], stats['tar_probe_resp'] ]], columns=col_names) df = df.append(df0) df.set_index(['cellid'], inplace=True) if os.access(out_path, os.W_OK): df.to_csv(datafile) else: # load cached dataframe df = pd.read_csv(datafile, index_col=0) sig_mod = list(df['r_pb'] - df['e_pb'] > df['r_p0b0'] + df['e_p0b0']) if compare == "pb": alabel = "active" elif compare == "ppas": alabel = "each passive" else: alabel = "pre/post" mi_bounds = [-0.4, 0.4] fh1 = stateplots.beta_comp(df['r_pup'], df['r_beh'], n1='pupil', n2=alabel, title=modelset + ' unique pred', hist_range=[-0.02, 0.15], highlight=sig_mod) fh2 = stateplots.beta_comp(df['pup_mod_n'], df['beh_mod'], n1='pupil', n2=alabel, title=modelset + ' mod index', hist_range=mi_bounds, highlight=sig_mod) fh3 = stateplots.beta_comp(df['beh_mod_pup0'], df['beh_mod'], n1=alabel + '-nopup', n2=alabel, title=modelset + ' unique mod', hist_range=mi_bounds, highlight=sig_mod) # unique behavior: # performance of full model minus performance behavior shuffled # r_beh_with_pupil = df['r_pb'] - df['r_pb0'] # naive behavior (ignorant of pupil): # performance of behavior alone (pupil shuffled) minus all shuffled # r_beh_no_pupil = df['r_p0b'] - df['r_p0b0'] fh4 = stateplots.beta_comp(df['r_beh_pup0'], df['r_beh'], n1=alabel + '-nopup', n2=alabel, title=modelset + ' unique r', hist_range=[-0.02, .15], highlight=sig_mod) #fh4 = stateplots.beta_comp(df['r_beh'], df['beh_mod'], n1='pred', n2='mod', # title='behavior', hist_range=[-0.4, 0.4]) #fh5 = stateplots.beta_comp(df['r_pup'], df['pup_mod'], n1='pred', n2='mod', # title='pupil', hist_range=[-0.1, 0.1]) if os.access(out_path, os.W_OK): fh1.savefig(out_path + 'summary_pred.pdf') fh2.savefig(out_path + 'summary_mod.pdf') fh3.savefig(out_path + 'summary_mod_ctl.pdf') fh4.savefig(out_path + 'summary_r_ctl.pdf')
'psth.fs20.pup-ld-st.pup0.afl0-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup.afl.pxf-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup0.afl.pxf-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup.afl0.pxf-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup.afl.pxf0-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup0.afl0.pxf-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup.afl0.pxf0-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup0.afl.pxf0-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup0.afl0.pxf0-ref-psthfr.s_stategain.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup.fil-ref-psthfr.s_sdexp.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup0.fil-ref-psthfr.s_sdexp.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup.fil0-ref-psthfr.s_sdexp.S_jk.nf20-basic', 'psth.fs20.pup-ld-st.pup0.fil0-ref-psthfr.s_sdexp.S_jk.nf20-basic', ] batch = 309 force_rerun = True cells = nd.get_batch_cells(batch).cellid.tolist() script = '/auto/users/hellerc/code/NEMS/scripts/fit_single.py' executable = '/auto/users/hellerc/anaconda3/envs/crh_nems/bin/python' nd.enqueue_models(celllist=cells, modellist=modellist, batch=batch, force_rerun=force_rerun, script_path=script, executable_path=executable, reserve_gb=1, user='******')