def bad_trials(label, threshold=20, only_indexes=True): data = io.load(label, 'no_smoothing') if 'quality' in data.columns: bad = select(data, _min_quality=0).full.apply( lambda x: np.max(x) > threshold).unstack().any(axis=1) else: bad = select(data, is_selected=True).full.apply( lambda x: np.max(x) > threshold).unstack().any(axis=1) if only_indexes: return bad[bad.values].index.values else: return bad
def select_ramping_neurons(label, tmin=1.5, min_quality=MIN_QUALITY, P_VALUE_RAMP=.05, return_ramping=True): """ Returns the indices of ramping neurons. If not return ramping, instead returns non-ramping neurons """ data = io.load(label, 'no_smoothing') data = select(data, _min_duration=tmin, _mineq_quality=min_quality) fr = to_feature_array(data, subset='cropped') rp = lambda df: ramping_p(df.value, df.time) p_ramp = fr.reset_index().drop( 'trial', axis=1).melt(id_vars=['time']).groupby('unit').apply(rp) is_ramp = p_ramp < P_VALUE_RAMP return is_ramp[is_ramp == return_ramping].index.values
data = loadmat(filename) spikes = data['dados'][0,0][1] behavior = data['dados'][0,0][0] spikes = pd.DataFrame([[ spikes[0,i][0][:,0], spikes[0,i][0][:,1]] for i in range(spikes.shape[1]) if spikes[0,i][0].shape[1]==2], columns=['times','trial']) behavior = pd.DataFrame(np.transpose(behavior[0,0][0]), columns=['one','onset','offset','zero', 'duration', 'sortIdx', 'sortLabel']).drop(['one', 'zero', 'sortIdx', 'sortLabel'], axis=1) behavior['trial'] = np.arange(1, behavior.shape[0]+1) behavior=behavior.set_index('trial') # Calculate relative spike time spikes['trial_time'] = pd.DataFrame(np.transpose([spikes.times[i] - behavior.iloc[spikes.trial[i]-1].onset.as_matrix() for i in range(spikes.shape[0])])) return spikes, behavior # Load into DataFrames each data for rat in SHORTCUTS['groups']['ALL']: filepath = io.load(rat, 'spikesorted', getpath=True) if rat in SHORTCUTS['groups']['GB']: spikes, behavior = spikes_behavior_from_mat(filepath) elif rat in SHORTCUTS['groups']['EZ']: print(filepath) spikes, behavior = spikes_behavior_from_ez(filepath) else: raise NotImplementedError('This dataset is not included as a special case') identifiers = dict(session=rat.split()[0], rat_number=rat.split()[1] ) io.save(spikes, rat, 'spikes', 'interim', **identifiers) io.save(behavior, rat, 'behavior', 'interim', **identifiers)
conditions = product(DSETS, CLFs, BLINE, SUBSETS, LABELS) for dset, (clf, clfname), bline, subset, label in conditions: savedir = '{}/{}/{}/{}'.format(basedir, clfname, dset, subset) print(savedir, '\n', label) if not os.path.exists(savedir): os.makedirs(savedir) alldata = {} for ramping in [True, False]: if label == 'all': print([lab for lab in SHORTCUTS['groups']['EZ']]) dsets = [ select(io.load(lab, dset).reset_index(), _mineq_quality=MIN_QUALITY, _in_unit=select_ramping_neurons(lab, return_ramping=ramping), _min_duration=tmin, _max_duration=tmax).set_index(['trial', 'unit']) for lab in SHORTCUTS['groups']['EZ'] ] print([ds.reset_index().trial.nunique() for ds in dsets]) dsets = [ ds for ds in dsets if ds.reset_index().trial.nunique() > 0 ] n_trials = np.min( [ds.reset_index().trial.nunique() for ds in dsets]) n_bins = dsets[0][subset].apply(len).min() alldata[ramping] = []
# Identifier variables id_vars = ['logC', 'penalty'] id_combs = lambda df: product(weights.unique(df[id_]) for id_ in id_vars) # Create visualizations for label, dset in product(SHORTCUTS['groups']['DRRD'], DSETS): print(label, dset) savedir = fsavedir(label, dset) if not os.path.exists(savedir): os.makedirs(savedir) # Loading data similarities = pd.read_csv(sim_filename(label, dset)).set_index(['unit', 'trial']) units = similarities.reset_index().unit.unique() behav = io.load(label, 'behav_stats').reset_index() behav = select(behav, trial_in_=similarities.reset_index()['trial']) # One image for each neuron for unit in units: fig = plt.figure() sns.heatmap(similarities.loc[unit]) plt.title('Unit {}, {}'.format(unit, label)) plt.savefig('{}/sim_evo_unit_{}.png'.format(savedir, unit), dpi=500) plt.close(fig) # Plus one image for the mean fig = plt.figure() sns.heatmap(similarities.reset_index().groupby('trial').mean().drop( 'unit', axis=1)) plt.title('Similarity with mean across units')
cross_validate(clf, df.values, df.reset_index().time, df.reset_index().trial, cv=GroupShuffleSplit(10), scoring=scoring, return_train_score=False)) # dag = DAG_analysis() # dag.add_step(io.load, dset='wide_smoothed') # dag.add_step(frankenstein, _min_duration=1.5, is_selected=True, is_tired=False, subset='full') # dag.add_step_branching_by_parameter(analysis, param_name='clf', branch_names = ['LDA', 'Bayesian Ridge'] # param_values=[LinearDiscriminantAnalysis(), BayesianRidge()]) # Merging rats DR = [io.load(label, 'wide_smoothed') for label in SHORTCUTS['groups']['DRRD']] EZ = [io.load(label, 'wide_smoothed') for label in SHORTCUTS['groups']['EZ']] sp_pfc = frankenstein(DR, _min_duration=1.5, is_selected=True, is_tired=False, subset='full') sp_pfc = sp_pfc[(sp_pfc.reset_index('time').time >= 200).values & (sp_pfc.reset_index('time').time < 1300).values] ez_pfc = frankenstein(EZ, _min_duration=1.5, _min_quality=0, area='PFC', subset='full')
ntrials_init_vs_after = 70 n_splits = 50 ntrials_total = 400 SUBSETS = ['cropped', 'full'] conditions = product(DSETS, CLFs, BLINE, SHORTCUTS['groups']['EZ'], SUBSETS) for dset, (clf, clfname), bline, label, subset in conditions: savedir = '{}/{}/{}/{}/{}/{}'.format(basedir, clfname, dset, subset, label, bline) print(savedir) if not os.path.exists(savedir): os.makedirs(savedir) data = io.load(label, dset) data = select(data, _mineq_quality=MINQUALITY, _min_duration=tmin, _max_duration=tmax) dataPFC = select(data, area='PFC') dataSTR = select(data, area='STR') print(data.shape, dataPFC.shape, dataSTR.shape) baseline = io.load(label, 'baseline') if bline: data = remove_baseline(to_feature_array(data, subset=subset), baseline, .5) dataPFC = remove_baseline(to_feature_array(dataPFC, subset=subset), baseline, .5) dataSTR = remove_baseline(to_feature_array(dataSTR, subset=subset), baseline, .5)
""" """ import numpy as np import pandas as pd import sys sys.path.append('.') from spikelearn.data import io, SHORTCUTS # Load into DataFrames each data for rat in SHORTCUTS['groups']['ALL']: behav = io.load(rat, 'behavior') behav['intertrial_interval'] = np.hstack( (0, behav.onset.values[1:] - behav.offset.values[:-1])) tiredness = io.load(rat, 'tiredness').values[0, 0] if io.dataset_exist( rat, 'tiredness') else float('inf') behav['is_tired'] = behav.index > tiredness io.save(behav, rat, 'behav_stats', 'interim')
from spikelearn.data import io, select, to_feature_array, SHORTCUTS from spikelearn.models.shuffle_decoding import shuffle_cross_predict from catboost import CatBoostClassifier from sklearn.linear_model import BayesianRidgeRegression import pickle allres = {} for rat, dset in product(SHORTCUTS['group']['eletro'], DSETS): data = select(io.load(rat, dset), _min_duration=.5, is_tired=False) tercils = [data.duration.quantile(q) for q in [1/3, 2/3]] t1 = to_feature_array(select(data, _max_duration=tercils[0]), subset='full') t3 = to_feature_array(select(data, _min_duration=tercils[1]), subset='full') res = shuffle_cross_predict(reg, [t1,t3], ['short', 'long'], n_splits=5, problem='regression', feature_scaling='robust') allres[(rat, dset)] = res pickle.dump(open('data/results/warping.pickle', 'wb')) # TODO calculate bias and mean bias direction
DATASETS = ['medium_smoothed' ] #['wide_smoothed', 'medium_smoothed', 'narrow_smoothed'] WHENS = ['init', 'end'] NUM_TRIALS = np.arange(10, 100, 5) LOGCS = np.linspace(-1.5, 4, 20) ANALYSIS_NTRIALS = product(DRRD_RATS, DATASETS, WHENS, NUM_TRIALS, [0]) ANALYSIS_REGUL = product(DRRD_RATS, DATASETS, WHENS, [50], LOGCS) ANALYSIS = chain(ANALYSIS_NTRIALS, ANALYSIS_REGUL) results = pd.DataFrame() preds = pd.DataFrame() acts = pd.DataFrame() for rat, dataset, when, num_trials, logC in ANALYSIS: clf = LogisticRegression(C=10**logC, penalty='l1') data = io.load(rat, dataset) units = data.groupby('is_selected').get_group( True).reset_index().unit.unique() data = select(data.reset_index(), maxlen=num_trials * units.shape[0], takefrom=when, is_selected=True, _min_duration=1.5, is_tired=False).set_index(['trial', 'unit']) X, y, trial = to_feature_array(data) local_preds, local_results = shuffle_val_predict(clf, X, y, trial, cv='sh',
# Prepare output folders [os.makedirs(folder + dset) for dset in DSETS] # Code cleaners def clean(df): return df.drop(['trial', 'init'], axis=1, level=0) # Run for label, dset in product(SHORTCUTS['groups']['DRRD'], DSETS): subset = 'full' if 'norm' in dset else 'cropped' #viz = lambda dset: dset+'_viz' if 'norm' not in dset or 'narrow' in dset else dset viz = lambda dset: dset data_ = select(io.load(label, viz(dset)), _min_duration=TMIN, is_selected=True) data = to_feature_array(data_, False, subset) print(label, dset) # Pearson similarity res_sim = pd.DataFrame() for unit in data.columns: sim_mat = unit_similarity_evolution(data[unit], WSIZE) sim_mat['unit'] = unit res_sim = res_sim.append(sim_mat) # ML prediction comparison res_pred = pd.DataFrame() res_weights = pd.DataFrame()
import sys sys.path.append('.') from spikelearn.data import io, SHORTCUTS for label in SHORTCUTS['groups']['eletro']: baseline = io.load(label, 'epoched_spikes').baseline.unstack('unit') io.save(baseline, label, 'baseline')
lambda x: np.max(x) > threshold).unstack().any(axis=1) else: bad = select(data, is_selected=True).full.apply( lambda x: np.max(x) > threshold).unstack().any(axis=1) if only_indexes: return bad[bad.values].index.values else: return bad remove_baseline_flag = True for rat, dset in product(SHORTCUTS['groups']['eletro'], DSETS): print(rat, dset) data = select(io.load(rat, dset), _min_duration=1.5, _max_duration=4.5, is_tired=False) data = data[~data.reset_index().trial.isin(bad_trials(rat)).values] if rat in SHORTCUTS['groups']['DRRD']: data = select(data, is_selected=True) else: data = select(data, _min_quality=0) data = to_feature_array(data, Xyt=False, subset='full') if remove_baseline_flag: data = remove_baseline(data, io.load(rat, 'baseline'), .5) X, y, trial = data.values, data.reset_index().time, data.reset_index( ).trial uniquetrials = np.unique(trial) for i, tr_ in enumerate(uniquetrials[30:]):
TMIN = 1.5 folder = 'data/results/across_trials/edge_crop_decoding/' if not os.path.exists(folder): os.makedirs(folder) DSETS = ['narrow_smoothed', 'narrow_smoothed_norm'] #['medium_smoothed', 'medium_smoothed_norm', # 'narrow_smoothed', 'narrow_smoothed_norm', # 'wide_smoothed'] NSPLITS = 30 subset = 'cropped' clf = LogisticRegression() for label, dset in product(SHORTCUTS['groups']['DRRD'], DSETS): data = select(io.load(label, dset), _min_duration=1.5, is_selected=True, is_tired=False) data = to_feature_array(data) times = data.reset_index().time.unique() res = [] for crop in range(len(times - 1) // 2): if crop > 0: to_use_times = times[crop:-crop] else: to_use_times = times df = select(data.reset_index(), time_in_=to_use_times).set_index(['trial', 'time'])
import os from spikelearn.data import io, to_feature_array, select, SHORTCUTS # Directory savedir = 'data/results/duration/d_prime2' if not os.path.exists(savedir): os.makedirs(savedir) # Parameters T_short_MAX, T_long_MIN = 1.5, 1.5 for label in SHORTCUTS['groups']['DRRD']: # Load necessary data data = io.load(label, 'epoched_spikes') data = select(data, is_tired=False) # Remove unwanted trials data = select(data, is_selected=True) # Remove unwanted unit # Separate trials long and short data['is_short'] = (data.duration < T_short_MAX) data['is_long'] = (data.duration > T_long_MIN) data = data[data.is_short | data.is_long].reset_index() # Separate along session moment data['is_init'] = data.trial < data.trial.quantile(.5) # Number of spikes during baseline data['baseline'] = data.baseline.apply(len) # Calculate D'
'narrow_smoothed_norm': { 'sigma': 20, 'bin_size': 50 }, 'narrow_smoothed_norm_viz': { 'sigma': 20, 'bin_size': 10 }, 'no_smoothing_norm': { 'sigma': None, 'bin_size': 100 } } for rat_label in SHORTCUTS['groups']['eletro']: #SHORTCUTS: epoched = io.load(rat_label, 'epoched_spikes') for dset_name, params in DSET_PARAMS.items(): # Create dataset and add identifiers smoothed_dataset = pd.DataFrame(index=epoched.index) if 'norm' in dset_name: cnames = ['normalized_time', 'normalized_without_edges'] edges_for_each = [lambda x: (0, 1000), lambda x: (0, 1000)] else: cnames = ['with_baseline', 'time'] edges_for_each = [ lambda x: (BASELINE, 1000 * x.duration), lambda x: (MA_CUT[0], 1000 * x.duration - MA_CUT[1]) ]