def get_dataset_mlab(f_behavior, f_ephys, conditions, smooth_method='both', smooth_width=[80, 40], pad=[400, 400], z_score=True, trial_duration=None, max_duration=5000, min_rate=0.1): global condition_pairs ##get the spike dataset, and the trial info X, trial_data = ptr.get_trial_spikes(f_behavior=f_behavior, f_ephys=f_ephys, smooth_method=smooth_method, smooth_width=smooth_width, pad=pad, z_score=z_score, trial_duration=trial_duration, max_duration=max_duration, min_rate=min_rate) ##if uncertainty level is one of the requested marginalizations, add this data to the trial_data set if 'u_level' in conditions: trial_data = mf.append_uncertainty(trial_data, condition_pairs['u_level']) ##get some metadata about this session n_units = X.shape[1] n_bins = X.shape[2] #sort out the different trial types trial_index, n_trials = split_trials(trial_data, conditions) trial_types = list(trial_index) ##allocate space for the dataset if n_trials > 0: X_c = np.empty((n_trials, n_units, len(condition_pairs[conditions[0]]), len(condition_pairs[conditions[1]]), n_bins)) X_c[:] = np.nan ##generate an array that provides info about how many trial numbers we have for each trial type trialNum = np.empty((n_units, len(condition_pairs[conditions[0]]), len(condition_pairs[conditions[1]]))) trialNum[:] = np.nan for t in trial_index.keys(): ##based on the key, figure out where these trials should be placed in the dataset ##I **think** that we should always expect the context[0] trial type to be the first part of the string c1_type = t[:t.index('+')] c2_type = t[t.index('+') + 1:] c1_idx = condition_pairs[conditions[0]].index(c1_type) c2_idx = condition_pairs[conditions[1]].index(c2_type) ##now add the data to the dataset using these indices for i, j in enumerate(trial_index[t]): X_c[i, :, c1_idx, c2_idx, :] = X[j, :, :] ##record how many trials of this type we have trialNum[:, c1_idx, c2_idx] = len(trial_index[t]) else: print("One marginalization has no trials.") X_c = None trialNum = None return X_c, trialNum
def decision_variables(f_behavior, f_ephys, pad, smooth_method='both', smooth_width=[80, 40], min_rate=0.1, z_score=True, trial_duration=None, max_duration=4000): ##get the raw data X, trial_data = ptr.get_trial_spikes(f_behavior, f_ephys, smooth_method=smooth_method, smooth_width=smooth_width, pad=pad, z_score=z_score, trial_duration=trial_duration, max_duration=max_duration, min_rate=min_rate) ##only take the data from the pre-action epoch epoch = 'action' if epoch == 'action': if smooth_method == 'bins': X = X[:, :, :int(pad[0] / smooth_width)] elif smooth_method == 'both': X = X[:, :, :int(pad[0] / smooth_width[1])] else: X = X[:, :, :int(pad[0])] if epoch == 'outcome': if smooth_method == 'bins': X = X[:, :, int(-pad[1] / smooth_width):] elif smooth_method == 'both': X = X[:, :, int(-pad[1] / smooth_width[1]):] else: X = X[:, :, int(-pad[1]):] ##add a constant term intercept = np.ones((X.shape[0], 1, X.shape[2])) X = np.concatenate([X, intercept], axis=1) ##construct the labels labels = (np.asarray(trial_data['action']) == 'upper_lever').astype(int) ##compute the beta weights across the whole trial interval betas = lr2.get_betas(X, labels) ##now take the mean across the whole interval (this assumes they are relatively constant) betas = np.mean(betas[:, -4:], axis=1) ##OK, now we can compute the log odds (?) for all of the trials odds = np.zeros((X.shape[0], X.shape[2])) for t in range(X.shape[2]): odds[:, t] = np.dot(X[:, :, t], betas) ##get the duration of this session to return session_duration = pe.get_session_duration(f_ephys) return odds, trial_data, session_duration
def get_datasets(f_behavior, f_ephys, smooth_method='both', smooth_width=[80, 40], pad=[1200, 800], z_score=True, trial_duration=None, min_rate=0.1, max_duration=5000): global to_regress ##start by getting the spike data and trial data spike_data, trial_data = ptr.get_trial_spikes( f_behavior, f_ephys, smooth_width=smooth_width, smooth_method=smooth_method, pad=pad, z_score=z_score, trial_duration=trial_duration, max_duration=max_duration, min_rate=min_rate) n_trials = spike_data.shape[0] ##get the uncertainty from the trial data uncertainty = mf.uncertainty_from_trial_data(trial_data) ##now, compute the regressors from the HMM and trial data ##let's keep these as a pandas dataset just for clarity regressors = pd.DataFrame(columns=to_regress, index=np.arange(n_trials)) ##now fill out the regressors regressors['action'] = np.asarray( trial_data['action'] == 'upper_lever').astype(int) + 1 regressors['outcome'] = np.asarray( trial_data['outcome'] == 'rewarded_poke').astype(int) regressors['state'] = np.asarray( trial_data['context'] == 'upper_rewarded').astype(int) + 1 regressors['uncertainty'] = uncertainty ##now do the interactions regressors[ 'action x\noutcome'] = regressors['action'] * regressors['outcome'] regressors['action x\nstate'] = regressors['action'] * regressors['state'] regressors['action x\n uncertainty'] = regressors['action'] * regressors[ 'uncertainty'] regressors[ 'outcome x\nstate'] = regressors['outcome'] * regressors['state'] regressors['outcome x\nuncertainty'] = regressors['outcome'] * regressors[ 'uncertainty'] regressors['state x\nuncertainty'] = regressors['state'] * regressors[ 'uncertainty'] ##Now just return the data arrays return spike_data, regressors
def get_dataset(f_behavior, f_ephys, conditions, smooth_method='both', smooth_width=[80, 40], pad=[400, 400], z_score=True, trial_duration=None, max_duration=5000, min_rate=0.1, balance=True): global condition_pairs ##get the spike dataset, and the trial info X, trial_data = ptr.get_trial_spikes(f_behavior=f_behavior, f_ephys=f_ephys, smooth_method=smooth_method, smooth_width=smooth_width, pad=pad, z_score=z_score, trial_duration=trial_duration, max_duration=max_duration, min_rate=min_rate) ##get some metadata about this session n_units = X.shape[1] n_bins = X.shape[2] if balance: trial_index, n_trials = balance_trials(trial_data, conditions) else: trial_index, n_trials = unbalance_trials(trial_data, conditions) trial_types = list(trial_index) ##allocate space for the dataset if n_trials > 0: X_c = np.empty((n_trials, n_units, len(condition_pairs[conditions[0]]), len(condition_pairs[conditions[1]]), n_bins)) X_c[:] = np.nan for t in trial_index.keys(): ##based on the key, figure out where these trials should be placed in the dataset ##I **think** that we should always expect the context[0] trial type to be the first part of the string c1_type = t[:t.index('+')] c2_type = t[t.index('+') + 1:] c1_idx = condition_pairs[conditions[0]].index(c1_type) c2_idx = condition_pairs[conditions[1]].index(c2_type) ##now add the data to the dataset using these indices for i, j in enumerate(trial_index[t]): X_c[i, :, c1_idx, c2_idx, :] = X[j, :, :] else: X_c = None return np.nan_to_num(X_c)
def lin_regress_belief(f_behavior, f_ephys, pad, smooth_method='both', smooth_width=[100, 50], min_rate=0.1, z_score=True, trial_duration=None, max_duration=4000): ##get the raw data X, trial_data = ptr.get_trial_spikes(f_behavior, f_ephys, smooth_method=smooth_method, smooth_width=smooth_width, pad=pad, z_score=z_score, trial_duration=trial_duration, max_duration=max_duration, min_rate=min_rate) ##only take the data from the pre-action epoch epoch = 'action' if epoch == 'action': if smooth_method == 'bins': X = X[:, :, :int(pad[0] / smooth_width)] elif smooth_method == 'both': X = X[:, :, :int(pad[0] / smooth_width[1])] else: X = X[:, :, :int(pad[0])] if epoch == 'outcome': if smooth_method == 'bins': X = X[:, :, int(-pad[1] / smooth_width):] elif smooth_method == 'both': X = X[:, :, int(-pad[1] / smooth_width[1]):] else: X = X[:, :, int(-pad[1]):] ##now get the belief strength by using the HMM model model_data = mf.fit_models_from_trial_data(trial_data) confidence = np.abs(model_data['state_vals'][0] - model_data['state_vals'][1]) ##now we can regress the confidence using the spike data predicted, r2, r2_adj, mse = linr.fit_timecourse(X, confidence, add_constant=True, n_iter=10) return predicted, r2, mse, trial_data, confidence
def run_tensor(f_behavior, f_ephys, smooth_method='both', smooth_width=[80, 40], pad=[400, 400], z_score=True, trial_duration=None, max_duration=5000, min_rate=0.1, n_components=12, epoch=None): ##get the data X, trial_data = ptr.get_trial_spikes(f_behavior, f_ephys, smooth_method=smooth_method, smooth_width=smooth_width, pad=pad, z_score=z_score, trial_duration=trial_duration, max_duration=max_duration, min_rate=min_rate) ##reshape X to be neurons x timepoints x trials X = X.transpose(1, 2, 0) ##take a specific window, if requested if epoch == 'action': if smooth_method == 'bins': X = X[:, :int(pad[0] / smooth_width), :] elif smooth_method == 'both': X = X[:, :int(pad[0] / smooth_width[1]), :] else: X = X[:, :int(pad[0]), :] if epoch == 'outcome': if smooth_method == 'bins': X = X[:, int(-pad[1] / smooth_width):, :] elif smooth_method == 'both': X = X[:, int(-pad[1] / smooth_width[1]):, :] else: X = X[:, int(-pad[1]):, :] ##now fit the model model, info = tt.cp_als(X, n_components, nonneg=False, tol=1e-5) print('Final reconstruction error : {}'.format(info['err_hist'][-1])) return model, info, trial_data
def log_pop_action(f_behavior, f_ephys, smooth_method='both', smooth_width=[100, 50], pad=[2000, 100], z_score=True, trial_duration=None, max_duration=5000, min_rate=0.1, n_iter=10): ##start by getting the spike data and trial data spike_data, trial_data = ptr.get_trial_spikes( f_behavior, f_ephys, smooth_width=smooth_width, smooth_method=smooth_method, pad=pad, z_score=z_score, trial_duration=trial_duration, max_duration=max_duration, min_rate=min_rate) ##convert the action data to binary actions = (np.asarray(trial_data['action']) == 'upper_lever').astype(int) ##get the action part of the spike data if smooth_method == 'both': bin_size = smooth_width[1] elif smooth_method == 'bins': bin_size = smooth_width elif smooth_method == 'gauss': bin_size = 1 action_bins = int(pad[0] / bin_size) X = spike_data[:, :, :action_bins] ##now run the logistic regression accuracy = lr3.pop_logit(X, actions, add_constant=True, n_iter=n_iter) return accuracy
def get_dataset_hmm(f_behavior, f_ephys, conditions, smooth_method='both', smooth_width=[80, 40], pad=[400, 400], trial_duration=None, max_duration=5000, min_rate=0.1, belief_range=(0, 0.1)): global condition_pairs ##the first step is to fit a hidden markov model to the data fit_results = mf.fit_models(f_behavior) ##now get the info about belief states b_a = fit_results['e_HMM'][ 0, :] ##belief in state corresponding to lower lever b_b = fit_results['e_HMM'][ 1, :] ##belief in state corresponding to upper lever ##the belief strength is besically the magnitude of the difference between ##the belief in the two possible states belief = abs(b_a - b_b) belief_idx = np.where( np.logical_and(belief >= belief_range[0], belief <= belief_range[1]))[0] ##get the spike dataset, and the trial info X, trial_data = ptr.get_trial_spikes(f_behavior=f_behavior, f_ephys=f_ephys, smooth_method=smooth_method, smooth_width=smooth_width, pad=pad, z_score=True, trial_duration=trial_duration, max_duration=max_duration, min_rate=min_rate) ##get some metadata about this session n_units = X.shape[1] n_bins = X.shape[2] # if balance: # trial_index,n_trials = balance_trials(trial_data,conditions) # else: trial_index, n_trials = unbalance_trials( trial_data, conditions) ##because of limited trial number we will unbalance trial_types = list(trial_index) ##allocate space for the dataset if n_trials > 0: X_c = np.empty((n_trials, n_units, len(condition_pairs[conditions[0]]), len(condition_pairs[conditions[1]]), n_bins)) X_c[:] = np.nan for t in trial_index.keys(): ##based on the key, figure out where these trials should be placed in the dataset ##I **think** that we should always expect the context[0] trial type to be the first part of the string c1_type = t[:t.index('+')] c2_type = t[t.index('+') + 1:] c1_idx = condition_pairs[conditions[0]].index(c1_type) c2_idx = condition_pairs[conditions[1]].index(c2_type) ##now add the data to the dataset using these indices for i, j in enumerate(trial_index[t]): X_c[i, :, c1_idx, c2_idx, :] = X[j, :, :] X_c = np.nan_to_num(X_c) else: X_c = np.empty([]) ##now repeat for the HMM-defined trials X = X[belief_idx] trial_data = trial_data.loc[belief_idx].reset_index(drop=True) ##get some metadata about this session n_units = X.shape[1] n_bins = X.shape[2] # if balance: # trial_index,n_trials = balance_trials(trial_data,conditions) # else: trial_index, n_trials = unbalance_trials( trial_data, conditions) ##because of limited trial number we will unbalance trial_types = list(trial_index) ##allocate space for the dataset if n_trials > 0: X_b = np.empty((n_trials, n_units, len(condition_pairs[conditions[0]]), len(condition_pairs[conditions[1]]), n_bins)) X_b[:] = np.nan for t in trial_index.keys(): ##based on the key, figure out where these trials should be placed in the dataset ##I **think** that we should always expect the context[0] trial type to be the first part of the string c1_type = t[:t.index('+')] c2_type = t[t.index('+') + 1:] c1_idx = condition_pairs[conditions[0]].index(c1_type) c2_idx = condition_pairs[conditions[1]].index(c2_type) ##now add the data to the dataset using these indices for i, j in enumerate(trial_index[t]): X_b[i, :, c1_idx, c2_idx, :] = X[j, :, :] X_b = np.nan_to_num(X_b) else: X_b = np.empty([]) return X_c, X_b
def concat_data(animal_id, smooth_method='both', smooth_width=[80, 40], pad=[800, 1200], max_duration=5000, epoch=None): behavior_files = flu.split_behavior_by_animal( match_ephys=True)[animal_id] ##first 6 days have only one lever ephys_files = flu.split_ephys_by_animal()[animal_id] ##determine the median trial duration for all trials med_duration = [] for f_behavior in behavior_files: med_duration.append( sa.session_trial_durations(f_behavior, max_duration=max_duration)) med_duration = np.median(np.concatenate(med_duration)) ##containers X = [] trial_data = pd.DataFrame() ##run through each session's data ##keep track of total time elapsed across all sessions clock = 0 for f_behavior, f_ephys in zip(behavior_files, ephys_files): print("Processing {}".format(f_behavior[-11:-5])) ##start by getting the session duration duration = sa.get_session_duration(f_ephys) ##now get the spike and trial data x, td = ptr.get_trial_spikes(f_behavior, f_ephys, smooth_method=smooth_method, smooth_width=smooth_width, pad=pad, z_score=True, trial_duration=med_duration, max_duration=max_duration, min_rate=-np.inf) ##adjust timestamps td['start_ts'] = td['start_ts'] + clock td['action_ts'] = td['action_ts'] + clock td['outcome_ts'] = td['outcome_ts'] + clock td['end_ts'] = td['end_ts'] + clock ##add to master containers trial_data = trial_data.append(td) print('n_neurons={}'.format(x.shape[1])) X.append(x) clock += duration ##reset the trial indices trial_data = trial_data.reset_index() ##concatenate all of the spike data X = np.concatenate(X, axis=0) ##reshape X to be neurons x timepoints x trials X = X.transpose(1, 2, 0) ##take a specific window, if requested if epoch == 'action': if smooth_method == 'bins': X = X[:, :int(pad[0] / smooth_width), :] elif smooth_method == 'both': X = X[:, :int(pad[0] / smooth_width[1]), :] else: X = X[:, :int(pad[0]), :] if epoch == 'outcome': if smooth_method == 'bins': X = X[:, int(-pad[1] / smooth_width):, :] elif smooth_method == 'both': X = X[:, int(-pad[1] / smooth_width[1]):, :] else: X = X[:, int(-pad[1]):, :] return X, trial_data