def test_fitting_of_multiple_models_to_single_training_set_and_seeing_how_well_they_predict_test_set(self): # make basis sets for each model print('Making filter basis functions...') basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions( INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR ) n_models = len(basis_outs) print('Getting trajectories...') trajs = igfh.get_trajs_with_integrated_odor_above_threshold( experiment_id=EXPT_ID, odor_state=ODOR_STATE, integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLD ) # split these into training and test trajectories perm = np.random.permutation(N_TRAIN + N_TEST) train_idxs = perm[:N_TRAIN] test_idxs = perm[N_TRAIN:N_TRAIN + N_TEST] trajs = np.array(trajs) trajs_train = list(trajs[train_idxs]) trajs_test = list(trajs[test_idxs]) # fit each of N models to training data and predict test data print('Fitting models...') models = [] residuals = [] for input_set, output, basis_in, basis_out in zip(INPUT_SETS, OUTPUTS, basis_ins, basis_outs): # get relevant time-series data from each trajectory set data_train = igfh.time_series_from_trajs( trajs_train, inputs=input_set, output=output ) data_test = igfh.time_series_from_trajs( trajs_test, inputs=input_set, output=output ) model = fitting.GLMFitter(link=LINK_NAME, family=FAMILY_NAME) model.set_params(DELAY, basis_in=basis_in, basis_out=False) model.input_set = input_set model.output = output # fit to training data model.fit(data=data_train, start=START_TIMEPOINT) # predict test data prediction = model.predict(data=data_test, start=START_TIMEPOINT) _, ground_truth = model.make_feature_matrix_and_response_vector(data_test, START_TIMEPOINT) # calculate residual residual = np.sqrt(((prediction - ground_truth)**2).mean()) # store things models.append(model) residuals.append(residual) print('Generating plots...') # plot basis and filters for each model, as well as example time-series with prediction fig_filt, axs_filt = plt.subplots( n_models, 2, facecolor='white', figsize=(10, 10), tight_layout=True ) fig_ts, axs_ts = plt.subplots( n_models, 1, facecolor='white', figsize=(10, 10), tight_layout=True ) for model, res, ax_filt_row, ax_ts in zip(models, residuals, axs_filt, axs_ts): data_test = igfh.time_series_from_trajs( trajs_test, inputs=model.input_set, output=model.output ) model.plot_filters(ax_filt_row[0], x_lim=(0, 100)) model.plot_basis(ax_filt_row[1], x_lim=(0, 100)) prediction_0 = model.predict(data=data_test[0:1], start=START_TIMEPOINT) _, ground_truth_0 = model.make_feature_matrix_and_response_vector(data_test[0:1], START_TIMEPOINT) t = np.arange(len(data_test[0][1]))[-len(prediction_0):] ax_ts.plot(t, ground_truth_0, color='k', ls='-') ax_ts.plot(t, prediction_0, color='r', ls='--', lw=2) odor = igfh.time_series_from_trajs( trajs_test, inputs=('odor',), output=model.output )[0][0][0] ax_ts_odor = ax_ts.twinx() t_odor = np.arange(START_TIMEPOINT + DELAY, len(odor)) ax_ts_odor.plot(t_odor, odor[START_TIMEPOINT + DELAY:], color='b', ls='-') ax_filt_row[0].set_ylabel('filter\nstrength') ax_ts.set_title('Residual = {}'.format(res)) axs_filt[-1][0].set_xlabel('timestep') axs_filt[-1][1].set_xlabel('timestep') axs_ts[-1].set_xlabel('timestep') fig_filt.savefig(os.path.join(SAVE_DIR, 'filters.png')) fig_ts.savefig(os.path.join(SAVE_DIR, 'example_predictions.png')) plt.show()
def main(n_trials, n_train_max, n_test_max, root_dir_env_var): # make basis functions basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions( INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR ) for expt_id in EXPERIMENT_IDS: for odor_state in ODOR_STATES: trajs = igfh.get_trajs_with_integrated_odor_above_threshold( expt_id, odor_state, INTEGRATED_ODOR_THRESHOLD ) train_test_ratio = (n_train_max / (n_train_max + n_test_max)) test_train_ratio = (n_test_max / (n_train_max + n_test_max)) n_train = min(n_train_max, np.floor(len(trajs) * train_test_ratio)) n_test = min(n_test_max, np.floor(len(trajs) * test_train_ratio)) trajs_trains = [] trajs_tests = [] glmss = [] residualss = [] for trial_ctr in range(n_trials): print('{}: odor {} (trial number: {})'.format(expt_id, odor_state, trial_ctr)) # get random set of training and test trajectories perm = np.random.permutation(len(trajs)) train_idxs = perm[:n_train] test_idxs = perm[-n_test:] trajs_train = list(np.array(trajs)[train_idxs]) trajs_test = list(np.array(trajs)[test_idxs]) # do some more stuff glms = [] residuals = [] for input_set, output, basis_in, basis_out in zip(INPUT_SETS, OUTPUTS, basis_ins, basis_outs): # get relevant time-series data from each trajectory set data_train = igfh.time_series_from_trajs( trajs_train, inputs=input_set, output=output ) data_test = igfh.time_series_from_trajs( trajs_test, inputs=input_set, output=output ) glm = fitting.GLMFitter(link=LINK, family=FAMILY) glm.set_params(DELAY, basis_in=basis_in, basis_out=False) glm.input_set = input_set glm.output = output # fit to training data glm.fit(data=data_train, start=START_TIMEPOINT) # predict test data prediction = glm.predict(data=data_test, start=START_TIMEPOINT) _, ground_truth = glm.make_feature_matrix_and_response_vector(data_test, START_TIMEPOINT) # calculate residual residual = np.sqrt(((prediction - ground_truth)**2).mean()) # clear out feature matrix and response from glm for efficient storage glm.feature_matrix = None glm.response_vector = None glm.results.remove_data() # store things glms.append(glm) residuals.append(residual) trajs_train_ids = [traj.id for traj in trajs_train] trajs_test_ids = [traj.id for traj in trajs_test] trajs_trains.append(trajs_train_ids) trajs_tests.append(trajs_test_ids) glmss.append(glms) residualss.append(residuals) # save a glm fit set glm_fit_set = models.GlmFitSet() # add data to it glm_fit_set.root_dir_env_var = root_dir_env_var glm_fit_set.path_relative = 'glm_fit' glm_fit_set.file_name = '{}_{}_odor_{}.pickle'.format(FIT_NAME, expt_id, odor_state) glm_fit_set.experiment = session.query(models.Experiment).get(expt_id) glm_fit_set.odor_state = odor_state glm_fit_set.name = FIT_NAME glm_fit_set.link = LINK glm_fit_set.family = FAMILY glm_fit_set.integrated_odor_threshold = INTEGRATED_ODOR_THRESHOLD glm_fit_set.predicted = PREDICTED glm_fit_set.delay = DELAY glm_fit_set.start_time_point = START_TIMEPOINT glm_fit_set.n_glms = len(glms) glm_fit_set.n_train = n_train glm_fit_set.n_test = n_test glm_fit_set.n_trials = n_trials # save data file glm_fit_set.save_to_file( input_sets=INPUT_SETS, outputs=OUTPUTS, basis_in=basis_ins, basis_out=basis_outs, trajs_train=trajs_trains, trajs_test=trajs_tests, glms=glmss, residuals=residualss ) # save everything else (+ link to data file) in database session.add(glm_fit_set) commit(session)
def test_fitting_of_multiple_models_to_single_training_set_and_seeing_how_well_they_predict_test_set( self): # make basis sets for each model print('Making filter basis functions...') basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions( INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR) n_models = len(basis_outs) print('Getting trajectories...') trajs = igfh.get_trajs_with_integrated_odor_above_threshold( experiment_id=EXPT_ID, odor_state=ODOR_STATE, integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLD) # split these into training and test trajectories perm = np.random.permutation(N_TRAIN + N_TEST) train_idxs = perm[:N_TRAIN] test_idxs = perm[N_TRAIN:N_TRAIN + N_TEST] trajs = np.array(trajs) trajs_train = list(trajs[train_idxs]) trajs_test = list(trajs[test_idxs]) # fit each of N models to training data and predict test data print('Fitting models...') models = [] residuals = [] for input_set, output, basis_in, basis_out in zip( INPUT_SETS, OUTPUTS, basis_ins, basis_outs): # get relevant time-series data from each trajectory set data_train = igfh.time_series_from_trajs(trajs_train, inputs=input_set, output=output) data_test = igfh.time_series_from_trajs(trajs_test, inputs=input_set, output=output) model = fitting.GLMFitter(link=LINK_NAME, family=FAMILY_NAME) model.set_params(DELAY, basis_in=basis_in, basis_out=False) model.input_set = input_set model.output = output # fit to training data model.fit(data=data_train, start=START_TIMEPOINT) # predict test data prediction = model.predict(data=data_test, start=START_TIMEPOINT) _, ground_truth = model.make_feature_matrix_and_response_vector( data_test, START_TIMEPOINT) # calculate residual residual = np.sqrt(((prediction - ground_truth)**2).mean()) # store things models.append(model) residuals.append(residual) print('Generating plots...') # plot basis and filters for each model, as well as example time-series with prediction fig_filt, axs_filt = plt.subplots(n_models, 2, facecolor='white', figsize=(10, 10), tight_layout=True) fig_ts, axs_ts = plt.subplots(n_models, 1, facecolor='white', figsize=(10, 10), tight_layout=True) for model, res, ax_filt_row, ax_ts in zip(models, residuals, axs_filt, axs_ts): data_test = igfh.time_series_from_trajs(trajs_test, inputs=model.input_set, output=model.output) model.plot_filters(ax_filt_row[0], x_lim=(0, 100)) model.plot_basis(ax_filt_row[1], x_lim=(0, 100)) prediction_0 = model.predict(data=data_test[0:1], start=START_TIMEPOINT) _, ground_truth_0 = model.make_feature_matrix_and_response_vector( data_test[0:1], START_TIMEPOINT) t = np.arange(len(data_test[0][1]))[-len(prediction_0):] ax_ts.plot(t, ground_truth_0, color='k', ls='-') ax_ts.plot(t, prediction_0, color='r', ls='--', lw=2) odor = igfh.time_series_from_trajs(trajs_test, inputs=('odor', ), output=model.output)[0][0][0] ax_ts_odor = ax_ts.twinx() t_odor = np.arange(START_TIMEPOINT + DELAY, len(odor)) ax_ts_odor.plot(t_odor, odor[START_TIMEPOINT + DELAY:], color='b', ls='-') ax_filt_row[0].set_ylabel('filter\nstrength') ax_ts.set_title('Residual = {}'.format(res)) axs_filt[-1][0].set_xlabel('timestep') axs_filt[-1][1].set_xlabel('timestep') axs_ts[-1].set_xlabel('timestep') fig_filt.savefig(os.path.join(SAVE_DIR, 'filters.png')) fig_ts.savefig(os.path.join(SAVE_DIR, 'example_predictions.png')) plt.show()