def test_that_residual_calculations_were_done_correctly(self):

        for expt_id in script.EXPERIMENT_IDS:
            for odor_state in script.ODOR_STATES:

                glm_fit_set = (
                    script.session.query(models.GlmFitSet)
                    .filter_by(experiment_id=expt_id, odor_state=odor_state)
                    .first()
                )

                # make sure predictions residuals add up to true residuals
                glm = glm_fit_set.glms[-1][-1]
                start = glm_fit_set.start_time_point
                residual_stored = glm_fit_set.residuals[-1][-1]
                trajs_test_ids = glm_fit_set.trajs_test[-1]
                self.assertTrue(isinstance(trajs_test_ids, list))
                self.assertTrue(isinstance(trajs_test_ids[0], str))
                trajs_test = [script.session.query(models.Trajectory).get(traj_id) for traj_id in trajs_test_ids]
                data_test = igfh.time_series_from_trajs(trajs_test, inputs=glm.input_set, output=glm.output)

                prediction = glm.predict(data=data_test, start=start)
                _, ground_truth = glm.make_feature_matrix_and_response_vector(data_test, start)

                residual_recalculated = np.sqrt(((prediction - ground_truth) ** 2).mean())

                self.assertAlmostEqual(residual_stored, residual_recalculated)
Beispiel #2
0
    def test_that_residual_calculations_were_done_correctly(self):

        for expt_id in script.EXPERIMENT_IDS:
            for odor_state in script.ODOR_STATES:

                glm_fit_set = script.session.query(models.GlmFitSet).filter_by(
                    experiment_id=expt_id,
                    odor_state=odor_state,
                ).first()

                # make sure predictions residuals add up to true residuals
                glm = glm_fit_set.glms[-1][-1]
                start = glm_fit_set.start_time_point
                residual_stored = glm_fit_set.residuals[-1][-1]
                trajs_test_ids = glm_fit_set.trajs_test[-1]
                self.assertTrue(isinstance(trajs_test_ids, list))
                self.assertTrue(isinstance(trajs_test_ids[0], str))
                trajs_test = [
                    script.session.query(models.Trajectory).get(traj_id)
                    for traj_id in trajs_test_ids
                ]
                data_test = igfh.time_series_from_trajs(trajs_test,
                                                        inputs=glm.input_set,
                                                        output=glm.output)

                prediction = glm.predict(data=data_test, start=start)
                _, ground_truth = glm.make_feature_matrix_and_response_vector(
                    data_test, start)

                residual_recalculated = np.sqrt(
                    ((prediction - ground_truth)**2).mean())

                self.assertAlmostEqual(residual_stored, residual_recalculated)
def main(n_trials, n_train_max, n_test_max, root_dir_env_var):

    # make basis functions
    basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions(
        INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR
    )

    for expt_id in EXPERIMENT_IDS:
        for odor_state in ODOR_STATES:

            trajs = igfh.get_trajs_with_integrated_odor_above_threshold(
                expt_id, odor_state, INTEGRATED_ODOR_THRESHOLD
            )

            train_test_ratio = (n_train_max / (n_train_max + n_test_max))
            test_train_ratio = (n_test_max / (n_train_max + n_test_max))
            n_train = min(n_train_max, np.floor(len(trajs) * train_test_ratio))
            n_test = min(n_test_max, np.floor(len(trajs) * test_train_ratio))

            trajs_trains = []
            trajs_tests = []
            glmss = []
            residualss = []

            for trial_ctr in range(n_trials):
                print('{}: odor {} (trial number: {})'.format(expt_id, odor_state, trial_ctr))

                # get random set of training and test trajectories
                perm = np.random.permutation(len(trajs))
                train_idxs = perm[:n_train]
                test_idxs = perm[-n_test:]

                trajs_train = list(np.array(trajs)[train_idxs])
                trajs_test = list(np.array(trajs)[test_idxs])

                # do some more stuff
                glms = []
                residuals = []
                for input_set, output, basis_in, basis_out in zip(INPUT_SETS, OUTPUTS, basis_ins, basis_outs):

                    # get relevant time-series data from each trajectory set
                    data_train = igfh.time_series_from_trajs(
                        trajs_train,
                        inputs=input_set,
                        output=output
                    )
                    data_test = igfh.time_series_from_trajs(
                        trajs_test,
                        inputs=input_set,
                        output=output
                    )

                    glm = fitting.GLMFitter(link=LINK, family=FAMILY)
                    glm.set_params(DELAY, basis_in=basis_in, basis_out=False)

                    glm.input_set = input_set
                    glm.output = output

                    # fit to training data
                    glm.fit(data=data_train, start=START_TIMEPOINT)

                    # predict test data
                    prediction = glm.predict(data=data_test, start=START_TIMEPOINT)
                    _, ground_truth = glm.make_feature_matrix_and_response_vector(data_test, START_TIMEPOINT)

                    # calculate residual
                    residual = np.sqrt(((prediction - ground_truth)**2).mean())

                    # clear out feature matrix and response from glm for efficient storage
                    glm.feature_matrix = None
                    glm.response_vector = None
                    glm.results.remove_data()
                    # store things
                    glms.append(glm)
                    residuals.append(residual)

                trajs_train_ids = [traj.id for traj in trajs_train]
                trajs_test_ids = [traj.id for traj in trajs_test]
                trajs_trains.append(trajs_train_ids)
                trajs_tests.append(trajs_test_ids)
                glmss.append(glms)
                residualss.append(residuals)

            # save a glm fit set
            glm_fit_set = models.GlmFitSet()

            # add data to it
            glm_fit_set.root_dir_env_var = root_dir_env_var
            glm_fit_set.path_relative = 'glm_fit'
            glm_fit_set.file_name = '{}_{}_odor_{}.pickle'.format(FIT_NAME, expt_id, odor_state)
            glm_fit_set.experiment = session.query(models.Experiment).get(expt_id)
            glm_fit_set.odor_state = odor_state
            glm_fit_set.name = FIT_NAME
            glm_fit_set.link = LINK
            glm_fit_set.family = FAMILY
            glm_fit_set.integrated_odor_threshold = INTEGRATED_ODOR_THRESHOLD
            glm_fit_set.predicted = PREDICTED
            glm_fit_set.delay = DELAY
            glm_fit_set.start_time_point = START_TIMEPOINT
            glm_fit_set.n_glms = len(glms)
            glm_fit_set.n_train = n_train
            glm_fit_set.n_test = n_test
            glm_fit_set.n_trials = n_trials

            # save data file
            glm_fit_set.save_to_file(
                input_sets=INPUT_SETS,
                outputs=OUTPUTS,
                basis_in=basis_ins,
                basis_out=basis_outs,
                trajs_train=trajs_trains,
                trajs_test=trajs_tests,
                glms=glmss,
                residuals=residualss
            )

            # save everything else (+ link to data file) in database
            session.add(glm_fit_set)

            commit(session)
Beispiel #4
0
fig, axs = plt.subplots(n_glms,
                        2,
                        figsize=FIG_SIZE_EXAMPLES,
                        facecolor=FACE_COLOR,
                        tight_layout=True)

axs_odor = [[None, None] for _ in range(len(axs))]

for t_ctr, traj in enumerate([traj_train, traj_test]):
    for g_ctr, glm in enumerate(glms):

        ax = axs[g_ctr, t_ctr]
        ax_odor = ax.twinx()

        data = igfh.time_series_from_trajs([traj],
                                           inputs=glm.input_set,
                                           output=glm.output)

        full_len = len(data[0][1])

        prediction = glm.predict(data=data, start=start_time_point)
        _, ground_truth = glm.make_feature_matrix_and_response_vector(
            data, start_time_point)

        odor = igfh.time_series_from_trajs(
            [traj], inputs=('odor', ),
            output=glm.output)[0][0][0][start_time_point + delay:]

        t = np.arange(full_len)[-len(prediction):]
        t_odor = np.arange(start_time_point + delay, full_len)
    def test_fitting_of_multiple_models_to_single_training_set_and_seeing_how_well_they_predict_test_set(self):

        # make basis sets for each model
        print('Making filter basis functions...')
        basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions(
            INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR
        )
        n_models = len(basis_outs)

        print('Getting trajectories...')
        trajs = igfh.get_trajs_with_integrated_odor_above_threshold(
            experiment_id=EXPT_ID,
            odor_state=ODOR_STATE,
            integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLD
        )

        # split these into training and test trajectories
        perm = np.random.permutation(N_TRAIN + N_TEST)
        train_idxs = perm[:N_TRAIN]
        test_idxs = perm[N_TRAIN:N_TRAIN + N_TEST]

        trajs = np.array(trajs)
        trajs_train = list(trajs[train_idxs])
        trajs_test = list(trajs[test_idxs])

        # fit each of N models to training data and predict test data
        print('Fitting models...')
        models = []
        residuals = []
        for input_set, output, basis_in, basis_out in zip(INPUT_SETS, OUTPUTS, basis_ins, basis_outs):

            # get relevant time-series data from each trajectory set
            data_train = igfh.time_series_from_trajs(
                trajs_train,
                inputs=input_set,
                output=output
            )
            data_test = igfh.time_series_from_trajs(
                trajs_test,
                inputs=input_set,
                output=output
            )

            model = fitting.GLMFitter(link=LINK_NAME, family=FAMILY_NAME)
            model.set_params(DELAY, basis_in=basis_in, basis_out=False)

            model.input_set = input_set
            model.output = output

            # fit to training data
            model.fit(data=data_train, start=START_TIMEPOINT)

            # predict test data
            prediction = model.predict(data=data_test, start=START_TIMEPOINT)
            _, ground_truth = model.make_feature_matrix_and_response_vector(data_test, START_TIMEPOINT)

            # calculate residual
            residual = np.sqrt(((prediction - ground_truth)**2).mean())

            # store things
            models.append(model)
            residuals.append(residual)

        print('Generating plots...')
        # plot basis and filters for each model, as well as example time-series with prediction
        fig_filt, axs_filt = plt.subplots(
            n_models, 2, facecolor='white', figsize=(10, 10), tight_layout=True
        )
        fig_ts, axs_ts = plt.subplots(
            n_models, 1, facecolor='white', figsize=(10, 10), tight_layout=True
        )

        for model, res, ax_filt_row, ax_ts in zip(models, residuals, axs_filt, axs_ts):

            data_test = igfh.time_series_from_trajs(
                trajs_test,
                inputs=model.input_set,
                output=model.output
            )

            model.plot_filters(ax_filt_row[0], x_lim=(0, 100))
            model.plot_basis(ax_filt_row[1], x_lim=(0, 100))

            prediction_0 = model.predict(data=data_test[0:1], start=START_TIMEPOINT)
            _, ground_truth_0 = model.make_feature_matrix_and_response_vector(data_test[0:1], START_TIMEPOINT)

            t = np.arange(len(data_test[0][1]))[-len(prediction_0):]

            ax_ts.plot(t, ground_truth_0, color='k', ls='-')
            ax_ts.plot(t, prediction_0, color='r', ls='--', lw=2)

            odor = igfh.time_series_from_trajs(
                trajs_test,
                inputs=('odor',),
                output=model.output
            )[0][0][0]

            ax_ts_odor = ax_ts.twinx()
            t_odor = np.arange(START_TIMEPOINT + DELAY, len(odor))
            ax_ts_odor.plot(t_odor, odor[START_TIMEPOINT + DELAY:], color='b', ls='-')

            ax_filt_row[0].set_ylabel('filter\nstrength')
            ax_ts.set_title('Residual = {}'.format(res))

        axs_filt[-1][0].set_xlabel('timestep')
        axs_filt[-1][1].set_xlabel('timestep')
        axs_ts[-1].set_xlabel('timestep')

        fig_filt.savefig(os.path.join(SAVE_DIR, 'filters.png'))
        fig_ts.savefig(os.path.join(SAVE_DIR, 'example_predictions.png'))

        plt.show()
    figsize=FIG_SIZE_EXAMPLES,
    facecolor=FACE_COLOR,
    tight_layout=True
)

axs_odor = [[None, None] for _ in range(len(axs))]

for t_ctr, traj in enumerate([traj_train, traj_test]):
    for g_ctr, glm in enumerate(glms):

        ax = axs[g_ctr, t_ctr]
        ax_odor = ax.twinx()

        data = igfh.time_series_from_trajs(
            [traj],
            inputs=glm.input_set,
            output=glm.output
        )

        full_len = len(data[0][1])

        prediction = glm.predict(data=data, start=start_time_point)
        _, ground_truth = glm.make_feature_matrix_and_response_vector(
            data, start_time_point
        )

        odor = igfh.time_series_from_trajs(
            [traj],
            inputs=('odor',),
            output=glm.output
        )[0][0][0][start_time_point + delay:]
Beispiel #7
0
    def test_fitting_of_multiple_models_to_single_training_set_and_seeing_how_well_they_predict_test_set(
            self):

        # make basis sets for each model
        print('Making filter basis functions...')
        basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions(
            INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR)
        n_models = len(basis_outs)

        print('Getting trajectories...')
        trajs = igfh.get_trajs_with_integrated_odor_above_threshold(
            experiment_id=EXPT_ID,
            odor_state=ODOR_STATE,
            integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLD)

        # split these into training and test trajectories
        perm = np.random.permutation(N_TRAIN + N_TEST)
        train_idxs = perm[:N_TRAIN]
        test_idxs = perm[N_TRAIN:N_TRAIN + N_TEST]

        trajs = np.array(trajs)
        trajs_train = list(trajs[train_idxs])
        trajs_test = list(trajs[test_idxs])

        # fit each of N models to training data and predict test data
        print('Fitting models...')
        models = []
        residuals = []
        for input_set, output, basis_in, basis_out in zip(
                INPUT_SETS, OUTPUTS, basis_ins, basis_outs):

            # get relevant time-series data from each trajectory set
            data_train = igfh.time_series_from_trajs(trajs_train,
                                                     inputs=input_set,
                                                     output=output)
            data_test = igfh.time_series_from_trajs(trajs_test,
                                                    inputs=input_set,
                                                    output=output)

            model = fitting.GLMFitter(link=LINK_NAME, family=FAMILY_NAME)
            model.set_params(DELAY, basis_in=basis_in, basis_out=False)

            model.input_set = input_set
            model.output = output

            # fit to training data
            model.fit(data=data_train, start=START_TIMEPOINT)

            # predict test data
            prediction = model.predict(data=data_test, start=START_TIMEPOINT)
            _, ground_truth = model.make_feature_matrix_and_response_vector(
                data_test, START_TIMEPOINT)

            # calculate residual
            residual = np.sqrt(((prediction - ground_truth)**2).mean())

            # store things
            models.append(model)
            residuals.append(residual)

        print('Generating plots...')
        # plot basis and filters for each model, as well as example time-series with prediction
        fig_filt, axs_filt = plt.subplots(n_models,
                                          2,
                                          facecolor='white',
                                          figsize=(10, 10),
                                          tight_layout=True)
        fig_ts, axs_ts = plt.subplots(n_models,
                                      1,
                                      facecolor='white',
                                      figsize=(10, 10),
                                      tight_layout=True)

        for model, res, ax_filt_row, ax_ts in zip(models, residuals, axs_filt,
                                                  axs_ts):

            data_test = igfh.time_series_from_trajs(trajs_test,
                                                    inputs=model.input_set,
                                                    output=model.output)

            model.plot_filters(ax_filt_row[0], x_lim=(0, 100))
            model.plot_basis(ax_filt_row[1], x_lim=(0, 100))

            prediction_0 = model.predict(data=data_test[0:1],
                                         start=START_TIMEPOINT)
            _, ground_truth_0 = model.make_feature_matrix_and_response_vector(
                data_test[0:1], START_TIMEPOINT)

            t = np.arange(len(data_test[0][1]))[-len(prediction_0):]

            ax_ts.plot(t, ground_truth_0, color='k', ls='-')
            ax_ts.plot(t, prediction_0, color='r', ls='--', lw=2)

            odor = igfh.time_series_from_trajs(trajs_test,
                                               inputs=('odor', ),
                                               output=model.output)[0][0][0]

            ax_ts_odor = ax_ts.twinx()
            t_odor = np.arange(START_TIMEPOINT + DELAY, len(odor))
            ax_ts_odor.plot(t_odor,
                            odor[START_TIMEPOINT + DELAY:],
                            color='b',
                            ls='-')

            ax_filt_row[0].set_ylabel('filter\nstrength')
            ax_ts.set_title('Residual = {}'.format(res))

        axs_filt[-1][0].set_xlabel('timestep')
        axs_filt[-1][1].set_xlabel('timestep')
        axs_ts[-1].set_xlabel('timestep')

        fig_filt.savefig(os.path.join(SAVE_DIR, 'filters.png'))
        fig_ts.savefig(os.path.join(SAVE_DIR, 'example_predictions.png'))

        plt.show()