def main():

    train_prediction_accuracy = {}
    test_prediction_accuracy = {}

    for condition in ['experimental', 'control']:

        print('Condition: {}'.format(condition))

        train_prediction_accuracy[condition] = {
            method: []
            for method in METHODS
        }
        test_prediction_accuracy[condition] = {
            method: []
            for method in METHODS
        }

        trajs = {}
        if condition == 'control':
            trajs['on'] = igfh.get_trajs_with_integrated_odor_above_threshold(
                EXPERIMENT_ID,
                'on',
                integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['on'],
                max_trajs=N_TRAIN + N_TEST,
            )
            trajs['none'] = igfh.get_trajs_with_integrated_odor_above_threshold(
                EXPERIMENT_ID,
                'on',
                integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['none'],
                max_trajs=N_TRAIN + N_TEST,
            )

        elif condition == 'experimental':
            trajs['on'] = igfh.get_trajs_with_integrated_odor_above_threshold(
                EXPERIMENT_ID,
                'on',
                integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['on'],
                max_trajs=N_TRAIN + N_TEST,
            )
            trajs['none'] = igfh.get_trajs_with_integrated_odor_above_threshold(
                EXPERIMENT_ID,
                'none',
                integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['none'],
                max_trajs=N_TRAIN + N_TEST,
            )

        print('{} trajectories with odor on'.format(len(trajs['on'])))
        print('{} trajectories with odor off'.format(len(trajs['none'])))

        assert len(trajs['on']) >= N_TRAIN + N_TEST
        assert len(trajs['none']) >= N_TRAIN + N_TEST

        print('Sufficient trajectories for classification analysis')

        for tr_ctr in range(N_TRIALS):

            if tr_ctr % 20 == 19:
                print('Trial # {}'.format(tr_ctr + 1))

            vels = {}

            # get all data
            for odor_state in ODOR_STATES:

                shuffle(trajs[odor_state])

                vels[odor_state] = {
                    'train': [
                        traj.velocities(session)
                        for traj in trajs[odor_state][:N_TRAIN]
                    ],
                    'test': [
                        traj.velocities(session)
                        for traj in trajs[odor_state][N_TRAIN:N_TRAIN + N_TEST]
                    ],
                }

            # loop through all classifiers
            for method in METHODS:
                # train classifer
                if method == 'var':
                    clf = tsc.VarClassifierBinary(dim=3, order=2)
                elif method == 'mean_speed':
                    clf = tsc.MeanSpeedClassifierBinary()
                elif method == 'mean_heading':
                    clf = tsc.MeanHeadingClassifierBinary()
                elif method == 'std_heading':
                    clf = tsc.StdHeadingClassifierBinary()

                clf.train(positives=vels['on']['train'],
                          negatives=vels['none']['train'])
                # make predictions on training set
                train_predictions = np.array(
                    clf.predict(vels['on']['train'] + vels['none']['train']))
                train_ground_truth = np.concatenate([[1] * N_TRAIN,
                                                     [-1] * N_TRAIN])

                train_accuracy = 100 * np.mean(
                    train_predictions == train_ground_truth)

                # make predictions on test set
                test_trajs = np.array(vels['on']['test'] +
                                      vels['none']['test'])
                test_ground_truth = np.concatenate([[1] * N_TEST,
                                                    [-1] * N_TEST])

                # shuffle trajectories and ground truths for good luck
                rand_idx = np.random.permutation(len(test_trajs))
                test_trajs = test_trajs[rand_idx]
                test_ground_truth = test_ground_truth[rand_idx]

                # predict
                test_predictions = np.array(clf.predict(test_trajs))

                test_accuracy = 100 * np.mean(
                    test_predictions == test_ground_truth)

                # store values for later plotting
                train_prediction_accuracy[condition][method].append(
                    train_accuracy)
                test_prediction_accuracy[condition][method].append(
                    test_accuracy)

    # make plot
    for method in METHODS:
        fig, axs = plt.subplots(2,
                                1,
                                facecolor=FACE_COLOR,
                                figsize=FIG_SIZE,
                                sharex=True,
                                tight_layout=True)
        axs[0].hist(test_prediction_accuracy['control'][method],
                    normed=True,
                    color='b',
                    lw=0)
        axs[0].hist(test_prediction_accuracy['experimental'][method],
                    normed=True,
                    color='g',
                    lw=0)

        axs[1].hist(train_prediction_accuracy['control'][method],
                    normed=True,
                    color='b',
                    lw=0)
        axs[1].hist(train_prediction_accuracy['experimental'][method],
                    normed=True,
                    color='g',
                    lw=0)

        axs[0].legend(
            [
                'Training examples from same class',
                'Training examples from different classes'
            ],
            loc='best',
            fontsize=FONT_SIZE,
        )

        axs[0].set_xlabel('Test set prediction accuracy (%)')
        axs[0].set_ylabel('Probability')

        axs[1].set_xlabel('Training set prediction accuracy (%)')
        axs[1].set_ylabel('Probability')

        axs[0].set_title(
            'Experiment: {}\n {} training, {} test\n{} classifier'.format(
                EXPERIMENT_ID, N_TRAIN, N_TEST, method))

        for ax in axs:

            axis_tools.set_fontsize(ax, FONT_SIZE)

        fig.savefig('/Users/rkp/Desktop/classifier_{}_method_{}.png'.format(
            EXPERIMENT_ID, method))
def main(n_trials, n_train_max, n_test_max, root_dir_env_var):

    # make basis functions
    basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions(
        INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR
    )

    for expt_id in EXPERIMENT_IDS:
        for odor_state in ODOR_STATES:

            trajs = igfh.get_trajs_with_integrated_odor_above_threshold(
                expt_id, odor_state, INTEGRATED_ODOR_THRESHOLD
            )

            train_test_ratio = (n_train_max / (n_train_max + n_test_max))
            test_train_ratio = (n_test_max / (n_train_max + n_test_max))
            n_train = min(n_train_max, np.floor(len(trajs) * train_test_ratio))
            n_test = min(n_test_max, np.floor(len(trajs) * test_train_ratio))

            trajs_trains = []
            trajs_tests = []
            glmss = []
            residualss = []

            for trial_ctr in range(n_trials):
                print('{}: odor {} (trial number: {})'.format(expt_id, odor_state, trial_ctr))

                # get random set of training and test trajectories
                perm = np.random.permutation(len(trajs))
                train_idxs = perm[:n_train]
                test_idxs = perm[-n_test:]

                trajs_train = list(np.array(trajs)[train_idxs])
                trajs_test = list(np.array(trajs)[test_idxs])

                # do some more stuff
                glms = []
                residuals = []
                for input_set, output, basis_in, basis_out in zip(INPUT_SETS, OUTPUTS, basis_ins, basis_outs):

                    # get relevant time-series data from each trajectory set
                    data_train = igfh.time_series_from_trajs(
                        trajs_train,
                        inputs=input_set,
                        output=output
                    )
                    data_test = igfh.time_series_from_trajs(
                        trajs_test,
                        inputs=input_set,
                        output=output
                    )

                    glm = fitting.GLMFitter(link=LINK, family=FAMILY)
                    glm.set_params(DELAY, basis_in=basis_in, basis_out=False)

                    glm.input_set = input_set
                    glm.output = output

                    # fit to training data
                    glm.fit(data=data_train, start=START_TIMEPOINT)

                    # predict test data
                    prediction = glm.predict(data=data_test, start=START_TIMEPOINT)
                    _, ground_truth = glm.make_feature_matrix_and_response_vector(data_test, START_TIMEPOINT)

                    # calculate residual
                    residual = np.sqrt(((prediction - ground_truth)**2).mean())

                    # clear out feature matrix and response from glm for efficient storage
                    glm.feature_matrix = None
                    glm.response_vector = None
                    glm.results.remove_data()
                    # store things
                    glms.append(glm)
                    residuals.append(residual)

                trajs_train_ids = [traj.id for traj in trajs_train]
                trajs_test_ids = [traj.id for traj in trajs_test]
                trajs_trains.append(trajs_train_ids)
                trajs_tests.append(trajs_test_ids)
                glmss.append(glms)
                residualss.append(residuals)

            # save a glm fit set
            glm_fit_set = models.GlmFitSet()

            # add data to it
            glm_fit_set.root_dir_env_var = root_dir_env_var
            glm_fit_set.path_relative = 'glm_fit'
            glm_fit_set.file_name = '{}_{}_odor_{}.pickle'.format(FIT_NAME, expt_id, odor_state)
            glm_fit_set.experiment = session.query(models.Experiment).get(expt_id)
            glm_fit_set.odor_state = odor_state
            glm_fit_set.name = FIT_NAME
            glm_fit_set.link = LINK
            glm_fit_set.family = FAMILY
            glm_fit_set.integrated_odor_threshold = INTEGRATED_ODOR_THRESHOLD
            glm_fit_set.predicted = PREDICTED
            glm_fit_set.delay = DELAY
            glm_fit_set.start_time_point = START_TIMEPOINT
            glm_fit_set.n_glms = len(glms)
            glm_fit_set.n_train = n_train
            glm_fit_set.n_test = n_test
            glm_fit_set.n_trials = n_trials

            # save data file
            glm_fit_set.save_to_file(
                input_sets=INPUT_SETS,
                outputs=OUTPUTS,
                basis_in=basis_ins,
                basis_out=basis_outs,
                trajs_train=trajs_trains,
                trajs_test=trajs_tests,
                glms=glmss,
                residuals=residualss
            )

            # save everything else (+ link to data file) in database
            session.add(glm_fit_set)

            commit(session)
Example #3
0
    def test_fitting_of_multiple_models_to_single_training_set_and_seeing_how_well_they_predict_test_set(
            self):

        # make basis sets for each model
        print('Making filter basis functions...')
        basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions(
            INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR)
        n_models = len(basis_outs)

        print('Getting trajectories...')
        trajs = igfh.get_trajs_with_integrated_odor_above_threshold(
            experiment_id=EXPT_ID,
            odor_state=ODOR_STATE,
            integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLD)

        # split these into training and test trajectories
        perm = np.random.permutation(N_TRAIN + N_TEST)
        train_idxs = perm[:N_TRAIN]
        test_idxs = perm[N_TRAIN:N_TRAIN + N_TEST]

        trajs = np.array(trajs)
        trajs_train = list(trajs[train_idxs])
        trajs_test = list(trajs[test_idxs])

        # fit each of N models to training data and predict test data
        print('Fitting models...')
        models = []
        residuals = []
        for input_set, output, basis_in, basis_out in zip(
                INPUT_SETS, OUTPUTS, basis_ins, basis_outs):

            # get relevant time-series data from each trajectory set
            data_train = igfh.time_series_from_trajs(trajs_train,
                                                     inputs=input_set,
                                                     output=output)
            data_test = igfh.time_series_from_trajs(trajs_test,
                                                    inputs=input_set,
                                                    output=output)

            model = fitting.GLMFitter(link=LINK_NAME, family=FAMILY_NAME)
            model.set_params(DELAY, basis_in=basis_in, basis_out=False)

            model.input_set = input_set
            model.output = output

            # fit to training data
            model.fit(data=data_train, start=START_TIMEPOINT)

            # predict test data
            prediction = model.predict(data=data_test, start=START_TIMEPOINT)
            _, ground_truth = model.make_feature_matrix_and_response_vector(
                data_test, START_TIMEPOINT)

            # calculate residual
            residual = np.sqrt(((prediction - ground_truth)**2).mean())

            # store things
            models.append(model)
            residuals.append(residual)

        print('Generating plots...')
        # plot basis and filters for each model, as well as example time-series with prediction
        fig_filt, axs_filt = plt.subplots(n_models,
                                          2,
                                          facecolor='white',
                                          figsize=(10, 10),
                                          tight_layout=True)
        fig_ts, axs_ts = plt.subplots(n_models,
                                      1,
                                      facecolor='white',
                                      figsize=(10, 10),
                                      tight_layout=True)

        for model, res, ax_filt_row, ax_ts in zip(models, residuals, axs_filt,
                                                  axs_ts):

            data_test = igfh.time_series_from_trajs(trajs_test,
                                                    inputs=model.input_set,
                                                    output=model.output)

            model.plot_filters(ax_filt_row[0], x_lim=(0, 100))
            model.plot_basis(ax_filt_row[1], x_lim=(0, 100))

            prediction_0 = model.predict(data=data_test[0:1],
                                         start=START_TIMEPOINT)
            _, ground_truth_0 = model.make_feature_matrix_and_response_vector(
                data_test[0:1], START_TIMEPOINT)

            t = np.arange(len(data_test[0][1]))[-len(prediction_0):]

            ax_ts.plot(t, ground_truth_0, color='k', ls='-')
            ax_ts.plot(t, prediction_0, color='r', ls='--', lw=2)

            odor = igfh.time_series_from_trajs(trajs_test,
                                               inputs=('odor', ),
                                               output=model.output)[0][0][0]

            ax_ts_odor = ax_ts.twinx()
            t_odor = np.arange(START_TIMEPOINT + DELAY, len(odor))
            ax_ts_odor.plot(t_odor,
                            odor[START_TIMEPOINT + DELAY:],
                            color='b',
                            ls='-')

            ax_filt_row[0].set_ylabel('filter\nstrength')
            ax_ts.set_title('Residual = {}'.format(res))

        axs_filt[-1][0].set_xlabel('timestep')
        axs_filt[-1][1].set_xlabel('timestep')
        axs_ts[-1].set_xlabel('timestep')

        fig_filt.savefig(os.path.join(SAVE_DIR, 'filters.png'))
        fig_ts.savefig(os.path.join(SAVE_DIR, 'example_predictions.png'))

        plt.show()
    def test_fitting_of_multiple_models_to_single_training_set_and_seeing_how_well_they_predict_test_set(self):

        # make basis sets for each model
        print('Making filter basis functions...')
        basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions(
            INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR
        )
        n_models = len(basis_outs)

        print('Getting trajectories...')
        trajs = igfh.get_trajs_with_integrated_odor_above_threshold(
            experiment_id=EXPT_ID,
            odor_state=ODOR_STATE,
            integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLD
        )

        # split these into training and test trajectories
        perm = np.random.permutation(N_TRAIN + N_TEST)
        train_idxs = perm[:N_TRAIN]
        test_idxs = perm[N_TRAIN:N_TRAIN + N_TEST]

        trajs = np.array(trajs)
        trajs_train = list(trajs[train_idxs])
        trajs_test = list(trajs[test_idxs])

        # fit each of N models to training data and predict test data
        print('Fitting models...')
        models = []
        residuals = []
        for input_set, output, basis_in, basis_out in zip(INPUT_SETS, OUTPUTS, basis_ins, basis_outs):

            # get relevant time-series data from each trajectory set
            data_train = igfh.time_series_from_trajs(
                trajs_train,
                inputs=input_set,
                output=output
            )
            data_test = igfh.time_series_from_trajs(
                trajs_test,
                inputs=input_set,
                output=output
            )

            model = fitting.GLMFitter(link=LINK_NAME, family=FAMILY_NAME)
            model.set_params(DELAY, basis_in=basis_in, basis_out=False)

            model.input_set = input_set
            model.output = output

            # fit to training data
            model.fit(data=data_train, start=START_TIMEPOINT)

            # predict test data
            prediction = model.predict(data=data_test, start=START_TIMEPOINT)
            _, ground_truth = model.make_feature_matrix_and_response_vector(data_test, START_TIMEPOINT)

            # calculate residual
            residual = np.sqrt(((prediction - ground_truth)**2).mean())

            # store things
            models.append(model)
            residuals.append(residual)

        print('Generating plots...')
        # plot basis and filters for each model, as well as example time-series with prediction
        fig_filt, axs_filt = plt.subplots(
            n_models, 2, facecolor='white', figsize=(10, 10), tight_layout=True
        )
        fig_ts, axs_ts = plt.subplots(
            n_models, 1, facecolor='white', figsize=(10, 10), tight_layout=True
        )

        for model, res, ax_filt_row, ax_ts in zip(models, residuals, axs_filt, axs_ts):

            data_test = igfh.time_series_from_trajs(
                trajs_test,
                inputs=model.input_set,
                output=model.output
            )

            model.plot_filters(ax_filt_row[0], x_lim=(0, 100))
            model.plot_basis(ax_filt_row[1], x_lim=(0, 100))

            prediction_0 = model.predict(data=data_test[0:1], start=START_TIMEPOINT)
            _, ground_truth_0 = model.make_feature_matrix_and_response_vector(data_test[0:1], START_TIMEPOINT)

            t = np.arange(len(data_test[0][1]))[-len(prediction_0):]

            ax_ts.plot(t, ground_truth_0, color='k', ls='-')
            ax_ts.plot(t, prediction_0, color='r', ls='--', lw=2)

            odor = igfh.time_series_from_trajs(
                trajs_test,
                inputs=('odor',),
                output=model.output
            )[0][0][0]

            ax_ts_odor = ax_ts.twinx()
            t_odor = np.arange(START_TIMEPOINT + DELAY, len(odor))
            ax_ts_odor.plot(t_odor, odor[START_TIMEPOINT + DELAY:], color='b', ls='-')

            ax_filt_row[0].set_ylabel('filter\nstrength')
            ax_ts.set_title('Residual = {}'.format(res))

        axs_filt[-1][0].set_xlabel('timestep')
        axs_filt[-1][1].set_xlabel('timestep')
        axs_ts[-1].set_xlabel('timestep')

        fig_filt.savefig(os.path.join(SAVE_DIR, 'filters.png'))
        fig_ts.savefig(os.path.join(SAVE_DIR, 'example_predictions.png'))

        plt.show()
def main():

    train_prediction_accuracy = {}
    test_prediction_accuracy = {}

    for condition in ['experimental', 'control']:

        print('Condition: {}'.format(condition))

        train_prediction_accuracy[condition] = {method: [] for method in METHODS}
        test_prediction_accuracy[condition] = {method: [] for method in METHODS}

        trajs = {}
        if condition == 'control':
            trajs['on'] = igfh.get_trajs_with_integrated_odor_above_threshold(
                EXPERIMENT_ID, 'on',
                integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['on'],
                max_trajs=N_TRAIN+N_TEST,
            )
            trajs['none'] = igfh.get_trajs_with_integrated_odor_above_threshold(
                EXPERIMENT_ID, 'on',
                integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['none'],
                max_trajs=N_TRAIN+N_TEST,
            )

        elif condition == 'experimental':
            trajs['on'] = igfh.get_trajs_with_integrated_odor_above_threshold(
                EXPERIMENT_ID, 'on',
                integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['on'],
                max_trajs=N_TRAIN+N_TEST,
            )
            trajs['none'] = igfh.get_trajs_with_integrated_odor_above_threshold(
                EXPERIMENT_ID, 'none',
                integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['none'],
                max_trajs=N_TRAIN+N_TEST,
            )

        print('{} trajectories with odor on'.format(len(trajs['on'])))
        print('{} trajectories with odor off'.format(len(trajs['none'])))

        assert len(trajs['on']) >= N_TRAIN + N_TEST
        assert len(trajs['none']) >= N_TRAIN + N_TEST

        print('Sufficient trajectories for classification analysis')


        for tr_ctr in range(N_TRIALS):

            if tr_ctr % 20 == 19:
                print('Trial # {}'.format(tr_ctr + 1))

            vels = {}

            # get all data
            for odor_state in ODOR_STATES:

                shuffle(trajs[odor_state])

                vels[odor_state] = {
                    'train': [traj.velocities(session) for traj in trajs[odor_state][:N_TRAIN]],
                    'test': [traj.velocities(session) for traj in trajs[odor_state][N_TRAIN:N_TRAIN+N_TEST]],
                }

            # loop through all classifiers
            for method in METHODS:
                # train classifer
                if method == 'var':
                    clf = tsc.VarClassifierBinary(dim=3, order=2)
                elif method == 'mean_speed':
                    clf = tsc.MeanSpeedClassifierBinary()
                elif method == 'mean_heading':
                    clf = tsc.MeanHeadingClassifierBinary()
                elif method == 'std_heading':
                    clf = tsc.StdHeadingClassifierBinary()

                clf.train(positives=vels['on']['train'], negatives=vels['none']['train'])
                # make predictions on training set
                train_predictions = np.array(clf.predict(vels['on']['train'] + vels['none']['train']))
                train_ground_truth = np.concatenate([[1] * N_TRAIN, [-1] * N_TRAIN])

                train_accuracy = 100 * np.mean(train_predictions == train_ground_truth)

                # make predictions on test set
                test_trajs = np.array(vels['on']['test'] + vels['none']['test'])
                test_ground_truth = np.concatenate([[1] * N_TEST, [-1] * N_TEST])

                # shuffle trajectories and ground truths for good luck
                rand_idx = np.random.permutation(len(test_trajs))
                test_trajs = test_trajs[rand_idx]
                test_ground_truth = test_ground_truth[rand_idx]

                # predict
                test_predictions = np.array(clf.predict(test_trajs))

                test_accuracy = 100 * np.mean(test_predictions == test_ground_truth)

                # store values for later plotting
                train_prediction_accuracy[condition][method].append(train_accuracy)
                test_prediction_accuracy[condition][method].append(test_accuracy)

    # make plot
    for method in METHODS:
        fig, axs = plt.subplots(2, 1, facecolor=FACE_COLOR, figsize=FIG_SIZE, sharex=True, tight_layout=True)
        axs[0].hist(test_prediction_accuracy['control'][method], normed=True, color='b', lw=0)
        axs[0].hist(test_prediction_accuracy['experimental'][method], normed=True, color='g', lw=0)

        axs[1].hist(train_prediction_accuracy['control'][method], normed=True, color='b', lw=0)
        axs[1].hist(train_prediction_accuracy['experimental'][method], normed=True, color='g', lw=0)

        axs[0].legend(
            ['Training examples from same class', 'Training examples from different classes'],
            loc='best',
            fontsize=FONT_SIZE,
        )

        axs[0].set_xlabel('Test set prediction accuracy (%)')
        axs[0].set_ylabel('Probability')

        axs[1].set_xlabel('Training set prediction accuracy (%)')
        axs[1].set_ylabel('Probability')

        axs[0].set_title(
            'Experiment: {}\n {} training, {} test\n{} classifier'.format(EXPERIMENT_ID, N_TRAIN, N_TEST, method))

        for ax in axs:

            axis_tools.set_fontsize(ax, FONT_SIZE)

        fig.savefig('/Users/rkp/Desktop/classifier_{}_method_{}.png'.format(EXPERIMENT_ID, method))