예제 #1
0
파일: demo.py 프로젝트: zphilip/mogp
 def onpress(self, event):
     if event.key == 'd':
         mtgp = MultiOutputGP(n_channels=3, n_latent_gps=2)
         t1 = time.time()
         raw_ts = [zip(self.x, self.y)]
         train_ts = mtgp.gen_collection(raw_ts)
         mtgp.train(train_ts, maxiter=50)
         t2 = time.time()
         print 'time:', t2 - t1
         #test_x = [np.linspace(self.min_x, self.max_x, 100)] * self.n_ts
         test_x = np.linspace(self.min_x, self.max_x, 100)
         #test_x = [[1, 2], [2, 3]]
         #post_mean, post_cov = mtgp.predict(test_x)
         post_mean, post_cov = mtgp.predictive_gaussian(train_ts[0], test_x)
         for i in xrange(self.n_ts):
             #std = np.sqrt(np.diag(post_cov[i]))
             std = np.sqrt(np.diag(post_cov[i]))
             ax = self.ax[i]
             self.clear_ax(ax)
             ax.fill_between(test_x,
                             post_mean[i] - std,
                             post_mean[i] + std,
                             edgecolor='none', alpha=.3, color='g')
             ax.plot(test_x, post_mean[i], 'g-')
             ax.plot(self.x[i], self.y[i], 'ko')
         self.fig.canvas.draw()
     elif event.key == 'm':
         pickle_save('input.pkl', self.x, self.y)
     elif event.key == 'c':
         self.reset_data()
         for ax in self.ax:
             self.clear_ax(ax)
         self.fig.canvas.draw()
예제 #2
0
 def save_params(self, params_file):
     # TODO: save t_test instead of idx_test and w_test
     model_params = (self.net_arch, self.n_classes, self.inducing_pts,
                     self.idx_test, self.w_test, self.gp_output_len)
     network_params = get_all_param_values(self.train_network)
     gp_params = [p.get_value() for p in self.post_gp.params]
     pickle_save(params_file, model_params, network_params, gp_params)
예제 #3
0
def main():
    for sparsity in [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
        #data = 'data/UWaveGestureLibraryAll-%d.pkl' % sparsity
        data = 'data/PhalangesOutlinesCorrect-%d.pkl' % sparsity
        print data
        t1 = time.time()
        gp_params, indep_noise = train_gp(data)
        t2 = time.time()
        print 'time', t2 - t1
        data_id = data.rsplit('/', 1)[-1]
        pickle_save('params/%s' % data_id, gp_params, indep_noise)
예제 #4
0
def cmp_n_train_test(save_as=None, n_inducing_pts=512):
    np.random.seed(0)
    batch_size = 1

    results = []

    for n_data in xrange(500, 3000 + 1, 500):
        #n_data = 1000
        #n_data = 1000
        gp_params = np.array([.1, 100])
        indep_noise = .001
        x, y = gen_data(n_data, gp_params, indep_noise)

        x_min, x_max = x.min(), x.max()
        #len_u = 2048 + 1

        print '# data', n_data

        n_test = n_data
        x_test = np.linspace(x_min, x_max, n_test)

        proj_1d = np.random.uniform(-1, 1, size=len(x_test))
        proj_1d /= np.linalg.norm(proj_1d)

        proj_2d = np.random.uniform(-1, 1, size=(batch_size, len(x_test)))
        proj_2d /= np.linalg.norm(proj_2d, axis=1)[:, np.newaxis]

        t_exact = time_exact(x, y, x_test, gp_params, indep_noise, proj_1d,
                             proj_2d)

        # parameters for approximation algorithm
        len_u = n_inducing_pts

        proj_1d = np.random.uniform(-1, 1, size=len(x_test))
        proj_1d /= np.linalg.norm(proj_1d)

        proj_2d = np.random.uniform(-1, 1, size=(batch_size, len(x_test)))
        proj_2d /= np.linalg.norm(proj_2d, axis=1)[:, np.newaxis]

        t_approx = time_approx(x, y, x_test, gp_params, indep_noise, len_u,
                               proj_1d, proj_2d)
        print
        results.append(np.concatenate((t_exact, t_approx)))

    if save_as:
        pickle_save(save_as, np.vstack(results))
예제 #5
0
파일: elbo_pqn.py 프로젝트: mlds-lab/mogp
 def save_model(self, pickle_name):
     pickle_save(pickle_name, self.gp_parms)
예제 #6
0
파일: elbo_pqn.py 프로젝트: mlds-lab/mogp
def main():
    np.random.seed(0)

    dat_id = 1
    ts_all, l_all = pickle_load('../chla-data/chla_ts_min0_%d.pkl' % dat_id)

    # randomly shuffle training examples
    idx = np.arange(len(ts_all))
    np.random.shuffle(idx)
    ts_all = ts_all[idx]
    #l_all = l_all[idx]

    parser = argparse.ArgumentParser()
    parser.add_argument('-q', dest='n_latent_gp', type=int, default=5)
    parser.add_argument('-g', dest='w_reg_group', default='none')
    parser.add_argument('-r', dest='w_reg', type=float, default=1)
    args = parser.parse_args()

    Q = args.n_latent_gp
    w_reg_group = args.w_reg_group
    w_reg = args.w_reg

    P = len(ts_all[0])
    #Q = 10
    M = 20

    #mogp = MultiOutputGP(P, Q, M)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='individual', w_reg=.5)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='row', w_reg=2)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1.5)
    mogp = MultiOutputGP(P, Q, M, w_reg_group=w_reg_group, w_reg=w_reg)

    n_train = int(len(ts_all) * 0.5)
    #n_train = 8
    print 'n_train', n_train
    train_raw = ts_all[:n_train]
    train_ts = mogp.gen_collection(train_raw)
    mogp.train(train_ts, maxiter=50)

    w_reg_group_name = {
            'none': 'non',
            'row': 'row',
            'column': 'col',
            'individual': 'ind',
            }

    mogp_str = 'model-pqn-%s-%g-%d-%d' % (
            w_reg_group_name[mogp.w_reg_group], mogp.w_reg, Q, n_train)
    print mogp_str

    mogp_pickle = 'model/%s.pkl' % mogp_str
    mogp.load_model(mogp_pickle)
    test_raw = ts_all[n_train:]

    indep_gp = IndependentMultiOutputGP(P)
    indep_gp.train(train_raw)
    gp_parms = indep_gp.gp_parms

    for channel in xrange(P):
        loglike = []
        loglike_baseline = []
        for each_test in test_raw:
            x = [xy[0] for xy in each_test]
            y = [xy[1] for xy in each_test]
            channel_len = len(x[channel])
            if channel_len < 3:
                continue
            one_third = channel_len // 3
            x_held_out = x[channel][one_third:-one_third]
            y_held_out = y[channel][one_third:-one_third]
            x_remain = [each_x if i == channel else
                        np.concatenate((each_x[:one_third],
                                        each_x[-one_third:]))
                        for i, each_x in enumerate(x)]
            y_remain = [each_y if i == channel else
                        np.concatenate((each_y[:one_third],
                                        each_y[-one_third:]))
                        for i, each_y in enumerate(y)]
            ts = TimeSeries(x_remain, y_remain, mogp.shared)
            mu, cov = mogp.predictive_gaussian(ts, x_held_out)

            mean, var = gp.pointwise_posterior_mean_var(
                    x_remain[channel], y_remain[channel], x_held_out,
                    gp_parms[channel])

            for i, each_y in enumerate(y_held_out):
                loglike.append(norm.logpdf(each_y, mu[channel, i],
                                                   np.sqrt(cov[channel, i, i])))

                loglike_baseline.append(norm.logpdf(each_y, mean[i],
                                                    np.sqrt(var[i])))

        print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % (
                channel, np.mean(loglike),
                np.std(loglike) / np.sqrt(len(loglike)),
                np.min(loglike), np.max(loglike), len(loglike))

        print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % (
                channel, np.mean(loglike_baseline),
                np.std(loglike_baseline) / np.sqrt(len(loglike_baseline)),
                np.min(loglike_baseline), np.max(loglike_baseline),
                len(loglike_baseline))
        print '-' * 50
        pickle_save('loglike-cmp/%02d.pkl' % channel, loglike, loglike_baseline)
        pl.figure()
        pl.axis('equal')
        pl.scatter(loglike, loglike_baseline, alpha=.5)
        pl.savefig('loglike-cmp/loglike-%02d.pdf' % channel)
        pl.close()
예제 #7
0
파일: elbo_pqn.py 프로젝트: mlds-lab/mogp
 def save_model(self, pickle_name):
     shared = self.shared
     pickle_save(pickle_name,
                 shared.w, shared.beta, shared.g_gp_b,
                 shared.h_gp_a, shared.h_gp_b)
예제 #8
0
 def save_model(self, pickle_name):
     pickle_save(pickle_name, self.gp_parms)
예제 #9
0
def main():
    np.random.seed(0)

    dat_id = 1
    ts_all, l_all = pickle_load('../chla-data/chla_ts_min0_%d.pkl' % dat_id)

    # randomly shuffle training examples
    idx = np.arange(len(ts_all))
    np.random.shuffle(idx)
    ts_all = ts_all[idx]
    #l_all = l_all[idx]

    parser = argparse.ArgumentParser()
    parser.add_argument('-q', dest='n_latent_gp', type=int, default=5)
    parser.add_argument('-g', dest='w_reg_group', default='none')
    parser.add_argument('-r', dest='w_reg', type=float, default=1)
    args = parser.parse_args()

    Q = args.n_latent_gp
    w_reg_group = args.w_reg_group
    w_reg = args.w_reg

    P = len(ts_all[0])
    #Q = 10
    M = 20

    #mogp = MultiOutputGP(P, Q, M)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='individual', w_reg=.5)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='row', w_reg=2)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1.5)
    mogp = MultiOutputGP(P, Q, M, w_reg_group=w_reg_group, w_reg=w_reg)

    n_train = int(len(ts_all) * 0.5)
    #n_train = 8
    print 'n_train', n_train
    train_raw = ts_all[:n_train]
    train_ts = mogp.gen_collection(train_raw)
    mogp.train(train_ts, maxiter=50)

    w_reg_group_name = {
        'none': 'non',
        'row': 'row',
        'column': 'col',
        'individual': 'ind',
    }

    mogp_str = 'model-pqn-%s-%g-%d-%d' % (w_reg_group_name[mogp.w_reg_group],
                                          mogp.w_reg, Q, n_train)
    print mogp_str

    mogp_pickle = 'model/%s.pkl' % mogp_str
    mogp.load_model(mogp_pickle)
    test_raw = ts_all[n_train:]

    indep_gp = IndependentMultiOutputGP(P)
    indep_gp.train(train_raw)
    gp_parms = indep_gp.gp_parms

    for channel in xrange(P):
        loglike = []
        loglike_baseline = []
        for each_test in test_raw:
            x = [xy[0] for xy in each_test]
            y = [xy[1] for xy in each_test]
            channel_len = len(x[channel])
            if channel_len < 3:
                continue
            one_third = channel_len // 3
            x_held_out = x[channel][one_third:-one_third]
            y_held_out = y[channel][one_third:-one_third]
            x_remain = [
                each_x if i == channel else np.concatenate(
                    (each_x[:one_third], each_x[-one_third:]))
                for i, each_x in enumerate(x)
            ]
            y_remain = [
                each_y if i == channel else np.concatenate(
                    (each_y[:one_third], each_y[-one_third:]))
                for i, each_y in enumerate(y)
            ]
            ts = TimeSeries(x_remain, y_remain, mogp.shared)
            mu, cov = mogp.predictive_gaussian(ts, x_held_out)

            mean, var = gp.pointwise_posterior_mean_var(
                x_remain[channel], y_remain[channel], x_held_out,
                gp_parms[channel])

            for i, each_y in enumerate(y_held_out):
                loglike.append(
                    norm.logpdf(each_y, mu[channel, i],
                                np.sqrt(cov[channel, i, i])))

                loglike_baseline.append(
                    norm.logpdf(each_y, mean[i], np.sqrt(var[i])))

        print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % (
            channel, np.mean(loglike), np.std(loglike) / np.sqrt(len(loglike)),
            np.min(loglike), np.max(loglike), len(loglike))

        print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % (
            channel, np.mean(loglike_baseline), np.std(loglike_baseline) /
            np.sqrt(len(loglike_baseline)), np.min(loglike_baseline),
            np.max(loglike_baseline), len(loglike_baseline))
        print '-' * 50
        pickle_save('loglike-cmp/%02d.pkl' % channel, loglike,
                    loglike_baseline)
        pl.figure()
        pl.axis('equal')
        pl.scatter(loglike, loglike_baseline, alpha=.5)
        pl.savefig('loglike-cmp/loglike-%02d.pdf' % channel)
        pl.close()
예제 #10
0
 def save_model(self, pickle_name):
     shared = self.shared
     pickle_save(pickle_name, shared.w, shared.beta, shared.g_gp_b,
                 shared.h_gp_a, shared.h_gp_b)
예제 #11
0
        t2 = time.time()
        print t2 - t1

        print
        #print '-' * 40
        #print (cov_zs**2).sum(axis=0)
        #print (cov_zs_exact**2).sum(axis=0)


if __name__ == '__main__':
    np.random.seed(0)
    #cmp_time()
    #errors = cmp_lanczos_basis(500)

    for i in xrange(10):
        results = []
        for n_data in xrange(500, 3000 + 1, 500):
            errors = cmp_lanczos_basis(n_data)
            results.append(errors)
        pickle_save('results/lanczos-%02d.pkl' % i, np.vstack(results))
    '''
    for i in xrange(10):
        results = []
        for n_data in xrange(200, 3000 + 1, 200):
            errors = cmp_n_inducing_points(n_data)
            results.append(errors)
        pickle_save('results/error-%02d.pkl' % i, np.vstack(results))
    '''

    #norm_diff()
예제 #12
0

if __name__ == '__main__':
    descriptions = np.array(
        pd.read_csv("../data/descriptions.csv")['description'])
    scores = np.array(pd.read_csv("../data/scores.csv")['points'])

    tfidf = TfidfVectorizer(ngram_range=(1, 2))
    X = tfidf.fit_transform(descriptions)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        scores,
                                                        test_size=0.2,
                                                        random_state=42)

    rf_regression = RandomForestRegressor().fit(X_train, y_train)

    y_train_pred = rf_regression.predict(X_train)
    y_test_pred = rf_regression.predict(X_test)

    print metrics.mean_squared_error(y_train, y_train_pred)
    # 0.506535834654

    print metrics.mean_squared_error(y_test, y_test_pred)
    # 2.79865706702
    #
    pickle_io.pickle_save("../models/random_forest_model.pkl", rf_regression)
    pickle_io.pickle_save("../vectorizers/random_forest_vectorizer.pkl", tfidf)

    # wine_regressor = load_regressor()