def onpress(self, event): if event.key == 'd': mtgp = MultiOutputGP(n_channels=3, n_latent_gps=2) t1 = time.time() raw_ts = [zip(self.x, self.y)] train_ts = mtgp.gen_collection(raw_ts) mtgp.train(train_ts, maxiter=50) t2 = time.time() print 'time:', t2 - t1 #test_x = [np.linspace(self.min_x, self.max_x, 100)] * self.n_ts test_x = np.linspace(self.min_x, self.max_x, 100) #test_x = [[1, 2], [2, 3]] #post_mean, post_cov = mtgp.predict(test_x) post_mean, post_cov = mtgp.predictive_gaussian(train_ts[0], test_x) for i in xrange(self.n_ts): #std = np.sqrt(np.diag(post_cov[i])) std = np.sqrt(np.diag(post_cov[i])) ax = self.ax[i] self.clear_ax(ax) ax.fill_between(test_x, post_mean[i] - std, post_mean[i] + std, edgecolor='none', alpha=.3, color='g') ax.plot(test_x, post_mean[i], 'g-') ax.plot(self.x[i], self.y[i], 'ko') self.fig.canvas.draw() elif event.key == 'm': pickle_save('input.pkl', self.x, self.y) elif event.key == 'c': self.reset_data() for ax in self.ax: self.clear_ax(ax) self.fig.canvas.draw()
def save_params(self, params_file): # TODO: save t_test instead of idx_test and w_test model_params = (self.net_arch, self.n_classes, self.inducing_pts, self.idx_test, self.w_test, self.gp_output_len) network_params = get_all_param_values(self.train_network) gp_params = [p.get_value() for p in self.post_gp.params] pickle_save(params_file, model_params, network_params, gp_params)
def main(): for sparsity in [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]: #data = 'data/UWaveGestureLibraryAll-%d.pkl' % sparsity data = 'data/PhalangesOutlinesCorrect-%d.pkl' % sparsity print data t1 = time.time() gp_params, indep_noise = train_gp(data) t2 = time.time() print 'time', t2 - t1 data_id = data.rsplit('/', 1)[-1] pickle_save('params/%s' % data_id, gp_params, indep_noise)
def cmp_n_train_test(save_as=None, n_inducing_pts=512): np.random.seed(0) batch_size = 1 results = [] for n_data in xrange(500, 3000 + 1, 500): #n_data = 1000 #n_data = 1000 gp_params = np.array([.1, 100]) indep_noise = .001 x, y = gen_data(n_data, gp_params, indep_noise) x_min, x_max = x.min(), x.max() #len_u = 2048 + 1 print '# data', n_data n_test = n_data x_test = np.linspace(x_min, x_max, n_test) proj_1d = np.random.uniform(-1, 1, size=len(x_test)) proj_1d /= np.linalg.norm(proj_1d) proj_2d = np.random.uniform(-1, 1, size=(batch_size, len(x_test))) proj_2d /= np.linalg.norm(proj_2d, axis=1)[:, np.newaxis] t_exact = time_exact(x, y, x_test, gp_params, indep_noise, proj_1d, proj_2d) # parameters for approximation algorithm len_u = n_inducing_pts proj_1d = np.random.uniform(-1, 1, size=len(x_test)) proj_1d /= np.linalg.norm(proj_1d) proj_2d = np.random.uniform(-1, 1, size=(batch_size, len(x_test))) proj_2d /= np.linalg.norm(proj_2d, axis=1)[:, np.newaxis] t_approx = time_approx(x, y, x_test, gp_params, indep_noise, len_u, proj_1d, proj_2d) print results.append(np.concatenate((t_exact, t_approx))) if save_as: pickle_save(save_as, np.vstack(results))
def save_model(self, pickle_name): pickle_save(pickle_name, self.gp_parms)
def main(): np.random.seed(0) dat_id = 1 ts_all, l_all = pickle_load('../chla-data/chla_ts_min0_%d.pkl' % dat_id) # randomly shuffle training examples idx = np.arange(len(ts_all)) np.random.shuffle(idx) ts_all = ts_all[idx] #l_all = l_all[idx] parser = argparse.ArgumentParser() parser.add_argument('-q', dest='n_latent_gp', type=int, default=5) parser.add_argument('-g', dest='w_reg_group', default='none') parser.add_argument('-r', dest='w_reg', type=float, default=1) args = parser.parse_args() Q = args.n_latent_gp w_reg_group = args.w_reg_group w_reg = args.w_reg P = len(ts_all[0]) #Q = 10 M = 20 #mogp = MultiOutputGP(P, Q, M) #mogp = MultiOutputGP(P, Q, M, w_reg_group='individual', w_reg=.5) #mogp = MultiOutputGP(P, Q, M, w_reg_group='row', w_reg=2) #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1) #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1.5) mogp = MultiOutputGP(P, Q, M, w_reg_group=w_reg_group, w_reg=w_reg) n_train = int(len(ts_all) * 0.5) #n_train = 8 print 'n_train', n_train train_raw = ts_all[:n_train] train_ts = mogp.gen_collection(train_raw) mogp.train(train_ts, maxiter=50) w_reg_group_name = { 'none': 'non', 'row': 'row', 'column': 'col', 'individual': 'ind', } mogp_str = 'model-pqn-%s-%g-%d-%d' % ( w_reg_group_name[mogp.w_reg_group], mogp.w_reg, Q, n_train) print mogp_str mogp_pickle = 'model/%s.pkl' % mogp_str mogp.load_model(mogp_pickle) test_raw = ts_all[n_train:] indep_gp = IndependentMultiOutputGP(P) indep_gp.train(train_raw) gp_parms = indep_gp.gp_parms for channel in xrange(P): loglike = [] loglike_baseline = [] for each_test in test_raw: x = [xy[0] for xy in each_test] y = [xy[1] for xy in each_test] channel_len = len(x[channel]) if channel_len < 3: continue one_third = channel_len // 3 x_held_out = x[channel][one_third:-one_third] y_held_out = y[channel][one_third:-one_third] x_remain = [each_x if i == channel else np.concatenate((each_x[:one_third], each_x[-one_third:])) for i, each_x in enumerate(x)] y_remain = [each_y if i == channel else np.concatenate((each_y[:one_third], each_y[-one_third:])) for i, each_y in enumerate(y)] ts = TimeSeries(x_remain, y_remain, mogp.shared) mu, cov = mogp.predictive_gaussian(ts, x_held_out) mean, var = gp.pointwise_posterior_mean_var( x_remain[channel], y_remain[channel], x_held_out, gp_parms[channel]) for i, each_y in enumerate(y_held_out): loglike.append(norm.logpdf(each_y, mu[channel, i], np.sqrt(cov[channel, i, i]))) loglike_baseline.append(norm.logpdf(each_y, mean[i], np.sqrt(var[i]))) print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % ( channel, np.mean(loglike), np.std(loglike) / np.sqrt(len(loglike)), np.min(loglike), np.max(loglike), len(loglike)) print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % ( channel, np.mean(loglike_baseline), np.std(loglike_baseline) / np.sqrt(len(loglike_baseline)), np.min(loglike_baseline), np.max(loglike_baseline), len(loglike_baseline)) print '-' * 50 pickle_save('loglike-cmp/%02d.pkl' % channel, loglike, loglike_baseline) pl.figure() pl.axis('equal') pl.scatter(loglike, loglike_baseline, alpha=.5) pl.savefig('loglike-cmp/loglike-%02d.pdf' % channel) pl.close()
def save_model(self, pickle_name): shared = self.shared pickle_save(pickle_name, shared.w, shared.beta, shared.g_gp_b, shared.h_gp_a, shared.h_gp_b)
def save_model(self, pickle_name): pickle_save(pickle_name, self.gp_parms)
def main(): np.random.seed(0) dat_id = 1 ts_all, l_all = pickle_load('../chla-data/chla_ts_min0_%d.pkl' % dat_id) # randomly shuffle training examples idx = np.arange(len(ts_all)) np.random.shuffle(idx) ts_all = ts_all[idx] #l_all = l_all[idx] parser = argparse.ArgumentParser() parser.add_argument('-q', dest='n_latent_gp', type=int, default=5) parser.add_argument('-g', dest='w_reg_group', default='none') parser.add_argument('-r', dest='w_reg', type=float, default=1) args = parser.parse_args() Q = args.n_latent_gp w_reg_group = args.w_reg_group w_reg = args.w_reg P = len(ts_all[0]) #Q = 10 M = 20 #mogp = MultiOutputGP(P, Q, M) #mogp = MultiOutputGP(P, Q, M, w_reg_group='individual', w_reg=.5) #mogp = MultiOutputGP(P, Q, M, w_reg_group='row', w_reg=2) #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1) #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1.5) mogp = MultiOutputGP(P, Q, M, w_reg_group=w_reg_group, w_reg=w_reg) n_train = int(len(ts_all) * 0.5) #n_train = 8 print 'n_train', n_train train_raw = ts_all[:n_train] train_ts = mogp.gen_collection(train_raw) mogp.train(train_ts, maxiter=50) w_reg_group_name = { 'none': 'non', 'row': 'row', 'column': 'col', 'individual': 'ind', } mogp_str = 'model-pqn-%s-%g-%d-%d' % (w_reg_group_name[mogp.w_reg_group], mogp.w_reg, Q, n_train) print mogp_str mogp_pickle = 'model/%s.pkl' % mogp_str mogp.load_model(mogp_pickle) test_raw = ts_all[n_train:] indep_gp = IndependentMultiOutputGP(P) indep_gp.train(train_raw) gp_parms = indep_gp.gp_parms for channel in xrange(P): loglike = [] loglike_baseline = [] for each_test in test_raw: x = [xy[0] for xy in each_test] y = [xy[1] for xy in each_test] channel_len = len(x[channel]) if channel_len < 3: continue one_third = channel_len // 3 x_held_out = x[channel][one_third:-one_third] y_held_out = y[channel][one_third:-one_third] x_remain = [ each_x if i == channel else np.concatenate( (each_x[:one_third], each_x[-one_third:])) for i, each_x in enumerate(x) ] y_remain = [ each_y if i == channel else np.concatenate( (each_y[:one_third], each_y[-one_third:])) for i, each_y in enumerate(y) ] ts = TimeSeries(x_remain, y_remain, mogp.shared) mu, cov = mogp.predictive_gaussian(ts, x_held_out) mean, var = gp.pointwise_posterior_mean_var( x_remain[channel], y_remain[channel], x_held_out, gp_parms[channel]) for i, each_y in enumerate(y_held_out): loglike.append( norm.logpdf(each_y, mu[channel, i], np.sqrt(cov[channel, i, i]))) loglike_baseline.append( norm.logpdf(each_y, mean[i], np.sqrt(var[i]))) print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % ( channel, np.mean(loglike), np.std(loglike) / np.sqrt(len(loglike)), np.min(loglike), np.max(loglike), len(loglike)) print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % ( channel, np.mean(loglike_baseline), np.std(loglike_baseline) / np.sqrt(len(loglike_baseline)), np.min(loglike_baseline), np.max(loglike_baseline), len(loglike_baseline)) print '-' * 50 pickle_save('loglike-cmp/%02d.pkl' % channel, loglike, loglike_baseline) pl.figure() pl.axis('equal') pl.scatter(loglike, loglike_baseline, alpha=.5) pl.savefig('loglike-cmp/loglike-%02d.pdf' % channel) pl.close()
def save_model(self, pickle_name): shared = self.shared pickle_save(pickle_name, shared.w, shared.beta, shared.g_gp_b, shared.h_gp_a, shared.h_gp_b)
t2 = time.time() print t2 - t1 print #print '-' * 40 #print (cov_zs**2).sum(axis=0) #print (cov_zs_exact**2).sum(axis=0) if __name__ == '__main__': np.random.seed(0) #cmp_time() #errors = cmp_lanczos_basis(500) for i in xrange(10): results = [] for n_data in xrange(500, 3000 + 1, 500): errors = cmp_lanczos_basis(n_data) results.append(errors) pickle_save('results/lanczos-%02d.pkl' % i, np.vstack(results)) ''' for i in xrange(10): results = [] for n_data in xrange(200, 3000 + 1, 200): errors = cmp_n_inducing_points(n_data) results.append(errors) pickle_save('results/error-%02d.pkl' % i, np.vstack(results)) ''' #norm_diff()
if __name__ == '__main__': descriptions = np.array( pd.read_csv("../data/descriptions.csv")['description']) scores = np.array(pd.read_csv("../data/scores.csv")['points']) tfidf = TfidfVectorizer(ngram_range=(1, 2)) X = tfidf.fit_transform(descriptions) X_train, X_test, y_train, y_test = train_test_split(X, scores, test_size=0.2, random_state=42) rf_regression = RandomForestRegressor().fit(X_train, y_train) y_train_pred = rf_regression.predict(X_train) y_test_pred = rf_regression.predict(X_test) print metrics.mean_squared_error(y_train, y_train_pred) # 0.506535834654 print metrics.mean_squared_error(y_test, y_test_pred) # 2.79865706702 # pickle_io.pickle_save("../models/random_forest_model.pkl", rf_regression) pickle_io.pickle_save("../vectorizers/random_forest_vectorizer.pkl", tfidf) # wine_regressor = load_regressor()