def F_informaiton_calculation_kernel(args, latent_size, model_latent_size): #Since H(u) is a constant, we can calculate it at the end of all experiments best_F = 100 vae = VAE_x(z_dim=10) model_path = 'vaex_model_relu_health_latent_500200.pth.tar' vae.load_state_dict(torch.load(model_path)) train_loader, test_loader = create_torch_dataloader(batch=64) train_loss_F = 0.0 cnt = 0. best_auc = 0.0 bw = 15 for epoch in range(args.epochs): bw = epoch + 1 train_loss_F = 0.0 correct = 0.0 cnt = 0 u_collect = [] z_collect = [] for iteration, (x, u, y) in enumerate(train_loader): x, u, y = x.cuda(), u.cuda(), y.cuda() recon_x, mean, log_var, z = vae(x, u) z_collect.append(mean) u_collect.append(u) z = torch.cat(z_collect, dim=0) u = torch.cat(u_collect, dim=0) K = KernelRegression(bandwidth=bw, X=z, Y=u) u_collect = [] recon_u_collect = [] for iteration, (x, u, y) in enumerate(test_loader): x, u, y = x.cuda(), u.cuda().long(), y.cuda() recon_x, mean, log_var, z = vae(x, u) recon_u = K.predict_u(mean) loss_F = nn.functional.nll_loss(recon_u, u.long()) train_loss_F += loss_F.item() * x.size(0) pred = torch.max(recon_u, 1)[1] correct += (pred == u).float().sum() recon_u_collect.append(recon_u.detach().cpu()) y_onehot = torch.FloatTensor(u.size(0), 18).zero_() y_onehot.scatter_(1, u.detach().cpu().unsqueeze(1), 1) u_collect.append(y_onehot.detach().cpu()) u = torch.cat(u_collect, dim=0).numpy() recon_u = torch.cat(recon_u_collect, dim=0).numpy() test_auc = roc_auc_score(u, recon_u) print("epoch: {}, F loss : {}, acc: {}, auc:{}".format( epoch, 0.631475 - train_loss_F / (len(test_loader.dataset)), correct / len(test_loader.dataset), test_auc)) print("Model F={}, Latent size :{}, F informaiton(best) :{}".format( model_latent_size, latent_size, best_F))
def F_informaiton_calculation_kernel(args, latent_size, model_latent_size): #Since H(u) is a constant, we can calculate it at the end of all experiments best_F = 100 vae = VAE_x(z_dim=10) #model_path = 'vae_model_adult_latent_'+str(model_latent_size)+'.pth.tar' model_path = 'vaex_model_adult_kernel.pth.tar' vae.load_state_dict(torch.load(model_path)) train_loader, test_loader = create_torch_dataloader(batch=64) train_loss_F = 0.0 cnt = 0. best_auc = 0.0 bw = 15 for epoch in range(args.epochs): bw = epoch + 1 train_loss_F = 0.0 correct = 0.0 cnt = 0 u_collect = [] z_collect = [] for iteration, (x, u, y) in enumerate(train_loader): x, u, y = x.cuda(), u.cuda(), y.cuda() recon_x, mean, log_var, z = vae(x, u) z_collect.append(mean) u_collect.append(u) z = torch.cat(z_collect, dim=0) u = torch.cat(u_collect, dim=0) K = KernelRegression(bandwidth=bw, X=z, Y=u) u_collect = [] recon_u_collect = [] for iteration, (x, u, y) in enumerate(test_loader): x, u, y = x.cuda(), u.cuda(), y.cuda() recon_x, mean, log_var, z = vae(x, u) recon_u = K.predict_u(mean) loss_F = loss_BCE(recon_u, u) train_loss_F += loss_F.item() * x.size(0) pred = (recon_u > 0.5).float() correct += (pred == u).float().sum() u_collect.append(u.detach().cpu()) recon_u_collect.append(recon_u.detach().cpu()) u = torch.cat(u_collect, dim=0).numpy() recon_u = torch.cat(recon_u_collect, dim=0).numpy() test_auc = roc_auc_score(u, recon_u) if test_auc > best_auc: best_auc = test_auc print("epoch: {}, F loss : {}, acc: {}, auc: {}".format( epoch, 0.631475 - train_loss_F / (len(test_loader.dataset)), correct / len(test_loader.dataset), test_auc)) if train_loss_F / (len(test_loader.dataset)) < best_F: best_F = train_loss_F / (len(test_loader.dataset)) print("Model F={}, Latent size :{}, F informaiton(best) :{}".format( model_latent_size, latent_size, best_F))
def _kernel_regression_and_rolling_mean(self, X, y, window): """ Implementation of Nadaraya-Watson regression and rolling_mean to the time series. :param X: array-like of shape = [n_samples, n_features] The training input samples. :param y: array-like, shape = [n_samples] The target values :param window: int Size of the moving window. This is the number of observations used for calculating the statistic. :return: tuple of 3 array-like, shape = [n_samples] Predicted target values, rolling_mean target values and rolling_std of the target values """ kr = KernelRegression(kernel="rbf", gamma=np.logspace(-2, 2, 10)) y_kr = kr.fit(X, y).predict(X) y_rm = pd.rolling_mean(y_kr, window=window,axis=1) y_std = pd.rolling_std(y, window=window,axis=1) return y_kr, y_rm, y_std
X = np.sort(5 * np.random.rand(100, 1), axis=0) y = np.sin(X).ravel() ############################################################################### # Add noise to targets y += 0.5 * (0.5 - np.random.rand(y.size)) ############################################################################### # Fit regression models svr = GridSearchCV(SVR(kernel='rbf'), cv=5, param_grid={ "C": [1e-1, 1e0, 1e1, 1e2], "gamma": np.logspace(-2, 2, 10) }) kr = KernelRegression(kernel="rbf", gamma=np.logspace(-2, 2, 10)) t0 = time.time() y_svr = svr.fit(X, y).predict(X) print "SVR complexity and bandwidth selected and model fitted in %.3f s" \ % (time.time() - t0) t0 = time.time() y_kr = kr.fit(X, y).predict(X) print "KR including bandwith fitted in %.3f s" \ % (time.time() - t0) ############################################################################### # Visualize models plt.scatter(X, y, c='k', label='data') plt.hold('on') plt.plot(X, y_kr, c='g', label='Kernel Regression') plt.plot(X, y_svr, c='r', label='SVR')
def figures_6_1_and_6_3(): """Reproduces figure 6.1 in ESLii showing different local regression fits to Y = sin(4X) + eps where X~U[0,1] and eps~N(0, 1/3) and overlays figure 6.3 highlighting the bias of constant fits near the boundaries """ # Produce the data an true function X = np.sort(np.random.rand(100, 1), axis=0) Y = np.sin(np.multiply(X, 4)).ravel() Y += np.random.normal(scale=.333, size=100) lin_x = np.linspace(0.0, 1.0, 100) f_x = np.sin(4*lin_x) # Plot the data plt.figure(1) plt.subplot(221) plt.scatter(X, Y) # Plot the true function f plt.plot(lin_x, f_x, label='f') # Plot a nearest-neighbor fit nbrs = KNeighborsRegressor(n_neighbors=30).fit(X, Y) fhat_x = nbrs.predict(lin_x.astype(np.ndarray).reshape((100, 1))) plt.plot(lin_x, fhat_x, label='nn') plt.legend(loc='best') # Plot a Nadaraya-Watson kernel-weighted fit using an Epanechnikov Kernel plt.subplot(222) plt.scatter(X, Y) plt.plot(lin_x, f_x, label='f') def epanechnikov_kernel(x0, x, gamma=1.0): d = abs(x0 - x)/gamma return (0.0 if 1 <= d else .75*(1 - d**2)) kr = KernelRegression(kernel=epanechnikov_kernel, gamma=0.2).fit(X, Y) fhat_x = kr.predict(lin_x.astype(np.ndarray).reshape((100, 1))) plt.plot(lin_x, fhat_x, label='nw') plt.legend(loc='best') # Plot a lwlr fit plt.subplot(223) plt.scatter(X, Y) plt.plot(lin_x, f_x, label='f') k1 = smooth.NonParamRegression(X.reshape(100,), Y, bandwidth=0.1, method=npr_methods.LocalPolynomialKernel( q=1)) k1.fit() fhat_x = k1(lin_x) plt.plot(lin_x, fhat_x, label='lwlr') plt.legend(loc='best') # Plot a lwqr fit plt.subplot(224) plt.scatter(X, Y) plt.plot(lin_x, f_x, label='f') k2 = smooth.NonParamRegression(X.reshape(100,), Y, bandwidth=0.3, method=npr_methods.LocalPolynomialKernel( q=2)) k2.fit() fhat_x = k2(lin_x) plt.plot(lin_x, fhat_x, label='lwqr') plt.legend(loc='best') plt.show()
def get_beta_params(mean, variance): alpha = mean*(mean*(1-mean)/variance - 1) beta = (1-mean)*(mean*(1-mean)/variance - 1) return alpha, beta CATEGORY = 'wind' CATEGORY = 'solar' FCNAME = 'fc' OBSNAME = 'ts' TESTNODE = 1069 TESTDELTA = '2d 3h' MIN_VARIANCE = 0.0001 TSVAULTFILE = 'data/TSVault.h5' AUTOSCALE = True kreg = KernelRegression() testx = np.linspace(0, 1, 101) # Possible gammas gammas = np.logspace(1, 4, 31) TEST_GAMMA = 100 # Gaussian width ~ 1/100 = 1% of production kreg.gamma = TEST_GAMMA store = pd.HDFStore(TSVAULTFILE) nodes = store['nodes'] store.close() outmeandict = {} outvardict = {} scalefactors = {} for node in nodes:
############################################################################### # Generate sample data X = np.sort(5 * np.random.rand(100, 1), axis=0) y = np.sin(X).ravel() ############################################################################### # Add noise to targets y += 0.5 * (0.5 - np.random.rand(y.size)) ############################################################################### # Fit regression models svr = GridSearchCV(SVR(kernel='rbf'), cv=5, param_grid={"C": [1e-1, 1e0, 1e1, 1e2], "gamma": np.logspace(-2, 2, 10)}) kr = KernelRegression(kernel="rbf", gamma=np.logspace(-2, 2, 10)) t0 = time.time() y_svr = svr.fit(X, y).predict(X) print("SVR complexity and bandwidth selected and model fitted in %.3f s" \ % (time.time() - t0)) t0 = time.time() y_kr = kr.fit(X, y).predict(X) print("KR including bandwith fitted in %.3f s" \ % (time.time() - t0)) ############################################################################### # Visualize models plt.scatter(X, y, c='k', label='data') plt.hold('on') plt.plot(X, y_kr, c='g', label='Kernel Regression') plt.plot(X, y_svr, c='r', label='SVR')
import pandas as pd from statsmodels.nonparametric.api import KernelReg from kernel_regression import KernelRegression from sklearn.cross_validation import train_test_split as sk_split import numpy as np df = pd.read_csv("abalone.data", header=None) print df.shape X = df.loc[:, 1:7].as_matrix() y = df.loc[:, 8].as_matrix().reshape(-1, 1) print X.shape print y.shape X_train, X_test, y_train, y_test = sk_split(X, y, test_size=0.20) kr = KernelRegression(kernel="rbf") kr.fit(X_train, y_train) print len(X_test) # Memory issues split X_test, just did two chunks here X_test_1 = X_test[0:100, :] X_test_2 = X_test[101:200, :] pred_y = kr.predict(X_test_1) pred_y = kr.predict(X_test_2)
def train_nearest_neighbor(vae_model, F, train_loader, test_loader, args, latent_size): e1 = 1 e2 = 1 e3 = 100 bw = 10 optimizer_vae = torch.optim.Adam(vae_model.parameters(), lr=args.learning_rate, betas=(0.5, 0.999)) for epoch in range(args.epochs): train_loss_v = 0.0 z_collect = [] u_collect = [] for iteration, (x, u, y) in enumerate(train_loader): x, u, y = x.cuda(), u.cuda(), y.cuda() recon_x, mean, log_var, z = vae_model(x, u) K = KernelRegression(bandwidth=bw, X=z, Y=u) recon_u = K.predict_u(mean, train=True) #print(recon_u.size()) loss = e1 * loss_BCE(recon_x, x) + e2 * loss_KLD( mean, log_var) - e3 * nn.functional.nll_loss( recon_u, u.long()) #loss = -e3 * nn.functional.nll_loss(recon_u, u) train_loss_v += loss.item() optimizer_vae.zero_grad() loss.backward() optimizer_vae.step() z_collect.append(mean) u_collect.append(u) z = torch.cat(z_collect, dim=0) u = torch.cat(u_collect, dim=0) K = KernelRegression(bandwidth=bw, X=z, Y=u) print("latent size : {}, epoch: {}, F loss : {}".format( latent_size, epoch, train_loss_v / len(train_loader.dataset))) train_loss_F = 0.0 correct = 0.0 u_collect = [] recon_u_collect = [] for iteration, (x, u, y) in enumerate(test_loader): x, u, y = x.cuda(), u.cuda().long(), y.cuda() recon_x, mean, log_var, z = vae_model(x, u) recon_u = K.predict_u(mean) loss_F = nn.functional.nll_loss(recon_u, u) train_loss_F += loss_F.item() * x.size(0) pred = torch.max(recon_u, 1)[1] correct += (pred == u).float().sum() recon_u_collect.append(recon_u.detach().cpu()) y_onehot = torch.FloatTensor(u.size(0), 18).zero_() y_onehot.scatter_(1, u.detach().cpu().unsqueeze(1), 1) u_collect.append(y_onehot.detach().cpu()) u = torch.cat(u_collect, dim=0).numpy() recon_u = torch.cat(recon_u_collect, dim=0).numpy() test_auc = roc_auc_score(u, recon_u) print("Test: latent size : {}, F information : {}, Acc : {}, Auc:{}". format(latent_size, 0.631475 - train_loss_F / len(test_loader.dataset), correct / len(test_loader.dataset), test_auc)) torch.save(vae_model.state_dict(), 'vaex_model_health_kernel.pth.tar')
def plot_regression(): np.random.seed(12345) fig, axes = plt.subplots(4, 4) for i, ax in enumerate(axes.flatten()): n_in = 1 n_out = 1 d = np.random.randint(1, 5) n_ex = np.random.randint(5, 500) std = np.random.randint(0, 1000) intercept = np.random.rand() * np.random.randint(-300, 300) X_train, y_train, X_test, y_test, coefs = random_regression_problem( n_ex, n_in, n_out, d=d, intercept=intercept, std=std, seed=i) LR = LinearRegression(fit_intercept=True) LR.fit(X_train, y_train) y_pred = LR.predict(X_test) loss = np.mean((y_test.flatten() - y_pred.flatten())**2) d = 3 best_loss = np.inf for gamma in np.linspace(1e-10, 1, 100): for c0 in np.linspace(-1, 1000, 100): kernel = "PolynomialKernel(d={}, gamma={}, c0={})".format( d, gamma, c0) KR_poly = KernelRegression(kernel=kernel) KR_poly.fit(X_train, y_train) y_pred_poly = KR_poly.predict(X_test) loss_poly = np.mean( (y_test.flatten() - y_pred_poly.flatten())**2) if loss_poly <= best_loss: KR_poly_best = kernel best_loss = loss_poly print("Best kernel: {} || loss: {:.4f}".format(KR_poly_best, best_loss)) KR_poly = KernelRegression(kernel=KR_poly_best) KR_poly.fit(X_train, y_train) KR_rbf = KernelRegression(kernel="RBFKernel(gamma=0.01)") KR_rbf.fit(X_train, y_train) y_pred_rbf = KR_rbf.predict(X_test) loss_rbf = np.mean((y_test.flatten() - y_pred_rbf.flatten())**2) xmin = min(X_test) - 0.1 * (max(X_test) - min(X_test)) xmax = max(X_test) + 0.1 * (max(X_test) - min(X_test)) X_plot = np.linspace(xmin, xmax, 100) y_plot = LR.predict(X_plot) y_plot_poly = KR_poly.predict(X_plot) y_plot_rbf = KR_rbf.predict(X_plot) ax.scatter(X_test, y_test, alpha=0.5) ax.plot(X_plot, y_plot, label="OLS", alpha=0.5) ax.plot(X_plot, y_plot_poly, label="KR (poly kernel, d={})".format(d), alpha=0.5) ax.plot(X_plot, y_plot_rbf, label="KR (rbf kernel)", alpha=0.5) ax.legend() # ax.set_title( # "MSE\nLR: {:.2f} KR (poly): {:.2f}\nKR (rbf): {:.2f}".format( # loss, loss_poly, loss_rbf # ) # ) ax.xaxis.set_ticklabels([]) ax.yaxis.set_ticklabels([]) plt.tight_layout() plt.savefig("img/kr_plots.png", dpi=300) plt.close("all")
def multivar_regress(self): # X, y = self.regression_data() X, y = self.regression_data_split() X = np.array(X) y = np.array(y) pb = X[:, 0].argsort() Xb = X[pb] yb = y[pb] X1 = np.delete(X, 1, 1) p1 = X1[:, 0].argsort() X1 = X1[p1] y1 = y[p1] X2 = np.delete(X, 0, 1) p2 = X2[:, 0].argsort() X2 = X2[p2] y2 = y[p2] x_range = np.arange(0, 0.025, 0.001) # generate a mesh y_range = np.arange(0, 1.3, 0.02) x_surf, y_surf = np.meshgrid(x_range, y_range) Xpred = np.stack((x_surf.flatten(), y_surf.flatten()), axis=1) svr = GridSearchCV(SVR(kernel='rbf'), cv=5, param_grid={ "C": [1e-1, 1e0, 1e1, 1e2], "gamma": np.logspace(-2, 2, 10) }) kr = KernelRegression(kernel="rbf", gamma=np.logspace(-2, 2, 10)) t0 = time.time() y_svrb = svr.fit(Xb, yb).predict(Xpred) print( "SVR complexity and bandwidth selected and model fitted in %.3f s" % (time.time() - t0)) score_svr = svr.score(Xb, yb) y_svr1 = svr.fit(X1, y1).predict(np.expand_dims(x_range, 1)) score_svr1 = svr.score(X1, y1) y_svr2 = svr.fit(X2, y2).predict(np.expand_dims(y_range, 1)) score_svr2 = svr.score(X2, y2) t0 = time.time() y_krb = kr.fit(Xb, yb).predict(Xpred) print("KR including bandwith fitted in %.3f s" % (time.time() - t0)) score_kr = kr.score(Xb, yb) y_kr1 = kr.fit(X1, y1).predict(np.expand_dims(x_range, 1)) score_kr1 = kr.score(X1, y1) y_kr2 = kr.fit(X2, y2).predict(np.expand_dims(y_range, 1)) score_kr2 = kr.score(X2, y2) print('R^2 / coeff determination:') print(' SVR model: cls_score=%0.3f bbox_pred=%0.3f both=%0.3f' % (score_svr1, score_svr2, score_svr)) print(' KR model: cls_score=%0.3f bbox_pred=%0.3f both=%0.3f' % (score_kr1, score_kr2, score_kr)) # R^2 / coeff determination: # SVR model: cls_score=0.675 bbox_pred=0.518 both=0.512 # KR model: cls_score=0.848 bbox_pred=0.320 both=0.881 ############################################################################### # Visualize models # fig = plt.figure() # ax = fig.gca(projection='3d') # to work in 3d # # z_surf = np.reshape(y_krb, x_surf.shape) # surf = ax.plot_surface(x_surf, y_surf, z_surf, cmap=cm.coolwarm, alpha=0.5, rstride=1, cstride=1); # plot a 3d surface plot # fig.colorbar(surf, shrink=0.5, aspect=5) # # ax.scatter(X[:,0], X[:,1], y, s=1, c='k') # plot a 3d scatter plot # # ax.set_xlabel('cls_score', fontsize=16) # ax.set_ylabel('bbox_pred', fontsize=16) # ax.set_zlabel('mAP', fontsize=16) # plt.show() fig = plt.figure() plt.scatter(X1[:, 0], y1, c='k', s=1, label='data') # plt.plot(x_range, y_kr1, c='g', label='Kernel Regression') # plt.plot(x_range, y_svr1, c='r', label='SVR') plt.xlabel('cls_score') plt.ylabel('mAP') plt.ylim(0, 0.85) # plt.title('Classification score difference as proxy for model performance/') plt.legend() plt.show() fig = plt.figure() plt.scatter(X2[:, 0], y2, c='k', s=1, label='data') # plt.plot(y_range, y_kr2, c='g', label='Kernel Regression') # plt.plot(y_range, y_svr2, c='r', label='SVR') plt.xlabel('bbox_pred') plt.ylabel('mAP') plt.ylim(0, 0.85) # plt.title('Kernel regression versus SVR') plt.legend() plt.show() # Visualize learning curves plt.figure() train_sizes, train_scores_svr, test_scores_svr = \ learning_curve(svr, X, y, train_sizes=np.linspace(0.1, 1, 10), scoring="neg_mean_squared_error", cv=10) train_sizes_abs, train_scores_kr, test_scores_kr = \ learning_curve(kr, X, y, train_sizes=np.linspace(0.1, 1, 10), scoring="neg_mean_squared_error", cv=10) plt.plot(train_sizes, test_scores_svr.mean(1), 'o-', color="r", label="SVR") plt.plot(train_sizes, test_scores_kr.mean(1), 'o-', color="g", label="Kernel Regression") plt.yscale("symlog", linthreshy=1e-7) plt.ylim(-10, -0.01) plt.xlabel("Training size") plt.ylabel("Mean Squared Error") plt.title('Learning curves') plt.legend(loc="best") plt.show()