def get_globals(): X = np.array([ [0.00, 0.00], [0.99, 0.99], [0.00, 0.99], [0.99, 0.00], [0.50, 0.50], [0.25, 0.50], [0.50, 0.25], [0.75, 0.50], [0.50, 0.75], ]) def get_y(X): return -(X[:, 0] - 0.3) ** 2 - 0.5 * (X[:, 1] - 0.6)**2 + 2 y = get_y(X) mesh = np.dstack( np.meshgrid(np.arange(0, 1, 0.01), np.arange(0, 1, 0.01)) ).reshape(-1, 2) GP = GaussianProcessRegressor( kernel=Matern(), n_restarts_optimizer=25, ) GP.fit(X, y) return {'x': X, 'y': y, 'gp': GP, 'mesh': mesh}
def test_y_normalization(): """ Test normalization of the target values in GP Fitting non-normalizing GP on normalized y and fitting normalizing GP on unnormalized y should yield identical results """ y_mean = y.mean(0) y_norm = y - y_mean for kernel in kernels: # Fit non-normalizing GP on normalized y gpr = GaussianProcessRegressor(kernel=kernel) gpr.fit(X, y_norm) # Fit normalizing GP on unnormalized y gpr_norm = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr_norm.fit(X, y) # Compare predicted mean, std-devs and covariances y_pred, y_pred_std = gpr.predict(X2, return_std=True) y_pred = y_mean + y_pred y_pred_norm, y_pred_std_norm = gpr_norm.predict(X2, return_std=True) assert_almost_equal(y_pred, y_pred_norm) assert_almost_equal(y_pred_std, y_pred_std_norm) _, y_cov = gpr.predict(X2, return_cov=True) _, y_cov_norm = gpr_norm.predict(X2, return_cov=True) assert_almost_equal(y_cov, y_cov_norm)
def bo_(x_obs, y_obs): kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, y_obs) xs = list(repeat(np.atleast_2d(np.linspace(0, 10, 128)).T, 2)) x = cartesian_product(*xs) a = a_EI(gp, x_obs=x_obs, y_obs=y_obs) argmin_a_x = x[np.argmax(a(x))] # heavy evaluation print("f({})".format(argmin_a_x)) f_argmin_a_x = f2d(np.atleast_2d(argmin_a_x)) plot_2d(gp, x_obs, y_obs, argmin_a_x, a, xs) plt.show() bo_( x_obs=np.vstack((x_obs, argmin_a_x)), y_obs=np.hstack((y_obs, f_argmin_a_x)), )
def test_acquisition_api(): rng = np.random.RandomState(0) X = rng.randn(10, 2) y = rng.randn(10) gpr = GaussianProcessRegressor() gpr.fit(X, y) for method in [gaussian_ei, gaussian_lcb, gaussian_pi]: assert_array_equal(method(X, gpr).shape, 10) assert_raises(ValueError, method, rng.rand(10), gpr)
class SmoothFunctionCreator(): def __init__(self, seed=42): self._gp = GaussianProcessRegressor() x_train = np.array([0.0, 2.0, 6.0, 10.0])[:, np.newaxis] source_train = np.array([0.0, 1.0, -1.0, 0.0]) self._gp.fit(x_train, source_train) self._random_state = np.random.RandomState(seed) def sample(self, n_samples): x = np.linspace(0.0, 10.0, 100)[:, np.newaxis] source = self._gp.sample_y(x, n_samples, random_state=self._random_state) target = gaussian_filter1d(source, 1, order=1, axis=0) target = np.tanh(10.0 * target) return source, target
def fit_GP(x_train): y_train = gaussian(x_train, mu, sig).ravel() # Instanciate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(1, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(x_train, y_train) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) return y_train, y_pred, sigma
def plot_gp(x_min, x_max, x, y, train_features, train_labels): fig = plt.figure(figsize=(16, 10)) fig.suptitle('Gaussian Process and Utility Function After {} Steps'.format(len(train_features)), fontdict={'size':30}) gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) axis = plt.subplot(gs[0]) acq = plt.subplot(gs[1]) gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), n_restarts_optimizer=25, ) gp.fit(train_features, train_labels) mu, sigma = gp.predict(x, return_std=True) axis.plot(x, y, linewidth=3, label='Target') axis.plot(train_features.flatten(), train_labels, 'D', markersize=8, label=u'Observations', color='r') axis.plot(x, mu, '--', color='k', label='Prediction') axis.fill(np.concatenate([x, x[::-1]]), np.concatenate([mu - 1.9600 * sigma, (mu + 1.9600 * sigma)[::-1]]), alpha=.6, fc='c', ec='None', label='95% confidence interval') axis.set_xlim((x_min, x_max)) axis.set_ylim((None, None)) axis.set_ylabel('f(x)', fontdict={'size':20}) axis.set_xlabel('x', fontdict={'size':20}) bounds = np.asarray([[x_min, x_max]]) acquisition_fucntion_kappa = 5 mean, std = gp.predict(x, return_std=True) acquisition_fucntion_values = mean + acquisition_fucntion_kappa * std acq.plot(x, acquisition_fucntion_values, label='Utility Function', color='purple') acq.plot(x[np.argmax(acquisition_fucntion_values)], np.max(acquisition_fucntion_values), '*', markersize=15, label=u'Next Best Guess', markerfacecolor='gold', markeredgecolor='k', markeredgewidth=1) acq.set_xlim((x_min, x_max)) acq.set_ylim((0, np.max(acquisition_fucntion_values) + 0.5)) acq.set_ylabel('Utility', fontdict={'size':20}) acq.set_xlabel('x', fontdict={'size':20}) axis.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.) acq.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.)
def test_K_inv_reset(kernel): y2 = f(X2).ravel() # Test that self._K_inv is reset after a new fit gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) assert hasattr(gpr, '_K_inv') assert gpr._K_inv is None gpr.predict(X, return_std=True) assert gpr._K_inv is not None gpr.fit(X2, y2) assert gpr._K_inv is None gpr.predict(X2, return_std=True) gpr2 = GaussianProcessRegressor(kernel=kernel).fit(X2, y2) gpr2.predict(X2, return_std=True) # the value of K_inv should be independent of the first fit assert_array_equal(gpr._K_inv, gpr2._K_inv)
def test_GP_brownian_motion(self): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C # add data t = np.linspace(0, 10, 100) # # Instanciate a Gaussian Process model # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) # Instanciate a Gaussian Process model kernel = lambda x, y: 1. * min(x, y) # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters X = np.atleast_2d(t).T gp.fit(X, y) # gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters # gp.fit(t, y) # Make the prediction on the meshed x-axis (ask for MSE as well) # y_star, err_y_star = gp.predict(t, return_std=True) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(t, return_std=True) fig = plt.figure() ax = fig.add_axes((0.1, 0.3, 0.8, 0.65)) ax.invert_yaxis() ax.plot(t, y, color='blue', label='L bol', lw=2.5) ax.errorbar(t, y, yerr=yerr, fmt='o', color='blue', label='%s obs.') # # ax.plot(t, y_star, color='red', ls='--', lw=1.5, label='GP') ax.plot(t, y_pred, '-', color='gray') # ax.fill_between(t, y_star - 2 * err_y_star, y_star + 2 * err_y_star, color='gray', alpha=0.3) ax.fill(np.concatenate([t, t[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.show()
def plot_gaussian(data, col): ''' Plots the gaussian process regression with a characteristic length scale of 10 years. Essentially this highlights the 'slow trend' in the data. Parameters ---------- data: dataframe pandas dataframe containing 'date', 'linMean' which is the average runtime and 'linSD' which is the standard deviation. col: string the color in which the plot the data ''' #extract the results from the dataframe Year = np.array(data[u'date'].tolist()) Mean = np.array(data[u'linMean'].tolist()) SD = np.array(data[u'linSD'].tolist()) #initialize the gaussian process. Note that the process is calculated with a #length scale of 10years to give the 'slow trend' in the results. length_scale = 10. kernel = 1.* RBF(length_scale) gp = GaussianProcessRegressor(kernel=kernel, sigma_squared_n=(SD) ** 2, \ normalize_y=True) #now fit the data and get the predicted mean and standard deviation #Note: for reasons that are unclear, GaussianProcessRegressor won't take 1D #arrays so the data are converted to 2D and then converted back for plotting gp.fit(np.atleast_2d(Year).T, np.atleast_2d(Mean).T) Year_array = np.atleast_2d(np.linspace(min(Year)-2, max(Year)+2, 100)).T Mean_prediction, SD_prediction = gp.predict(Year_pred, return_std=True) Year_array=Year_array.ravel() Mean_prediction=Mean_prediction.ravel() #plot the predicted best fit plt.plot(Year_array, Mean_prediction, col, alpha=1) #plot the 95% confidence interval plt.fill_between(Year_array, (Mean_prediction - 1.9600 * SD_prediction), \ y2=(Mean_prediction + 1.9600 * SD_prediction), alpha=0.5, \ color=col) plt.draw()
def test_custom_optimizer(): """ Test that GPR can use externally defined optimizers. """ # Define a dummy optimizer that simply tests 50 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(0) theta_opt, func_min = initial_theta, obj_func(initial_theta, eval_gradient=False) for _ in range(50): theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]), np.minimum(1, bounds[:, 1]))) f = obj_func(theta, eval_gradient=False) if f < func_min: theta_opt, func_min = theta, f return theta_opt, func_min for kernel in kernels: if kernel == fixed_kernel: continue gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer) gpr.fit(X, y) # Checks that optimizer improved marginal likelihood assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta), gpr.log_marginal_likelihood(gpr.kernel.theta))
def test_duplicate_input(): """ Test GPR can handle two different output-values for the same input. """ for kernel in kernels: gpr_equal_inputs = GaussianProcessRegressor(kernel=kernel, alpha=1e-2) gpr_similar_inputs = GaussianProcessRegressor(kernel=kernel, alpha=1e-2) X_ = np.vstack((X, X[0])) y_ = np.hstack((y, y[0] + 1)) gpr_equal_inputs.fit(X_, y_) X_ = np.vstack((X, X[0] + 1e-15)) y_ = np.hstack((y, y[0] + 1)) gpr_similar_inputs.fit(X_, y_) X_test = np.linspace(0, 10, 100)[:, None] y_pred_equal, y_std_equal = gpr_equal_inputs.predict(X_test, return_std=True) y_pred_similar, y_std_similar = gpr_similar_inputs.predict(X_test, return_std=True) assert_almost_equal(y_pred_equal, y_pred_similar) assert_almost_equal(y_std_equal, y_std_similar)
class GP(BaseTuner): def __init__(self, tunables, gridding=0, r_minimum=2): """ Extra args: r_minimum: the minimum number of past results this selector needs in order to use gaussian process for prediction. If not enough results are present during a fit(), subsequent calls to propose() will revert to uniform selection. """ super(GP, self).__init__(tunables, gridding=gridding) self.r_minimum = r_minimum def fit(self, X, y): """ Use X and y to train a Gaussian process. """ super(GP, self).fit(X, y) # skip training the process if there aren't enough samples if X.shape[0] < self.r_minimum: return self.gp = GaussianProcessRegressor(normalize_y=True) self.gp.fit(X, y) def predict(self, X): if self.X.shape[0] < self.r_minimum: # we probably don't have enough logger.warn('GP: not enough data, falling back to uniform sampler') return Uniform(self.tunables).predict(X) y, stdev = self.gp.predict(X, return_std=True) return np.array(list(zip(y, stdev))) def _acquire(self, predictions): """ Predictions from the GP will be in the form (prediction, error). The default acquisition function returns the index with the highest predicted value, not factoring in error. """ return np.argmax(predictions[:, 0])
def setTrainMC(self, MCHisto): """Train a GP on a histogram to get hyperparamters. Use a high stats MC sample to optimize the hyperparamters then return a kernel object to be used in the data fit. """ print "===== Optimizing hyperparamters on the training sample." GPh = GPHisto(MCHisto) X_t = GPh.getXArr() Y_t = GPh.getYArr() dy_t = GPh.getErrArr() gp = GaussianProcessRegressor(kernel=self.kernel ,alpha=dy_t**2 ,n_restarts_optimizer=10) # Fit for the hyperparameters. gp.fit(X_t, Y_t) print "Optimized hyperparameters:" print gp.kernel_ # return a kernel object with hyperparameters optimized return gp.kernel_
def bo_(x_obs, y_obs, n_iter): if n_iter > 0: kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, 1-y_obs) a = a_EI(gp, x_obs=x_obs, y_obs=1-y_obs) argmax_f_x_ = x[np.argmax(a(x))] # heavy evaluation f_argmax_f_x_ = cross_validation(argmax_f_x_) y_ob = np.atleast_2d(mean_mean_validation_scores(f_argmax_f_x_)).T return f_argmax_f_x_ + bo_( x_obs=np.vstack((x_obs, argmax_f_x_)), y_obs=np.vstack((y_obs, y_ob)), n_iter=n_iter-1, ) else: return []
def integrated_sigma(alpha, n_samples, n_restarts_optimizer=16, f=f): print("integrated_sigma(n_samples={n_samples}, alpha={alpha})".format( n_samples=n_samples, alpha=alpha, )) X = np.atleast_2d( np.linspace(1, 9, n_samples) ).T y = f(X).ravel() x = np.atleast_2d(np.linspace(0, 10, 16 * 1024)).T kernel = kernels.Matern() + (kernels.WhiteKernel(noise_level=alpha) if alpha is not None else 0.0) gp = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, ) gp.fit(X, y) y_pred, sigma = gp.predict(x, return_std=True) return simps( x=x.ravel(), y=sigma, )
def test_y_multioutput(): """ Test that GPR can deal with multi-dimensional target values""" y_2d = np.vstack((y, y*2)).T # Test for fixed kernel that first dimension of 2d GP equals the output # of 1d GP and that second dimension is twice as large kernel = RBF(length_scale=1.0) gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None, normalize_y=False) gpr.fit(X, y) gpr_2d = GaussianProcessRegressor(kernel=kernel, optimizer=None, normalize_y=False) gpr_2d.fit(X, y_2d) y_pred_1d, y_std_1d = gpr.predict(X2, return_std=True) y_pred_2d, y_std_2d = gpr_2d.predict(X2, return_std=True) _, y_cov_1d = gpr.predict(X2, return_cov=True) _, y_cov_2d = gpr_2d.predict(X2, return_cov=True) assert_almost_equal(y_pred_1d, y_pred_2d[:, 0]) assert_almost_equal(y_pred_1d, y_pred_2d[:, 1] / 2) # Standard deviation and covariance do not depend on output assert_almost_equal(y_std_1d, y_std_2d) assert_almost_equal(y_cov_1d, y_cov_2d) y_sample_1d = gpr.sample_y(X2, n_samples=10) y_sample_2d = gpr_2d.sample_y(X2, n_samples=10) assert_almost_equal(y_sample_1d, y_sample_2d[:, 0]) # Test hyperparameter optimization for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr.fit(X, y) gpr_2d = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr_2d.fit(X, np.vstack((y, y)).T) assert_almost_equal(gpr.kernel_.theta, gpr_2d.kernel_.theta, 4)
period = row['pf'] source_id = row['source_id'] lc_time = np.array(lc4id[source_id]['time']) lc_mag = np.array(lc4id[source_id]['mag']) x = (lc_time % period) / period y = lc_mag sorted_indexes = np.argsort(x) x = x[sorted_indexes] y = y[sorted_indexes] try: gaussian_process.fit(np.atleast_2d(x).T, y) predictedY, sigma = gaussian_process.predict(np.atleast_2d(np.linspace(0, 1)).T, return_std=True) except: print(source_id) continue if nb_processed<5: plt.errorbar(np.linspace(0, 1), predictedY, yerr=sigma*3) plt.scatter(x, y, color='r') plt.show() with open('results/%d.json' % (source_id), 'w') as h: h.write(json.dumps({'phase':x.tolist(), 'mag': y.tolist(), 'phase_estimate': np.linspace(0, 1).tolist(), 'mag_estimate': predictedY.tolist()})) nb_processed += 1 print(nb_processed)
# Fit KernelRidge with parameter selection based on 5-fold cross validation param_grid = {"alpha": [1e0, 1e-1, 1e-2, 1e-3], "kernel": [ExpSineSquared(l, p) for l in np.logspace(-2, 2, 10) for p in np.logspace(0, 2, 10)]} kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid) stime = time.time() kr.fit(X, y) print("Time for KRR fitting: %.3f" % (time.time() - stime)) gp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \ + WhiteKernel(1e-1) gpr = GaussianProcessRegressor(kernel=gp_kernel) stime = time.time() gpr.fit(X, y) print("Time for GPR fitting: %.3f" % (time.time() - stime)) # Predict using kernel ridge X_plot = np.linspace(0, 20, 10000)[:, None] stime = time.time() y_kr = kr.predict(X_plot) print("Time for KRR prediction: %.3f" % (time.time() - stime)) # Predict using kernel ridge stime = time.time() y_gpr = gpr.predict(X_plot, return_std=False) print("Time for GPR prediction: %.3f" % (time.time() - stime)) stime = time.time() y_gpr, y_std = gpr.predict(X_plot, return_std=True)
def test_scikit_GP_SNRefsdal(self): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C # add data dm = -29.38 # D = 7.5e6 pc # dm = -30.4 # D = 12.e6 pc image = "S1" bname = 'F160W' curves = snrefsdal.read_curves(snrefsdal.path_data, image) lc = curves.get(bname) # lc.mshift = dm t = lc.Time y = lc.Mag yerr = lc.MagErr # # Instanciate a Gaussian Process model # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) # Instanciate a Gaussian Process model # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) # gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters X = np.atleast_2d(t).T gp.fit(X, y) # gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters # gp.fit(t, y) # Make the prediction on the meshed x-axis (ask for MSE as well) # y_star, err_y_star = gp.predict(t, return_std=True) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(t, return_std=True) # k = gptools.SquaredExponentialKernel(param_bounds=[(min(np.abs(y)), max(np.abs(y))), # (0, np.std(t))]) # # k = gptools.SquaredExponentialKernel(param_bounds=[(min(np.abs(y)), max(np.abs(y))), # # (0, np.std(t))]) # gp = gptools.GaussianProcess(k) # # gp = gptools.GaussianProcess(k, mu=gptools.LinearMeanFunction()) # gp.add_data(t, y, err_y=yerr) # # gp.optimize_hyperparameters() # y_star, err_y_star = gp.predict(t) fig = plt.figure() ax = fig.add_axes((0.1, 0.3, 0.8, 0.65)) ax.invert_yaxis() ax.plot(t, y, color='blue', label='L bol', lw=2.5) ax.errorbar(t, y, yerr=yerr, fmt='o', color='blue', label='%s obs.') # # ax.plot(t, y_star, color='red', ls='--', lw=1.5, label='GP') ax.plot(t, y_pred, '-', color='gray') # ax.fill_between(t, y_star - 2 * err_y_star, y_star + 2 * err_y_star, color='gray', alpha=0.3) ax.fill(np.concatenate([t, t[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.show()
#plt.ylabel(r"$T_e \ $"+"(eV)") #plt.xlim(-0.04, 0.01) #plt.ylim(0, 2000) #plt.legend(loc='upper right') #plt.show() x_T = np.atleast_2d(np.linspace(-0.07, 0.01, 1000)).T x_n = np.atleast_2d(np.linspace(-0.075, 0.01, 1000)).T kernel_T = C(10.0, (1e-5, 1e5)) * RBF(10.0, (1e-5, 1e5)) kernel_n = C(0.01, (1e-5, 1e-1)) * RBF(0.01, (1e-5, 1e5)) gp_L_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(L_mode_T_x))**2., n_restarts_optimizer=10) L_mode_T_X = np.atleast_2d(L_mode_T_x).T # Fit to data using Maximum Likelihood Estimation of the parameters gp_L_mode_T.fit(L_mode_T_X, L_mode_T_y) # Make the prediction on the meshed x-axis (ask for MSE as well) L_mode_T_yp, L_mode_T_sigma = gp_L_mode_T.predict(x_T, return_std=True) gp_H_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(H_mode_T_x))**2., n_restarts_optimizer=10) H_mode_T_X = np.atleast_2d(H_mode_T_x).T # Fit to data using Maximum Likelihood Estimation of the parameters gp_H_mode_T.fit(H_mode_T_X, H_mode_T_y) # Make the prediction on the meshed x-axis (ask for MSE as well) H_mode_T_yp, H_mode_T_sigma = gp_H_mode_T.predict(x_T, return_std=True) gp_I_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(I_mode_T_x))**2., n_restarts_optimizer=10) I_mode_T_X = np.atleast_2d(I_mode_T_x).T # Fit to data using Maximum Likelihood Estimation of the parameters
#dy1 = np.random.random(y.shape) #noise = np.random.normal(0, dy) #dy1 += noise #print 'test y', y #print 'test dy', dyf #print 'test x', X # Instanciate a Gaussian Process model gp = GaussianProcessRegressor( kernel=kernel, alpha=(dyf / y)**2, #(dyf / y) ** 2, n_restarts_optimizer=500) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) print 'Parameters:', gp.get_params(deep=True) print 'Score:', gp.score(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) #print 'Sigma:', sigma likel = y_pred - sigma m = max(y_pred) mp = [i for i, j in enumerate(y_pred) if j == m] Mag400p = -((np.log10(y_pred[mp] * fm) - 31.4) / 0.4) Mag400p_err = (-(np.log10(y_pred[mp] * fm) - 31.4) / 0.4) - (-(np.log10(likel[mp] * fm) - 31.4) / 0.4)
X = GPh.getXArr() y = GPh.getYArr() dy = GPh.getErrArr() x = np.atleast_2d(np.linspace(start=0., stop=10, num=1000)).T # Predict at each data point #kernel = C(1.0, (sigmaMin, 1e5)) * RBF(2.0, (lMin, 1e3)) #squared exponential kernel kernel = C(1.0, 1e-3,1e5) * FallExp() * Gibbs() gp = GaussianProcessRegressor(kernel=kernel #,optimizer=None ,alpha=(dy**2) ,n_restarts_optimizer=15 ) gp.fit(X,y) print gp.kernel_ y_pred, sigma = gp.predict(x, return_std=True) outhist = TH1F('GP','GP', 1000,0,10) for index,cont in enumerate(y_pred): outhist.SetBinContent(index+1, cont) outhist.SetBinError(index+1, 1.96*sigma[index]) canv = TCanvas('plot') canv.cd() funcHist = func.GetHistogram() funcHist.Scale(hist.Integral()*hist.GetBinWidth(2)/(funcHist.Integral()*funcHist.GetBinWidth(2)))
from sklearn.gaussian_process.kernels import RBF if __name__ == "__main__": x, y = sample_generators.generate_osband_sin_samples() additional_range = 0.1 * np.max(x) x_eval = np.linspace(np.min(x) - additional_range, np.max(x) + additional_range, 100).reshape([-1, 1]) x = x.reshape((-1, 1)) y = y.reshape((-1, 1)) fig, axs = plt.subplots(2, 1, figsize=(30, 10)) kernel = 0.5 * RBF(length_scale=0.01) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.03, n_restarts_optimizer=10) y_prior = gp.sample_y(x_eval, 5) axs[0].plot(x_eval, y_prior) gp.fit(x, y) y_eval, sigma = gp.predict(x_eval, return_std=True) y_eval = y_eval.flatten() plotting.plot_mean_vs_truth(x, y, x_eval, y_eval, sigma, axs[1]) axs[1].set_title("Posterior (kernel: %s)\n Log-Likelihood: %.3f" % (gp.kernel_, gp.log_marginal_likelihood(gp.kernel_.theta))) plt.show() fig.savefig("GP_Sinus.pdf") plt.close() x, y = sample_generators.generate_osband_nonlinear_samples() additional_range = 0.2 * np.max(x) x_eval = np.linspace(np.min(x) - additional_range, np.max(x) + additional_range, 100).reshape([-1, 1]) x = x.reshape((-1, 1))
from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import ConstantKernel, RBF import pandas as pd import numpy as np import os os.chdir('Desktop/PythonClassNotes/KocPython2020/in-classMaterial/day17') tt = pd.read_csv('immSurvey.csv') alphas = tt.stanMeansNewSysPooled sample = tt.textToSend from sklearn.feature_extraction.text import TfidfVectorizer vec = TfidfVectorizer(ngram_range=(1, 2),token_pattern=r'\b\w+\b', min_df=1) X = vec.fit_transform(sample) pd.DataFrame(X.toarray(), columns=vec.get_feature_names()) from sklearn.model_selection import train_test_split Xtrain, Xtest, ytrain, ytest = train_test_split(X, alphas, random_state=1) rbf = ConstantKernel(1.0) * RBF(length_scale=1.0) gpr = GaussianProcessRegressor(kernel=rbf, alpha=1e-8) gpr.fit(Xtrain.toarray(), ytrain) mu_s, cov_s = gpr.predict(Xtest.toarray(), return_cov=True) np.corrcoef(ytest, mu_s) #with bigrams, my accuracy dropped from 0.683 to 0.646
class GPUCB(UCB): """Perform GPUCB algorithm on environment Attributes -------------------------------- environment: instance of DummyEnvironment, generate samples using x x, t: list of all index and values of samples maxlabel: max label of t beta: parameter for gaussian process beta can be fixed as specified by arguments beta also can be changed along with epoches alpha, mu, sigma: parameter for guassian process X, T: list of observed samples cumu_regret: cumulative regret regret_list: list for regret for each epoch """ def __init__(self, env, x, alpha=1., beta=1.): """ Arguments --------------------------------- x list of all index of samples t list of all values of samples alpha parameter for gaussian process beta parameter for upper bound """ super().__init__(env, x) self.alpha = alpha self.beta = beta self.mu = np.zeros_like(x) self.sigma = 0.5 * np.ones_like(x) # self.gp = GaussianProcessRegressor(alpha = self.alpha) self.gp = GaussianProcessRegressor(kernel=DotProduct()) def argmax_ucb(self): """compute upper bound. """ return self.X[np.argmax(self.mu + self.sigma * self.beta)][0] def learn(self, epoch): """sample index with maximum upper bound and update gaussian process parameters """ if len(self.X) > 0: self.beta = 2.0 * np.log(len(self.X) * (epoch + 1.0)) / 20 self.gp.fit(self.X, self.T) self.mu, self.sigma = self.gp.predict(self.x.reshape( (self.x.shape[0], 1)), return_std=True) idx = self.argmax_ucb() self.sample(idx) self.regret(epoch) def init_reward(self): for i in self.x: self.X.append([i]) self.T.append(self.env.sample(i)) def sample(self, idx): """sample idx according to the gound truth """ self.X.append([idx]) self.T.append(self.env.sample(idx)) def plot(self): fig = plt.figure() ax = plt.axes() min_val = min(self.x) max_val = max(self.x) test_range = np.arange(min_val - 1, max_val + 1, 0.1) num_test = len(test_range) #test_range.shape = (num_test, 1) (preds, pred_var) = self.gp.predict(test_range.reshape(num_test, 1), return_std=True) ax.plot(test_range, preds, alpha=0.5, color='g', label='predict') ax.fill_between(test_range, preds - pred_var, preds + pred_var, facecolor='k', alpha=0.2) #ax.plot(self.x, self.mu, alpha=0.5, color='g', label = 'predict') #ax.fill_between(self.x, self.mu + self.sigma, self.mu - self.sigma, facecolor='k', alpha=0.2) init_len = len(self.x) ax.scatter(self.X[:init_len], self.T[:init_len], c='b', marker='o', alpha=1.0, label='init sample') ax.scatter(self.X[init_len:], self.T[init_len:], c='r', marker='o', alpha=1.0, label='selected sample') plt.legend() plt.xlabel('X') plt.ylabel('Y') plt.title('GPUCB') plt.show()
mu_bin += signal(m) toy[i_bin] = R.Poisson(l*mu_bin) h_1.SetBinContent(i_bin,toy[i_bin]) #print(toy) background_fit.SetParameter(1,2) background_fit.SetParameter(2,-0.1) background_fit.SetParameter(3,1e-9) fitresults = h_1.Fit(background_fit,"SNQ") if fitresults.Status() != 0: print("Error in fit") Error += 1 print(t+1) h_chi2_param.Fill(fitresults.Chi2()/fitresults.Ndf()) #print(fitresults.Chi2()/fitresults.Ndf()) gp = GaussianProcessRegressor(kernel=kernel, alpha=np.sqrt(toy)) gp.fit(mass.reshape(-1,1),toy) y_mean = gp.predict(mass.reshape(-1,1)) chi2_gp = np.sum((toy-y_mean)**2/toy) h_chi2_gp.Fill(chi2_gp/(len(toy) - 1 - gp.kernel_.n_dims)) #print(chi2_gp/(len(toy) - 1 - len(gp.kernel_.theta))) #h_loglik.Fill(gp.log_marginal_likelihood()) #h_1.Draw("PE") #canvas1.Update() #input("Press enter to exit!") foobar
def test_bayes_optimizaion(self): print("Start bayesian optimization") # 1. Initialize parameters acquisition_function_kappa = 5 init_point_number = 3 iteration_number = 3 iteration_index = 0 train_features = [] train_labels = [] gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), n_restarts_optimizer=25, ) bound_dict = {'x': (-4, 4), 'y': (-3, 3)} # Example: [[-4, 4], [-3, 3]] bounds = [] for key in bound_dict.keys(): bounds.append(bound_dict[key]) # Example: ndarray([[-4, 4], [-3, 3]]) bounds = np.asarray(bounds) # 2. Get init random samples # Example: array([-3.66909025, -1.93270006, 1.36095631]) init_xs = np.random.uniform(-4, 4, size=init_point_number) # Example: array([-0.84486644, -0.95367483, 0.61358525]) init_ys = np.random.uniform(-3, 3, size=init_point_number) # Example: [[-3.66909025, -0.84486644], [-1.93270006, -0.95367483], [1.36095631, 0.61358525]] init_points = [] for i in range(init_point_number): init_points.append([init_xs[i], init_ys[i]]) # Example: [-4.4555402320291684, -7.9016857176523114] init_labels = [] for point in init_points: init_labels.append(self.test_function(point[0], point[1])) # 3. GP compute the prior train_features = np.asarray(init_points) train_labels = np.asarray(init_labels) current_max_label = train_labels.max() gp.fit(train_features, train_labels) # 4. Acquision function computes the max value # Example: [[-3.66909025, -0.84486644], [-1.93270006, -0.95367483], [1.36095631, 0.61358525], ...], shape is [100000, 2] x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(100000, bounds.shape[0])) mean, std = gp.predict(x_tries, return_std=True) # Confidence bound criteria acquisition_fucntion_values = mean + acquisition_function_kappa * std x_max = x_tries[acquisition_fucntion_values.argmax()] max_acquision_fucntion_value = acquisition_fucntion_values.max() x_max = np.clip(x_max, bounds[:, 0], bounds[:, 1]) print("Current max acquision function choose: {}".format(x_max)) for i in range(iteration_number): iteration_index += 1 # 5. Choose the best and compute to add in train dataset train_features = np.vstack((train_features, x_max.reshape( (1, -1)))) train_labels = np.append(train_labels, self.test_function(x_max[0], x_max[1])) # 6. Re-compute gaussian process and acquistion function gp.fit(train_features, train_labels) # Update maximum value if train_labels[-1] > current_max_label: current_max_label = train_labels[-1] print("Get the better parameters!") x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(100000, bounds.shape[0])) mean, std = gp.predict(x_tries, return_std=True) acquisition_fucntion_values = mean + acquisition_function_kappa * std x_max = x_tries[acquisition_fucntion_values.argmax()] max_acquision_fucntion_value = acquisition_fucntion_values.max() x_max = np.clip(x_max, bounds[:, 0], bounds[:, 1]) print( "Max label: {}, current label: {}, acquision function choose: {}" .format(current_max_label, train_labels[-1], x_max))
def get_new_suggestions(self, study_id, trials=[], number=1): # TODO: Only support returning one trial number = 1 # Get study and completed data study = Study.objects.get(id=study_id) completed_trials = Trial.objects.filter(study_name=study_id, status="Completed") study_configuration_json = json.loads(study.study_configuration) random_init_trial_number = study_configuration_json.get( "randomInitTrials", 3) params = study_configuration_json["params"] study_goal = study_configuration_json["goal"] # Use random search if it has less dataset if len(completed_trials) < random_init_trial_number: randomSearchAlgorithm = RandomSearchAlgorithm() return_trials = randomSearchAlgorithm.get_new_suggestions( study_id, trials, number) return return_trials # Construct the map of name and scope to compute gaussian process acquisition_function_kappa = 5 # Example: {'x': (-4, 4), 'y': (-3, 3)} # name_scope_map = {} # Construct the list with only scope, Example: [(40, 400)] bounds = [] for param in params: if param["type"] == "DOUBLE" or param["type"] == "INTEGER": min_value = param["minValue"] max_value = param["maxValue"] # name_scope_map[param["parameterName"]] = (min_value, max_value) bounds.append((min_value, max_value)) elif param["type"] == "DISCRETE": feasible_points_string = param["feasiblePoints"] feasible_points = [ float(value.strip()) for value in feasible_points_string.split(",") ] for feasible_point in feasible_points: parameter_name = "{}_{}".format(param["parameterName"], feasible_point) # name_scope_map[parameter_name] = (0, 1) bounds.append((0, 1)) elif param["type"] == "CATEGORICAL": feasible_points_string = param["feasiblePoints"] feasible_points = [ value.strip() for value in feasible_points_string.split(",") ] for feasible_point in feasible_points: parameter_name = "{}_{}".format(param["parameterName"], feasible_point) # name_scope_map[parameter_name] = (0, 1) bounds.append((0, 1)) # Make sure it is numpy ndarry bounds = np.asarray(bounds) # Construct data to train gaussian process, Example: [[50], [150], [250]] init_points = [] # Example: [0.6, 0.8, 0.6] init_labels = [] # Construct train data with completed trials for trial in completed_trials: # Example: {"learning_rate": 0.01, "optimizer": "ftrl"} parameter_values_json = json.loads(trial.parameter_values) # Example: [0.01, "ftrl"] instance_features = [] instance_label = trial.objective_value for param in params: if param["type"] == "DOUBLE" or param["type"] == "INTEGER": instance_feature = parameter_values_json[ param["parameterName"]] instance_features.append(instance_feature) elif param["type"] == "DISCRETE": feasible_points_string = param["feasiblePoints"] feasible_points = [ float(value.strip()) for value in feasible_points_string.split(",") ] parameter_value = parameter_values_json[ param["parameterName"]] for feasible_point in feasible_points: if feasible_point == parameter_value: instance_features.append(1) else: instance_features.append(0) elif param["type"] == "CATEGORICAL": feasible_points_string = param["feasiblePoints"] # Example: ["sgd", "adagrad", "adam", "ftrl"] feasible_points = [ value.strip() for value in feasible_points_string.split(",") ] # Example: "ftrl" parameter_value = parameter_values_json[ param["parameterName"]] for feasible_point in feasible_points: if feasible_point == parameter_value: instance_features.append(1) else: instance_features.append(0) init_points.append(instance_features) init_labels.append(instance_label) # Example: ndarray([[ 50], [150], [250]]) train_features = np.asarray(init_points) # Example: ndarray([0.6, 0.8, 0.6]) train_labels = np.asarray(init_labels) # current_max_label = train_labels.max() # Train with gaussian process gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), n_restarts_optimizer=25, ) gp.fit(train_features, train_labels) # Example: [[-3.66909025, -0.84486644], [-1.93270006, -0.95367483], [1.36095631, 0.61358525], ...], shape is [100000, 2] x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(100000, bounds.shape[0])) mean, std = gp.predict(x_tries, return_std=True) # Confidence bound criteria acquisition_fucntion_values = mean + acquisition_function_kappa * std #x_max = x_tries[acquisition_fucntion_values.argmax()] # tobe #x_max = x_tries[acquisition_fucntion_values.argmin()] if study_goal == "MAXIMIZE": x_max = x_tries[acquisition_fucntion_values.argmax()] #max_acquision_fucntion_value = acquisition_fucntion_values.max() elif study_goal == "MINIMIZE": x_max = x_tries[acquisition_fucntion_values.argmin()] #max_acquision_fucntion_value = acquisition_fucntion_values.min() else: # TODO: Throw the error x_max = [] # Example: [3993.864683994805, 44.15441513231316] x_max = np.clip(x_max, bounds[:, 0], bounds[:, 1]) print("Current max acquision function choose: {}".format(x_max)) # Example: {"hidden2": 3993.864683994805, "hidden1": 44.15441513231316} suggested_parameter_values_json = {} index = 0 """ Construct the suggested params according to the result of gaussian process # Example prior result: [0.1, 0.5, 0.3, 0.9] # Example param scope: {"learning_rate": (0.01, 0.5), "hidden1": (40, 400), "optimizer_sgd": (0, 1), "optimizer_ftrl": (0, 1)} for key in bound_dict.keys(): parameter_values_json[key] = x_max[index] index += 1 """ for param in params: if param["type"] == "DOUBLE": suggested_parameter_values_json[ param["parameterName"]] = x_max[index] index += 1 elif param["type"] == "INTEGER": suggested_parameter_values_json[param["parameterName"]] = int( x_max[index]) index += 1 elif param["type"] == "DISCRETE": feasible_points_string = param["feasiblePoints"] feasible_points = [ float(value.strip()) for value in feasible_points_string.split(",") ] # Find the max value of these and get its string current_max = x_max[index] suggested_parameter_value = feasible_points[0] for feasible_point in feasible_points: if x_max[index] > current_max: current_max = x_max[index] suggested_parameter_value = feasible_point index += 1 suggested_parameter_values_json[ param["parameterName"]] = suggested_parameter_value elif param["type"] == "CATEGORICAL": feasible_points_string = param["feasiblePoints"] # Example: ["sgd", "adagrad", "adam", "ftrl"] feasible_points = [ value.strip() for value in feasible_points_string.split(",") ] # Find the max value of these and get its string current_max = x_max[index] suggested_parameter_value = feasible_points[0] for feasible_point in feasible_points: if x_max[index] > current_max: current_max = x_max[index] suggested_parameter_value = feasible_point index += 1 suggested_parameter_values_json[ param["parameterName"]] = suggested_parameter_value return_trial = Trial.create(study.name, "BayesianOptimizationTrial") return_trial.parameter_values = json.dumps( suggested_parameter_values_json) return_trial.save() return [return_trial]
Y = readfile_oneshop_Y(fr2)[-350:] x_train = X[:-14] y_train = Y[:-14] x_test = X[-14:] y_test = Y[-14:] ### params_rf = { 'n_estimators': 1000, 'max_depth': 10, 'min_samples_split': 2, 'warm_start': True, 'n_jobs': 4 } gpr = GaussianProcessRegressor(kernel=2.0 * RBF(1.0)) gpr.fit(x_train, y_train) y_pre_gpr = gpr.predict(x_test) ### ''' params_gbrt = {'loss':'huber','n_estimators': 800,'max_depth':12,'learning_rate': 0.01, 'random_state': 3} gbrt = GradientBoostingRegressor(**params_gbrt) gbrt.fit(x_train,y_train) y_pre_gbrt = gbrt.predict(x_test) gbrt_pre.append(y_pre_gbrt)''' ### gpr_pre.append(y_pre_gpr) Y_test.append(y_test) #y_pre_diff = mean_normal_weekend_diff(Y,xday,xweekend,-21,-14) ### loss_gpr = Evaluation([y_pre_gpr], [y_test])
class System_Model(object): def __init__(self): np.random.seed(31) #31 plt.close('all') kernel1 = 1.0 * RBF(length_scale=1000, length_scale_bounds=(10000, 10000)) kernel2 = 1.0 * RBF(length_scale=1000, length_scale_bounds=(10000, 10000)) kernel3 = 1.0 * RBF(length_scale=4000, length_scale_bounds=(4000, 4000)) kernel4 = 1.0 * RBF(length_scale=1000, length_scale_bounds=(10000, 10000)) self.gp_system1 = GaussianProcessRegressor(kernel1) self.gp_system2 = GaussianProcessRegressor(kernel2) self.gp_system3 = GaussianProcessRegressor(kernel3) self.gp_system4 = GaussianProcessRegressor(kernel4) self.reward = 0 self.sl = 0 def create_model_sl1(self): t, workload, tr = self.__load_data('sys-data-1sl.txt') self.__plot_data(t, workload, tr) t_training, w_training, tr_training = self.__get_data_training( t, workload, tr, 1) self.__plot_data_training(t_training, w_training, tr_training) t_validation, w_validation, tr_validation = self.__get_data_validation( t, workload, tr, 2) self.__train_sl1(w_training, tr_training) self.__validate_gp_sl1(t_validation, w_validation, tr_validation) def create_model_sl2(self): t, workload, tr = self.__load_data('sys-data-2sl.txt') self.__plot_data(t, workload, tr) t_training, w_training, tr_training = self.__get_data_training( t, workload, tr, 2) self.__plot_data_training(t_training, w_training, tr_training) t_validation, w_validation, tr_validation = self.__get_data_validation( t, workload, tr, 2) self.__train_sl2(w_training, tr_training) self.__validate_gp_sl2(t_validation, w_validation, tr_validation) def create_model_sl3(self): t, workload, tr = self.__load_data('sys-data-3sl.txt') self.__plot_data(t, workload, tr) t_training, w_training, tr_training = self.__get_data_training( t, workload, tr, 3) self.__plot_data_training(t_training, w_training, tr_training) t_validation, w_validation, tr_validation = self.__get_data_validation( t, workload, tr, 2) self.__train_sl3(w_training, tr_training) self.__validate_gp_sl3(t_validation, w_validation, tr_validation) def create_model_sl4(self): t, workload, tr = self.__load_data('sys-data-4sl.txt') self.__plot_data(t, workload, tr) t_training, w_training, tr_training = self.__get_data_training( t, workload, tr, 4) self.__plot_data_training(t_training, w_training, tr_training) t_validation, w_validation, tr_validation = self.__get_data_validation( t, workload, tr, 2) self.__train_sl4(w_training, tr_training) self.__validate_gp_sl4(t_validation, w_validation, tr_validation) # Leer el valor del archivo def __find_valor(self, linea): indice = string.find(linea, "\t") valor = linea[0:indice] linea = linea[indice + 1:len(linea)] return float(valor), linea def __load_data(self, nombre_file): arch = open(nombre_file, 'r') t = [] workload = [] tr1 = [] linea = arch.readline() while linea != "": tiempo, linea = self.__find_valor(linea) t.append(tiempo) carga, linea = self.__find_valor(linea) workload.append(carga) tr1.append(float(linea)) linea = arch.readline() arch.close() return np.array(t), np.array(workload), np.array(tr1) def __plot_data(self, t, workload, tr): plt.figure() plt.plot(t, workload, 'g.-', linewidth=0.9) plt.xlabel('Time of the day (hours)') plt.ylabel('Total arriving rate (control messages per second)') plt.grid() #plt.ylim(0.0, 0.0006) plt.twinx() plt.semilogy(t, tr, 'b+-', linewidth=0.9) plt.ylabel('Mean response time (seconds)') plt.grid() #plt.xlim(1.9, 13.1) def __get_data_training(self, t, workload, tr, model): # Data for training K = 1 if model == 1 or model == 2: points_training = np.concatenate( (np.arange(0, len(t) / 3, 1), np.arange(len(t) / 3, len(t), 30)), axis=0) elif model == 3: #points_training = np.concatenate((np.arange(0,len(t)/3,2), np.arange(len(t)/3,len(t)*2/3,100), np.arange(len(t)*2/3,len(t),10)), axis=0) points_training = np.concatenate( (np.arange(0, len(t) / 3, 1), np.arange(len(t) / 3, len(t), 20)), axis=0) elif model == 4: points_training = np.concatenate( (np.arange(0, len(t) / 3, 2), np.arange(len(t) / 3, len(t), 40)), axis=0) t_training = t[points_training] w_training = workload[points_training] #w_training = w_training/max(w_training) tr_training = tr[points_training] return t_training, w_training, tr_training def __plot_data_training(self, t, workload, tr): # Plot plt.figure() f1, = plt.semilogy(t, workload, '.-', label='W training') f2, = plt.semilogy(t, tr, '+--', label='Tr training') plt.title('Training data') plt.xlabel('t (min)') plt.ylabel('Training data') plt.grid() plt.legend(handles=[f1, f2]) plt.show() def __get_data_validation(self, t, workload, tr, step_length): # Data for validation points_validation = np.arange(0, len(t), 1) t_validation = t[points_validation] w_validation = workload[points_validation] #w_validation = w_validation/max(w_validation) tr_validation = tr[points_validation] return t_validation, w_validation, tr_validation def __train_sl1(self, w_training, tr_training): self.gp_system1.fit(w_training[:, np.newaxis], tr_training) print 'model 1 construido' def __train_sl2(self, w_training, tr_training): self.gp_system2.fit(w_training[:, np.newaxis], tr_training) print 'model 2 construido' def __train_sl3(self, w_training, tr_training): self.gp_system3.fit(w_training[:, np.newaxis], tr_training) print 'model 3 construido' def __train_sl4(self, w_training, tr_training): self.gp_system4.fit(w_training[:, np.newaxis], tr_training) print 'model 4 construido' def __validate_gp_sl1(self, t_validation, w_validation, tr_validation): tr_predicted, y_std = self.gp_system1.predict(w_validation[:, np.newaxis], return_std=True) # Plot plt.figure() f1, = plt.semilogy(t_validation, tr_validation, 'b.-', label='Actual MRT') f2, = plt.semilogy(t_validation, tr_predicted, 'r+--', label='Predicted MRT') #plt.title('Mean Response Time Variarion in 24 hours') plt.xlabel('Time of the day (hours)') plt.ylabel('Mean response time (seconds)') #plt.xlim(1.9, 13.1) plt.grid() plt.legend(handles=[f1, f2]) plt.show() mse = (1.0 / len(tr_validation)) * sum( np.power((tr_validation - tr_predicted), 2)) print 'mse', mse def __validate_gp_sl2(self, t_validation, w_validation, tr_validation): # Validation tr_predicted, y_std = self.gp_system2.predict(w_validation[:, np.newaxis], return_std=True) # Plot plt.figure() f1, = plt.semilogy(t_validation, tr_validation, 'b.-', label='Actual MRT') f2, = plt.semilogy(t_validation, tr_predicted, 'r+--', label='Predicted MRT') #plt.title('Mean Response Time Variarion in 24 hours') plt.xlabel('Time of the day (hours)') plt.ylabel('Mean response time (seconds)') #plt.xlim(1.9, 13.1) plt.grid() plt.legend(handles=[f1, f2]) plt.show() mse = (1.0 / len(tr_validation)) * sum( np.power((tr_validation - tr_predicted), 2)) print 'mse', mse def __validate_gp_sl3(self, t_validation, w_validation, tr_validation): # Validation tr_predicted, y_std = self.gp_system3.predict(w_validation[:, np.newaxis], return_std=True) # Plot plt.figure() f1, = plt.semilogy(t_validation, tr_validation, 'b.-', label='Actual MRT') f2, = plt.semilogy(t_validation, tr_predicted, 'r+--', label='Predicted MRT') #plt.title('Mean Response Time Variarion in 24 hours') plt.xlabel('Time of the day (hours)') plt.ylabel('Mean response time (seconds)') #plt.xlim(1.9, 13.1) plt.grid() plt.legend(handles=[f1, f2]) plt.show() mse = (1.0 / len(tr_validation)) * sum( np.power((tr_validation - tr_predicted), 2)) print 'mse', mse def __validate_gp_sl4(self, t_validation, w_validation, tr_validation): # Validation tr_predicted, y_std = self.gp_system4.predict(w_validation[:, np.newaxis], return_std=True) # Plot plt.figure() f1, = plt.semilogy(t_validation, tr_validation, 'b.-', label='Actual MRT') f2, = plt.semilogy(t_validation, tr_predicted, 'r+--', label='Predicted MRT') #plt.title('Mean Response Time Variarion in 24 hours') plt.xlabel('Time of the day (hours)') plt.ylabel('Mean response time (seconds)') #plt.xlim(1.9, 13.1) plt.grid() plt.legend(handles=[f1, f2]) plt.show() mse = (1.0 / len(tr_validation)) * sum( np.power((tr_validation - tr_predicted), 2)) print 'mse', mse def predict_tr(self, sl, lmbda): if sl == 1: # if lmbda < 100: # lmbda = 100 if lmbda > 9865: lmbda = 9865 #self.gp_system1.predict(np.array([lmbda])) #tr_predicted, y_std = self.gp_system1.predict(lmbda[:, np.newaxis], return_std=True) tr_predicted, y_std = self.gp_system1.predict(lmbda, return_std=True) return np.asscalar(tr_predicted) elif sl == 2: # if lmbda < 100: # lmbda = 100 if lmbda > 20845: lmbda = 20845 tr_predicted, y_std = self.gp_system2.predict(lmbda, return_std=True) return np.asscalar(tr_predicted) elif sl == 3: # if lmbda < 100: # lmbda = 100 if lmbda > 29878: lmbda = 29878 tr_predicted, y_std = self.gp_system3.predict(lmbda, return_std=True) return np.asscalar(tr_predicted) elif sl == 4: # if lmbda < 100: # lmbda = 100 if lmbda > 41643: lmbda = 41643 tr_predicted, y_std = self.gp_system4.predict(lmbda, return_std=True) return np.asscalar(tr_predicted) def get_next_states(self, action, lmbda): sl_prima = action tr_prima = self.predict_tr(action, lmbda) self.__calculate_reward(sl_prima, tr_prima, lmbda) return action, tr_prima def __calculate_reward2(self, sl_prima, tr_prima): if sl_prima == 1: if tr_prima < 0.001: self.reward = 1 else: self.reward = -1 elif sl_prima == 2: if tr_prima >= 0.00014 and tr_prima < 0.001: #0.14 self.reward = 1 else: self.reward = -1 elif sl_prima == 3: if tr_prima >= 0.00014 and tr_prima < 0.001: #0.15 self.reward = 1 else: self.reward = -1 elif sl_prima == 4: if tr_prima >= 0.00014: self.reward = 1 else: self.reward = -1 def __calculate_reward3(self, sl_prima, tr_prima): reward = 0 if sl_prima == 1: if tr_prima < 0.001: reward = 1 else: reward = -1 elif sl_prima == 2: if tr_prima >= 0.00016 and tr_prima < 0.001: reward = 1 elif tr_prima < 0.00014 or tr_prima >= 0.001: reward = -1 elif sl_prima == 3: if tr_prima >= 0.00016 and tr_prima < 0.001: reward = 1 elif tr_prima < 0.00014 or tr_prima >= 0.001: reward = -1 elif sl_prima == 4: if tr_prima >= 0.00016: reward = 1 elif tr_prima < 0.00014: reward = -1 self.reward = reward def __calculate_reward3(self, sl_prima, tr_prima): reward = 0 tr_prima = 1000 * tr_prima if sl_prima == 1: reward = -2 / (1 + mt.exp(-20 * (tr_prima - 0.9))) + 1 elif sl_prima == 4: reward = 2 / (1 + mt.exp(-20 * (tr_prima - 0.3))) - 1 else: if tr_prima < 0.4: reward = 2 / (1 + mt.exp(-20 * (tr_prima - 0.3))) - 1 else: reward = -2 / (1 + mt.exp(-20 * (tr_prima - 0.9))) + 1 print 'reward:', reward self.reward = reward def __calculate_reward7(self, sl_prima, tr_prima, workload): mu = 10167.0 rho = workload / (sl_prima * mu) if sl_prima == 1: if tr_prima < 0.001: self.reward = 1 else: self.reward = -1 elif sl_prima == 2: if tr_prima >= 0.001: self.reward = -1 else: if rho >= 0.45: self.reward = 1 else: self.reward = -1 elif sl_prima == 3: if tr_prima >= 0.001: self.reward = -1 else: if rho >= 0.65: self.reward = 1 else: self.reward = -1 elif sl_prima == 4: if rho >= 0.75: self.reward = 1 else: self.reward = -1 def __calculate_reward(self, sl_prima, tr_prima, workload): mu = 10167.0 rho = workload / (sl_prima * mu) if sl_prima == 1: if tr_prima < 0.001: self.reward = 1 else: self.reward = -1 elif sl_prima == 2: if tr_prima >= 0.001: self.reward = -1 else: if rho >= 0.45 and rho < 1: self.reward = 1 else: self.reward = -1 elif sl_prima == 3: if tr_prima >= 0.001: self.reward = -1 else: if rho >= 0.65 and rho < 1: self.reward = 1 else: self.reward = -1 elif sl_prima == 4: if rho >= 0.7: self.reward = 1 else: self.reward = -1 def get_reward(self): print 'reward: ', self.reward return self.reward
class GaussianProcess: def __init__(self): # np.arrayを0で割ったときのWarningを無視するためのコード np.seterr(divide='ignore', invalid='ignore') def _scale(self, x): x = np.array(x).reshape(-1, 1) scaler = StandardScaler().fit(x) scaled_x = scaler.transform(x) return scaled_x, scaler def _inverse_scale(self, scaler_y, mu, sigma): mu = np.array(mu).reshape(-1, 1) inversed_mu = scaler_y.inverse_transform(mu) inversed_sigma = sigma.reshape(-1, 1) * scaler_y.scale_ return inversed_mu, inversed_sigma def fit(self, X, y, kernel = None, alpha = 1E-10, scale = True): """fit Parameters ---------- X : list or something [description] y : list or something [description] kernel : callable, optional [description], by default None alpha : float, optional [description], by default 1E-10 scale : bool, optional [description], by default True """ # クラス変数化 self.X = np.array(X).reshape(-1, 1) self.y = np.array(y).reshape(-1, 1) self.best_estimator = GaussianProcessRegressor(kernel = kernel, alpha=alpha) # あとでアクセスできるように self.best_kernel_ = kernel # 正規化 (必要な場合) if scale: scaled_y, self.scaler_y = self._scale(self.y) else: scaled_y = self.y self.scaler_y = None self.best_estimator.fit(self.X, scaled_y) def cross_validation(self, X, y, cv = 5, scoring = 'neg_mean_squared_error', scale = True, fit = True): """You can decide kernels by using cross validation. You don't have to do this if you've already decide which kernels do you use. Parameters ---------- X : list or something [description] y : list or something [description] cv : int, optional [description], by default 5 scoring : str, optional [description], by default 'neg_mean_squared_error' scale : bool, optional [description], by default True fit : bool, optional [description], by default True """ # クラス変数化 self.X = np.array(X).reshape(-1, 1) self.y = np.array(y).reshape(-1, 1) cv = min(cv, self.y.shape[0]) # 目的変数を正規化 if scale: scaled_y, self.scaler_y = self._scale(self.y) else: scaled_y = self.y self.scaler_y = None # カーネル kernels = [ConstantKernel() * DotProduct() + WhiteKernel(), ConstantKernel() * RBF() + WhiteKernel(), ConstantKernel() * RBF() + WhiteKernel() + ConstantKernel() * DotProduct(), ConstantKernel() * RBF(np.ones(self.X.shape[1])) + WhiteKernel(), ConstantKernel() * RBF(np.ones(self.X.shape[1])) + WhiteKernel() + ConstantKernel() * DotProduct(), ConstantKernel() * Matern(nu=1.5) + WhiteKernel(), ConstantKernel() * Matern(nu=1.5) + WhiteKernel() + ConstantKernel() * DotProduct(), ConstantKernel() * Matern(nu=0.5) + WhiteKernel(), ConstantKernel() * Matern(nu=0.5) + WhiteKernel() + ConstantKernel() * DotProduct(), ConstantKernel() * Matern(nu=2.5) + WhiteKernel(), ConstantKernel() * Matern(nu=2.5) + WhiteKernel() + ConstantKernel() * DotProduct() ] params = { 'kernel':kernels } # GPRのインスタンス生成 gpr = GaussianProcessRegressor(alpha=0) # kernel決定のためにcv gscv = GridSearchCV(gpr, params, cv = cv, scoring = scoring) gscv.fit(self.X, scaled_y) # 後でアクセスできるように self.best_score_ = gscv.best_score_ self.results = pd.DataFrame.from_dict(gscv.cv_results_) # 最適なカーネルを使用 self.best_kernel_ = gscv.best_params_['kernel'] self.best_estimator = GaussianProcessRegressor(kernel = self.best_kernel_, alpha = 0) if fit: # fitさせる self.best_estimator.fit(self.X, scaled_y) def predict(self, plot_X): # クラス変数化 self.plot_X = plot_X # predict self.mu, self.sigma = self.best_estimator.predict(self.plot_X, return_std = True) # scaleを戻す (必要な場合) if self.scaler_y is None: self.sigma = self.sigma.reshape(-1, 1) else: self.mu, self.sigma = self._inverse_scale(self.scaler_y, self.mu, self.sigma) return self.mu, self.sigma # 図生成関連 def _formatting(self): # 図全体のフォーマット rcParams["font.family"] = "Helvetica" rcParams["font.size"] = 13 def _plot_gp_results(self, offset = 0.05, ylabel = None): # offsetについて Xmin = min(self.plot_X) Xmax = max(self.plot_X) diff = Xmax - Xmin xlim = [Xmin - diff * offset, Xmax + diff * offset] # 範囲 self.ax.set_xlim(xlim) # ylabel self.ax.set_ylabel(ylabel) # plot self.ax.plot(self.plot_X, self.mu, color = '#022C5E', label = 'mean', zorder = 1) self.ax.scatter(self.X, self.y, color = 'black', label = 'sample', zorder = 2) self.ax.fill_between(self.plot_X.squeeze(), (self.mu - 1.9600 * self.sigma).squeeze(), (self.mu + 1.9600 * self.sigma).squeeze(), zorder = 0, color = '#0572F7', label = '95 % confidence interval') return self.fig, self.ax def plot(self, offset = 0.05, xlabel = None, ylabel = None, figsize = (None, None)): """[summary] Parameters ---------- offset : float, optional [description], by default 0.05 xlabel : [type], optional [description], by default None ylabel : [type], optional [description], by default None figsize : tuple, optional [description], by default (None, None) Returns ------- matplot.pyplot.figure, matplot.pyplot.axis """ # フォーマットを綺麗に self._formatting() # figsizeについて if all(None is s for s in figsize) or figsize is None: figsize = rcParams["figure.figsize"] # 図の生成 self.fig = plt.figure(facecolor = 'white', figsize = figsize) self.ax = self.fig.add_subplot(111) self._plot_gp_results(offset, ylabel) # xlabel self.ax.set_xlabel(xlabel) plt.tight_layout() return self.fig, self.ax def plot_with_acq(self, acquisition_function, offset = 0.05, xlabel = None, ylabel = None, figsize = (None, None)): """[summary] Parameters ---------- acquisition_function : callable [description] offset : float, optional [description], by default 0.05 xlabel : [type], optional [description], by default None ylabel : [type], optional [description], by default None figsize : tuple, optional [description], by default (None, None) Returns ------- matplot.pyplot.figure, matplot.pyplot.axis, matplot.pyplot.axis """ # フォーマットを綺麗に self._formatting() # 変数の定義 af = acquisition_function if None in figsize or figsize is None: figsize = rcParams["figure.figsize"] #gridspecで互いにサイズの違うsubplotを作成 gridspec_master = GridSpec(2, 1, height_ratios = [3, 1]) # 図の生成 self.fig = plt.figure(facecolor = 'white', figsize = figsize) self.ax = self.fig.add_subplot(gridspec_master[0]) self._plot_gp_results(offset, ylabel) # 獲得関数の図 self.ax2 = self.fig.add_subplot(gridspec_master[1]) # 獲得関数のy軸方向の値を得る acq, acq_name = af() # 獲得関数 self.ax2.plot(af.plot_X, acq) # xlabel self.ax2.set_xlabel(xlabel) # ylabel self.ax2.set_ylabel(acq_name) plt.tight_layout() return self.fig, self.ax, self.ax2 # 獲得関数関連 class _acquisition_function: def __init__(self): pass def __call__(self): # np.ndarrayと獲得関数の名前をreturnするように書く. return np.ones(1), 'name' def get_optimum(self): index_opt = np.argmax(self.__call__()[0]) X_opt = self.plot_X[index_opt][0] mu_opt = self.mu[index_opt] sigma_opt = self.sigma[index_opt] dict_opt = { 'i':index_opt, 'X':X_opt, 'mu':mu_opt, 'sigma':sigma_opt } return dict_opt class upper_confidence_bound(_acquisition_function): def __init__(self, gpr, plot_X, X, y): # 名前 self.name = 'Upper Confidence Bound' # クラス変数化 self.plot_X = plot_X self.mu, self.sigma = gpr.predict(self.plot_X, return_std=True) self.mu_sample = gpr.predict(X) n_sample = X.shape[0] mu_sample_opt = np.max(self.mu_sample) self.ucb = mu_sample_opt + np.sqrt(np.log(n_sample) / n_sample) * self.sigma def __call__(self): return self.ucb, self.name class expected_improvement(_acquisition_function): def __init__(self, gpr, plot_X, X, y, xi=0.01): # 名前 self.name = 'Expected Improvement' # クラス変数化 self.plot_X = plot_X self.mu, self.sigma = gpr.predict(self.plot_X, return_std=True) self.mu_sample = gpr.predict(X) self.sigma = self.sigma.reshape(-1, X.shape[1]) mu_sample_opt = np.max(self.mu_sample) with np.errstate(divide='warn'): imp = self.mu - mu_sample_opt - xi Z = imp / self.sigma self.ei = imp * norm.cdf(Z) + self.sigma * norm.pdf(Z) self.ei[self.sigma == 0.0] = 0.0 def __call__(self): return self.ei, self.name
"alpha": [1e0, 1e-1, 1e-2, 1e-3], "kernel": [ ExpSineSquared(l, p) for l in np.logspace(-2, 2, 10) for p in np.logspace(0, 2, 10) ] } kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid) stime = time.time() kr.fit(X, y) print("Time for KRR fitting: %.3f" % (time.time() - stime)) gp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \ + WhiteKernel(1e-1) gpr = GaussianProcessRegressor(kernel=gp_kernel) stime = time.time() gpr.fit(X, y) print("Time for GPR fitting: %.3f" % (time.time() - stime)) # Predict using kernel ridge X_plot = np.linspace(0, 20, 10000)[:, None] stime = time.time() y_kr = kr.predict(X_plot) print("Time for KRR prediction: %.3f" % (time.time() - stime)) # Predict using kernel ridge stime = time.time() y_gpr = gpr.predict(X_plot, return_std=False) print("Time for GPR prediction: %.3f" % (time.time() - stime)) stime = time.time() y_gpr, y_std = gpr.predict(X_plot, return_std=True)
f.write('\nMAE for AdaBoost Regressor: ' + str(mean_absolute_error(y_test, y_pred))) best_linear_regressor = LinearRegression(copy_X=True, fit_intercept=True, normalize=False) best_linear_regressor.fit(x_train_scaled, y_train) y_pred = best_linear_regressor.predict(x_test_scaled) f.write('\nMAE for Linear Regressor: ' + str(mean_absolute_error(y_test, y_pred))) best_neural_network_regressor = MLPRegressor(activation='logistic', alpha=0.002, hidden_layer_sizes=10, learning_rate='constant', learning_rate_init=0.01, random_state=0) best_neural_network_regressor.fit(x_train_scaled, y_train) y_pred = best_neural_network_regressor.predict(x_test_scaled) f.write('\nMAE for Neural Network Regressor: ' + str(mean_absolute_error(y_test, y_pred))) best_gaussian_regressor = GaussianProcessRegressor( kernel=1**2 * RationalQuadratic(alpha=0.1, length_scale=1)) best_gaussian_regressor.fit(x_train_scaled, y_train) y_pred = best_gaussian_regressor.predict(x_test_scaled) f.write('\nMAE for Gaussian Regressor: ' + str(mean_absolute_error(y_test, y_pred))) # In[ ]:
Y = data[[0]] locs = data[[26,27]] locs environment = data[[1,2,3,4,5,6,7,26,27]] coocs = data[[8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27]] kernel = C(10.0, (1e-3, 1e3)) * RBF(19, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.01, optimizer=None, normalize_y=True) gp.fit(environment.values,Y) rm.raster_models.pop(0) rdata = map(lambda r : tls.RasterData(r,polygon),rm.raster_models) map(lambda r : r.getRaster(),rdata) cells = reduce(lambda a,b : a+b , map(lambda l : l.getExactCells(),n.neighbours)) polys = reduce(lambda a,b : a + b , map(lambda l : l.polygon,cells)) rdata2 = map(lambda r : tls.RasterData(r,polys),rm.raster_models) map(lambda r : r.getRaster(),rdata2) # REmove null cells: #ocs_free = filter(lambda l : l, ocs) #trees_f = map(lambda l : TreeNeo(l),ocs_free)
1*ExpSineSquared(length_scale=30,periodicity=365, \ length_scale_bounds=(0.1,3650),periodicity_bounds=(30,3650)) \ + WhiteKernel(noise_level=1,noise_level_bounds=(0.1,50)), \ 1*Matern(length_scale=30, length_scale_bounds=(0.1, 3650), \ nu=1.5) \ + WhiteKernel(noise_level=1,noise_level_bounds=(0.1,50))] # Plot data and fitted GPR predictions # Display posterior kernel details and Log Marginal-Likelihood # Display training time of each GPR kernel for i in range(0, 2): for j in range(0, 2): t1 = time() gp = GaussianProcessRegressor(kernel=kern[i + 1 + j * 2 - 1], normalize_y=True) gp.fit(x_train, y_train) y_mean, y_std = gp.predict(X_[:, np.newaxis], return_std=True) t2 = time() print('@@@@@@@@@@@@@@@@@@@@@@@@@') print('Kernel(prior): %s\nFit+Predict runtime: %.2f' % \ (kern[i+1+j*2-1],t2-t1)) print('@@@@@@@@@@@@@@@@@@@@@@@@@') ax[i,j].scatter(x_train[:,0],y_train[:,0],s=5,c='green',marker='o', \ edgecolors='k',alpha=0.6) ax[i, j].plot(X_, y_mean, c='b', lw=1.5) ax[i,j].fill_between(X_,y_mean[:,0]-y_std,y_mean[:,0]+y_std,alpha=0.4, \ color='b') ax[i,j].set_title('%s\nLog-Marginal-Likelihood: %.2f' % (gp.kernel_, \ gp.log_marginal_likelihood(gp.kernel_.theta)),size=8) ax[i, j].xaxis.set_tick_params(labelsize=8) ax[i, j].xaxis.set_label_text("Days from 1/1/1960")
def bo(X, y): data = list(zip(X, y)) x = np.atleast_2d(np.linspace(0, 10, 1024)).T x_= np.atleast_2d(np.linspace(0, 10, 1024)).T kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16, )#normalize_y=True) gp.fit(X, y) # FIXME is it possible for mu(x) < min{x \in observed_x}? # is this due to that GaussainProcess's prior states that mu(x) = 0? # will this effect the performance of GO, since everything not observed will automatically give an additional boost since the prior plays a bigger role (look it up) [we know that the loss we in the end are optimizing is \in [0, 1] y_pred, sigma = gp.predict(x, return_std=True) #http://www.scipy-lectures.org/advanced/mathematical_optimization/ # x_min = fmin(negate(silly_f), 5) # TODO better maximizer # Strong points: it is robust to noise, as it does not rely on computing gradients. Thus it can work on functions that are not locally smooth such as experimental data points, as long as they display a large-scale bell-shape behavior. However it is slower than gradient-based methods on smooth, non-noisy functions. #opt_result = minimize(negate(silly_f), 5, bounds=[(0, 10)]) # TODO better maximizer #print(opt_result) #assert(opt_result.success) #x_min = opt_result.x # x_min = brent(negate(silly_f), brack=(0, 10)) # NOTE 1D only, NOTE not guaranteed to be within range brack=(0, 10) (see documentation) # TODO getting the gradient the gaussian would unlock all gradient based optimization methods!! (including L_BFGS) a = a_EI(gp, x_obs=X, y_obs=y, theta=0.01) a_x = np.apply_along_axis(a, 1, x) (x_min_,) = max(x, key=a) # TODO have a reasonable optimization (this doesn't scale well) #(x_min_,) = brute( # negate(a), # ranges=((0, 10),), # Ns=64, # finish=fmin, #) # FIXME brute can return numbers outside of the range! X = np.linspace(0, 10, 32), Ns=64, ranges=((0, 10) (x_min_ = 10.22...) # I think it occurs when the function is pretty flat (but not constant) # TODO verify that finish function gets the same range as brute and don't wonder off (perhaps this is intended behaviour?) # TODO check https://github.com/scipy/scipy/blob/master/scipy/optimize/optimize.py#L2614 to see if it's possible for x_min to end up outside of the range (and if then when) print(x_min_) #plot_2d(x=x, x_=x_, y_pred=y_pred, sigma = sigma, a_x=a_x) #plot(x=x, y_pred=y_pred, x_obs=X, y_obs=y, x_min_=x_min_, sigma=sigma, a_x=a_x) #plt.show() # evaluate fx_min_ = f(x_min_) bo( X=np.vstack( (X,[x_min_,]) ), y=np.hstack( (y,[fx_min_,]) ), )
class BayesianOptimization(object): def __init__(self, f, pbounds, steps, constraints, constraintParams, extraParam=None, random_state=None, verbose=1): """ :param f: Function to be maximized. :param pbounds: Dictionary with parameters names as keys and a tuple with minimum and maximum values. :param verbose: Whether or not to print progress. """ # Store the original dictionary self.pbounds = pbounds self.steps = steps self.constraints = constraints self.constraintParams = constraintParams self.extraParam = extraParam self.random_state = ensure_rng(random_state) # Data structure containing the function to be optimized, the bounds of # its domain, and a record of the evaluations we have done so far self.space = TargetSpace(f, pbounds,steps, constraints, constraintParams, extraParam,random_state) # Initialization flag self.initialized = False # Initialization lists --- stores starting points before process begins self.init_points = [] self.x_init = [] self.y_init = [] # Counter of iterations self.i = 0 # Internal GP regressor self.gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), n_restarts_optimizer=25, random_state=self.random_state ) # Utility Function placeholder self.util = None # PrintLog object self.plog = PrintLog(self.space.keys) # Output dictionary self.res = {} # Output dictionary self.res['max'] = {'max_val': None, 'max_params': None} self.res['all'] = {'values': [], 'params': []} # non-public config for maximizing the aquisition function # (used to speedup tests, but generally leave these as is) self._acqkw = {'n_warmup': 100000, 'n_iter': 250} # Verbose self.verbose = verbose def init(self, init_points): """ Initialization method to kick start the optimization process. It is a combination of points passed by the user, and randomly sampled ones. :param init_points: Number of random points to probe. """ # Concatenate new random points to possible existing # points from self.explore method. rand_points = self.space.random_points(init_points) self.init_points.extend(rand_points) # Evaluate target function at all initialization points for x in self.init_points: y = self._observe_point(x) # Add the points from `self.initialize` to the observations if self.x_init: x_init = np.vstack(self.x_init) y_init = np.hstack(self.y_init) for x, y in zip(x_init, y_init): self.space.add_observation(x, y) if self.verbose: self.plog.print_step(x, y) # Updates the flag self.initialized = True def _observe_point(self, x): y = self.space.observe_point(x) if self.verbose: self.plog.print_step(x, y) return y def explore(self, points_dict, eager=False): """Method to explore user defined points. :param points_dict: :param eager: if True, these points are evaulated immediately """ if eager: self.plog.reset_timer() if self.verbose: self.plog.print_header(initialization=True) points = self.space._dict_to_points(points_dict) for x in points: self._observe_point(x) else: points = self.space._dict_to_points(points_dict) self.init_points = points def initialize(self, points_dict): """ Method to introduce points for which the target function value is known :param points_dict: dictionary with self.keys and 'target' as keys, and list of corresponding values as values. ex: { 'target': [-1166.19102, -1142.71370, -1138.68293], 'alpha': [7.0034, 6.6186, 6.0798], 'colsample_bytree': [0.6849, 0.7314, 0.9540], 'gamma': [8.3673, 3.5455, 2.3281], } :return: """ self.y_init.extend(points_dict['target']) for i in range(len(points_dict['target'])): all_points = [] for key in self.space.keys: all_points.append(points_dict[key][i]) self.x_init.append(all_points) def initialize_df(self, points_df): """ Method to introduce point for which the target function value is known from pandas dataframe file :param points_df: pandas dataframe with columns (target, {list of columns matching self.keys}) ex: target alpha colsample_bytree gamma -1166.19102 7.0034 0.6849 8.3673 -1142.71370 6.6186 0.7314 3.5455 -1138.68293 6.0798 0.9540 2.3281 -1146.65974 2.4566 0.9290 0.3456 -1160.32854 1.9821 0.5298 8.7863 :return: """ for i in points_df.index: self.y_init.append(points_df.loc[i, 'target']) all_points = [] for key in self.space.keys: all_points.append(points_df.loc[i, key]) self.x_init.append(all_points) def set_bounds(self, new_bounds): """ A method that allows changing the lower and upper searching bounds :param new_bounds: A dictionary with the parameter name and its new bounds """ # Update the internal object stored dict self.pbounds.update(new_bounds) self.space.set_bounds(new_bounds) def maximize(self, init_points=5, n_iter=25, acq='ucb', kappa=2.576, xi=0.0, **gp_params): """ Main optimization method. Parameters ---------- :param init_points: Number of randomly chosen points to sample the target function before fitting the gp. :param n_iter: Total number of times the process is to repeated. Note that currently this methods does not have stopping criteria (due to a number of reasons), therefore the total number of points to be sampled must be specified. :param acq: Acquisition function to be used, defaults to Upper Confidence Bound. :param gp_params: Parameters to be passed to the Scikit-learn Gaussian Process object Returns ------- :return: Nothing Example: >>> xs = np.linspace(-2, 10, 10000) >>> f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2/10) + 1/ (xs**2 + 1) >>> bo = BayesianOptimization(f=lambda x: f[int(x)], >>> pbounds={"x": (0, len(f)-1)}) >>> bo.maximize(init_points=2, n_iter=25, acq="ucb", kappa=1) """ # Reset timer self.plog.reset_timer() # Set acquisition function self.util = UtilityFunction(kind=acq, kappa=kappa, xi=xi) # Initialize x, y and find current y_max if not self.initialized: if self.verbose: self.plog.print_header() self.init(init_points) y_max = self.space.Y.max() # Set parameters if any was passed self.gp.set_params(**gp_params) # Find unique rows of X to avoid GP from breaking self.gp.fit(self.space.X, self.space.Y) # Finding argmax of the acquisition function. x_max = acq_max(ac=self.util.utility, gp=self.gp, y_max=y_max, bounds=self.space.bounds, steps = self.space.steps, keys = self.space.keys, constraints = self.space.constraints, constraintParams = self.space.constraintParams, random_state=self.random_state, **self._acqkw) # Print new header if self.verbose: self.plog.print_header(initialization=False) # Iterative process of searching for the maximum. At each round the # most recent x and y values probed are added to the X and Y arrays # used to train the Gaussian Process. Next the maximum known value # of the target function is found and passed to the acq_max function. # The arg_max of the acquisition function is found and this will be # the next probed value of the target function in the next round. for i in range(n_iter): # Test if x_max is repeated, if it is, draw another one at random # If it is repeated, print a warning pwarning = False while x_max in self.space: x_max = self.space.random_points(1)[0] pwarning = True # Append most recently generated values to X and Y arrays y = self.space.observe_point(x_max) if self.verbose: self.plog.print_step(x_max, y, pwarning) # Updating the GP. self.gp.fit(self.space.X, self.space.Y) # Update the best params seen so far self.res['max'] = self.space.max_point() self.res['all']['values'].append(y) self.res['all']['params'].append(dict(zip(self.space.keys, x_max))) # Update maximum value to search for next probe point. if self.space.Y[-1] > y_max: y_max = self.space.Y[-1] # Maximize acquisition function to find next probing point x_max = acq_max(ac=self.util.utility, gp=self.gp, y_max=y_max, bounds=self.space.bounds, steps = self.space.steps, keys = self.space.keys, constraints = self.space.constraints, constraintParams = self.space.constraintParams, random_state=self.random_state, **self._acqkw) # Keep track of total number of iterations self.i += 1 # Print a final report if verbose active. if self.verbose: self.plog.print_summary() def points_to_csv(self, file_name): """ After training all points for which we know target variable (both from initialization and optimization) are saved :param file_name: name of the file where points will be saved in the csv format :return: None """ points = np.hstack((self.space.X, np.expand_dims(self.space.Y, axis=1))) header = ','.join(self.space.keys + ['target']) np.savetxt(file_name, points, header=header, delimiter=',', comments='') # --- API compatibility --- @property def X(self): warnings.warn("use self.space.X instead", DeprecationWarning) return self.space.X @property def Y(self): warnings.warn("use self.space.Y instead", DeprecationWarning) return self.space.Y @property def keys(self): warnings.warn("use self.space.keys instead", DeprecationWarning) return self.space.keys @property def f(self): warnings.warn("use self.space.target_func instead", DeprecationWarning) return self.space.target_func @property def bounds(self): warnings.warn("use self.space.dim instead", DeprecationWarning) return self.space.bounds @property def dim(self): warnings.warn("use self.space.dim instead", DeprecationWarning) return self.space.dim
def de_optimizer(obj_func, initial_theta, bounds): res = differential_evolution(lambda x: obj_func(x, eval_gradient=False), bounds, maxiter=20, disp=False, polish=False) return res.x, obj_func(res.x, eval_gradient=False) # Specify stationary and non-stationary kernel kernel_matern = C(1.0, (1e-10, 1000)) \ * Matern(length_scale_bounds=(1e-1, 1e3), nu=1.5) gp_matern = GaussianProcessRegressor(kernel=kernel_matern) kernel_lls = C(1.0, (1e-10, 1000)) \ * LocalLengthScalesKernel.construct(X, l_L=0.1, l_U=2.0, l_samples=5) gp_lls = GaussianProcessRegressor(kernel=kernel_lls, optimizer=de_optimizer) # Fit GPs gp_matern.fit(X, y) gp_lls.fit(X, y) print "Learned kernel Matern: %s" % gp_matern.kernel_ print "Log-marginal-likelihood Matern: %s" \ % gp_matern.log_marginal_likelihood(gp_matern.kernel_.theta) print "Learned kernel LLS: %s" % gp_lls.kernel_ print "Log-marginal-likelihood LLS: %s" \ % gp_lls.log_marginal_likelihood(gp_lls.kernel_.theta) # Compute GP mean and standard deviation on test data X_ = np.linspace(-1, 1, 500) y_mean_lls, y_std_lls = gp_lls.predict(X_[:, np.newaxis], return_std=True)
return res.x, obj_func(res.x, eval_gradient=False) kernel_lls = C(1.0, (1e-10, 1000)) \ * LocalLengthScalesKernel.construct(X_n, l_L=lower_l, l_U=upper_l, l_samples=l_samples_input)\ + HeteroscedasticKernel.construct(prototypes, 1e-1, (1e-5, 50.0), gamma=1.0, gamma_bounds="fixed") finish_training_n = 0 while finish_training_n == 0: print("Start GPR training - density of length ="+str(len(X_n))) start = timeit.default_timer() #density fitting gp = GaussianProcessRegressor(kernel=kernel_lls, optimizer=de_optimizer, alpha = (y_n_TS_err)**2.) try: gp.fit(X_n, y_n_TS.reshape(-1,1)) finish_training_n = 1 except MemoryError: print('Memory error') except: print('Non-memory error') stop = timeit.default_timer() print('Time: ', stop - start) prototypes = KMeans(n_clusters=N_clusters).fit(X_T).cluster_centers_ kernel_lls_T = C(1.0, (1e-10, 1000)) \ * LocalLengthScalesKernel.construct(X_T, l_L=lower_l, l_U=upper_l, l_samples=l_samples_input)\ + HeteroscedasticKernel.construct(prototypes, 1e-1, (1e-5, 50.0), gamma=1.0, gamma_bounds="fixed")
def test_GP_example(self): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C np.random.seed(1) def f(x): """The function to predict.""" return x * np.sin(x) # ---------------------------------------------------------------------- # First the noiseless case X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T # Observations y = f(X).ravel() # Mesh the input space for evaluations of the real function, the prediction and # its MSE x = np.atleast_2d(np.linspace(0, 10, 1000)).T # Instanciate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) # Plot the function, the prediction and the 95% confidence interval based on # the MSE fig = plt.figure() plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') plt.plot(X, y, 'r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') plt.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.xlabel('$x$') plt.ylabel('$f(x)$') plt.ylim(-10, 20) plt.legend(loc='upper left') # ---------------------------------------------------------------------- # now the noisy case X = np.linspace(0.1, 9.9, 20) X = np.atleast_2d(X).T # Observations and noise y = f(X).ravel() dy = 0.5 + 1.0 * np.random.random(y.shape) noise = np.random.normal(0, dy) y += noise # Instanciate a Gaussian Process model gp = GaussianProcessRegressor(kernel=kernel, alpha=(dy / y) ** 2, n_restarts_optimizer=10) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) # Plot the function, the prediction and the 95% confidence interval based on # the MSE plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') plt.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.xlabel('$x$') plt.ylabel('$f(x)$') plt.ylim(-10, 20) plt.legend(loc='upper left') plt.show()
def bayesianoptimization(X, y, candidates_of_X, acquisition_function_flag, cumulative_variance=None): """ Bayesian optimization Gaussian process regression model is constructed between X and y. A candidate of X with the highest acquisition function is selected using the model from candidates of X. Parameters ---------- X: numpy.array or pandas.DataFrame m x n matrix of X-variables of training dataset (m is the number of samples and n is the number of X-variables) y: numpy.array or pandas.DataFrame m x 1 vector of a y-variable of training dataset candidates_of_X: numpy.array or pandas.DataFrame Candidates of X acquisition_function_flag: int 1: Mutual information (MI), 2: Expected improvement(EI), 3: Probability of improvement (PI) [0: Estimated y-values] cumulative_variance: numpy.array or pandas.DataFrame cumulative variance in mutual information (MI)[acquisition_function_flag=1] Returns ------- selected_candidate_number : int selected number of candidates_of_X selected_X_candidate : numpy.array selected X candidate cumulative_variance: numpy.array cumulative variance in mutual information (MI)[acquisition_function_flag=1] """ X = np.array(X) y = np.array(y) if cumulative_variance is None: cumulative_variance = np.empty(len(y)) else: cumulative_variance = np.array(cumulative_variance) relaxation_value = 0.01 delta = 10**-6 alpha = np.log(2 / delta) autoscaled_X = (X - X.mean(axis=0)) / X.std(axis=0, ddof=1) autoscaled_candidates_of_X = (candidates_of_X - X.mean(axis=0)) / X.std( axis=0, ddof=1) autoscaled_y = (y - y.mean(axis=0)) / y.std(axis=0, ddof=1) gaussian_process_model = GaussianProcessRegressor( ConstantKernel() * RBF() + WhiteKernel(), alpha=0) gaussian_process_model.fit(autoscaled_X, autoscaled_y) autoscaled_estimated_y_test, autoscaled_std_of_estimated_y_test = gaussian_process_model.predict( autoscaled_candidates_of_X, return_std=True) if acquisition_function_flag == 1: acquisition_function_values = autoscaled_estimated_y_test + alpha**0.5 * ( (autoscaled_std_of_estimated_y_test**2 + cumulative_variance)**0.5 - cumulative_variance**0.5) cumulative_variance = cumulative_variance + autoscaled_std_of_estimated_y_test**2 elif acquisition_function_flag == 2: acquisition_function_values = (autoscaled_estimated_y_test - max(autoscaled_y) - relaxation_value) * \ norm.cdf((autoscaled_estimated_y_test - max(autoscaled_y) - relaxation_value) / autoscaled_std_of_estimated_y_test) + \ autoscaled_std_of_estimated_y_test * \ norm.pdf((autoscaled_estimated_y_test - max(autoscaled_y) - relaxation_value) / autoscaled_std_of_estimated_y_test) elif acquisition_function_flag == 3: acquisition_function_values = norm.cdf( (autoscaled_estimated_y_test - max(autoscaled_y) - relaxation_value) / autoscaled_std_of_estimated_y_test) elif acquisition_function_flag == 0: acquisition_function_values = autoscaled_estimated_y_test selected_candidate_number = np.where( acquisition_function_values == max(acquisition_function_values))[0][0] selected_X_candidate = candidates_of_X[selected_candidate_number, :] return selected_candidate_number, selected_X_candidate, cumulative_variance
def nrlmfb_cv_eval(method, dataset, cv_data, intMat, Kd, Kt, cvs, para, logger, scoring='auc', gpmi=None, params=None): # Generate parameters params_grid, x_grid = list(), list() if params != None: for param in params: if param['lambda_d'] != param['lambda_t']: continue params_grid.append({ 'cfix': param['c'], 'K1': param['K1'], 'K2': param['K2'], 'num_factors': param['r'], 'lambda_d': param['lambda_d'], 'lambda_t': param['lambda_t'], 'alpha': param['alpha'], 'beta': param['beta'], 'theta': param['theta'], 'max_iter': param['max_iter'], 'eta1': param['eta1'], 'eta2': param['eta2'] }) x_grid.append([ param['c'], param['K1'], param['K2'], param['r'], param['lambda_d'], param['lambda_t'], param['alpha'], param['beta'], param['theta'], param['max_iter'], param['eta1'], param['eta2'] ]) else: for r in [50, 100]: for x in np.arange(-5, 2): for y in np.arange(-5, 3): for z in np.arange(-5, 1): for t in np.arange(-3, 1): for a in np.arange(5, 10): for b in np.arange(1, 5): params_grid.append({ 'cfix': para['c'], 'K1': para['K1'], 'K2': para['K2'], 'num_factors': r, 'lambda_d': 2**(x), 'lambda_t': 2**(x), 'alpha': 2**(y), 'beta': 2**(z), 'eta1': 2**(a), 'eta2': 2**(b), 'theta': 2**(t), 'max_iter': 100 }) x_grid.append([ para['c'], para['K1'], para['K2'], r, 2**(x), 2**(x), 2**(y), 2**(z), 2**(t), 2 * a - 1, 2 * b - 1, 100 ]) # GP-MI (Bayesian optimization) if gpmi is not None: # Initialization start = time.time() n_init = int(gpmi['n_init']) if gpmi['n_init'] > 0 else 1 best_score = 0 count = 1 if n_init > 0: np.random.seed(list(cv_data.keys())[0]) i_init = np.random.permutation(range(len(params_grid)))[0:n_init] X = np.array([x_grid[i] for i in i_init]) y = np.array(list()) for i in i_init: tic = time.time() params_next = params_grid[i] model = NRLMFb(**params_next) aupr_vec, auc_vec = train(model, cv_data, intMat, Kd, Kt) aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec) auc_avg, auc_conf = mean_confidence_interval(auc_vec) y_next = auc_avg if scoring == 'auc' else aupr_avg y = np.concatenate((y, [y_next]), axis=0) logger.info("%s %s cvs=%s (sample= %s) %.6f[sec]" % (params_grid[i], scoring, str(cvs), str(count), time.time() - tic)) logger.info(str(y_next)) if i == 0: cmd = "Dataset:" + dataset + " CVS: " + str( cvs) + "\n" + str(model) best_params, best_score = params_grid[i], y_next auc_opt = [cmd, auc_avg, aupr_avg, auc_conf, aupr_conf] if best_score < y_next: cmd = "Dataset:" + dataset + " CVS: " + str( cvs) + "\n" + str(model) best_params, best_score = params_grid[i], y_next auc_opt = [cmd, auc_avg, aupr_avg, auc_conf, aupr_conf] count += 1 # GP-MI algorithm alpha = np.log(2 / gpmi['delta']) gamma = 0 for i in range(int(gpmi['max_iter'])): tic = time.time() gp = GaussianProcessRegressor() gp.fit(X, y) mean, sig = gp.predict(x_grid, return_std=True) phi = np.sqrt(alpha) * (np.sqrt(sig**2 + gamma) - np.sqrt(gamma)) idx = np.argmax(mean + phi) params_next = params_grid[idx] x_next = x_grid[idx] gamma = gamma + sig[idx]**2 model = NRLMFb(**params_next) aupr_vec, auc_vec = train(model, cv_data, intMat, Kd, Kt) aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec) auc_avg, auc_conf = mean_confidence_interval(auc_vec) y_next = auc_avg if scoring == 'auc' else aupr_avg logger.info("%s %s cvs=%s (sample= %s) %.6f[sec]" % (params_grid[i], scoring, str(cvs), str(i + n_init + 1), time.time() - tic)) logger.info(str(y_next)) if best_score < y_next: cmd = "Dataset:" + dataset + " CVS: " + str(cvs) + "\n" + str( model) best_params, best_score = params_next, y_next auc_opt = [cmd, auc_avg, aupr_avg, auc_conf, aupr_conf] if np.array_equal(x_next, X[-1]): break X = np.concatenate((X, [x_next]), axis=0) y = np.concatenate((y, [y_next]), axis=0) end = time.time() cmd = "Optimal parameter setting:\n%s\n" % auc_opt[0] cmd += "auc:%.6f, aupr:%.6f, auc_conf:%.6f, aupr_conf:%.6f, time:%.6f\n" % ( auc_opt[1], auc_opt[2], auc_opt[3], auc_opt[4], end - start) # Grid search else: start = time.time() max_auc, auc_opt = 0, [] for param in params_grid: tic = time.time() model = NRLMFb(**param) cmd = "Dataset:" + dataset + " CVS: " + str(cvs) + "\n" + str( model) logger.info(cmd) aupr_vec, auc_vec = train(model, cv_data, intMat, Kd, Kt) aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec) auc_avg, auc_conf = mean_confidence_interval(auc_vec) logger.info( "auc:%.6f,aupr:%.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f\n" % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.time() - tic)) if auc_avg > max_auc: max_auc = auc_avg auc_opt = [cmd, auc_avg, aupr_avg, auc_conf, aupr_conf] end = time.time() cmd = "Optimal parameter setting:\n%s\n" % auc_opt[0] cmd += "auc:%.6f, aupr:%.6f, auc_conf:%.6f, aupr_conf:%.6f, time:%.6f\n" % ( auc_opt[1], auc_opt[2], auc_opt[3], auc_opt[4], end - start) logger.info('') logger.info(cmd)
class SklearnGPSurrogate(GaussianProcess): """Surrogate for https://github.com/scikit-learn/scikit-learn Gaussian process. Attributes: model (sklearn.gaussian_process.GaussianProcessRegressor): Model object of Sklearn. """ def __init__(self): super().__init__() self.model = None def train(self, X, y, kernel=None, hyperparameters=None, fixed_sigma_n=False, return_hess_inv=False, multi_output=False): from sklearn.gaussian_process import GaussianProcessRegressor super().prepare_train(X, y, kernel, hyperparameters, fixed_sigma_n) if self.multi_output: raise NotImplementedError( "Multi-Output is not implemented for this surrogate.") numeric_noise = self.hyperparameters['sigma_n'].item( )**2 if self.fixed_sigma_n else 1e-5 # Instantiate the model self.model = GaussianProcessRegressor(kernel=self.kernel, alpha=numeric_noise) # Train the model self.model.fit(self.Xtrain, self.ytrain) self.kernel = self.model.kernel_ # Set hyperparameters from model self._set_hyperparameters_from_model() self.print_hyperparameters("Optimized") self.trained = True self.decode_training_data() def add_training_data(self, X, y): """Add training points to existing data. Parameters: X (ndarray): Input points to add. y (ndarray): Observed output to add. """ self.Xtrain = np.concatenate([self.Xtrain, X], axis=0) self.ytrain = np.concatenate([self.ytrain, y], axis=0) def predict(self, Xpred, add_data_variance=True): Xpred = super().prepare_predict(Xpred) ymean, ystd = self.model.predict(Xpred, return_std=True) yvar = ystd.reshape(-1, 1)**2 if add_data_variance: yvar = yvar + self.hyperparameters['sigma_n']**2 ymean, yvar = self.decode_predict_data(ymean, yvar) return ymean, yvar def get_marginal_variance(self, Xpred): """Calculates the marginal variance to infer the next point in active learning. Currently only the predictive variance is taken into account. Parameters: Xpred (ndarray): Possible prediction input points. Returns: ndarray: Currently only predictive variance. """ mtilde, vhat = self.predict(Xpred) return vhat.reshape(-1, 1) def save_model(self, path): """Save the SklGPSurrogate model to a pickle file. All attributes of the surrogate are loaded directly from the model. Parameters: path (str): Path including the file name, where the model should be saved. """ from pickle import dump dump(self.model, open(path, 'wb')) @classmethod def load_model(cls, path): """Load a saved SklGPSurrogate model from a pickle file and update its attributes. Parameters: path (str): Path including the file name, from where the model should be loaded. Returns: profit.sur.gaussian_process.SklearnGPSurrogate: Instantiated surrogate model. """ from pickle import load self = cls() self.model = load(open(path, 'rb')) self.Xtrain = self.model.X_train_ self.ytrain = self.model.y_train_ self.kernel = self.model.kernel_ self.ndim = self.Xtrain.shape[-1] self.fixed_sigma_n = self.model.alpha != 1e-5 self.trained = True self._set_hyperparameters_from_model() self.print_hyperparameters("Loaded") return self def optimize(self, return_hess_inv=False, **opt_kwargs): """For hyperparameter optimization the Sklearn base optimization is used. Currently, the inverse Hessian can not be retrieved, which limits the active learning effectivity. Parameters: return_hess_inv (bool): Is not considered currently. opt_kwargs: Keyword arguments used directly in the Sklearn base optimization. """ self.model.fit(self.Xtrain, self.ytrain, **opt_kwargs) self._set_hyperparameters_from_model() def select_kernel(self, kernel): """Get the sklearn.gaussian_process.kernels kernel by matching the given kernel identifier. Parameters: kernel (str): Kernel string such as 'RBF' or depending on the surrogate also product and sum kernels such as 'RBF+Matern52'. Returns: sklearn.gaussian_process.kernels: Scikit-learn kernel object. Currently, for sum and product kernels, the initial hyperparameters are the same for all kernels. """ from re import split from sklearn.gaussian_process import kernels as sklearn_kernels full_str = split('([+*])', kernel) try: kernel = [] for key in full_str: kernel += [ key if key in ('+', '*') else getattr( sklearn_kernels, key)( length_scale=self.hyperparameters['length_scale']) ] except AttributeError: raise RuntimeError("Kernel {} is not implemented.".format(kernel)) if len(kernel) == 1: kernel = kernel[0] else: kernel = [ str(key) if not isinstance(key, str) else key for key in kernel ] kernel = eval(''.join(kernel)) # Add scale and noise to kernel kernel *= sklearn_kernels.ConstantKernel( constant_value=1 / self.hyperparameters['sigma_f'].item()**2) if not self.fixed_sigma_n: kernel += sklearn_kernels.WhiteKernel( noise_level=self.hyperparameters['sigma_n'].item()**2) return kernel def _set_hyperparameters_from_model(self): r"""Helper function to set the hyperparameter dict from the model. It depends on whether $\sigma_n$ is fixed. Currently this is only stable for single kernels and not for Sum and Prod kernels. """ if self.fixed_sigma_n: self.hyperparameters['length_scale'] = np.atleast_1d( self.model.kernel_.k1.length_scale) self.hyperparameters['sigma_f'] = np.sqrt( np.atleast_1d(1 / self.model.kernel_.k2.constant_value)) self.hyperparameters['sigma_n'] = np.sqrt( np.atleast_1d(self.model.alpha)) else: self.hyperparameters['length_scale'] = np.atleast_1d( self.model.kernel_.k1.k1.length_scale) self.hyperparameters['sigma_f'] = np.sqrt( np.atleast_1d(1 / self.model.kernel_.k1.k2.constant_value)) self.hyperparameters['sigma_n'] = np.sqrt( np.atleast_1d(self.model.kernel_.k2.noise_level))
def run(args, bkghist, trainHisto, optKernel): GPh = GPHisto(bkghist) # The distributions with no window removed. X_t = GPh.getXArr() y_t = GPh.getYArr() dy_t = GPh.getErrArr() if args.noWindow: X = X_t y = y_t dy = dy_t x = GPh.getXArr() gp = None kernel = optKernel gp = GaussianProcessRegressor(kernel=kernel ,optimizer=None ,alpha=dy**2 ) gp.fit(X,y) print gp.kernel_ length = float(re.search('length_scale=(\d+(\.\d+)?)', gp.kernel_.__repr__()).group(1)) y_pred, sigma = gp.predict(x, return_std=True) outhist = GPh.getHisto(y_pred, sigma, 'GP Fit') #bkg = GPh.getHisto(y, dy, 'Full Background') ### RooFit part myy = RooRealVar('myy','myy',105,160) #nSig = RooRealVar('nSig','nSig',-200,1000) sigMass = 125 #pdf = RooGP.RooGP("mypdf", "CustomPDF",myy ,nSig, sigMass, trainHisto, dataHisto) pdf = RooGPBkg.RooGPBkg("bkgPDF", "bkg only PDF",myy, trainHisto, bkghist) data = RooDataHist("dh", "dh", RooArgList(myy), bkghist) c1 = TCanvas('c1','c1') frame = myy.frame() data.plotOn(frame, RooFit.MarkerColor(kRed)) fitResult = pdf.fitTo(data, RooFit.Save()) # pdf.gpHisto.Draw() # data.Draw('same') # outhist.Draw('samehist') # outhist.SetFillColorAlpha(kWhite, 0) # bkghist.Draw('same') # bkghist.SetMarkerColor(kBlack) # outhist.Divide(bkghist) # outhist.Draw() pdf.plotOn(frame) #fitResult.plotOn(frame) frame.Draw() #nSig.Print() c1.Print(args.outDir+'/test_GP.pdf') pass #Run
class Learner(Loader): def __init__(self): super().__init__() self.prepare() self.learn() def prepare(self): self.kernel = C(1.0, (1e-5, 1e5)) * RBF(15, (1e-5, 1e5)) self.gp = GPR(kernel=self.kernel, n_restarts_optimizer=9, alpha=1e-6) self.model = FittingModel() self.coords, _ = utils.read_data(self.train_set) with open(self.desc_file, 'rb') as pickled: self.X_train = pickle.load(pickled) self.Y_train = np.zeros(self.X_train.shape[0]) self.idx_all = np.arange(self.X_train.shape[0]) self.idx_left = copy.deepcopy(self.idx_all) self.idx_now = self.idx_all[~np.in1d(self.idx_all, self.idx_left)] # idx_failed = np.ones(idx_all.shape[0], dtype=bool) self.err_train = np.zeros_like(self.idx_all, dtype=float) os.makedirs(self.output, exist_ok=True) os.makedirs(self.calculations, exist_ok=True) utils.write_energy_file(self.test_set, os.path.join(self.output, 'val_refer.dat'), col_to_write=1) SubmitFit(self.fit_fold) SubmitMolpro(self.calculations) # Logfile self.logfile = os.path.join(self.output, '_PickSet.log') to_log = [ 'Iteration' 'TrainSet_Size', 'Leftover_Size', 'Train_MSE', 'Train_wMSE', 'Test_MSE', 'Test_wMSE', 'Fitting[s]', ] with open(self.logfile, 'w') as f: for key in to_log: print(key, end='\t', file=f) print('', end='\n', file=f) # saving settings to_record = { 'Nr of samples in first iteration': self.first_batch, 'Nr of samples picked in other iterations': self.batch, 'cluster size': self.cluster_sz, 'STD weight': self.std_w, 'TRAIN ERROR weight': self.trainerr_w, 'Used Guassian Process model': self.gp, } with open(os.path.join(self.output, '_setting.ini'), 'w') as f: for key, value in to_record.items(): try: print(key + '\t' + str(value), end='\n', file=f) except: print(key, end='\n', file=f) print(value, end='\n', file=f) def learn(self): if self.restart: restart = pd.read_csv(self.restart_file, sep='\t') self.idx_now = restart['idx'].to_numpy() energies = restart['energy'].to_numpy() error = restart['error'].to_numpy() self.Y_train[self.idx_now] = energies self.err_train[self.idx_now] = error self.idx_left = self.idx_left[~np.in1d(self.idx_left, self.idx_now )] # while idx_left.shape[0] > 0: while self.t < self.tmax: print('Start iteration: ', self.t) tic = time() if (self.idx_now is None) or (self.idx_now.shape[0] == 0): # first iteration: choose sufficient samples for the first run idx_pick = np.random.choice(self.idx_left, self.first_batch, replace=False) else: # other iterations: Pool selection by probability # step 1: clustering current training set NCluster = int(self.idx_now.shape[0] / self.cluster_sz) cls = KMeans(n_clusters=NCluster, init='k-means++', precompute_distances=True, copy_x=True) lab_now = cls.fit_predict(self.X_train[self.idx_now]) # predict the label of current candidates idx_cand = self.idx_left lab_cand = cls.predict(self.X_train[idx_cand]) p_chose_tmp = np.zeros((idx_cand.shape[0], ), dtype=float) # step 2: predict the uncertainty by GP for l in set(lab_cand): idx_now_with_this_label = self.idx_now[lab_now == l] idx_cand_with_this_label = idx_cand[lab_cand == l] self.gp.fit(self.X_train[idx_now_with_this_label, :], self.Y_train[idx_now_with_this_label]) prd_cand_with_this_label, uct_cand_with_this_label = self.gp.predict( self.X_train[idx_cand_with_this_label], return_std=True) # step 3: update selection probability p_err = np.average(self.err_train[idx_now_with_this_label]) p_chose_tmp[ lab_cand == l] = self.trainerr_w * p_err + self.std_w * uct_cand_with_this_label # step 4: sample from updated probability nr_pick = min(p_chose_tmp.shape[0], self.batch) # p_chose_tmp[p_chose_tmp < 1e-4] = 1e-4 # Set the lowest probability p_chose_tmp = p_chose_tmp / np.sum(p_chose_tmp) idx_pick = np.random.choice(idx_cand, nr_pick, replace=False, p=p_chose_tmp) # update energy of those samples newly put into training set self.idx_left = self.idx_left[~np.in1d(self.idx_left, idx_pick)] print("Calculating the energy...") calc_energy = Energy(self.coords, idx_pick, self.t) idx_pick = calc_energy.idx_pick print( F'Number of selected configurations in this iteration: {len(idx_pick)}' ) if (self.idx_now is None) or (self.idx_now.shape[0] == 0): self.idx_now = idx_pick else: self.idx_now = np.hstack((self.idx_now, idx_pick)) self.Y_train[idx_pick] = calc_energy.energy new_train = os.path.join(self.calculations, 'it_' + str(self.t), 'tr_set.xyz') with open(new_train, 'r+') as labeled_file: newxyz = labeled_file.read() with open(self.train_out, 'a+') as oldxyz: oldxyz.write(newxyz) train_weights = utils.get_weights(self.Y_train[self.idx_now], self.delta_e, self.e_min) print("Fitting the model...") train_err = self.model.fit(ite=self.t) self.err_train[ self.idx_now] = np.abs(train_err) * np.sqrt(train_weights) print("Creating restart file...") train_set_idx = 'trainset_' + str(self.t) + '.RESTART' restart_path = os.path.join(self.output, train_set_idx) restart_file = pd.DataFrame() restart_file['idx'] = self.idx_now restart_file['energy'] = self.Y_train[self.idx_now] restart_file['error'] = self.err_train[self.idx_now] restart_file.to_csv(restart_path, sep='\t', index=False) # section: evaluate current trained model test_err, test_weights = self.model.evaluate(ite=self.t) train_mse = np.sqrt(np.mean(np.square(train_err))) train_wmse = np.sqrt(np.mean(np.square(train_err) * train_weights)) test_mse = np.sqrt(np.mean(np.square(test_err))) test_wmse = np.sqrt(np.mean(np.square(test_err) * test_weights)) toc = time() print('time consumed this iteration [s]: ', toc - tic) with open(self.logfile, 'a') as f: print( '{0:d}\t{1:d}\t{2:d}\t{3:.8f}\t{4:.8f}\t{5:.8f}\t{6:.8f}\t{7:.2f}' .format(self.t, self.idx_now.shape[0], self.idx_left.shape[0], train_mse, train_wmse, test_mse, test_wmse, toc - tic), file=f, end='\n') self.t += 1
class SurrogateACESOptimizer(ACESOptimizer): def __init__(self, context_boundaries, n_context_samples, kappa, active=True, **kwargs): super(SurrogateACESOptimizer, self).__init__( context_boundaries=context_boundaries, active=active, **kwargs) self.n_context_samples = n_context_samples self.kappa = kappa def init(self, n_params, n_context_dims): super(SurrogateACESOptimizer, self).init(n_params, n_context_dims) def _determine_contextparams(self, optimizer): """Select context and params jointly using ACES.""" # Choose the first samples uniform randomly if len(optimizer.X_) < optimizer.initial_random_samples: cx = np.random.uniform(self.cx_boundaries[:, 0], self.cx_boundaries[:, 1]) return cx[:self.context_dims], cx[self.context_dims:] # Prepare entropy search objective self._init_es_ensemble() # Generate data for function mapping # query_context x query_parameters x eval_context -> entropy reduction n_query_points = 500 n_data_dims = 2 * self.context_dims + self.dimension X = np.empty((n_query_points, n_data_dims)) y = np.empty(n_query_points) for i in range(n_query_points): # Select query point and evaluation context randomly query = np.random.uniform(self.cx_boundaries[:, 0], self.cx_boundaries[:, 1]) ind = np.random.choice(self.n_context_samples) # Store query point in X and value of entropy-search in y X[i, :self.context_dims + self.dimension] = query X[i, self.context_dims + self.dimension:] = \ self.context_samples[ind] - query[:self.context_dims] y[i] = self.entropy_search_ensemble[ind](query)[0] # Fit GP model to this data kernel = C(1.0, (1e-10, 100.0)) \ * RBF(length_scale=(1.0,)*n_data_dims, length_scale_bounds=[(0.01, 10.0),]*n_data_dims) \ + WhiteKernel(1.0, (1e-10, 100.0)) self.es_surrogate = GaussianProcessRegressor(kernel=kernel) self.es_surrogate.fit(X, y) # Select query based on mean entropy reduction in surrogate model # predictions contexts = np.random.uniform(self.context_boundaries[:, 0], self.context_boundaries[:, 1], (250, self.context_dims)) def objective_function(cx): X_query = np.empty((250, n_data_dims)) X_query[:, :self.context_dims + self.dimension] = cx X_query[:, self.context_dims + self.dimension:] = \ contexts - cx[:self.context_dims] es_pred, es_cov = \ self.es_surrogate.predict(X_query, return_cov=True) return es_pred.mean() + self.kappa * np.sqrt(es_cov.mean()) cx = global_optimization( objective_function, boundaries=self.cx_boundaries, optimizer=self.optimizer, maxf=optimizer.maxf) return cx[:self.context_dims], cx[self.context_dims:] def _init_es_ensemble(self): # Determine samples at which CES will be evaluated by # 1. uniform random sampling self.context_samples = \ np.random.uniform(self.context_boundaries[:, 0], self.context_boundaries[:, 1], (self.n_context_samples*25, self.context_dims)) # 2. subsampling via k-means clustering kmeans = KMeans(n_clusters=self.n_context_samples, n_jobs=1) self.context_samples = \ kmeans.fit(self.context_samples).cluster_centers_ # 3. Create entropy search ensemble self.entropy_search_ensemble = [] for i in range(self.n_context_samples): cx_boundaries_i = np.copy(self.cx_boundaries) cx_boundaries_i[:self.context_dims] = \ self.context_samples[i][:, np.newaxis] entropy_search_fixed_context = deepcopy(self.acquisition_function) entropy_search_fixed_context.set_boundaries(cx_boundaries_i) self.entropy_search_ensemble.append(entropy_search_fixed_context) def _create_acquisition_function(self, name, model, **kwargs): if not name in ["EntropySearch", "MinimalRegretSearch"]: raise ValueError("%s acquisition function not supported." % name) return create_acquisition_function(name, model, **kwargs)
def gaussian_regfit(lc_tab, flt): # list to hold GPR fit variables lcfit = [] lc = lc_tab # Use only data with SNR > 1 lc = lc.loc[lc['SNR'] > 1] # Load in values from observation t = lc['NORM_T'] flux = lc['FLUXCAL'] flux_err = lc['FLUXCALERR'] #flt = lc['FLT'].unique()[0] x_in = t y_in = flux y_err = flux_err x_min = min(t) - 10 x_max = max(t) + 10 # Mesh the input space for evaluations of the real function, the prediction and # its MSE x_space = np.atleast_2d(np.linspace(x_min, x_max, 100)).T x_fit = np.atleast_2d(x_in).T # Define RBF kernel k_rbf = RBF(length_scale=10, length_scale_bounds=(5., 1e2)) # Define sine kernel k_sine = ExpSineSquared(length_scale=1e2, length_scale_bounds=(5., 1e2), periodicity=1e2, periodicity_bounds=(1e2, 1e4)) # Define white noise kernel k_noise = WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-3, 1e3)) kernel = 1.0 * k_rbf + 1.0 * (k_rbf * k_sine) + k_noise ''' # Things used for plotting kernel_label = 'RBF + RBF*Sine + noise' mean_colors = ['#000099', '#b30000', '#006600'] var_colors = ['#9999ff', '#ff8080', '#66ff66'] ''' gpr = GaussianProcessRegressor(kernel=kernel, alpha=y_err, n_restarts_optimizer=10) # Fit to data using Maximum Likelihood Estimation of the parameters gpr.fit(x_fit, y_in) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, y_pred_sigma = gpr.predict(x_space, return_std=True) # Store fitted lightcurve into holding list lcfit.append([x_space.flatten(), y_pred, y_pred_sigma]) ''' # Get log likelihood and hyperparameters log_likelihood = np.round(gpr.log_marginal_likelihood(),2) hyper_params = gpr.kernel_ params = hyper_params.get_params() ''' return lcfit
class GCP(BaseTuner): def __init__(self, tunables, gridding=0, r_minimum=2): """ Extra args: r_minimum: the minimum number of past results this selector needs in order to use gaussian process for prediction. If not enough results are present during a fit(), subsequent calls to propose() will revert to uniform selection. """ super(GCP, self).__init__(tunables, gridding=gridding) self.r_minimum = r_minimum def fit(self, X, y): def jitter(x, range): y = np.copy(x) scale_exp_min = np.abs(np.ceil(np.log10(range[0]))) scale_exp_max = np.abs(np.ceil(np.log10(range[1]))) scale_exp = (scale_exp_max + scale_exp_min) / 2. r = np.random.rand(y.size) / (10**scale_exp) y = y + r return y # Print msg. when going into gcp.fit strMessage = "rows in X = %d, r_minimum = %d" % (X.shape[0], self.r_minimum) logger.debug(strMessage) # Use X and y to train a Gaussian Copula Process. super(GCP, self).fit(X, y) # skip training the process if there aren't enough samples if X.shape[0] < self.r_minimum: return # -- Non-parametric model of 'y', estimated with kernel density kernel_pdf = st.gaussian_kde(y) kernel_cdf = make_cdf(kernel_pdf) kernel_ppf = make_ppf(kernel_pdf) y_kernel_model = {'pdf': kernel_pdf, 'cdf': kernel_cdf, 'ppf': kernel_ppf} self.y_kernel_model = y_kernel_model # - Transform y-->F-->vF-->norm.ppf-->v vF = y_kernel_model['cdf'](y) v = st.norm.ppf(vF) # -- Non-parametric model of each feature in 'X', estimated with kernel density X_kernel_model = [] for ki in range(X.shape[1]): columnX = X[:, ki] if self.tunables[ki][1].is_integer: columnX = jitter(columnX, self.tunables[ki][1].range) kernel_pdf = st.gaussian_kde(columnX) kernel_cdf = make_cdf(kernel_pdf) kernel_ppf = make_ppf(kernel_pdf) kernel_model = {'pdf': kernel_pdf, 'cdf': kernel_cdf, 'ppf': kernel_ppf} X_kernel_model.append(kernel_model) self.X_kernel_model = X_kernel_model # -- Transform X-->F-->uF-->norm.ppf-->U U = np.empty_like(X) for ki in range(X.shape[1]): uF = X_kernel_model[ki]['cdf'](X[:, ki]) U[:, ki] = st.norm.ppf(uF) # - Instantiate a GP and fit it with (U, v) self.gcp = GaussianProcessRegressor(normalize_y=True) self.gcp.fit(U, v) def predict(self, X): if self.X.shape[0] < self.r_minimum: # we probably don't have enough logger.warn('GP: not enough data, falling back to uniform sampler') return Uniform(self.tunables).predict(X) def get_valid_row(U): ind_OK = np.full(U.shape[0], 1, dtype=bool) for ki in range(U.shape[1]): ind_OK = np.logical_and(ind_OK, np.logical_not(np.isinf(U[:, ki]))) V = np.copy(U[ind_OK, :]) return V, ind_OK # -- Load non-parametric model x_kernel_model = self.X_kernel_model y_kernel_model = self.y_kernel_model # -- Transform X into U before using the GP learned U = np.empty_like(X) for ki in range(X.shape[1]): uF = x_kernel_model[ki]['cdf'](X[:, ki]) U[:, ki] = st.norm.ppf(uF) # -- Get U_safe and print msg. to inform of how many rows are valid U_safe, ind_OK = get_valid_row(U) strMessage = "Num. of valid rows in X = %d" % (np.sum(ind_OK)) logger.debug(strMessage) # -- use GP to estimate mean and stdev only of safe U's mu_v, stdev_v = self.gcp.predict(U_safe, return_std=True) # -- Transform back mu_u-->NormStd-->mu_uF mu_vF = st.norm.cdf(mu_v) stdev_vF = st.norm.cdf(stdev_v) # -- Transform back mu_uF-->F.ppf-->mu_y # VERSION 1: # It should be used in case of mu_y and stdev_y can have a size lower than X. # Otherwise, swap to version 2. mu_y = y_kernel_model['ppf'](mu_vF) stdev_y = y_kernel_model['ppf'](stdev_vF) ''' # VERSION 2: # It should be used in case of mu_y and stdev_y must have the same length # than X. Otherwise, Version 1 is faster. # -- Transform back mu_uF-->F.ppf-->mu_y # mu_y has the same length than U, but is positive only for safe rows mu_y = np.zeros([U.shape[0]]) stdev_y = np.zeros([U.shape[0]]) mu_y[ind_OK] = y_kernel_model['ppf'](mu_vF) stdev_y[ind_OK] = y_kernel_model['ppf'](stdev_vF) ''' return np.array(list(zip(mu_y, stdev_y))) def _acquire(self, predictions): """ Predictions from the GCP will be in the form (prediction, error). The default acquisition function returns the index with the highest predicted value, not factoring in error. """ return np.argmax(predictions[:, 0])
################################################################################ print 'Initializing the GP Regressor...' kernel = sig_n(1.)**2 * SE(length_scale=1., length_scale_bounds=(1.e-2, 1)) GPR = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10) # Information regarding optimization: increase to increase accuracy of surragate ################################################################################ # NOTE: As it founds optimum it stays there which causes some convergence issues # you can avoid it by playing with xi values to let the model explore more or # keep your data numbers low... ################################################################################ PI = 10 Euc_dis = 0 max_iter = 10 for iter in range(1, max_iter): print 'Imposing sampling points for iteration...', iter GPR.fit(X, y) print 'Proposing new point for iteration... ', iter # Proposal of new point by minimizing acquisation function X_next = prop(Expected_Improvement, X, GPR, bound) y_next = -np.sin(3 * X_next) - np.power(X_next, 2) + 0.7 * X_next + np.cos( 3 * X_next) Euc_dis = X[-1] - X_next #print Euc_dis; # Plotting... GP_2D_Opt(X, y, x, GPR, Expected_Improvement(x, X, GPR), X_next, y_next) X = np.vstack((X, X_next)) y = np.vstack((y, y_next)) EI = Expected_Improvement(x, X, GPR).max() PI = Probability_Improvement(x, X, GPR).max() print PI if (PI < PI_eps and abs(Euc_dis) > X_eps and iter > 3 or EI == 0):
param_array = np.vstack((mej, vej, Xlan)).T param_array_postprocess = np.array(param_array) param_mins, param_maxs = np.min(param_array_postprocess, axis=0), np.max(param_array_postprocess, axis=0) for i in range(len(param_mins)): param_array_postprocess[:, i] = (param_array_postprocess[:, i] - param_mins[i]) / (param_maxs[i] - param_mins[i]) nsvds, nparams = param_array_postprocess.shape kernel = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=0, alpha=1.0) gp.fit(param_array_postprocess, Mag) M, sigma2_pred = gp.predict(np.atleast_2d(param_array_postprocess), return_std=True) sigma_best = np.median(np.sqrt(sigma2_pred)) sigma = sigma_best * np.ones(M.shape) elif opts.fit_type == "linear": if opts.analysis_type == "combined": parameters = ["K", "alpha", "beta", "gamma", "delta", "zeta", "sigma"] labels = [ r'K', r'$\alpha$', r'$\beta$', r'$\gamma$', r"$\delta$", r"$\zeta$", r'$\sigma$' ] n_params = len(parameters)
def gp(self, true_graph, training_idx): training_X = [] training_Y = [] for idx in training_idx: training_Y.append(true_graph.vertex_[idx].ig_) nbs_p = self.adjacent_p(idx, true_graph) print("The vert {}".format(idx)) print("The training X is {}".format(nbs_p)) print("The training Y is {}".format(training_Y[-1])) print("===============================================") training_X.append(nbs_p) rbf = 1.0 * RBF(length_scale=1.0) matern = 1.0 * Matern( length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5) gp_opt = GaussianProcessRegressor(kernel=rbf) gp_opt.fit(training_X, training_Y) print("The trained hyperparameter are {}".format( (gp_opt.kernel_.theta))) print("Log Marginal Likelihood (optimized): %.3f" % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta)) # Contour 3d x1_ = [i for i in range(self.rows_ + 1)] x2_ = [j for j in range(self.cols_ + 1)] X1_, X2_ = np.meshgrid(x1_, x2_) y_mean = np.empty([self.rows_ + 1, self.cols_ + 1]) y_true = np.empty([self.rows_ + 1, self.cols_ + 1]) y_std = np.empty([self.rows_ + 1, self.cols_ + 1]) y_mean_u = np.empty([self.rows_ + 1, self.cols_ + 1]) y_mean_d = np.empty([self.rows_ + 1, self.cols_ + 1]) for i in range(self.rows_ + 1): for j in range(self.cols_ + 1): cur_idx = (self.rows_ - X1_[i][j] - 1) * self.cols_ + X2_[i][j] print("X: {}, Y: {}, idx: {}".format(X2_[i][j], X1_[i][j], cur_idx)) if X1_[i][j] < self.rows_ and X1_[i][j] >= 0 and X2_[i][ j] < self.cols_ and X2_[i][j] >= 0 and cur_idx in nz_ig: neigh_p = self.adjacent_p(cur_idx, true_graph) y_mean[X2_[i][j], X1_[i][j]], y_std[X2_[i][j], X1_[i][j]] = gp_opt.predict( [neigh_p], return_std=True) print("Prediction ========================") print("Vertex {}, X:{}, Y:{}".format( cur_idx, X2_[i][j], X1_[i][j])) print("Testing data is {}".format(neigh_p)) print("Predicted IG {}".format(y_mean[X2_[i][j], X1_[i][j]])) else: y_mean[X2_[i][j], X1_[i][j]] = 0 y_std[X2_[i][j], X1_[i][j]] = 0.0 y_mean_u[X2_[i][j], X1_[i][j]] = y_mean[X2_[i][j], X1_[i][j]] + y_std[X2_[i][j], X1_[i][j]] y_mean_d[X2_[i][j], X1_[i][j]] = y_mean[X2_[i][j], X1_[i][j]] - y_std[X2_[i][j], X1_[i][j]] if X2_[i][j] < self.cols_ and X1_[i][j] < self.rows_ and X1_[ i][j] >= 0 and X2_[i][j] >= 0: idx_ = (self.rows_ - X1_[i][j] - 1) * self.cols_ + X2_[i][j] y_true[X2_[i][j], X1_[i][j]] = true_graph.vertex_[idx_].ig_ self.vertex_[idx_].ig_ = y_mean[X2_[i][j], X1_[i][j]] true_graph.vertex_[idx_].L2_error_ = ( true_graph.vertex_[idx_].ig_ - self.vertex_[idx_].ig_)**2 self.vertex_[idx_].ig_ub_ = y_mean[ X2_[i][j], X1_[i][j]] + y_std[X2_[i][j], X1_[i][j]] self.vertex_[idx_].ig_lb_ = y_mean[ X2_[i][j], X1_[i][j]] - y_std[X2_[i][j], X1_[i][j]] if round(true_graph.vertex_[idx_].ig_, 3) <= round( self.vertex_[idx_].ig_ub_, 3) and round( true_graph.vertex_[idx_].ig_, 3) >= round( self.vertex_[idx_].ig_lb_, 3): self.vertex_[idx_].ig_pred_ = True # print("Idx {}, Coordinate {}, {}, IG is {}".format(idx_,X2_[i][j],X1_[i][j],self.vertex_[idx_].ig_) # print("The final L2 error is {}".format(np.square(L2_error_sum_))) # print("The value is {}".format(plus_sum)) self.graph_vis() self.graph_vis("MeanUpBound") self.graph_vis("MeanLowBound") self.graph_vis("InRange") plt.figure() plt.subplots_adjust(left=0.03, right=1.0, wspace=0.02) plt.subplot(1, 2, 1) plt.title("IG prediction -- mean") CMAP = plt.get_cmap('coolwarm') plt.pcolormesh(X2_, X1_, y_mean, cmap=CMAP) cb = plt.colorbar(shrink=1.0) plt.subplot(1, 2, 2) plt.title("True IG") CMAP = plt.get_cmap('coolwarm') plt.pcolormesh(X2_, X1_, y_true, cmap=CMAP) cb = plt.colorbar(shrink=1.0) plt.figure() plt.title("Diviation") plt.pcolormesh(X2_, X1_, y_std, cmap=CMAP) cb = plt.colorbar(shrink=1.0)
# * ManifoldKernel.construct(base_kernel=RBF(0.1), architecture=((1, 2),), # transfer_fct="tanh", max_nn_weight=1) # heteroscedastic prototypes = KMeans(n_clusters=8).fit(X).cluster_centers_ kernel = C(1.0, (1e-10, 1000)) * RBF(length_scale = [10., 100.], length_scale_bounds=[(1e-3, 1e3),(1e-4, 1e4)]) \ + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 50.0), gamma=1.0, gamma_bounds="fixed") #gp.fit(X[:, np.newaxis], y) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y.reshape(-1,1)) #removing reshape results in a different error # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(inputs_x_array, return_std=True) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(inputs_x_array[:,0],inputs_x_array[:,1],y_pred) ax.scatter(X[:,0],X[:,1],y,color='orange') ax.set_xlabel('X Label (radius)') ax.set_ylabel('Y Label (time)') ax.set_zlabel('Z Label (density)') plt.show() index_y1 = 3 #only valid until len_x2 print("Time is "+str(inputs_x_array[:,1][index_y1::len_x2][0])+"s")
autoscaled_y, cv=cross_validation)) estimated_y_in_cv = estimated_y_in_cv * y.std(ddof=1) + y.mean() r2cvs.append(r2_score(y, estimated_y_in_cv)) optimal_kernel_number = np.where( r2cvs == np.max(r2cvs))[0][0] # クロスバリデーション後の r2 が最も大きいカーネル関数の番号 optimal_kernel = kernels[ optimal_kernel_number] # クロスバリデーション後の r2 が最も大きいカーネル関数 print('クロスバリデーションで選択されたカーネル関数の番号 :', optimal_kernel_number) print('クロスバリデーションで選択されたカーネル関数 :', optimal_kernel) # モデル構築 model = GaussianProcessRegressor(alpha=0, kernel=optimal_kernel) # GPR モデルの宣言 model.fit(autoscaled_x, autoscaled_y) # モデル構築 # トレーニングデータの推定 autoscaled_estimated_y, autoscaled_estimated_y_std = model.predict( autoscaled_x, return_std=True) # y の推定 estimated_y = autoscaled_estimated_y * y.std() + y.mean() # スケールをもとに戻す estimated_y_std = autoscaled_estimated_y_std * y.std() # スケールをもとに戻す estimated_y = pd.DataFrame(estimated_y, index=x.index, columns=['estimated_y']) estimated_y_std = pd.DataFrame(estimated_y_std, index=x.index, columns=['std_of_estimated_y']) # トレーニングデータの実測値 vs. 推定値のプロット plt.rcParams['font.size'] = 18 plt.scatter(y, estimated_y.iloc[:, 0], c='blue') # 実測値 vs. 推定値プロット y_max = max(y.max(),
def run(args, mass, winLow, winHigh): f = TFile(args.input) bkghist_template = f.Get('hmgg_c0') bkghist_template.Rebin(8) stats = 100000 seed = 10 bkghist = toyModel(bkghist_template, stats, seed) if args.doSig: #get signal hist sighist = buildSignal(125,1000, bkghist.GetNbinsX()) #inject signal into background bkghist.Add(sighist) GPh = GPHisto(bkghist) GPh.setWindow(winLow,winHigh) X = GPh.getXWindowArr() y = GPh.getYWindowArr() dy = GPh.getErrWindowArr() X_t = GPh.getXArr() y_t = GPh.getYArr() dy_t = GPh.getErrArr() if args.noWindow: X = X_t y = y_t dy = dy_t #X, y, dy = histoToArrayTest(bkghist, 120, 140) #X, y, dy = histoToArrayCut(bkghist, 120, 125) #X_t, y_t, dy_t = histoToArray(bkghist) #X = np.atleast_2d(X).T #y = y.ravel() #dy = dy.ravel() #x = np.atleast_2d(np.linspace(start=105, stop=160, num=1000)).T # Predict a relatively smooth function #x = np.atleast_2d(np.linspace(start=105, stop=160, num=219)).T # Predict at each data point x = GPh.getXArr() #kernel = C(800.0, (1e-3, 1e3)) * RBF(100.0, (1e-3, 1e3)) #squared exponential kernel #kernel = C(10.0, (1e-3, 1e15)) * RBF(np.sqrt(2)*(7**2), (1e-3,1e5 )) #squared exponential kernel kernel = C(1000.0, (1e-3, 1e15)) * FallExp(1.0, (1e-5, 1e2), 1.0, (1e-3,1e15)) * Gibbs(1.0, (1e-3, 1e5), 1.0, (1e-3,1e5)) #kernel = C(10.0, (1e-3, 1e6)) * Gibbs(1.0, (1e-3, 1e5), 1.0, (1e-3,1e5)) print "dy[5] =",dy[5] print "err =", bkghist.GetBinError(5), "Original =", bkghist_template.GetBinError(5) gp = GaussianProcessRegressor(kernel=kernel ,optimizer='fmin' ,alpha=dy**2 ,n_restarts_optimizer=15 ) gp.fit(X,y) print gp.kernel_ y_pred, sigma = gp.predict(x, return_std=True) if args.mplot: fig = plt.figure() #plt.plot(X, y, 'r.', markersize=10, label=u'Background') plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=8, label=u'Training Points', zorder=2) plt.errorbar(X_t.ravel(), y_t, dy_t, fmt='k.', markersize=7, label=u'Background', zorder=1) plt.plot(x, y_pred, 'b-', label=u'Prediction', zorder=3) plt.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval', zorder=3) plt.xlabel('$M_{\gamma \gamma}$') plt.ylabel('$events$') plt.title('Optimized Kernel: {}'.format(gp.kernel_)) #plt.yscale('log') #plt.ylim(-10, 20) plt.legend(loc='upper right') plt.savefig(args.tag+'GPFit.pdf') #plt.show() else: outfile = TFile('out.root','RECREATE') #outhist = arrayToHisto('GP Fit', 105, 160, y_pred, sigma) outhist = GPh.getHisto(y_pred, 1.96*sigma, 'GP Fit') if args.noWindow: bkgWindow = GPh.getHisto(y, dy, 'Full Background') else: bkgWindow = GPh.getWinHisto(y, dy, 'Full Background') bkgSubtracted = bkghist.Clone('bkgSubtracted') bkgSubtracted.Add(outhist,-1) #Subtract background prediction from background with injected signal. canv = TCanvas('canv', 'canv') pad1 = TPad("pad1", "pad1", 0, 0.3, 1, 1.0) pad1.SetBottomMargin(0) pad1.SetGridx() pad1.Draw() pad1.cd() outhist.SetStats(0) bkghist_template.SetStats(0) bkgWindow.SetStats(0) bkgWindow.SetMarkerColor(kBlue) bkgWindow.SetLineColor(kBlue) outhist.SetMarkerColor(kBlack) outhist.SetLineColor(kBlack) print outhist.GetBinError(10) #bkgNorm = bkgWindow.Integral(1, bkgWindow.FindBin(winLow)) #tmpNorm = bkghist_template.Integral(1,bkghist_template.FindBin(winLow)) bkgNorm = bkgWindow.Integral() tmpNorm = bkghist_template.Integral() bkghist_template.Scale(bkgNorm/tmpNorm) bkghist_template.SetTitle(str(gp.kernel_)+" nToys: "+str(stats)) print "Bin 24: {0} : {1} : {2}".format((outhist.GetBinContent(24)-outhist.GetBinError(24)), bkghist_template.GetBinContent(24), (outhist.GetBinContent(24)+outhist.GetBinError(24)) ) ####### Poly2 fit #canv4 = TCanvas('c4','c4') expPol_func = TF1("expPol","[0]*exp((x-100)/100 * ([1] + [2]*(x-100)/100))",105,160) expPol_func.SetParameters(0,0,0) expPol_func.SetParLimits(1,-10.,10.) expPol_func.SetParLimits(2,-10.,10.) bkgWindow.Fit("expPol","","",105,160) expFitResult = bkgWindow.GetFunction("expPol") expPolHist = expFitResult.GetHistogram() print expPolHist.GetNbinsX() #expPolHist.Divide(outhist) #expPolHist.Draw() #canv4.Print(args.tag+'/expPol_GP_ratio.pdf') bkghist_template.Draw('') bkgWindow.Draw('same') outhist.Draw('histsame') #outhist.GetYaxis().SetLabelSize(0.) axis = TGaxis( -5, 20, -5, 220, 20,220,510,"") axis.SetLabelFont(43) axis.SetLabelSize(15) axis.Draw() canv.cd() pad2 = TPad("pad2", "pad2", 0, 0.02, 1, 0.3) pad2.SetTopMargin(0) pad2.SetBottomMargin(0.28) pad2.SetGridx() pad2.Draw() pad2.cd() h3 = bkghist_template.Clone("h3") h3.SetLineColor(kBlack) h3.SetMinimum(0.95) h3.SetMaximum(1.05) h3.Sumw2() h3.SetStats(0) h3.Divide(outhist) h3.SetMarkerColor(kBlack) h3.SetMarkerStyle(20) h3.SetMarkerSize(0.5) h3.Draw("ep") h4 = bkghist_template.Clone("h4") h4.SetLineColor(kRed) h4.SetMinimum(0.95) h4.SetMaximum(1.05) h4.Sumw2() h4.SetStats(0) h4.Divide(expFitResult) h4.SetMarkerColor(kRed) h4.SetMarkerStyle(20) h4.SetMarkerSize(0.5) h4.Draw("epsame") line = TLine(105,1,160,1) line.Draw('same') # outhist settings outhist.SetLineColor(kBlack); outhist.SetFillColorAlpha(33, 0.5) outhist.SetLineWidth(2); # Y axis outhist plot settings outhist.GetYaxis().SetTitleSize(20); outhist.GetYaxis().SetTitleFont(43); outhist.GetYaxis().SetTitleOffset(1.55); # bkghist settings bkghist.SetLineColor(kBlack); bkghist.SetMarkerSize(0.7) bkghist.SetLineWidth(2); # Ratio plot (h3) settings h3.SetTitle(""); # Remove the ratio title # Y axis ratio plot settings h3.GetYaxis().SetTitle("data/fit "); h3.GetYaxis().SetNdivisions(505); h3.GetYaxis().SetTitleSize(20); h3.GetYaxis().SetTitleFont(43); h3.GetYaxis().SetTitleOffset(1.); h3.GetYaxis().SetLabelFont(43); # Absolute font size in pixel (precision 3) h3.GetYaxis().SetLabelSize(15); # X axis ratio plot settings h3.GetXaxis().SetTitleSize(20); h3.GetXaxis().SetTitleFont(43); h3.GetXaxis().SetTitleOffset(3.); h3.GetXaxis().SetLabelFont(43); # Absolute font size in pixel (precision 3) h3.GetXaxis().SetLabelSize(15) canv.SetBottomMargin(0) canv.Write() #canv.Print(winLow+'_'+winHigh+'_GPFit.pdf') #canv.Print(args.tag+'/GPFit_'+str(winLow)+'_'+str(winHigh)+'.pdf') canv.Print(args.tag+'/GPFit_'+str(seed)+'.pdf') if args.doSig: ### Plot signal stuff canv2 = TCanvas('c2','c2') canv2.cd() sighist.SetMarkerColor(kBlack) sighist.SetMarkerStyle(20) bkgSubtracted.GetXaxis().SetRangeUser(105,158) bkgSubtracted.Draw('hist') sighist.Draw('samep') #canv2.Write() canv2.Print(args.tag+'/SigYield_root.pdf') canv3 = TCanvas('c3', 'c3') canv3.cd() ratio = sighist.Clone('ratio') ratio.Divide(bkgSubtracted) ratio.GetYaxis().SetRangeUser(-5,5) ratio.Draw() #canv3.Write() canv3.Print(args.tag+'/SigYield_Ratio_root.pdf') """ dscb_func = TF1("dscb", DSCB, 105, 160, 7) dscb_func.SetParameters(1 # Normalization ,mass # mu ,1.475 # alpha_low ,1.902 # alpha_high ,12.1 # n_low ,11.6 # n_high ,1.86 ) # sigma #dscb_func.FixParameter(0,1) #Normalization Dont want to fix this dscb_func.FixParameter(1,mass) #Mass Fixed to middle of window dscb_func.FixParameter(2, 1.475) #alpha_low dscb_func.FixParameter(3, 1.902) #alpha_high dscb_func.FixParameter(4, 12.1) #n_low dscb_func.FixParameter(5, 11.6) #n_high dscb_func.FixParameter(6, 1.68) # sigma bkgSubtracted.Fit("dscb","","", winLow, winHigh) fitResult = bkgSubtracted.GetFunction("dscb") norm = fitResult.GetParameter(0) ss = fitResult.Integral(winLow,winHigh) """ #canv.cd() #bkgSubtracted.GetXaxis().SetRangeUser(120,130) #bkgSubtracted.Draw() #fitResult.Draw('same') #canv.Print(args.tag+'/fitResult.pdf') #print fitResult.Integral(120,130) """ canv4 = TCanvas('c4','c4') gp_pred_full = GPh.getHisto(y_pred_full, sigma_full, 'GP Fit full') gp_pred_full.Divide(outhist) gp_pred_full.GetYaxis().SetRangeUser(0.95,1.05) gp_pred_full.Draw() canv4.Print(args.tag+'/Full_window_ratio.pdf') """ f.Close()
class GaussProcess: def __init__(self, objective_function): self.objective_function = objective_function self.x = [] # Chromosome that has fitness value self.y = [] # Fit value of chromosome in X self.name = [] self.estimate = [0] self.cloud_metrics = {'train_data_type': 'cpu', 'predict_data': 'cpu'} self.alpha = Config.ALPHA self._parse_domain() self.population_size = Config.POPULATION_SIZE self.max_iteration = Config.MAX_ITER def gen_sample(self): x_sample = [] for index, value in enumerate(self.type_attr): if value == 'discrete': _x = (np.random.choice(self.range_val[index]) - self.min_val[index]) / (self.max_val[index] - self.min_val[index]) #print(_x) x_sample.append(_x) #x_sample_memory.append(_x) if value == 'continuous': # _old_x = self.min_val + (self.max_val - self.min_val) * np.random.rand(len(self.type_attr)) # _x = np.round(np.random.rand() * (self.max_val[index] - self.min_val[index]) + self.min_val[index], 5) _x = (np.random.rand() * (self.max_val[index] - self.min_val[index])) / ( self.max_val[index] - self.min_val[index]) x_sample.append(_x) #x_sample_memory.append(_x) if self.name[index] in ["sliding", "network_size", "layer_size"]: if value == 'discrete': _x = (np.random.choice(self.range_val[index]) - self.min_val[index]) / (self.max_val[index] - self.min_val[index]) x_sample.append(_x) if value == 'continuous': # _old_x = self.min_val + (self.max_val - self.min_val) * np.random.rand(len(self.type_attr)) # _x = np.round(np.random.rand() * (self.max_val[index] - self.min_val[index]) + self.min_val[index], 5) _x = (np.random.rand() * (self.max_val[index] - self.min_val[index])) / ( self.max_val[index] - self.min_val[index]) x_sample.append(_x) #print(x_sample) return x_sample def _parse_domain(self): domain = Config.LSTM_CONFIG['domain'] names = [] type_attr = [] max_val = [] min_val = [] range_val = [] for attr in domain: names.append(attr['name']) type_attr.append(attr['type']) if attr['type'] == 'discrete': min_val.append(attr['domain'][0]) max_val.append(attr['domain'][len(attr['domain']) - 1]) elif attr['type'] == 'continuous': min_val.append(attr['domain'][0]) max_val.append(attr['domain'][1]) range_val.append(attr['domain']) self.name = names self.type_attr = type_attr self.max_val = np.array(max_val) self.min_val = np.array(min_val) self.range_val = range_val x_sample = self.gen_sample() print(x_sample) self.x.append(x_sample) x_cpu, x_mem = self.split_sample(x_sample) # @TODO thangbk2209 need to add fitness_type and cloud_metrics into objective_function self.y.append(self.alpha*self.objective_function(self.decode_sample(x_cpu),cloud_metrics=self.cloud_metrics)[0]+\ (1-self.alpha)*self.objective_function(self.decode_sample(x_mem))[0]) def split_sample(self, sample): x_cpu = [] x_mem = [] #print(sample) for i in range(len(sample)): if i in [0, 1]: x_cpu.append( int(sample[i] * (self.max_val[i] - self.min_val[i])) + self.min_val[i]) x_mem.append( int(sample[i] * (self.max_val[i] - self.min_val[i])) + self.min_val[i]) elif i in [2, 4, 6]: x_cpu.append( int(sample[int(i - (i - 2) / 2)] * (self.max_val[int(i - (i - 2) / 2)] - self.min_val[int(i - (i - 2) / 2)])) + self.min_val[int(i - (i - 2) / 2)]) elif i in [3, 5, 7]: x_mem.append( int(sample[int(i - 1 - (i - 3) / 2)] * (self.max_val[int(i - 1 - (i - 3) / 2)] - self.min_val[int(i - 1 - (i - 3) / 2)])) + self.min_val[int(i - 1 - (i - 3) / 2)]) elif i in [8, 9]: x_cpu.append(sample[i - 3] * (self.max_val[i - 3] - self.min_val[i - 3]) + self.min_val[i - 3]) x_mem.append(sample[i - 3] * (self.max_val[i - 3] - self.min_val[i - 3]) + self.min_val[i - 3]) else: x_cpu.append( int(sample[i - 3] * (self.max_val[i - 3] - self.min_val[i - 3]))) x_mem.append( int(sample[i - 3] * (self.max_val[i - 3] - self.min_val[i - 3]))) #print(x_cpu,x_mem) return x_cpu, x_mem def decode_sample(self, sample): result = {} for i, name in enumerate(self.name): if name in ["learning_rate", "dropout"]: result[name] = sample[i] else: result[name] = int(sample[i]) return result # surrogate or approximation for the objective function def surrogate(self, x): # catch any warning generated when making a prediction with catch_warnings(): # ignore generated warnings simplefilter('ignore') return self.gaussian_process_model.predict(x, return_std=True) # probability of improvement acquisition function def acquisition(self, x, x_samples): # calculate the best surrogate score found so far yhat, _ = self.surrogate(x) best = min(yhat) # calculate mean and stdev via surrogate function mu, std = self.surrogate(x_samples) try: mu = mu[:, 0] except: mu = mu # calculate the probability of improvement probs = norm.cdf((mu - best) / (std + 1E-9)) return probs def opt_acquisition(self, x): # random search, generate random samples x_samples = [] for j in range(self.population_size): x_sample = self.gen_sample() x_samples.append(x_sample) #print(x[:,0]) #print("_____________________________") #print(x_samples[:,0]) # calculate the acquisition function for each sample scores = self.acquisition(x, x_samples) min_sample_idx = argmin(scores) #min_sample_idx2 = argmin(scores) return x_samples[min_sample_idx] def optimize(self): self.gaussian_process_model = GaussianProcessRegressor() #self.gaussian_process_model_mem = GaussianProcessRegressor() for i in range(self.max_iteration): # select the next point to sample x = self.opt_acquisition(self.x) # sample the point x_cpu, x_mem = self.split_sample(x) actual = self.alpha*self.objective_function(item=self.decode_sample(x_cpu),cloud_metrics=self.cloud_metrics)[0]\ + (1-self.alpha)*self.objective_function(item=self.decode_sample(x_mem))[0] # summarize the finding est, _ = self.surrogate([x]) #est1, _1 = self.surrogate([x[0]],type="cpu") print(est) print('>x={}, f()={}, actual={}'.format(x, est, actual)) #print('>x1={},c f()={}, actual={}'.format(x[1], est1, actual)) # add the data to the dataset if not math.isnan(actual): self.x = vstack((self.x, [x])) self.y = vstack((self.y, [actual])) self.estimate.append(est) # update the gausian model self.gaussian_process_model.fit(self.x, self.y) #self.gaussian_process_model_mem.fit(self.x[:,0], self.y[:,0]) optimal_sample_idx = argmin(self.y) print( f'Best Result: x1={self.x[optimal_sample_idx][0]},x2={self.x[optimal_sample_idx][1]}, y={self.y[optimal_sample_idx]}' ) files = open("optimization_result.csv", "w") files.write("x,y,actual\n") print(len(self.x)) print(len(self.y)) print(len(self.estimate)) for i in range(len(self.y)): print(i) files.write("{},{},{}\n".format(self.x[i], self.y[i], self.estimate[i])) return self.x[optimal_sample_idx]
# Kita dpt menggunakan Object Inspector utk melihat parameternya imputer = Imputer(missing_values='NaN', strategy='mean', axis=0) # Axis mana yang ingin kita perbaiki dengan Object Imputer imputer = imputer.fit(X_train[:, :]) # Selanjutnya kita pilih axis mana yg ingin kita re-place dgn data yg baru X_train[:, :] = imputer.transform(X_train[:, :]) from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C # Instanciate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X_train, y_train) # Importing the data testing data_test = pd.read_csv('NEW-DATA-2.T15.csv') data_test = data_test.drop(columns=['1:Date']) data_test = data_test.drop(columns=['2:Time']) data_test = data_test.drop(columns=['19:Exterior_Entalpic_1']) data_test = data_test.drop(columns=['20:Exterior_Entalpic_2']) data_test = data_test.drop(columns=['21:Exterior_Entalpic_turbo']) data_test = data_test.drop(columns=['24:Day_Of_Week']) # Number of columns print(len(data_test.columns)) print(data_test.columns)
def parameterized_inference(algorithm='carl', morphing_aware=False, training_sample='baseline', # 'baseline', 'basis', 'random' use_smearing=False, denominator=0, alpha=None, training_sample_size=None, do_neyman=False, options=''): # all other options in a string """ Likelihood ratio estimation through parameterized or morphing-aware versions of CARL, CASCAL, ROLR, and RASCAL. :param algorithm: Inference strategy. 'carl' for CARL, 'score' for an unnamed strategy that just uses the score, 'combined' for CASCAL, 'regression' for ROLR, 'combinedregression' for RASCAL, 'mxe' for modified cross entropy, or 'combinedmxe' for modified cross entropy + score. :param morphing_aware: bool that decides whether a morphing-aware or morphing-agnostic parameterized architecture is used. :param training_sample: Training sample. Can be 'baseline', 'basis', or 'random'. :param use_smearing: Whether to use the training and evaluation sample with (simplified) detector simulation. :param denominator: Which of five predefined denominator (reference) hypotheses to use. :param alpha: Hyperparameter that multiplies score term in loss function for RASCAL and CASCAL. If None, default values are used. :param training_sample_size: If not None, limits the training sample size to the given value. :param do_neyman: Switches on the evaluation of toy experiments for the Neyman construction. :param options: Further options in a list of strings or string. 'learns' changes the architecture such that the fully connected networks represent s rather than log r. 'new' changes the samples. 'short' and 'long' change the number of epochs. 'deep' and 'shallow' use more or less hidden layers. 'factorsm' changes the architecture in the morphing-aware mode such that the SM and the deviation from it are modelled independently. 'slowlearning' and 'fastlearning' change the learning rate, while 'constantlr' turns off the learning rate decay. 'neyman2' and 'neyman3' change the Neyman construction sample, and 'recalibration' activates the calculation of E[r] on a separate sample for the expectation calibration step. 'debug' activates a debug mode with much smaller samples. """ logging.info('Starting parameterized inference') ################################################################################ # Settings ################################################################################ assert algorithm in ['carl', 'score', 'combined', 'regression', 'combinedregression', 'mxe', 'combinedmxe'] assert training_sample in ['baseline', 'basis', 'random'] random_theta_mode = training_sample == 'random' basis_theta_mode = training_sample == 'basis' learn_logr_mode = ('learns' not in options) new_sample_mode = ('new' in options) short_mode = ('short' in options) long_mode = ('long' in options) deep_mode = ('deep' in options) shallow_mode = ('shallow' in options) debug_mode = ('debug' in options) factor_out_sm_in_aware_mode = morphing_aware and ('factorsm' in options) small_lr_mode = ('slowlearning' in options) large_lr_mode = ('fastlearning' in options) large_batch_mode = ('largebatch' in options) small_batch_mode = ('smallbatch' in options) constant_lr_mode = ('constantlr' in options) neyman2_mode = ('neyman2' in options) neyman3_mode = ('neyman3' in options) recalibration_mode = ('recalibration' in options) filename_addition = '' if morphing_aware: filename_addition = '_aware' if random_theta_mode: filename_addition += '_random' elif basis_theta_mode: filename_addition += '_basis' if not learn_logr_mode: filename_addition += '_learns' if factor_out_sm_in_aware_mode: filename_addition += '_factorsm' learning_rate = settings.learning_rate_default if small_lr_mode: filename_addition += '_slowlearning' learning_rate = settings.learning_rate_small elif large_lr_mode: filename_addition += '_fastlearning' learning_rate = settings.learning_rate_large lr_decay = settings.learning_rate_decay if constant_lr_mode: lr_decay = 0. filename_addition += '_constantlr' batch_size = settings.batch_size_default if large_batch_mode: filename_addition += '_largebatch' batch_size = settings.batch_size_large elif small_batch_mode: filename_addition += '_smallbatch' batch_size = settings.batch_size_small settings.batch_size = batch_size alpha_regression = settings.alpha_regression_default alpha_carl = settings.alpha_carl_default alpha_mxe = settings.alpha_mxe_default if alpha is not None: alpha_regression = alpha alpha_carl = alpha alpha_mxe = alpha precision = int(max(- math.floor(np.log10(alpha)) + 1, 0)) filename_addition += '_alpha_' + format_number(alpha, precision) n_hidden_layers = settings.n_hidden_layers_default if shallow_mode: n_hidden_layers = settings.n_hidden_layers_shallow filename_addition += '_shallow' elif deep_mode: n_hidden_layers = settings.n_hidden_layers_deep filename_addition += '_deep' n_epochs = settings.n_epochs_default early_stopping = True early_stopping_patience = settings.early_stopping_patience if debug_mode: n_epochs = settings.n_epochs_short early_stopping = False filename_addition += '_debug' elif long_mode: n_epochs = settings.n_epochs_long filename_addition += '_long' elif short_mode: n_epochs = settings.n_epochs_short early_stopping = False filename_addition += '_short' if training_sample_size is not None: filename_addition += '_trainingsamplesize_' + str(training_sample_size) n_epoch_factor = int(len(settings.thetas_train) * (settings.n_events_baseline_num + settings.n_events_baseline_den) / training_sample_size) n_epochs *= n_epoch_factor lr_decay /= float(n_epoch_factor) early_stopping_patience *= n_epoch_factor input_X_prefix = '' if use_smearing: input_X_prefix = 'smeared_' filename_addition += '_smeared' theta1 = settings.theta1_default input_filename_addition = '' if denominator > 0: input_filename_addition = '_denom' + str(denominator) filename_addition += '_denom' + str(denominator) theta1 = settings.theta1_alternatives[denominator - 1] if new_sample_mode: filename_addition += '_new' input_filename_addition += '_new' n_expected_events_neyman = settings.n_expected_events_neyman n_neyman_null_experiments = settings.n_neyman_null_experiments n_neyman_alternate_experiments = settings.n_neyman_alternate_experiments neyman_filename = 'neyman' if neyman2_mode: neyman_filename = 'neyman2' n_expected_events_neyman = settings.n_expected_events_neyman2 n_neyman_null_experiments = settings.n_neyman2_null_experiments n_neyman_alternate_experiments = settings.n_neyman2_alternate_experiments if neyman3_mode: neyman_filename = 'neyman3' n_expected_events_neyman = settings.n_expected_events_neyman3 n_neyman_null_experiments = settings.n_neyman3_null_experiments n_neyman_alternate_experiments = settings.n_neyman3_alternate_experiments results_dir = settings.base_dir + '/results/parameterized' neyman_dir = settings.neyman_dir + '/parameterized' logging.info('Main settings:') logging.info(' Algorithm: %s', algorithm) logging.info(' Morphing-aware: %s', morphing_aware) logging.info(' Training sample: %s', training_sample) logging.info(' Denominator theta: denominator %s = theta %s = %s', denominator, theta1, settings.thetas[theta1]) logging.info('Options:') logging.info(' Number of hidden layers: %s', n_hidden_layers) if algorithm == 'combined': logging.info(' alpha: %s', alpha_carl) elif algorithm == 'combinedregression': logging.info(' alpha: %s', alpha_regression) elif algorithm == 'combinedmxe': logging.info(' alpha: %s', alpha_mxe) logging.info(' Batch size: %s', batch_size) logging.info(' Learning rate: %s', learning_rate) logging.info(' Learning rate decay: %s', lr_decay) logging.info(' Number of epochs: %s', n_epochs) logging.info(' Training samples: %s', 'all' if training_sample_size is None else training_sample_size) if do_neyman: logging.info(' NC experiments: (%s alternate + %s null) experiments with %s alternate events each', n_neyman_alternate_experiments, n_neyman_null_experiments, n_expected_events_neyman) else: logging.info(' NC experiments: False') logging.info(' Debug mode: %s', debug_mode) ################################################################################ # Data ################################################################################ # Load data train_filename = '_train' if random_theta_mode: train_filename += '_random' elif basis_theta_mode: train_filename += '_basis' train_filename += input_filename_addition X_train = np.load(settings.unweighted_events_dir + '/' + input_X_prefix + 'X' + train_filename + '.npy') X_train_unshuffled = np.load(settings.unweighted_events_dir + '/' + input_X_prefix + 'X' + train_filename + '.npy') y_train = np.load(settings.unweighted_events_dir + '/y' + train_filename + '.npy') scores_train = np.load(settings.unweighted_events_dir + '/scores' + train_filename + '.npy') r_train = np.load(settings.unweighted_events_dir + '/r' + train_filename + '.npy') theta0_train = np.load(settings.unweighted_events_dir + '/theta0' + train_filename + '.npy') theta0_train_unshuffled = np.load(settings.unweighted_events_dir + '/theta0' + train_filename + '.npy') X_calibration = np.load( settings.unweighted_events_dir + '/' + input_X_prefix + 'X_calibration' + input_filename_addition + '.npy') weights_calibration = np.load( settings.unweighted_events_dir + '/weights_calibration' + input_filename_addition + '.npy') if recalibration_mode: X_recalibration = np.load( settings.unweighted_events_dir + '/' + input_X_prefix + 'X_recalibration' + '.npy') X_test = np.load( settings.unweighted_events_dir + '/' + input_X_prefix + 'X_test' + input_filename_addition + '.npy') r_test = np.load(settings.unweighted_events_dir + '/r_test' + input_filename_addition + '.npy') X_roam = np.load( settings.unweighted_events_dir + '/' + input_X_prefix + 'X_roam' + input_filename_addition + '.npy') n_roaming = len(X_roam) X_illustration = np.load( settings.unweighted_events_dir + '/' + input_X_prefix + 'X_illustration' + input_filename_addition + '.npy') if do_neyman: X_neyman_alternate = np.load( settings.unweighted_events_dir + '/neyman/' + input_X_prefix + 'X_' + neyman_filename + '_alternate.npy') n_events_test = X_test.shape[0] assert settings.n_thetas == r_test.shape[0] # Shuffle training data X_train, y_train, scores_train, r_train, theta0_train = shuffle(X_train, y_train, scores_train, r_train, theta0_train, random_state=44) # Limit training sample size keras_verbosity = 2 if training_sample_size is not None: keras_verbosity = 0 original_training_sample_size = X_train.shape[0] X_train = X_train[:training_sample_size] y_train = y_train[:training_sample_size] scores_train = scores_train[:training_sample_size] r_train = r_train[:training_sample_size] theta0_train = theta0_train[:training_sample_size] logging.info('Reduced training sample size from %s to %s (factor %s)', original_training_sample_size, X_train.shape[0], n_epoch_factor) # Normalize data scaler = StandardScaler() scaler.fit(np.array(X_train, dtype=np.float64)) X_train_transformed = scaler.transform(X_train) X_train_transformed_unshuffled = scaler.transform(X_train_unshuffled) X_test_transformed = scaler.transform(X_test) X_roam_transformed = scaler.transform(X_roam) X_calibration_transformed = scaler.transform(X_calibration) X_illustration_transformed = scaler.transform(X_illustration) if recalibration_mode: X_recalibration_transformed = scaler.transform(X_recalibration) if do_neyman: X_neyman_alternate_transformed = scaler.transform( X_neyman_alternate.reshape((-1, X_neyman_alternate.shape[2]))) # Roaming data X_thetas_train = np.hstack((X_train_transformed, theta0_train)) X_thetas_train_unshuffled = np.hstack((X_train_transformed_unshuffled, theta0_train_unshuffled)) y_logr_score_train = np.hstack((y_train.reshape(-1, 1), np.log(r_train).reshape((-1, 1)), scores_train)) xi = np.linspace(-1.0, 1.0, settings.n_thetas_roam) yi = np.linspace(-1.0, 1.0, settings.n_thetas_roam) xx, yy = np.meshgrid(xi, yi) thetas_roam = np.asarray((xx.flatten(), yy.flatten())).T X_thetas_roam = [] for i in range(n_roaming): X_thetas_roam.append(np.zeros((settings.n_thetas_roam ** 2, X_roam_transformed.shape[1] + 2))) X_thetas_roam[-1][:, :-2] = X_roam_transformed[i, :] X_thetas_roam[-1][:, -2:] = thetas_roam if debug_mode: X_thetas_train = X_thetas_train[::100] y_logr_score_train = y_logr_score_train[::100] X_test_transformed = X_test[::100] r_test = r_test[:, ::100] X_calibration_transformed = X_calibration_transformed[::100] weights_calibration = weights_calibration[:, ::100] X_illustration_transformed = X_illustration_transformed[::100] if recalibration_mode: X_recalibration_transformed = X_recalibration_transformed[::100] n_events_test = len(X_test_transformed) ################################################################################ # Training ################################################################################ if algorithm == 'carl': if morphing_aware: regr = KerasRegressor(lambda: make_classifier_carl_morphingaware(n_hidden_layers=n_hidden_layers, learn_log_r=learn_logr_mode, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) else: regr = KerasRegressor(lambda: make_classifier_carl(n_hidden_layers=n_hidden_layers, learn_log_r=learn_logr_mode, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) elif algorithm == 'score': if morphing_aware: regr = KerasRegressor(lambda: make_classifier_score_morphingaware(n_hidden_layers=n_hidden_layers, learn_log_r=learn_logr_mode, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) else: regr = KerasRegressor(lambda: make_classifier_score(n_hidden_layers=n_hidden_layers, learn_log_r=learn_logr_mode, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) elif algorithm == 'combined': if morphing_aware: regr = KerasRegressor( lambda: make_classifier_combined_morphingaware(n_hidden_layers=n_hidden_layers, learn_log_r=learn_logr_mode, alpha=alpha_carl, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) else: regr = KerasRegressor(lambda: make_classifier_combined(n_hidden_layers=n_hidden_layers, learn_log_r=learn_logr_mode, alpha=alpha_carl, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) elif algorithm == 'regression': if morphing_aware: regr = KerasRegressor(lambda: make_regressor_morphingaware(n_hidden_layers=n_hidden_layers, factor_out_sm=factor_out_sm_in_aware_mode, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) else: regr = KerasRegressor(lambda: make_regressor(n_hidden_layers=n_hidden_layers), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) elif algorithm == 'combinedregression': if morphing_aware: regr = KerasRegressor( lambda: make_combined_regressor_morphingaware(n_hidden_layers=n_hidden_layers, factor_out_sm=factor_out_sm_in_aware_mode, alpha=alpha_regression, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) else: regr = KerasRegressor(lambda: make_combined_regressor(n_hidden_layers=n_hidden_layers, alpha=alpha_regression, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) elif algorithm == 'mxe': if morphing_aware: raise NotImplementedError() else: regr = KerasRegressor(lambda: make_modified_xe_model(n_hidden_layers=n_hidden_layers, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) elif algorithm == 'combinedmxe': if morphing_aware: raise NotImplementedError() else: regr = KerasRegressor(lambda: make_combined_modified_xe_model(n_hidden_layers=n_hidden_layers, alpha=alpha_mxe, learning_rate=learning_rate), epochs=n_epochs, validation_split=settings.validation_split, verbose=keras_verbosity) else: raise ValueError() # Callbacks callbacks = [] detailed_history = {} callbacks.append(DetailedHistory(detailed_history)) if not constant_lr_mode: def lr_scheduler(epoch): return learning_rate * np.exp(- epoch * lr_decay) callbacks.append(LearningRateScheduler(lr_scheduler)) if early_stopping: callbacks.append(EarlyStopping(verbose=1, patience=early_stopping_patience)) # Training logging.info('Starting training') history = regr.fit(X_thetas_train[::], y_logr_score_train[::], callbacks=callbacks, batch_size=batch_size) # Save metrics def _save_metrics(key, filename): try: metrics = np.asarray([history.history[key], history.history['val_' + key]]) np.save(results_dir + '/traininghistory_' + filename + '_' + algorithm + filename_addition + '.npy', metrics) except KeyError: logging.warning('Key %s not found in per-epoch history. Available keys: %s', key, list(history.history.keys())) try: detailed_metrics = np.asarray(detailed_history[key]) np.save( results_dir + '/traininghistory_100batches_' + filename + '_' + algorithm + filename_addition + '.npy', detailed_metrics) except KeyError: logging.warning('Key %s not found in per-batch history. Available keys: %s', key, list(detailed_history.keys())) _save_metrics('loss', 'loss') _save_metrics('full_cross_entropy', 'ce') _save_metrics('full_modified_cross_entropy', 'mce') _save_metrics('full_mse_log_r', 'mse_logr') _save_metrics('full_mse_score', 'mse_scores') # Evaluate rhat on training sample # r_hat_train = np.exp(regr.predict(X_thetas_train_unshuffled)[:, 1]) # np.save(results_dir + '/r_train_' + algorithm + filename_addition + '.npy', r_hat_train) ################################################################################ # Raw evaluation loop ################################################################################ # carl wrapper # ratio = ClassifierScoreRatio(regr, prefit=True) logging.info('Starting evaluation') expected_llr = [] mse_log_r = [] trimmed_mse_log_r = [] eval_times = [] expected_r_vs_sm = [] if recalibration_mode: recalibration_expected_r = [] for t, theta in enumerate(settings.thetas): if (t + 1) % 100 == 0: logging.info('Starting theta %s / %s', t + 1, settings.n_thetas) ################################################################################ # Evaluation ################################################################################ # Prepare test data thetas0_array = np.zeros((X_test_transformed.shape[0], 2), dtype=X_test_transformed.dtype) thetas0_array[:, :] = theta X_thetas_test = np.hstack((X_test_transformed, thetas0_array)) # Evaluation time_before = time.time() prediction = regr.predict(X_thetas_test) eval_times.append(time.time() - time_before) this_log_r = prediction[:, 1] this_score = prediction[:, 2:4] if morphing_aware: this_wi = prediction[:, 4:19] this_ri = prediction[:, 19:] logging.debug('Morphing weights for theta %s (%s): %s', t, theta, this_wi[0]) # Extract numbers of interest expected_llr.append(- 2. * settings.n_expected_events / n_events_test * np.sum(this_log_r)) mse_log_r.append(calculate_mean_squared_error(np.log(r_test[t]), this_log_r, 0.)) trimmed_mse_log_r.append(calculate_mean_squared_error(np.log(r_test[t]), this_log_r, 'auto')) if t == settings.theta_observed: r_sm = np.exp(this_log_r) expected_r_vs_sm.append(np.mean(np.exp(this_log_r) / r_sm)) # For benchmark thetas, save more info if t == settings.theta_benchmark_nottrained: np.save(results_dir + '/r_nottrained_' + algorithm + filename_addition + '.npy', np.exp(this_log_r)) np.save(results_dir + '/scores_nottrained_' + algorithm + filename_addition + '.npy', this_score) np.save(results_dir + '/r_vs_sm_nottrained_' + algorithm + filename_addition + '.npy', np.exp(this_log_r) / r_sm) if morphing_aware: np.save(results_dir + '/morphing_ri_nottrained_' + algorithm + filename_addition + '.npy', this_ri) np.save(results_dir + '/morphing_wi_nottrained_' + algorithm + filename_addition + '.npy', this_wi) elif t == settings.theta_benchmark_trained: np.save(results_dir + '/r_trained_' + algorithm + filename_addition + '.npy', np.exp(this_log_r)) np.save(results_dir + '/scores_trained_' + algorithm + filename_addition + '.npy', this_score) np.save(results_dir + '/r_vs_sm_trained_' + algorithm + filename_addition + '.npy', np.exp(this_log_r) / r_sm) if morphing_aware: np.save(results_dir + '/morphing_ri_trained_' + algorithm + filename_addition + '.npy', this_ri) np.save(results_dir + '/morphing_wi_trained_' + algorithm + filename_addition + '.npy', this_wi) ################################################################################ # Recalibration ################################################################################ if recalibration_mode: # Prepare data for recalibration thetas0_array = np.zeros((X_recalibration_transformed.shape[0], 2), dtype=X_recalibration_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_recalibration = np.hstack((X_recalibration_transformed, thetas0_array)) # Evaluate recalibration data prediction = regr.predict(X_thetas_recalibration) this_r = np.exp(prediction[:, 1]) if t == settings.theta_observed: r_recalibration_sm = this_r recalibration_expected_r.append(np.mean(this_r / r_recalibration_sm)) ################################################################################ # Illustration ################################################################################ if t == settings.theta_benchmark_illustration: # Prepare data for illustration thetas0_array = np.zeros((X_illustration_transformed.shape[0], 2), dtype=X_illustration_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_illustration = np.hstack((X_illustration_transformed, thetas0_array)) # Evaluate illustration data prediction = regr.predict(X_thetas_illustration) r_hat_illustration = np.exp(prediction[:, 1]) np.save(results_dir + '/r_illustration_' + algorithm + filename_addition + '.npy', r_hat_illustration) ################################################################################ # Neyman construction toys ################################################################################ if do_neyman: # Prepare alternate data for Neyman construction thetas0_array = np.zeros((X_neyman_alternate_transformed.shape[0], 2), dtype=X_neyman_alternate_transformed.dtype) thetas0_array[:, :] = theta X_thetas_neyman_alternate = np.hstack((X_neyman_alternate_transformed, thetas0_array)) # Neyman construction: evaluate alternate sample (raw) log_r_neyman_alternate = regr.predict(X_thetas_neyman_alternate)[:, 1] llr_neyman_alternate = -2. * np.sum(log_r_neyman_alternate.reshape((-1, n_expected_events_neyman)), axis=1) np.save(neyman_dir + '/' + neyman_filename + '_llr_alternate_' + str( t) + '_' + algorithm + filename_addition + '.npy', llr_neyman_alternate) # NC: null X_neyman_null = np.load( settings.unweighted_events_dir + '/neyman/' + input_X_prefix + 'X_' + neyman_filename + '_null_' + str( t) + '.npy') X_neyman_null_transformed = scaler.transform( X_neyman_null.reshape((-1, X_neyman_null.shape[2]))) # Prepare null data for Neyman construction thetas0_array = np.zeros((X_neyman_null_transformed.shape[0], 2), dtype=X_neyman_null_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_neyman_null = np.hstack((X_neyman_null_transformed, thetas0_array)) # Neyman construction: evaluate null sample (raw) log_r_neyman_null = regr.predict(X_thetas_neyman_null)[:, 1] llr_neyman_null = -2. * np.sum(log_r_neyman_null.reshape((-1, n_expected_events_neyman)), axis=1) np.save(neyman_dir + '/' + neyman_filename + '_llr_null_' + str( t) + '_' + algorithm + filename_addition + '.npy', llr_neyman_null) # NC: null evaluated at alternate if t == settings.theta_observed: for tt in range(settings.n_thetas): X_neyman_null = np.load( settings.unweighted_events_dir + '/neyman/' + input_X_prefix + 'X_' + neyman_filename + '_null_' + str( tt) + '.npy') X_neyman_null_transformed = scaler.transform( X_neyman_null.reshape((-1, X_neyman_null.shape[2]))) X_thetas_neyman_null = np.hstack((X_neyman_null_transformed, thetas0_array)) # Neyman construction: evaluate null sample (raw) log_r_neyman_null = regr.predict(X_thetas_neyman_null)[:, 1] llr_neyman_null = -2. * np.sum(log_r_neyman_null.reshape((-1, n_expected_events_neyman)), axis=1) np.save(neyman_dir + '/' + neyman_filename + '_llr_nullatalternate_' + str( tt) + '_' + algorithm + filename_addition + '.npy', llr_neyman_null) # Save evaluation results expected_llr = np.asarray(expected_llr) mse_log_r = np.asarray(mse_log_r) trimmed_mse_log_r = np.asarray(trimmed_mse_log_r) expected_r_vs_sm = np.asarray(expected_r_vs_sm) np.save(results_dir + '/llr_' + algorithm + filename_addition + '.npy', expected_llr) np.save(results_dir + '/mse_logr_' + algorithm + filename_addition + '.npy', mse_log_r) np.save(results_dir + '/trimmed_mse_logr_' + algorithm + filename_addition + '.npy', trimmed_mse_log_r) np.save(results_dir + '/expected_r_vs_sm_' + algorithm + filename_addition + '.npy', expected_r_vs_sm) if recalibration_mode: recalibration_expected_r = np.asarray(recalibration_expected_r) np.save(results_dir + '/recalibration_expected_r_vs_sm_' + algorithm + filename_addition + '.npy', recalibration_expected_r) # Evaluation times logging.info('Evaluation timing: median %s s, mean %s s', np.median(eval_times), np.mean(eval_times)) logging.info('Starting roaming') r_roam = [] for i in range(n_roaming): prediction = regr.predict(X_thetas_roam[i]) r_roam.append(np.exp(prediction[:, 1])) r_roam = np.asarray(r_roam) np.save(results_dir + '/r_roam_' + algorithm + filename_addition + '.npy', r_roam) ################################################################################ # Calibrated evaluation loop ################################################################################ logging.info('Starting calibrated evaluation and roaming') expected_llr_calibrated = [] mse_log_r_calibrated = [] trimmed_mse_log_r_calibrated = [] r_roam_temp = np.zeros((settings.n_thetas, n_roaming)) eval_times = [] expected_r_vs_sm = [] if recalibration_mode: recalibration_expected_r = [] for t, theta in enumerate(settings.thetas): if (t + 1) % 100 == 0: logging.info('Starting theta %s / %s', t + 1, settings.n_thetas) ################################################################################ # Calibration ################################################################################ # Prepare data for calibration n_calibration_each = X_calibration_transformed.shape[0] thetas0_array = np.zeros((n_calibration_each, 2), dtype=X_calibration_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_calibration = np.hstack((X_calibration_transformed, thetas0_array)) X_thetas_calibration = np.vstack((X_thetas_calibration, X_thetas_calibration)) y_calibration = np.zeros(2 * n_calibration_each) y_calibration[n_calibration_each:] = 1. w_calibration = np.zeros(2 * n_calibration_each) w_calibration[:n_calibration_each] = weights_calibration[t] w_calibration[n_calibration_each:] = weights_calibration[theta1] # Calibration ratio_calibrated = ClassifierScoreRatio( CalibratedClassifierScoreCV(regr, cv='prefit', method='isotonic') ) ratio_calibrated.fit(X_thetas_calibration, y_calibration, sample_weight=w_calibration) ################################################################################ # Evaluation ################################################################################ # Prepare data thetas0_array = np.zeros((X_test_transformed.shape[0], 2), dtype=X_test_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_test = np.hstack((X_test_transformed, thetas0_array)) time_before = time.time() this_r, this_other = ratio_calibrated.predict(X_thetas_test) eval_times.append(time.time() - time_before) this_score = this_other[:, 1:3] # Extract numbers of interest expected_llr_calibrated.append(- 2. * settings.n_expected_events / n_events_test * np.sum(np.log(this_r))) mse_log_r_calibrated.append(calculate_mean_squared_error(np.log(r_test[t]), np.log(this_r), 0.)) trimmed_mse_log_r_calibrated.append(calculate_mean_squared_error(np.log(r_test[t]), np.log(this_r), 'auto')) if t == settings.theta_observed: r_sm = this_r expected_r_vs_sm.append(np.mean(this_r / r_sm)) # For benchmark theta, save more data if t == settings.theta_benchmark_nottrained: np.save(results_dir + '/scores_nottrained_' + algorithm + '_calibrated' + filename_addition + '.npy', this_score) np.save(results_dir + '/r_nottrained_' + algorithm + '_calibrated' + filename_addition + '.npy', this_r) np.save(results_dir + '/r_vs_sm_nottrained_' + algorithm + '_calibrated' + filename_addition + '.npy', this_r / r_sm) np.save(results_dir + '/calvalues_nottrained_' + algorithm + filename_addition + '.npy', ratio_calibrated.classifier_.calibration_sample[:n_calibration_each]) elif t == settings.theta_benchmark_trained: np.save(results_dir + '/scores_trained_' + algorithm + '_calibrated' + filename_addition + '.npy', this_score) np.save(results_dir + '/r_trained_' + algorithm + '_calibrated' + filename_addition + '.npy', this_r) np.save(results_dir + '/r_vs_sm_trained_' + algorithm + '_calibrated' + filename_addition + '.npy', this_r / r_sm) np.save(results_dir + '/calvalues_trained_' + algorithm + filename_addition + '.npy', ratio_calibrated.classifier_.calibration_sample[:n_calibration_each]) ################################################################################ # Recalibration ################################################################################ if recalibration_mode: # Prepare data for recalibration thetas0_array = np.zeros((X_recalibration_transformed.shape[0], 2), dtype=X_recalibration_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_recalibration = np.hstack((X_recalibration_transformed, thetas0_array)) # Evaluate recalibration data this_r, _ = ratio_calibrated.predict(X_thetas_recalibration) if t == settings.theta_observed: r_recalibration_sm = this_r recalibration_expected_r.append(np.mean(this_r / r_recalibration_sm)) ################################################################################ # Illustration ################################################################################ if t == settings.theta_benchmark_illustration: # Prepare data for illustration thetas0_array = np.zeros((X_illustration_transformed.shape[0], 2), dtype=X_illustration_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_illustration = np.hstack((X_illustration_transformed, thetas0_array)) # Evaluate illustration data r_hat_illustration, _ = ratio_calibrated.predict(X_thetas_illustration) np.save(results_dir + '/r_illustration_' + algorithm + '_calibrated' + filename_addition + '.npy', r_hat_illustration) ################################################################################ # Neyman construction toys ################################################################################ # Neyman construction if do_neyman: # Prepare alternate data for Neyman construction thetas0_array = np.zeros((X_neyman_alternate_transformed.shape[0], 2), dtype=X_neyman_alternate_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_neyman_alternate = np.hstack((X_neyman_alternate_transformed, thetas0_array)) # Neyman construction: alternate (calibrated) r_neyman_alternate, _ = ratio_calibrated.predict(X_thetas_neyman_alternate) llr_neyman_alternate = -2. * np.sum(np.log(r_neyman_alternate).reshape((-1, n_expected_events_neyman)), axis=1) np.save(neyman_dir + '/' + neyman_filename + '_llr_alternate_' + str( t) + '_' + algorithm + '_calibrated' + filename_addition + '.npy', llr_neyman_alternate) # Neyman construction: null X_neyman_null = np.load( settings.unweighted_events_dir + '/neyman/' + input_X_prefix + 'X_' + neyman_filename + '_null_' + str( t) + '.npy') X_neyman_null_transformed = scaler.transform( X_neyman_null.reshape((-1, X_neyman_null.shape[2]))) # Prepare null data for Neyman construction thetas0_array = np.zeros((X_neyman_null_transformed.shape[0], 2), dtype=X_neyman_null_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_neyman_null = np.hstack((X_neyman_null_transformed, thetas0_array)) # Neyman construction: evaluate null (calibrated) r_neyman_null, _ = ratio_calibrated.predict(X_thetas_neyman_null) llr_neyman_null = -2. * np.sum( np.log(r_neyman_null).reshape((-1, n_expected_events_neyman)), axis=1) np.save(neyman_dir + '/' + neyman_filename + '_llr_null_' + str( t) + '_' + algorithm + '_calibrated' + filename_addition + '.npy', llr_neyman_null) # NC: null evaluated at alternate if t == settings.theta_observed: for tt in range(settings.n_thetas): X_neyman_null = np.load( settings.unweighted_events_dir + '/neyman/' + input_X_prefix + 'X_' + neyman_filename + '_null_' + str( tt) + '.npy') X_neyman_null_transformed = scaler.transform( X_neyman_null.reshape((-1, X_neyman_null.shape[2]))) X_thetas_neyman_null = np.hstack((X_neyman_null_transformed, thetas0_array)) # Neyman construction: evaluate null sample (calibrated) r_neyman_null, _ = ratio_calibrated.predict(X_thetas_neyman_null) llr_neyman_null = -2. * np.sum( np.log(r_neyman_null).reshape((-1, n_expected_events_neyman)), axis=1) np.save(neyman_dir + '/' + neyman_filename + '_llr_nullatalternate_' + str( tt) + '_' + algorithm + '_calibrated' + filename_addition + '.npy', llr_neyman_null) # Roaming thetas0_array = np.zeros((n_roaming, 2), dtype=X_roam_transformed.dtype) thetas0_array[:, :] = settings.thetas[t] X_thetas_roaming_temp = np.hstack((X_roam_transformed, thetas0_array)) r_roam_temp[t, :], _ = ratio_calibrated.predict(X_thetas_roaming_temp) # Save evaluation results expected_llr_calibrated = np.asarray(expected_llr_calibrated) mse_log_r_calibrated = np.asarray(mse_log_r_calibrated) trimmed_mse_log_r_calibrated = np.asarray(trimmed_mse_log_r_calibrated) expected_r_vs_sm = np.asarray(expected_r_vs_sm) if recalibration_mode: recalibration_expected_r = np.asarray(recalibration_expected_r) np.save(results_dir + '/llr_' + algorithm + '_calibrated' + filename_addition + '.npy', expected_llr_calibrated) np.save(results_dir + '/mse_logr_' + algorithm + '_calibrated' + filename_addition + '.npy', mse_log_r_calibrated) np.save(results_dir + '/trimmed_mse_logr_' + algorithm + '_calibrated' + filename_addition + '.npy', trimmed_mse_log_r_calibrated) np.save(results_dir + '/expected_r_vs_sm_' + algorithm + '_calibrated' + filename_addition + '.npy', expected_r_vs_sm) if recalibration_mode: recalibration_expected_r = np.asarray(recalibration_expected_r) np.save( results_dir + '/recalibration_expected_r_vs_sm_' + algorithm + '_calibrated' + filename_addition + '.npy', recalibration_expected_r) # Evaluation times logging.info('Calibrated evaluation timing: median %s s, mean %s s', np.median(eval_times), np.mean(eval_times)) logging.info('Interpolating calibrated roaming') gp = GaussianProcessRegressor(normalize_y=True, kernel=C(1.0) * Matern(1.0, nu=0.5), n_restarts_optimizer=10) gp.fit(settings.thetas[:], np.log(r_roam_temp)) r_roam_calibrated = np.exp(gp.predict(np.c_[xx.ravel(), yy.ravel()])).T np.save(results_dir + '/r_roam_' + algorithm + '_calibrated' + filename_addition + '.npy', r_roam_calibrated)
def get_new_suggestions(self, study_id, trials, number=1): # TODO: Only support retuning one trial study = Study.objects.get(id=study_id) completed_trials = Trial.objects.filter(study_id=study_id, status="Completed") study_configuration_json = json.loads(study.study_configuration) random_init_trials = study_configuration_json.get( "randomInitTrials", 3) params = study_configuration_json["params"] # Use random search if it has less dataset if len(completed_trials) < random_init_trials: randomSearchAlgorithm = RandomSearchAlgorithm() return_trials = randomSearchAlgorithm.get_new_suggestions( study_id, trials) return return_trials else: return_trial = Trial.create(study.id, "BayesianOptimizationTrial") acquisition_fucntion_kappa = 5 # Example: {'x': (-4, 4), 'y': (-3, 3)} bound_dict = {} for param in params: if param["type"] == "DOUBLE" or param["type"] == "INTEGER": min_value = param["minValue"] max_value = param["maxValue"] bound_dict[param["parameterName"]] = (min_value, max_value) elif param["type"] == "DISCRETE": feasible_points_string = param["feasiblePoints"] feasible_points = [ float(value.strip()) for value in feasible_points_string.split(",") ] feasible_points.sort() min_value = feasible_points[0] max_value = feasible_points[-1] bound_dict[param["parameterName"]] = (min_value, max_value) elif param["type"] == "CATEGORICAL": feasible_points_string = param["feasiblePoints"] feasible_points = [ value.strip() for value in feasible_points_string.split(",") ] for feasible_point in feasible_points: parameter_name = "{}_{}".format( param["parameterName"], feasible_point) bound_dict[parameter_name] = (0, 1) bounds = [] for key in bound_dict.keys(): bounds.append(bound_dict[key]) bounds = np.asarray(bounds) gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), n_restarts_optimizer=25, ) init_points = [] init_labels = [] """ parametername_type_map = {} for param in params: parametername_type_map[param["parameterName"]] = param["type"] """ for trial in completed_trials: # Example: {"learning_rate": 0.01, "optimizer": "ftrl"} parameter_values_json = json.loads(trial.parameter_values) # Example: [0.01] instance_features = [] instance_label = trial.objective_value for param in params: if param["type"] == "DOUBLE" or param[ "type"] == "INTEGER" or param["type"] == "DISCRETE": instance_feature = parameter_values_json[ param["parameterName"]] instance_features.append(instance_feature) elif param["type"] == "CATEGORICAL": feasible_points_string = param["feasiblePoints"] # Example: ["sgd", "adagrad", "adam", "ftrl"] feasible_points = [ value.strip() for value in feasible_points_string.split(",") ] # Example: "ftrl" parameter_value = parameter_values_json[ param["parameterName"]] for feasible_point in feasible_points: if feasible_point == parameter_value: instance_features.append(1) else: instance_features.append(0) init_points.append(instance_features) init_labels.append(instance_label) #import ipdb;ipdb.set_trace() train_features = np.asarray(init_points) train_labels = np.asarray(init_labels) current_max_label = train_labels.max() gp.fit(train_features, train_labels) # Example: [[-3.66909025, -0.84486644], [-1.93270006, -0.95367483], [1.36095631, 0.61358525], ...], shape is [100000, 2] x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(100000, bounds.shape[0])) mean, std = gp.predict(x_tries, return_std=True) # Confidence bound criteria acquisition_fucntion_values = mean + acquisition_fucntion_kappa * std x_max = x_tries[acquisition_fucntion_values.argmax()] max_acquision_fucntion_value = acquisition_fucntion_values.max() # Example: [3993.864683994805, 44.15441513231316] x_max = np.clip(x_max, bounds[:, 0], bounds[:, 1]) print("Current max acquision function choose: {}".format(x_max)) # Example: {"hidden2": 3993.864683994805, "hidden1": 44.15441513231316} suggested_parameter_values_json = {} index = 0 """ # Example: [0.1, 0.5, 0.3, 0.9] # Example: {"learning_rate": (0.01, 0.5), "hidden1": (40, 400), "optimizer_sgd": (0, 1), "optimizer_ftrl": (0, 1)} for key in bound_dict.keys(): parameter_values_json[key] = x_max[index] index += 1 """ for param in params: if param["type"] == "DOUBLE" or param["type"] == "DISCRETE": suggested_parameter_values_json[ param["parameterName"]] = x_max[index] index += 1 elif param["type"] == "INTEGER": suggested_parameter_values_json[ param["parameterName"]] = int(round(x_max[index])) index += 1 elif param["type"] == "DISCRETE": feasible_points_string = param["feasiblePoints"] feasible_points = [ float(value.strip()) for value in feasible_points_string.split(",") ] feasible_points.sort() selected_value = self.find_closest_value_in_list( feasible_points, x_max[index]) suggested_parameter_values_json[ param["parameterName"]] = selected_value index += 1 elif param["type"] == "CATEGORICAL": feasible_points_string = param["feasiblePoints"] # Example: ["sgd", "adagrad", "adam", "ftrl"] feasible_points = [ value.strip() for value in feasible_points_string.split(",") ] # 记录这4个值中数最大的,然后取到对应的字符串 current_max = x_max[index] suggested_parameter_value = feasible_points[0] for feasible_point in feasible_points: if x_max[index] > current_max: current_max = x_max[index] suggested_parameter_value = feasible_point index += 1 suggested_parameter_values_json[ param["parameterName"]] = suggested_parameter_value return_trial.parameter_values = json.dumps( suggested_parameter_values_json) return_trial.save() return [return_trial]
coordinates_list = list(zip(*coordinates_list)) timestamp_list = coordinates_list[0] latitude_list = coordinates_list[1] longitude_list = coordinates_list[2] latitude_list = np.atleast_2d(latitude_list).T # Instantiate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) print("x2:", latitude_list) print("y2:", longitude_list) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(latitude_list, longitude_list) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(latitude_list, return_std=True) # Plot the function, the prediction and the 95% confidence interval based on # the MSE plt.figure() plt.plot(latitude_list, longitude_list, 'r.', markersize=10, label='Observations') plt.plot(latitude_list, y_pred, 'b-', label='Prediction') plt.xlabel('$x$')