def test_export_sklearn_kernel_neg(self): x = numpy.array([[1, 2], [3, 4]], dtype=float) kernel = ExpSineSquared(length_scale=1.2, periodicity=1.1) def kernel_call_ynone(X, length_scale=1.2, periodicity=1.1, pi=3.141592653589793): dists = squareform_pdist(X, metric='euclidean') arg = dists / periodicity * pi sin_of_arg = numpy.sin(arg) K = numpy.exp((sin_of_arg / length_scale) ** 2 * (-2)) return K exp = kernel(x, None) got = kernel_call_ynone(x) self.assertEqualArray(exp, got) context = {'numpy.sin': numpy.sin, 'numpy.exp': numpy.exp, 'numpy_pi': numpy.pi, 'squareform_pdist': 'squareform_pdist'} onnx_code = translate_fct2onnx(kernel_call_ynone, context=context, output_names=['Z']) self.assertIn( "X, length_scale=1.2, periodicity=1.1, pi=3.14159", onnx_code) self.assertIn("-2", onnx_code) self.assertIn('metric="euclidean"', onnx_code)
def gp(train, test, t=132): X_train, X_test = sklearn_formatting(train, test) gp_kernel = 2**2 \ + ExpSineSquared(1, 60000.0) \ + ExpSineSquared(2, 120000.0) \ + WhiteKernel(2.5) gpr = GaussianProcessRegressor(kernel=gp_kernel) gpr.fit(X_train, train.values) y_fit = gpr.predict(X_train, return_std=False) # predict a cycle y_pred = gpr.predict(X_test, return_std=False) rmse = error(test.values, y_pred) return y_fit, y_pred, rmse
def test_gpr_rbf_fitted_return_std_exp_sine_squared_double(self): gp = GaussianProcessRegressor(kernel=ExpSineSquared(), alpha=1e-7, n_restarts_optimizer=15, normalize_y=True) gp.fit(Xtrain_, Ytrain_) # return_cov=False, return_std=False options = {GaussianProcessRegressor: {"return_std": True}} gp.predict(Xtrain_, return_std=True) model_onnx = to_onnx(gp, initial_types=[('X', DoubleTensorType([None, None]))], options=options, dtype=np.float64) self.assertTrue(model_onnx is not None) dump_data_and_model( Xtest_.astype(np.float64), gp, model_onnx, verbose=False, basename="SklearnGaussianProcessExpSineSquaredStdDouble-Out0-Dec4") self.check_outputs( gp, model_onnx, Xtest_.astype(np.float64), predict_attributes=options[GaussianProcessRegressor], decimal=4)
def test_kernel_ker2_exp_sine_squared(self): ker = ExpSineSquared() onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, op_version=onnx_opset_version()) model_onnx = onx.to_onnx(inputs=[('X', FloatTensorType([None, None]))], dtype=np.float32) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=4) onx = convert_kernel(ker, 'X', output_names=['Z'], x_train=Xtest_ * 2, dtype=np.float32, op_version=onnx_opset_version()) model_onnx = onx.to_onnx(inputs=[('X', FloatTensorType([None, None]))], dtype=np.float32) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_, Xtest_ * 2) assert_almost_equal(m1, m2, decimal=4)
def test_export_sklearn_kernel_exp_sine_squared(self): x = numpy.array([[1, 2], [3, 4]], dtype=float) kernel = ExpSineSquared(length_scale=1.2, periodicity=1.1) def kernel_call_ynone(X, length_scale=1.2, periodicity=1.1, pi=3.141592653589793): dists = squareform_pdist(X, metric='euclidean') t_pi = py_make_float_array(pi) t_periodicity = py_make_float_array(periodicity) arg = dists / t_periodicity * t_pi sin_of_arg = numpy.sin(arg) t_2 = py_make_float_array(2) t__2 = py_make_float_array(-2) t_length_scale = py_make_float_array(length_scale) K = numpy.exp((sin_of_arg / t_length_scale) ** t_2 * t__2) return K exp = kernel(x, None) got = kernel_call_ynone(x) self.assertEqualArray(exp, got) context = {'numpy.sin': numpy.sin, 'numpy.exp': numpy.exp, 'numpy_pi': numpy.pi, 'squareform_pdist': 'squareform_pdist', 'py_make_float_array': py_make_float_array} onnx_code = translate_fct2onnx(kernel_call_ynone, context=context, output_names=['Z']) self.assertIn( "X, length_scale=1.2, periodicity=1.1, pi=3.14159", onnx_code) self.assertIn("-2", onnx_code) self.assertIn('metric="euclidean"', onnx_code) from skl2onnx.algebra.onnx_ops import ( # pylint: disable=E0611,E0401 OnnxAdd, OnnxSin, OnnxMul, OnnxPow, OnnxDiv, OnnxExp ) from skl2onnx.algebra.complex_functions import onnx_squareform_pdist ctx = {'OnnxAdd': OnnxAdd, 'OnnxPow': OnnxPow, 'OnnxSin': OnnxSin, 'OnnxDiv': OnnxDiv, 'OnnxMul': OnnxMul, 'OnnxIdentity': OnnxIdentity, 'OnnxExp': OnnxExp, 'numpy': numpy, 'onnx_squareform_pdist': onnx_squareform_pdist, 'py_make_float_array': py_make_float_array} fct = translate_fct2onnx(kernel_call_ynone, context=context, cpl=True, context_cpl=ctx, output_names=['Z'], dtype=numpy.float32) r = fct('X') self.assertIsInstance(r, OnnxIdentity) inputs = {'X': x.astype(numpy.float32)} onnx_g = r.to_onnx(inputs) oinf = OnnxInference(onnx_g) res = oinf.run(inputs) self.assertEqualArray(exp, res['Z'])
def plot_cloud_top_series(): loc = '/scratchSSD/loh/tracking' case_name = 'BOMEX' f_list = sorted(glob.glob(f'{loc}/{case_name}/clouds_*.pq')) with open('unique_clouds.json', 'r') as f: c_dict = ujson.load(f) c_list = [f_list[i] for i in c_dict[f'{cid}']] with Parallel(n_jobs=16) as Pr: result = Pr(delayed(get_cloud_top_height)(f) for f in c_list) #---- Plotting fig = plt.figure(1, figsize=(6, 3)) fig.clf() sns.set_context('paper') sns.set_style('ticks', { 'axes.grid': False, 'axes.linewidth': '0.75', 'grid.color': '0.75', 'grid.linestyle': u':', 'legend.frameon': True, }) plt.rc('text', usetex=True) plt.rc('font', family='Helvetica') ax = plt.subplot(1, 1, 1) plt.ylabel(r'$d \hat{\mathcal{H}} / dt$') plt.xlabel('Time [min]') # xf, yf = fft_.get_fft_matrix(h_) # plt.plot(xf, yf, '--o') plt.plot(t_, h_, '--o', lw=0.75) # Plot GP regression result kernel = 1.0 * RBF(length_scale=1e5, length_scale_bounds=(1e3, 1e5)) \ + 1.0 * WhiteKernel(noise_level=1e-2) \ + 1.0 * ExpSineSquared(periodicity=15.8, periodicity_bounds=(5, 25)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=0) gp.fit(t_[:, None], h_[:]) X = np.linspace(t_[0], t_[-1], 180) y_mean, y_std = gp.predict(X[:, None], return_std=True) plt.plot(X, y_mean, 'k', lw=1, zorder=9) plt.fill_between(X, y_mean - y_std, y_mean + y_std, alpha=0.2, color='k') print(gp.kernel_) print(gp.log_marginal_likelihood_value_) y_samples = gp.sample_y(X[:, None], 10) plt.plot(X, y_samples, lw=0.5, alpha=0.3) plt.tight_layout(pad=0.5) figfile = 'png/{}.png'.format(os.path.splitext(__file__)[0]) print('\t Writing figure to {}...'.format(figfile)) plt.savefig(figfile,bbox_inches='tight', dpi=180, \ facecolor='w', transparent=True)
def knr(X,y): param_grid = {"alpha": [1e0, 1e-1, 1e-2, 1e-3], "kernel": [ExpSineSquared(l, p) for l in np.logspace(-2, 2, 10) for p in np.logspace(0, 2, 10)]} kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid) kr.fit(X,y) return kr
class GaussianProcessRegressor_Fit(SkLearnNode): icon = "icons/divide.png" op_code = GAUSSIANPROCESSREGRESSOR_FIT op_title = "GaussianProcessRegressor_fit" content_label = "/" content_label_objname = "GaussianProcessRegressor_node_fit" kernel = 1.0 * ExpSineSquared(length_scale=1.0, periodicity=3.0, length_scale_bounds=(0.2, 10.0), periodicity_bounds=(3.0, 6.0)) model = GaussianProcessRegressor(kernel=kernel) def __init__(self, scene): super().__init__(scene, inputs=[1, 1, 1], outputs=[2]) def initInnerClasses(self): self.content = GaussianProcessRegressorContent(self) self.grNode = SkLearnGraphicsNode(self) def train(self, x_data, y_data): if n_neighbors != self.n_neighbors: self.model = GaussianProcessRegressor(kernel=self.kernel) self.model.fit(x_data, y_data) return self.model def evalImplementation(self, param): x_Input = self.getInput(0) y_Input = self.getInput(1) socket1 = self.getInputSocket(0) socket2 = self.getInputSocket(1) # socket3 = self.getInputSocket(2) # val1=input_node1.eval({"a":socket1}) # val2=input_node2.eval({"a":socket2}) # if n_neighbors_Input is not None: # self.n_neighbors=n_neighbors_Input.eval({}) # self.content.lbl.setText(str(self.n_neighbors)) if x_Input is None or y_Input is None: self.markInvalid() self.markDescendantsDirty() self.grNode.setToolTip("Connect all inputs") return None else: val = self.train(x_Input.eval({"a": socket1}), y_Input.eval({"a": socket2})) self.value = val self.markDirty(False) self.markInvalid(False) self.grNode.setToolTip("") self.markDescendantsDirty() self.evalChildren() return val
def test(): pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) dataset = read_lake_erie_data() v = np.array(dataset.index).reshape(-1, 1) bootstrapped_dataset = gp_bootstrap_noise(np.array(dataset.index).reshape(-1, 1), dataset["Level"].reshape(-1, 1), 3.27 ** 2 * ExpSineSquared(length_scale=180, periodicity=10) * ExpSineSquared(length_scale=1.44), n_samples=50, alpha=.09) co2_dataset = read_mauna_loa_co2_data() samps = gp_bootstrap_noise(co2_dataset['Time'].reshape(-1, 1), co2_dataset['CO2Concentration'].reshape(-1, 1), 34.4 ** 2 * RBF(length_scale=41.8) + 3.27 ** 2 * RBF(length_scale=180) * ExpSineSquared(length_scale=1.44, periodicity=1) + 0.446 ** 2 * RationalQuadratic(alpha=17.7, length_scale=0.957) + 0.197 ** 2 * RBF(length_scale=0.138) + WhiteKernel(noise_level=0.0336), n_samples=50, alpha=30.0) f, (ax1, ax2) = plt.subplots(1, 2) for dat in bootstrapped_dataset: ax1.plot(dat[1], alpha=.1, color="gray") ax1.plot(dataset["Level"], color="blue") ax1.set_xlabel("Month") ax1.set_ylabel("Level (M)") for dat in samps: ax2.plot(dat[1], alpha=.1, color="gray") ax2.plot(co2_dataset['CO2Concentration'], color="blue") ax2.set_xlabel("Month") ax2.set_ylabel("CO2 Concentration (PPM)") plt.suptitle("Example of Samples from GP Bootstrap") plt.show()
def __kernel_ridge(fn,X,y,x): param_grid = {"alpha": [1e0, 1e-1, 1e-2, 1e-3], "kernel": [ExpSineSquared(l, p) for l in np.logspace(-2, 2, 10) for p in np.logspace(0, 2, 10)]} kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid) kr.fit(X, y) y_kr = kr.predict(x) return y_kr
def test_bound_check_fixed_hyperparameter(): # Regression test for issue #17943 # Check that having a hyperparameter with fixed bounds doesn't cause an # error k1 = 50.0**2 * RBF(length_scale=50.0) # long term smooth rising trend k2 = ExpSineSquared(length_scale=1.0, periodicity=1.0, periodicity_bounds="fixed") # seasonal component kernel = k1 + k2 GaussianProcessRegressor(kernel=kernel).fit(X, y)
def get_gaussian_process(): kernel = ExpSineSquared(length_scale_bounds=(0.5, 100), periodicity=52, periodicity_bounds=(10, 100)) gaussian_process = GaussianProcessRegressor(kernel=kernel, alpha=5, normalize_y=False, n_restarts_optimizer=10) return gaussian_process
def generate_erie_samples(n_samples): erie_dataset = read_lake_erie_data() train_data = erie_dataset.iloc[:int(.7 * len(erie_dataset)), ] samps = gp_bootstrap_noise(np.array(train_data.index).reshape(-1, 1), train_data['Level'].reshape(-1, 1), 3.27 ** 2 * ExpSineSquared(length_scale=180, periodicity=10) * ExpSineSquared(length_scale=1.44), n_samples=n_samples, alpha=.09) for osf in glob.glob("./gp_samples/erie/*.pkl"): os.remove(osf) for idx, samp in enumerate(samps): with open("./gp_samples/erie/" + str(idx) + ".pkl", "wb") as sf: pickle.dump(samp, sf)
def __init__(self, t, y, selected_kernel="RatQuad", interpolation_factor=None): super().__init__(t, y) self.kernels = None self.selected_kernel = selected_kernel self.interpolation_factor = interpolation_factor # TODO: fix this to comply with python standards self.A_mean = None self.A_std = None # Create different kernels that will be explored self.kernels = dict() self.kernels["RBF"] = 1.0 * RBF(length_scale=0.5) self.kernels["RatQuad"] = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.2) self.kernels["ExpSineSquared"] = 1.0 * ExpSineSquared(length_scale=1.0, periodicity=3) self.kernels["Matern"] = 1.0 * Matern(length_scale=1.0, nu=1.5) self.kernels["Matern*ExpSineSquared"] = ( 1.0 * Matern(length_scale=1.0, nu=1.5) * ExpSineSquared(length_scale=1, periodicity=3)) self.kernels["RBF*ExpSineSquared"] = ( 1.0 * RBF(length_scale=1.0) * ExpSineSquared(length_scale=1, periodicity=3)) self.kernels["RatQuad*ExpSineSquared"] = ( 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.2) * ExpSineSquared(length_scale=1, periodicity=3)) self.kernels["Matern*RBF"] = (1.0 * Matern(length_scale=1.0, nu=1.5) * RBF(length_scale=1)) self.kernels["Matern+ExpSineSquared"] = 1.0 * Matern( length_scale=1.0, nu=1.5) + ExpSineSquared(length_scale=1, periodicity=3) self.kernels["RBF+ExpSineSquared"] = 1.0 * RBF( length_scale=1.0) + ExpSineSquared(length_scale=1, periodicity=3) self.kernels["RatQuad+ExpSineSquared"] = 1.0 * RationalQuadratic( length_scale=1.0) + ExpSineSquared(length_scale=1, periodicity=3) if selected_kernel not in self.kernels.keys(): raise KeyError( f"Unknown kernel: {selected_kernel}, available kernels: {self.kernels.keys()}" ) # Generate the noisy kernels self.noisy_kernels = dict() for key, kernel in self.kernels.items(): self.noisy_kernels[key] = kernel + WhiteKernel( noise_level=1, noise_level_bounds=(1e-7, 1e7))
def build_gp(arguments): """ Build Gaussian Process using scikit-learn, print hyperparams and return model :return: gp model """ kernel_dict = { 'c_rbf': C(arguments.const_val, (arguments.const_val_min, arguments.const_val_max)) * RBF(length_scale=arguments.length_scale, length_scale_bounds=(arguments.length_scale_min, arguments.length_scale_max)), 'rbf': RBF(length_scale=arguments.length_scale, length_scale_bounds=(arguments.length_scale_min, arguments.length_scale_max)), 'matern': Matern(length_scale=arguments.length_scale, length_scale_bounds=(arguments.length_scale_min, arguments.length_scale_max), nu=arguments.nu), 'expsinesquared': ExpSineSquared(length_scale=arguments.length_scale, periodicity=arguments.periodicity, length_scale_bounds=(arguments.length_scale_min, arguments.length_scale_max), periodicity_bounds=(arguments.periodicity_min, arguments.periodicity_max)) } kernel = kernel_dict[arguments.kernel] if arguments.verbosity: print( "\n================================================================" ) print("\nKernel: {}".format(kernel)) print("\nHyperparameters:") for hyperparameter in kernel.hyperparameters: print(hyperparameter) print("\nParameters:") params = kernel.get_params() for key in sorted(params): print("%s : %s" % (key, params[key])) print( "\n================================================================" ) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=20, normalize_y=True) return gp
def define_model(): k0 = WhiteKernel(noise_level=0.3**2, noise_level_bounds=(0.1**2, 0.5**2)) k1 = ConstantKernel(constant_value=2)* \ ExpSineSquared(length_scale=1.0, periodicity=40, periodicity_bounds=(35,45)) k2 = ConstantKernel(constant_value=10, constant_value_bounds=(1e-2, 1e3))* \ RBF(length_scale=100.0, length_scale_bounds=(1, 1e4)) kernel_1 = k0 + k1 + k2 linear_model = gaussian_process.GaussianProcessRegressor( kernel=kernel_1, n_restarts_optimizer=10, normalize_y=True, alpha=0.0) return linear_model
def gaussian_regressor_param_selection(X, y, nfolds): kernel_rbf = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(1.0, length_scale_bounds="fixed") kernel_rq = ConstantKernel(1.0, constant_value_bounds="fixed") * RationalQuadratic(alpha=0.1, length_scale=1) kernel_expsine = ConstantKernel(1.0, constant_value_bounds="fixed") * ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) Kernels = [kernel_rbf, kernel_rq] param_grid = {'kernel': Kernels} grid_search = GridSearchCV(GaussianProcessRegressor(random_state=0), param_grid, cv=nfolds, n_jobs=-1) grid_search.fit(X, y) print('GaussianRegressor Lowest MSE Score:'+str(grid_search.best_score_)) print('GaussianRegressor With Parameters:'+str(grid_search.best_params_)) return grid_search.best_params_
def test_duck_typing_nested_estimator(): # Test duck typing metaestimators with random search kernel_ridge = KernelRidge(kernel=ExpSineSquared()) param_distributions = {"alpha": [1, 2]} kernel_ridge_tuned = RandomizedSearchCV( kernel_ridge, param_distributions=param_distributions, ) html_output = estimator_html_repr(kernel_ridge_tuned) assert "estimator: KernelRidge</label>" in html_output
def test_kernel_exp_sine_squared(self): from skl2onnx.operator_converters.gaussian_process import convert_kernel ker = ExpSineSquared() onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=numpy.float32) model_onnx = onx.to_onnx(inputs=[('X', FloatTensorType([None, None]))]) sess = OnnxInference(model_onnx) Xtest_ = numpy.arange(6).reshape((3, 2)) res = sess.run({'X': Xtest_.astype(numpy.float32)}) m1 = res['Y'] m2 = ker(Xtest_) self.assertEqualArray(m1, m2, decimal=5)
def set_estimators(self): #gauss radial kernel params = { 'kernel': [ C(1.0, (1e-3, 1e3)), C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)), ExpSineSquared() ], 'n_restarts_optimizer': [9] } self.estimators['gaussian_process'] = (GaussianProcessRegressor, params)
def _test_scikit_gaussian_process_one(self): problem = MyProblemSin() problem.surrogate = SurrogateModelScikit(problem) problem.surrogate.sigma_threshold = 0.1 kernel = 1.0 * ExpSineSquared(length_scale=1.0, periodicity=3.0, length_scale_bounds=(0.1, 10.0), periodicity_bounds=(1.0, 10.0)) problem.surrogate.regressor = GaussianProcessRegressor(kernel=kernel) self._check_scikit_one(problem, 5.0)
def special_conversions(self, params): """ TODO: replace this logic with something better """ # create list parameters lists = defaultdict(list) element_regex = re.compile(r'(.*)\[(\d)\]') for name, param in list(params.items()): # look for variables of the form "param_name[1]" match = element_regex.match(name) if match: # name of the list parameter lname = match.groups()[0] # index of the list item index = int(match.groups()[1]) lists[lname].append((index, param)) # drop the element parameter from our list del params[name] for lname, items in list(lists.items()): # drop the list size parameter del params['len(%s)' % lname] # sort the list by index params[lname] = [val for idx, val in sorted(items)] # Gaussian process classifier if self.method == "gp": if params["kernel"] == "constant": params["kernel"] = ConstantKernel() elif params["kernel"] == "rbf": params["kernel"] = RBF() elif params["kernel"] == "matern": params["kernel"] = Matern(nu=params["nu"]) del params["nu"] elif params["kernel"] == "rational_quadratic": params["kernel"] = RationalQuadratic( length_scale=params["length_scale"], alpha=params["alpha"]) del params["length_scale"] del params["alpha"] elif params["kernel"] == "exp_sine_squared": params["kernel"] = ExpSineSquared( length_scale=params["length_scale"], periodicity=params["periodicity"]) del params["length_scale"] del params["periodicity"] # return the updated parameter vector return params
def str2ker(str): k1 = C(1.0) * RBF(length_scale=1) k2 = C(1.0) * RationalQuadratic(length_scale=1) k4 = DotProduct(sigma_0=1) k3 = C(1.0) * ExpSineSquared(length_scale=1, periodicity=1) k5 = WhiteKernel(1.0) map = {"s": k1, "r": k2, "p": k3, "l": k4} # if basic kernel if len(str) == 1: ker = map[str] else: # if composite kernel ker = [] factor = map[str[0]] op = str[1] for i in range(2, len(str), 2): # if the operator is *, use @ covProd to continue costructing the # factor if op == '*': factor = factor * map[str[i]] # the end? if i == len(str) - 1: if not ker: ker = factor else: ker = ker + factor else: op = str[i + 1] # if the oprator is +, combine current factor with ker then form a # new factor else: if not ker: ker = factor else: ker = ker + factor factor = map[str[i]] # % the end? if i == len(str) - 1: ker = ker + factor else: op = str[i + 1] ker = ker + k5 return ker
def fig_sin_kernel(only_trace: bool = True): from sklearn.gaussian_process.kernels import ExpSineSquared kernel = ExpSineSquared() x = np.linspace(-1, 1, 100) x_exp = np.expand_dims(x, axis=1) surf_data = kernel(x_exp, x_exp) trace = go.Surface(x=x, y=x, z=surf_data, showscale=False) if only_trace: return trace else: fig.update_layout(scene=dict(xaxis_title='xi', yaxis_title='xj', zaxis_title='Linear Kernel Value'), margin=dict(r=10, b=10, l=10, t=10)) return fig
def run(self): import numpy as np from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels \ import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared X = np.array(self.train.data) Y = np.array(self.train.occupancy).flatten() kernel = RBF() + RBF() * ExpSineSquared() + RationalQuadratic( ) + WhiteKernel() gp = GaussianProcessClassifier(kernel=kernel, optimizer='fmin_l_bfgs_b').fit(X, Y) predict_occupancy = gp.predict(np.array(self.test.data)) return np.reshape(predict_occupancy, (-1, 1))
def test_onnxt_gpr_iris(self): iris = load_iris() X, y = iris.data, iris.target X_train, _, y_train, __ = train_test_split(X, y, random_state=11) clr = GaussianProcessRegressor(ExpSineSquared(), alpha=20.) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train) oinf = OnnxInference(model_def) res1 = oinf.run({'X': X_train}) new_model = onnx_optimisations(model_def) oinf = OnnxInference(new_model) res2 = oinf.run({'X': X_train}) self.assertEqualArray(res1['GPmean'], res2['GPmean']) self.assertNotIn('op_type: "CDist"', str(new_model))
def __init__(self, f, domain=None, kernel=None, alpha=1e-10, n_restarts_optimizer=2, random_inits=2, **kwargs): if kernel is None: kernel = kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) + C( 1e-2, (1e-3, 1e3)) * ExpSineSquared(1.0, 1.0, (1e-5, 1e5), (1e-5, 1e5)) # set default kernel # cov_amplitude = C(1.0, (1e-3, 1e3)) # matern = Matern(length_scale=) if domain is None: domain = np.atleast_2d(np.linspace(0.01, 10, 1000)).T self.domain = domain self.f = f self.n_restarts_optimizer = n_restarts_optimizer nrs = n_restarts_optimizer self.gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha, n_restarts_optimizer=nrs, **kwargs) self.X_ind = np.random.randint(0, len(self.domain), random_inits) self.X = np.atleast_2d(self.domain[self.X_ind]) # self.y = f(self.X).ravel() self.y = np.asarray([f(x) for x in self.X]) self.fig_stocker = [] self.fig_info_stocker = [] self.current_argmax = self.domain[np.argmax(self.y)] self.current_max = max(self.y) self.current_step = random_inits self.other_info_to_stock = { 'theta_after_fit_log': [], 'kernel_hyperparams_after_fit': [] } self.gp.fit(self.X, self.y) self.other_info_to_stock['theta_after_fit_log'].append( self.gp.kernel_.theta) self.other_info_to_stock['kernel_hyperparams_after_fit'].append( self.gp.kernel_.get_params()) self.y_pred, self.sigma = self.gp.predict(self.domain, return_std=True) self.current_argmax = self.domain[self.X_ind[np.argmax( self.y_pred[self.X_ind])]] self.current_max = max(self.y_pred[self.X_ind])
def fit(self, long_term_length_scale=None, pre_periodic_term_length_scale=None, periodic_term_length_scale=None, periodicity=None, noise_level=None, do_plot=False, fig=None): data = self.data[['mjd', 'mag', 'err']] data = np.atleast_2d(data) time = data[:, 0] - data[0, 0] time = np.atleast_2d(time).T if self._gp is None: time_scale = data[-1, 0] - data[0, 0] data_scale = np.max(data[:, 1]) - np.min(data[:, 1]) noise_std = np.median(data[:, 2]) if long_term_length_scale is None: long_term_length_scale = 0.5 * time_scale if pre_periodic_term_length_scale is None: pre_periodic_term_length_scale = 0.5 * time_scale if periodic_term_length_scale is None: periodic_term_length_scale = 0.1 * time_scale if periodicity is None: periodicity = 0.1 * time_scale if noise_level is None: noise_level = noise_std k1 = data_scale ** 2 * RBF(length_scale=long_term_length_scale) k2 = 0.1 * data_scale *\ RBF(length_scale=pre_periodic_term_length_scale) *\ ExpSineSquared(length_scale=periodic_term_length_scale, periodicity=periodicity) k3 = WhiteKernel(noise_level=noise_level ** 2, noise_level_bounds=(1e-3, 1.)) kernel = k1 + k2 + k3 gp = GaussianProcessRegressor(kernel=kernel, alpha=(data[:, 2] / data[:, 1]) ** 2, normalize_y=True, n_restarts_optimizer=10) gp.fit(time, data[:, 1]) self._gp = gp if do_plot: self.plot_fitted(fig=fig)
def _cast(self, n, X, y): """ Evaluates and optimizes all legitimate combinations of length `n` :param n: The length of pipelines :param X: Training data :param y: Observed values :return: None """ from .structsearch import SurrogateRandomCV, BoxSample, CompactSample from importlib import import_module if self.couldBfirst == []: from sklearn.pipeline import Pipeline else: from imblearn.pipeline import Pipeline from sklearn.model_selection import RandomizedSearchCV if self.surrogates is None: from numpy import logspace from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.kernel_ridge import KernelRidge from sklearn.gaussian_process.kernels import Matern, Sum, ExpSineSquared, WhiteKernel param_grid_gpr = {"alpha": logspace(-8, 1, 20), "kernel": [Sum(Matern(length_scale=l, nu=p), WhiteKernel(noise_level=q)) for l in logspace(-3, 3, 20) for p in [0.5, 1.5, 2.5] for q in logspace(-3, 1.5, 20)]} GPR = RandomizedSearchCV(GaussianProcessRegressor(), param_distributions=param_grid_gpr, n_iter=20, cv=2) param_grid_krr = {"alpha": logspace(-4, 0, 10), "kernel": [Sum(Matern(), ExpSineSquared(l, p)) for l in logspace(-2, 2, 20) for p in logspace(0, 2, 20)]} KRR = RandomizedSearchCV(KernelRidge(), param_distributions=param_grid_krr, n_iter=30, cv=2) self.surrogates = [(KRR, 35, CompactSample, 'L-BFGS-B'), (GPR, 50, BoxSample, 'L-BFGS-B')] self.min_random_evals = 10 Pop = [] candidates = self.words.Generate(n) for cnddt in candidates: if self._validate_sequence(cnddt): Pop.append(cnddt) for seq in Pop: if not self._validate_sequence(seq): continue best_mdl, best_scr = self.optimize_pipeline(seq, X, y) self.models[seq] = (best_mdl, best_scr) if self.verbose > 0: print("score:%f" % best_scr) print(best_mdl)
def GPR(X, Y, Z): k1 = 50.0**2 * RBF(length_scale=50.0) # long term smooth rising trend k2 = 2.0**2 * RBF(length_scale=100.0) \ * ExpSineSquared(length_scale=1.0, periodicity=1.0, periodicity_bounds="fixed") # seasonal component # medium term irregularities k3 = 0.5**2 * RationalQuadratic(length_scale=1.0, alpha=1.0) k4 = 0.1**2 * RBF(length_scale=0.1) \ + WhiteKernel(noise_level=0.1**2, noise_level_bounds=(1e-3, np.inf)) # noise terms kernel = k4 gp = GaussianProcessRegressor(kernel=kernel, alpha=0, normalize_y=True) gp.fit(X, Y) mu, var = gp.predict(Z, return_std=True) return mu, var