class GaussianProcessRegressorImpl(): def __init__(self, kernel=None, alpha=1e-10, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=0, normalize_y=False, copy_X_train=True, random_state=None): self._hyperparams = { 'kernel': kernel, 'alpha': alpha, 'optimizer': optimizer, 'n_restarts_optimizer': n_restarts_optimizer, 'normalize_y': normalize_y, 'copy_X_train': copy_X_train, 'random_state': random_state } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def _fit_gp(X, y, kernel, max_gp): n_samples = X.shape[0] n_gp = min(n_samples, max_gp) X = X[(n_samples - n_gp):n_samples, :] y = y[(n_samples - n_gp):n_samples] gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10) gp.fit(X, y) return gp
def fit(kernel, sample_indices, X, y, n_restarts_optimizer, normalize_y): """Fits a Gaussian Process Regression model on a subset of X and y using the provided covariance kernel and subset indices. This is used as a single worker payload in the parallel fitting process of the rBCM. TODO: take the sample_indices argument out of this function and keep it in the logic of the rBCM class alone. Just pass the X and y we'll actually use. For now keep it to avoid too many changes during the refactor, however. Args: kernel : sklearn kernel object The kernel specifying the covariance function of the Guassian Process. sample_indices : list of integers The indices of the subset of X and y to fit X : np.ndarray The locations of the points. Must match y in length. y : np.ndarray The values of the points at the X locations. Must match X in length. n_restarts_optimizer : non-negative integer The number of restarts to permit in the GPR. Look to scikit-learn's GPR implementation for more detail as it is passed through. normalize_y : boolean Whether to normalize the scale of y to improve fitting quality. See scikit-learn's GPR implementation for more detail. """ gpr = GPR(kernel, n_restarts_optimizer=n_restarts_optimizer, copy_X_train=False, normalize_y=normalize_y) gpr.fit(X[sample_indices, :], y[sample_indices, :]) return gpr
def fix_patch(c, w, h, patch_size, corr_img, recover_img): width = recover_img.shape[1] height = recover_img.shape[2] g = GaussianProcessRegressor(kernel=RBF(patch_size)) if w + patch_size >= width: w = patch_size - width - 1 if h + patch_size >= height: h = patch_size - height channel_img = corr_img[c, w:w + patch_size, h:h + patch_size] noise_mask = channel_img == 0 sample_coordinates = np.nonzero(~noise_mask) if len(sample_coordinates[0]) == 0: return fix_patch(c, w, h, patch_size * 2, corr_img, recover_img) # if len(sample_coordinates[0]) < 2 * patch_size: # return fix_patch(c, w, h, patch_size * 2, recover_img) train_x = np.array(sample_coordinates).transpose() train_y = channel_img[sample_coordinates] g.fit(train_x, train_y) predict_coordinates = np.nonzero(noise_mask) predict_x = np.array(predict_coordinates).transpose() predict_y = g.predict(predict_x) recover_img[c, w:w + patch_size, h:h + patch_size][predict_coordinates] = predict_y
if __name__ == '__main__': # initial parameters # sklearn will adjust these parameters based on data # see gpr.kernel_ length = 1.5 period = 2 * np.pi noise_level = .6 # define problem x, y = generate_data() # available data finex = np.linspace(min(x), max(x), 77) # prediction locations # krige # step 1: define correlation function kernel = ExpSineSquared(length_scale=length, periodicity=period) +\ WhiteKernel(noise_level) # step 2: incorporate available data gpr = GaussianProcessRegressor(kernel=kernel) gpr.fit(x[:, None], y) print(gpr.kernel_) # step 3: make predictions fineym, fineye = gpr.predict(finex[:, None], return_std=True) # show results fig, ax = plt.subplots(1, 1) ax.set_xlabel('x') ax.set_ylabel('y') show_data(ax, x, y, finex, myfunc) show_krig(ax, finex, fineym, fineye) ax.legend() plt.show() # end __main__
test_size=0.2, random_state=np.random.randint(1, 1000)) # Separate output from inputs y_train = train_set['time_to_failure'] x_train_seg = train_set['segment_id'] x_train = train_set.drop(['time_to_failure', 'segment_id'], axis=1) y_test = test_set['time_to_failure'] x_test_seg = test_set['segment_id'] x_test = test_set.drop(['time_to_failure', 'segment_id'], axis=1) # y_train = np.around(y_train.values, decimals=2) # mlpReg = MLPRegressor(verbose=True, tol=0.0001, max_iter=200000, n_iter_no_change=10000, hidden_layer_sizes=(200,)) # Instantiate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) gp.fit(x_train, y_train) # Create an variable to pickle and open it in write mode mh = ModelHolder(gp, most_dependent_columns) mh.save(model_name) gp = None mh_new = load_model(model_name) gp, most_dependent_columns = mh_new.get() y_pred = gp.predict(x_test) # y_pred = pd.Series(y_pred).apply(lambda x: float(x / 10)) print('MAE for Multi Layer Perceptron', mean_absolute_error(y_test, y_pred))