class GaussianProcessRegressorImpl():
    def __init__(self,
                 kernel=None,
                 alpha=1e-10,
                 optimizer='fmin_l_bfgs_b',
                 n_restarts_optimizer=0,
                 normalize_y=False,
                 copy_X_train=True,
                 random_state=None):
        self._hyperparams = {
            'kernel': kernel,
            'alpha': alpha,
            'optimizer': optimizer,
            'n_restarts_optimizer': n_restarts_optimizer,
            'normalize_y': normalize_y,
            'copy_X_train': copy_X_train,
            'random_state': random_state
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Exemple #2
0
def _fit_gp(X, y, kernel, max_gp):

    n_samples = X.shape[0]

    n_gp = min(n_samples, max_gp)

    X = X[(n_samples - n_gp):n_samples, :]
    y = y[(n_samples - n_gp):n_samples]

    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
    gp.fit(X, y)

    return gp
Exemple #3
0
def fit(kernel, sample_indices, X, y, n_restarts_optimizer, normalize_y):
    """Fits a Gaussian Process Regression model on a subset of X and y using
    the provided covariance kernel and subset indices. This is used as a single
    worker payload in the parallel fitting process of the rBCM.

    TODO: take the sample_indices argument out of this function and keep it
          in the logic of the rBCM class alone. Just pass the X and y we'll
          actually use. For now keep it to avoid too many changes during the
          refactor, however.

    Args:
        kernel : sklearn kernel object
            The kernel specifying the covariance function of the Guassian
            Process.

        sample_indices : list of integers
            The indices of the subset of X and y to fit

        X : np.ndarray
            The locations of the points.
            Must match y in length.

        y : np.ndarray
            The values of the points at the X locations.
            Must match X in length.

        n_restarts_optimizer : non-negative integer
            The number of restarts to permit in the GPR. Look to scikit-learn's
            GPR implementation for more detail as it is passed through.

        normalize_y : boolean
            Whether to normalize the scale of y to improve fitting quality.
            See scikit-learn's GPR implementation for more detail.
    """
    gpr = GPR(kernel,
              n_restarts_optimizer=n_restarts_optimizer,
              copy_X_train=False,
              normalize_y=normalize_y)
    gpr.fit(X[sample_indices, :], y[sample_indices, :])
    return gpr
Exemple #4
0
def fit(kernel, sample_indices, X, y, n_restarts_optimizer, normalize_y):
    """Fits a Gaussian Process Regression model on a subset of X and y using
    the provided covariance kernel and subset indices. This is used as a single
    worker payload in the parallel fitting process of the rBCM.

    TODO: take the sample_indices argument out of this function and keep it
          in the logic of the rBCM class alone. Just pass the X and y we'll
          actually use. For now keep it to avoid too many changes during the
          refactor, however.

    Args:
        kernel : sklearn kernel object
            The kernel specifying the covariance function of the Guassian
            Process.

        sample_indices : list of integers
            The indices of the subset of X and y to fit

        X : np.ndarray
            The locations of the points.
            Must match y in length.

        y : np.ndarray
            The values of the points at the X locations.
            Must match X in length.

        n_restarts_optimizer : non-negative integer
            The number of restarts to permit in the GPR. Look to scikit-learn's
            GPR implementation for more detail as it is passed through.

        normalize_y : boolean
            Whether to normalize the scale of y to improve fitting quality.
            See scikit-learn's GPR implementation for more detail.
    """
    gpr = GPR(kernel, n_restarts_optimizer=n_restarts_optimizer,
              copy_X_train=False, normalize_y=normalize_y)
    gpr.fit(X[sample_indices, :], y[sample_indices, :])
    return gpr
Exemple #5
0
def fix_patch(c, w, h, patch_size, corr_img, recover_img):
    width = recover_img.shape[1]
    height = recover_img.shape[2]
    g = GaussianProcessRegressor(kernel=RBF(patch_size))
    if w + patch_size >= width:
        w = patch_size - width - 1
    if h + patch_size >= height:
        h = patch_size - height
    channel_img = corr_img[c, w:w + patch_size, h:h + patch_size]
    noise_mask = channel_img == 0
    sample_coordinates = np.nonzero(~noise_mask)
    if len(sample_coordinates[0]) == 0:
        return fix_patch(c, w, h, patch_size * 2, corr_img, recover_img)
    # if len(sample_coordinates[0]) < 2 * patch_size:
    #     return fix_patch(c, w, h, patch_size * 2, recover_img)
    train_x = np.array(sample_coordinates).transpose()
    train_y = channel_img[sample_coordinates]
    g.fit(train_x, train_y)
    predict_coordinates = np.nonzero(noise_mask)
    predict_x = np.array(predict_coordinates).transpose()
    predict_y = g.predict(predict_x)
    recover_img[c, w:w + patch_size,
                h:h + patch_size][predict_coordinates] = predict_y
Exemple #6
0
if __name__ == '__main__':
    # initial parameters
    #  sklearn will adjust these parameters based on data
    #  see gpr.kernel_
    length = 1.5
    period = 2 * np.pi
    noise_level = .6
    # define problem
    x, y = generate_data()  # available data
    finex = np.linspace(min(x), max(x), 77)  # prediction locations
    # krige
    #  step 1: define correlation function
    kernel = ExpSineSquared(length_scale=length, periodicity=period) +\
             WhiteKernel(noise_level)
    #  step 2: incorporate available data
    gpr = GaussianProcessRegressor(kernel=kernel)
    gpr.fit(x[:, None], y)
    print(gpr.kernel_)
    #  step 3: make predictions
    fineym, fineye = gpr.predict(finex[:, None], return_std=True)

    # show results
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    show_data(ax, x, y, finex, myfunc)
    show_krig(ax, finex, fineym, fineye)
    ax.legend()
    plt.show()
# end __main__
                                       test_size=0.2,
                                       random_state=np.random.randint(1, 1000))

# Separate output from inputs
y_train = train_set['time_to_failure']
x_train_seg = train_set['segment_id']
x_train = train_set.drop(['time_to_failure', 'segment_id'], axis=1)

y_test = test_set['time_to_failure']
x_test_seg = test_set['segment_id']
x_test = test_set.drop(['time_to_failure', 'segment_id'], axis=1)

# y_train = np.around(y_train.values, decimals=2)

# mlpReg = MLPRegressor(verbose=True, tol=0.0001, max_iter=200000, n_iter_no_change=10000, hidden_layer_sizes=(200,))
# Instantiate a Gaussian Process model
kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gp.fit(x_train, y_train)

# Create an variable to pickle and open it in write mode
mh = ModelHolder(gp, most_dependent_columns)
mh.save(model_name)
gp = None
mh_new = load_model(model_name)
gp, most_dependent_columns = mh_new.get()
y_pred = gp.predict(x_test)
# y_pred = pd.Series(y_pred).apply(lambda x: float(x / 10))

print('MAE for Multi Layer Perceptron', mean_absolute_error(y_test, y_pred))