예제 #1
0
def test_random_starts():
    # Test that an increasing number of random-starts of GP fitting only
    # increases the log marginal likelihood of the chosen theta.
    n_samples, n_features = 25, 2
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features) * 2 - 1
    y = (np.sin(X).sum(axis=1) + np.sin(3 * X).sum(axis=1)) > 0

    kernel = C(1.0, (1e-2, 1e2)) \
        * RBF(length_scale=[1e-3] * n_features,
              length_scale_bounds=[(1e-4, 1e+2)] * n_features)
    last_lml = -np.inf
    for n_restarts_optimizer in range(5):
        gp = GaussianProcessClassifier(
            kernel=kernel,
            n_restarts_optimizer=n_restarts_optimizer,
            random_state=0).fit(X, y)
        lml = gp.log_marginal_likelihood(gp.kernel_.theta)
        assert lml > last_lml - np.finfo(np.float32).eps
        last_lml = lml
예제 #2
0
 def __init__(self,
              arms,
              kernel=C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-3, 1e3)),
              alpha=10.0,
              prior=None):
     self.n_arms = len(arms)
     self.predicted_arms = np.zeros(self.n_arms)
     self.sigmas = np.ones(self.n_arms) * 10
     self.arms = arms
     self.collected_rewards = np.array([])
     self.pulled_arms = []
     self.gaussian_process = GaussianProcessRegressor(
         kernel=kernel,
         alpha=alpha**2,
         normalize_y=True,
         n_restarts_optimizer=9)
     if prior is None:
         self.prior = lambda x: 0
     else:
         self.prior = prior
예제 #3
0
    def fit(self, X, y):
        """
		Fit Gaussian process regression model.

		Parameters
		----------
		X : array-like, shape = (n_samples, n_features)
			Training data

		y : array-like, shape = (n_samples, [n_output_dims])
			Target values

		Returns
		-------
		self : returns an instance of self.
		"""

        if self.kernel_generator is None:
            if self.standardize_before_fit:
                kernel_generator = lambda dims: RBF([1.0] * dims)
            else:
                kernel_generator = lambda dims: C() * RBF([1.0] * dims)
        else:
            kernel_generator = self.kernel_generator
        self.kernel = kernel_generator(X.shape[1])

        self._pre_fit(X, y)

        if self.standardize_before_fit:
            y = numpy.copy(y)
            self.standardize_Y = y.std(axis=0, ddof=0)
            if isinstance(self.standardize_Y, numpy.float):
                if self.standardize_Y == 0:
                    self.standardize_Y = 1
            else:
                self.standardize_Y[self.standardize_Y == 0] = 1
            y /= self.standardize_Y
        else:
            self.standardize_Y = None

        return super().fit(X, y)
예제 #4
0
파일: gp_sklearn.py 프로젝트: ssoudan/pyGP
def evalMLENoisy(X,
                 Y,
                 x,
                 DY=0.):  # type: (Any, Any, Any, float) -> Tuple[Any, Any]

    # Instantiate a Gaussian Process model
    kernel = C(1.0, (1e-4, 1e4)) * RBF(length_scale=100.0, length_scale_bounds=(1e-2, 1e3)) \
             + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))

    # Instantiate a Gaussian Process model
    gp = GaussianProcessRegressor(kernel=kernel,
                                  alpha=DY**2,
                                  n_restarts_optimizer=10)

    # Fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(X, Y)

    # Make the prediction on the meshed x-axis (ask for MSE as well)
    y, sigma = gp.predict(x, return_std=True)

    return y, sigma
예제 #5
0
	def __init__(
			self,
			keep_other_features=3,
			step2_cv_folds=5,
	):
		"""

		Parameters
		----------
		keep_other_features : int
			The number of other (derived) feature columns to keep. Keeping this
			number small help prevent overfitting problems if the number of
			output features is large.
		step2_cv_folds : int
			The step 1 cross validation predictions are used in step two.  How many
			CV folds?
		"""

		self.keep_other_features = keep_other_features
		self.step2_cv_folds = step2_cv_folds
		self._kernel_generator = lambda dims: C() * RBF([1.0] * dims)
예제 #6
0
def GPtrain(x, y):
    y = y.reshape(-1, 1)
    x = x.reshape(-1, 1)
    scaler = StandardScaler().fit(y)
    y = scaler.transform(y)
    #kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-3, 1e3)) + W(1.0, (1e-5, 1e5))
    kernel = C(0.03, (1e-3, 1e1)) * RBF(0.01, (1e-2, 1e2))
    gpr = GaussianProcessRegressor(kernel=kernel,
                                   n_restarts_optimizer=20,
                                   normalize_y=False,
                                   alpha=np.var(y))
    gpr.fit(x, y)
    #print("Optimised kernel: %s" % gpr.kernel_)
    ystar, sigma = gpr.predict(x, return_std=True)
    sigma = np.reshape(sigma, (np.size(sigma), 1))
    sigma = (sigma**2 + 1)**0.5
    ystarp = ystar + sigma
    ystari = scaler.inverse_transform(ystar)
    ystarpi = scaler.inverse_transform(ystarp)
    sigmai = np.mean(ystarpi - ystari)
    return ystari, sigmai
예제 #7
0
def smooth_prof_gpr(data, data_sigma, data_ps, sample_ps):
    # Format the inputs
    X = np.atleast_2d(data_ps).T
    y = np.maximum(data, 0)
    alpha = data_sigma**2

    # Set up the Gaussian Process Regressor
    kernel = C(1000, (1, 1e4)) * RBF(0.05, (1e-3, 1))
    gp = GaussianProcessRegressor(kernel=kernel,
                                  n_restarts_optimizer=9,
                                  alpha=alpha)
    gp.fit(X, y)

    # Predict, then convert back to normal dimensions
    xs = np.atleast_2d(sample_ps).T
    y_pred, y_sigma = gp.predict(xs, return_std=True)
    xs = np.squeeze(xs)
    y_pred = np.squeeze(y_pred)
    y_sigma = np.squeeze(y_sigma)

    return y_pred, y_sigma
예제 #8
0
def GP_fit_periodic(time, mag, err, T, T_1='same', x_pred=None, plot=True):
    if T_1 == 'same':
        T_1 = T
    phase = np.mod(time, T) / T
    sort_idx = np.argsort(phase)
    PHASE = phase[sort_idx]
    MAG = mag[sort_idx]
    ERR = err[sort_idx]
    k=1
    while k < 2:
        MAG = np.concatenate([MAG,MAG])
        PHASE = np.concatenate([PHASE,PHASE+k])
        ERR = np.concatenate([ERR,ERR])
        k += 1
    #PHASE *= T
    kernel = C(constant_value=1., constant_value_bounds=(.1,.5)) *              RBF(length_scale=.3, length_scale_bounds=(0.2, .5))
    #kernel = C(constant_value=1., constant_value_bounds=(0.1,10.)) * \
    #         Matern(length_scale=4., length_scale_bounds=(.1,10.))
    gp = GaussianProcessRegressor(kernel=kernel, alpha=(ERR)**2, optimizer='fmin_l_bfgs_b',
                                  n_restarts_optimizer=10, normalize_y=True)
    gp.fit(PHASE[:,None], MAG)
    x_test=np.linspace(0,2,1000)
    y_pred, sigma = gp.predict(x_test[:,None], return_std=True)

    phase_pred = np.mod(x_pred, T_1) / T_1
    mag_pred = gp.predict(phase_pred[:,None], return_std=False)

    if plot:
        plt.figure(figsize=(9,4))
        plt.errorbar(PHASE, MAG, yerr=ERR, fmt='k.', ms=7, lw=1,alpha=1)
        plt.plot(x_test, y_pred, 'r-', lw= 1)
        plt.fill(np.concatenate([x_test, x_test[::-1]]),
                 np.concatenate([y_pred - 1.9600 * sigma,
                                (y_pred + 1.9600 * sigma)[::-1]]),
                 alpha=.5, fc='b', ec='None')
        plt.plot(phase_pred, mag_pred, 'b.')
        plt.gca().invert_yaxis()
        plt.show()

    return mag_pred
예제 #9
0
파일: multitarget.py 프로젝트: jpn--/pines
    def __init__(self,
                 core_features=None,
                 keep_other_features=3,
                 detrend=True,
                 expected_features=None):
        """

		Parameters
		----------
		core_features
			feature columns to definitely keep for both LR and GPR

		"""

        self.core_features = core_features
        self.keep_other_features = keep_other_features
        self.lr = LinearRegression()
        self.gpr = GaussianProcessRegressor_(n_restarts_optimizer=9)
        self.y_residual = None
        self.kernel_generator = lambda dims: C() * RBF([1.0] * dims)
        self.use_linear = detrend
        self.expected_features = expected_features
예제 #10
0
 def update_model(self):
     """
       updating gaussian process model with the collected training samples.
       using KFold cross validation to avoid the overfitting of the lengthscale for gaussian process.
       :return: None
     """
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         x = []
         y = []
         for f, r in self.memory:
             x.append(f)
             y.append(r)
         x = np.asarray(x)
         y = np.asarray(y)
         total_train = x.shape[0]
         if total_train <= 20:
             kf = KFold(n_splits=min([x.shape[0], 5]))
             for train_index, test_index in kf.split(x):
                 gpr = self.fitting_gaussian_process(
                     x[train_index], y[train_index])
                 self.gprs.append(gpr)
         else:
             kf = KFold(n_splits=min([x.shape[0], 5]))
             for train_index, test_index in kf.split(x):
                 gpr = self.fitting_gaussian_process(
                     x[test_index], y[test_index])
                 self.gprs.append(gpr)
         loss = [item.score(x, y) for item in self.gprs]
         best_gpr = self.gprs[np.argmax(loss)]
         kernel = C(best_gpr.kernel_.k1.constant_value, (1e-3, 1e3)) * \
                  Matern(length_scale=best_gpr.kernel_.k2.length_scale,
                         length_scale_bounds=(0.01, 10.0e20), nu=1.5)
         self.gp_reward = GaussianProcessRegressor(kernel=kernel,
                                                   optimizer=None,
                                                   n_restarts_optimizer=10)
         self.gp_reward.fit(x, y)
         print(np.sqrt(self.gp_reward.kernel_.k1.constant_value))
         print(self.gp_reward.kernel_.k2.length_scale)
    def __init__(self, arms, sigma=5, window_length=0):
        self.arms = arms
        self.n_arms = len(arms)
        self.means = np.zeros(self.n_arms)
        self.sigmas = np.ones(self.n_arms) * sigma

        self.pulled_arms = np.array([])
        self.collected_rewards = np.array([])

        self.window_length = window_length

        alpha = 1.5

        theta = 1
        l = 1

        kernel = C(theta, (1e-5, 1e5)) * RBF(l, (1e-10, 1e10))

        self.gp = GaussianProcessRegressor(kernel=kernel,
                                           alpha=alpha**2,
                                           normalize_y=True,
                                           n_restarts_optimizer=10)
예제 #12
0
def test_gpr_consistency_std_cov_non_invertible_kernel():
    """Check the consistency between the returned std. dev. and the covariance.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19936
    Inconsistencies were observed when the kernel cannot be inverted (or
    numerically stable).
    """
    kernel = C(8.98576054e05,
               (1e-12, 1e12)) * RBF([5.91326520e02, 1.32584051e03],
                                    (1e-12, 1e12)) + WhiteKernel(
                                        noise_level=1e-5)
    gpr = GaussianProcessRegressor(kernel=kernel, alpha=0, optimizer=None)
    X_train = np.array([
        [0.0, 0.0],
        [1.54919334, -0.77459667],
        [-1.54919334, 0.0],
        [0.0, -1.54919334],
        [0.77459667, 0.77459667],
        [-0.77459667, 1.54919334],
    ])
    y_train = np.array([
        [-2.14882017e-10],
        [-4.66975823e00],
        [4.01823986e00],
        [-1.30303674e00],
        [-1.35760156e00],
        [3.31215668e00],
    ])
    gpr.fit(X_train, y_train)
    X_test = np.array([
        [-1.93649167, -1.93649167],
        [1.93649167, -1.93649167],
        [-1.93649167, 1.93649167],
        [1.93649167, 1.93649167],
    ])
    pred1, std = gpr.predict(X_test, return_std=True)
    pred2, cov = gpr.predict(X_test, return_cov=True)
    assert_allclose(std, np.sqrt(np.diagonal(cov)), rtol=1e-5)
예제 #13
0
    def __init__(self,
                 dim,
                 amplitude=1,
                 length_scale=1,
                 noise=0.1,
                 kernel=None):
        super().__init__()

        self.amplitude = amplitude
        self.length_scale = length_scale
        self.noise = noise

        self.X = []
        self.y = []

        self.kernel = kernel
        if self.kernel is None:
            self.kernel = C(self.amplitude,
                            (1e-5, 1e3)) * RBF([self.length_scale] * dim,
                                               (1e-2, 1e3)) + WhiteKernel(
                                                   self.noise, (1e-9, 1))
        self.model = GaussianProcessRegressor(kernel=self.kernel,
                                              n_restarts_optimizer=10)
예제 #14
0
def test_random_starts():
    # Test that an increasing number of random-starts of GP fitting only
    # increases the log marginal likelihood of the chosen theta.
    n_samples, n_features = 25, 2
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features) * 2 - 1
    y = np.sin(X).sum(axis=1) + np.sin(3 * X).sum(axis=1) \
        + rng.normal(scale=0.1, size=n_samples)

    kernel = C(1.0, (1e-2, 1e2)) \
        * RBF(length_scale=[1.0] * n_features,
              length_scale_bounds=[(1e-4, 1e+2)] * n_features) \
        + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-5, 1e1))
    last_lml = -np.inf
    for n_restarts_optimizer in range(5):
        gp = GaussianProcessRegressor(
            kernel=kernel,
            n_restarts_optimizer=n_restarts_optimizer,
            random_state=0,
        ).fit(X, y)
        lml = gp.log_marginal_likelihood(gp.kernel_.theta)
        assert_greater(lml, last_lml - np.finfo(np.float32).eps)
        last_lml = lml
예제 #15
0
    def __init__(self, arms):

        # max_arm = np.max(arms)
        # min_arm = np.min(arms)
        # normalize_arms = (arms-min_arm)/(max_arm - min_arm)
        self.arms = arms

        self.n_arms = len(arms)

        # self.t = 0
        # self.rewards_per_arm = [[] for i in range(self.n_arms)]
        self.collected_rewards = np.array([])
        self.pulled_arms = []

        self.means = np.zeros(self.n_arms)
        self.sigmas = np.ones(self.n_arms) * 10

        alpha = 10.0
        kernel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-3, 1e3))
        self.gp = sklearn.gaussian_process.GaussianProcessRegressor(
            kernel=kernel,
            alpha=alpha**2,
            normalize_y=True,
            n_restarts_optimizer=9)
예제 #16
0
def kriging(array_in, x, y, theta0=0.1, thetaL=.001, thetaU=1., nugget=0.01):
    """

    read numpy array with nans and return interplated and extrapolated 2d array using Gaussian Process Regression / Kriging method

    Parameters
    ----------
    array_in : read numpy array with nans

    Returns
    -------
    array_out : return interplated and extrapolated 2d array

    """

    import numpy as np
    #    from sklearn.gaussian_process import GaussianProcess
    from sklearn.gaussian_process import GaussianProcessRegressor
    from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

    kernel = C(1.0, (1e-3, 1e3)) * RBF([5, 5], (1e-2, 1e2))
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=1)
    #    gp = GaussianProcess(theta0=theta0, thetaL=thetaL, thetaU=thetaU, nugget=nugget)

    xx, yy = np.meshgrid(x, y)
    vals = ~np.isnan(array_in)

    gp.fit(X=np.column_stack([xx[vals], yy[vals]]), y=array_in[vals])

    xx_yy_as_cols = np.column_stack([xx.flatten(), yy.flatten()])

    array_out = gp.predict(xx_yy_as_cols).reshape(array_in.shape)

    #    plt.imshow(GD1,interpolation='nearest')

    return (array_out)
예제 #17
0
    def __init__(self, params={}):
        super().__init__()
        self.params = {}
        self.params['Amplitude'] = Parameter(name= 'Kernel amplitude',
                                            value = 1,
                                            min = 0,
                                            max = 10,
                                            description = 'Amplitude of modeled cost landscape')
        self.params['Length scale'] = Parameter(name= 'Kernel length scale',
                                            value = 1,
                                            min = 0,
                                            max = 10,
                                            description = 'Characteristic size of cost landscape')
        self.params['Noise'] = Parameter(name= 'Kernel noise',
                                            value = 0.1,
                                            min = 0,
                                            max = 10,
                                            description = 'Amplitude of modeled white noise process')

        for p in params:
            self.params[p].value = params[p]

        kernel = C(self.params['Amplitude'].value, (1e-3, 1e3)) * RBF(self.params['Length scale'].value, (1e-2, 1e2)) + WhiteKernel(self.params['Noise'].value)
        self.model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
예제 #18
0
def EI_GP(x_train, y_train, x_test, scores, param_choices):
    # param_choices and x_test are same.
    # y_train and scores are same

    kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
    gp = GaussianProcessRegressor(kernel,
                                  n_restarts_optimizer=100,
                                  normalize_y=True)
    gp.fit(x_train, y_train)

    # Get mean and standard deviation for each possible
    # number of hidden units
    y_mean, y_std = gp.predict(x_test, return_std=True)
    y_std = vector_2d(y_std)

    y_min = min(scores)  # np.min(scores, axis=1)

    # Calculate expected improvement from 95% confidence interval
    expected_improvement = y_min - (y_mean - 1.96 * y_std)
    expected_improvement[expected_improvement < 0] = 0

    max_index = expected_improvement.argmax()
    # Select next che based on expected improvement
    new_param = param_choices[max_index]
예제 #19
0
    def __init__(self, search_space, list_of_parameters_names, maximize):

        self.dict_of_means = {}
        self.list_of_parameters_names = list_of_parameters_names
        self.search_space = search_space
        for key in list_of_parameters_names:

            self.dict_of_means[key] = [
                float(search_space[key][0][1] + search_space[key][0][0]) / 2.0,
                float(search_space[key][0][1] - search_space[key][0][0]) / 2.0
            ]

        # Instantiate a Gaussian Process model
        self.kernel = RBF(5, (1e-2, 1e2)) * C(1, (1e-2, 1e2)) + WhiteKernel(
            noise_level=0.2)

        self.maximize = maximize

        self.generate_meshes()

        self.parameters_and_loss_dict = {}
        for item in self.list_of_parameters_names:
            self.parameters_and_loss_dict[item] = []
        self.parameters_and_loss_dict['loss'] = []
예제 #20
0
파일: dia_hrl.py 프로젝트: zxsted/gpsarsa
    def __init__(self,
                 s,
                 a,
                 r,
                 regularization=1e-5,
                 kernal=None,
                 gamma=GAMMA,
                 memory=None):

        self.sigma = regularization  # noise for Q(s,a)
        self.kernal = kernal
        self.gamma = gamma
        self.B_t = memory
        self.s = s
        self.a = a
        self.r = r

        if self.kernal is None:
            self.kernal_ = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-4, 1e4))
        else:
            self, kernal_ = clone(self.kernal)

        if self.B_t is None:
            self.B_t = Memory()
예제 #21
0
def gp_fit(x, y, mc_index=0):

    print("Fitting", len(y), "points: ")

    #length scale tuning taken from util_ConstructIntrinsicPosterior_GenericCoordinates.py
    length_scale_est = []
    length_scale_bounds_est = []

    for indx in np.arange(len(x[0])):
        # These length scales have been tuned by experience
        length_scale_est.append(
            2 *
            np.std(x[:, indx]))  # auto-select range based on sampling retained
        length_scale_min_here = np.max(
            [1e-3, 0.2 * np.std(x[:, indx] / np.sqrt(len(x)))])
        if indx == mc_index:
            length_scale_min_here = 0.2 * np.std(x[:, indx] / np.sqrt(len(x)))
            print(" Setting mc range: retained point range is ",
                  np.std(x[:, indx]), " and target min is ",
                  length_scale_min_here)
    length_scale_bounds_est.append(
        (length_scale_min_here, 5 * np.std(x[:, indx])))

    #set up kernel
    kernel = WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-2, 1)) + C(
        0.5, (1e-3, 1e1)) * RBF(length_scale=length_scale_est,
                                length_scale_bounds=length_scale_bounds_est)
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=8)

    #fit and estimate
    gp.fit(x, y)

    def fit_func(coord):
        return gp.predict(coord)[0]

    return fit_func
예제 #22
0
    def gaussian(self, lats, lons, temps):
        coords = np.zeros((len(lats), 2))
        for ii in range(len(lats)):
            coords[ii, 0] = lats[ii]  #column 1 is lats
            coords[ii, 1] = lons[ii]  #column 2 is lons
        res = 100
        # Generates a bunch of lats and lons
        lat_sample = np.linspace(float(min(lats)), float(max(lats)), res)
        lon_sample = np.linspace(float(min(lons)), float(max(lons)), res)
        kernel = C(1.0, (1e-3, 1e4)) * RBF(
            [5, 5], (1e-2, 1e2))  #constant kernel just scales it
        gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=20)
        gp.fit(coords, temps)

        latlon = np.array(list(product(
            lat_sample, lon_sample)))  #these are the sample points
        y_pred, MSE = gp.predict(
            latlon, return_std=True)  #returns mean and the std dev

        latp, lonp = latlon[:, 0].reshape(res,
                                          res), latlon[:, 1].reshape(res, res)
        tempp = np.reshape(y_pred, (res, res))
        tempp = np.clip(tempp, 0, max(temps))
        return latp, lonp, tempp
예제 #23
0
def kernel_RBF(k1=1e6,
               l1_t=5e3,
               l1_l=6e3,
               k2=1e5,
               l2_t=1e2,
               l2_l=3e2,
               k3=1e2,
               l3_t=5e0,
               l3_l=3e1,
               bounds=False):

    if bounds:

        kernel = C(k1, (1e-2,1e3)) * RBF((l1_t,l1_l), ((1e0,1e6),(1e0,1e6))) +\
        C(k2, (1e-2,1e3)) * RBF((l2_t,l2_l), ((1e0,1e5),(1e0,1e5))) +\
        C(k3, (1e-2,1e3)) * RBF((l3_t,l3_l), ((1e0,1e4),(1e0,1e4)))

    else:

        kernel = C(k1) * RBF((l1_t,l1_l)) +\
        C(k2) * RBF((l2_t,l2_l)) +\
        C(k3) * RBF((l3_t,l3_l))

    return kernel
예제 #24
0
X = np.atleast_2d(np.linspace(0, 10, 30)).T
X2 = np.atleast_2d([2.0, 4.0, 5.5, 6.5, 7.5]).T
y = np.array(f(X).ravel() > 0, dtype=int)
fX = f(X).ravel()
y_mc = np.empty(y.shape, dtype=int)  # multi-class
y_mc[fX < -0.35] = 0
y_mc[(fX >= -0.35) & (fX < 0.35)] = 1
y_mc[fX > 0.35] = 2


fixed_kernel = RBF(length_scale=1.0, length_scale_bounds="fixed")
kernels = [
    RBF(length_scale=0.1),
    fixed_kernel,
    RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
    C(1.0, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
]
non_fixed_kernels = [kernel for kernel in kernels if kernel != fixed_kernel]


@pytest.mark.parametrize("kernel", kernels)
def test_predict_consistent(kernel):
    # Check binary predict decision has also predicted probability above 0.5.
    gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
    assert_array_equal(gpc.predict(X), gpc.predict_proba(X)[:, 1] >= 0.5)


def test_predict_consistent_structured():
    # Check binary predict decision has also predicted probability above 0.5.
    X = ["A", "AB", "B"]
    y = np.array([True, False, True])
예제 #25
0
def create_space(list_name):
    spaces = []
    if 'svm' in list_name:
        space_svm = {
            'model_name': 'svm',
            'C': hp.uniform('C', 0, 10.0),
            'kernel': hp.choice('kernel', ['linear', 'rbf']),
            'gamma': hp.uniform('gamma', 0, 20.0)
        }
        spaces.append(space_svm)
    if 'knn' in list_name:
        space_knn = {
            'model_name':
            'knn',
            'n_neighbors':
            hp.choice('n_neighbors', range(1, 14)),
            'algorithm':
            hp.choice('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute'])
        }
        spaces.append(space_knn)
    if 'xgboost' in list_name:
        space_xgboost = {
            'model_name': 'xgboost',
            'n_estimators': hp.choice('n_estimators', range(50, 501, 2)),
            'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
            'max_depth': hp.choice('max_depth', range(2, 11, 1)),
            'min_child_weight': hp.choice('min_child_weight', range(1, 7, 1)),
            'reg_alpha': hp.uniform('reg_alpha', 0, 1.0),
            'subsample': hp.uniform('subsample', 0.5, 1.0),
            'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1.0)
        }
        spaces.append(space_xgboost)
    if 'lightgbm' in list_name:
        space_lightgbm = {
            'model_name': 'lightgbm',
            'n_estimators': hp.choice('n_estimators', range(50, 501, 2)),
            'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
            'max_depth': hp.choice('max_depth', range(2, 11, 1)),
            'num_leaves': hp.choice('num_leaves', range(20, 61, 1)),
            'min_child_weight': hp.uniform('min_child_weight', 0.001, 0.2),
            'min_child_samples': hp.choice('min_child_samples',
                                           range(5, 51, 2)),
            'subsample': hp.uniform('subsample', 0.5, 1.0),
            'colsample_bytree': hp.uniform('lgb_colsample_bytree', 0.6, 1.0),
            'reg_alpha': hp.uniform('reg_alpha', 0, 1.0)
        }
        spaces.append(space_lightgbm)
    if 'randomforest' in list_name:
        space_randomforest = {
            'model_name': 'randomforest',
            'n_estimators': hp.choice('n_estimators', range(50, 501, 2)),
            'max_depth': hp.choice('max_depth', range(1, 11, 1)),
            'min_samples_split': hp.choice('min_samples_split',
                                           range(2, 21, 1)),
            'min_samples_leaf': hp.choice('min_samples_leaf', range(1, 21, 1)),
            'max_features': hp.uniform('max_features', 0.4, 1.0)
        }
        spaces.append(space_randomforest)
    if 'linear' in list_name:
        space_linear = {'model_name': 'linear'}
        spaces.append(space_linear)
    if 'gpr' in list_name:
        space_gpr = {
            'model_name': 'gpr',
            'kernel': C(0.1, (0.001, 0.1)) * RBF(0.5, (1e-4, 10)),
            'alpha': hp.uniform('alpha', 0.05, 1.0),
            'normalize_y': True
        }
        spaces.append(space_gpr)
    return spaces
def map_bayesian_gpr_sparse_pymc3_ex(amountTraining, amount_Inducing,
                                     amountTest, trainingValues,
                                     trainingParameters, testValues,
                                     testParameters):

    inducing_jitter = 0.0001

    valuesFFTCallsTraining = np.squeeze(np.asarray(trainingValues))
    X = np.array(np.asarray(trainingParameters), dtype=np.float64)

    valuesFFTCallsTest = np.squeeze(np.asarray(testValues))
    X_new = np.array(np.asarray(testParameters), dtype=np.float64)

    dimension = X.shape[1]

    startFittingTimer = timer()
    with pm.Model() as model2:

        # Set priors on the hyperparameters of the covariance
        ls1 = pm.Gamma("lengthscale", alpha=2, beta=2)

        eta = pm.HalfNormal("signal variance", sigma=2)

        cov = eta * pm.gp.cov.ExpQuad(
            dimension, ls1)  # cov_x1 must accept X1 without error

        # Specify the GP.  The default mean function is `Zero`.
        gp = pm.gp.MarginalSparse(cov_func=cov, approx="VFE")

        Xu = pm.gp.util.kmeans_inducing_points(amount_Inducing, X)

        sigma = pm.HalfNormal("sigma", sigma=0.01)  #halfnormal always positive

        # Place a GP prior over the function f.
        gp.marginal_likelihood("y",
                               X=X,
                               Xu=Xu,
                               y=valuesFFTCallsTraining,
                               noise=sigma)

    with model2:
        mp = pm.find_MAP()

    # the package does the following calculations in the prediction, althought it is actually training.
    # this makes predicting in using very slow and not suitable for our data study
    # we write it ourself

    d = [*mp.values()]
    kernel = C(d[4]) * RBF((d[3]))
    noise = d[5]**2
    K_uu = kernel(Xu, Xu)
    K_uu[np.diag_indices_from(K_uu)] += inducing_jitter
    K_xu = kernel(X, Xu)
    K_ux = kernel(Xu, X)

    init = np.ones((np.shape(X)[0]))
    inverse_noise = 1 / noise
    inv_lambd_vec = init * inverse_noise
    Lambd_inv = np.diag(inv_lambd_vec)

    sigma = K_uu + np.dot(K_ux, np.dot(Lambd_inv, K_xu))
    L_sigma = cholesky_dec(sigma)
    y_l = np.dot(Lambd_inv, valuesFFTCallsTraining)
    a = np.dot(K_ux, y_l)
    alpha = cho_solve((L_sigma, True), a)

    endFittingTimer = timer()
    print('Timer of fitting in sample ' +
          str(endFittingTimer - startFittingTimer))

    # The prediction code in the package is very slow since it does not
    # fully optimise the training and it calculates the variance (or its trace)
    # We write the prediction ourself

    # startPredictingInSampleTimerGPR = timer()
    # mu, var = gp.predict(X, point=mp, diag=True)
    # endPredictingInSampleTimerGPR = timer()

    startPredictingInSampleTimerGPR = timer()
    for i in range(10):
        K_xastu = kernel(X, Xu)
        pred = np.dot(K_xastu, alpha)
    endPredictinginSampleTimerGPR = timer()

    print('Timer of predicting in sample GPR ' +
          str((endPredictinginSampleTimerGPR -
               startPredictingInSampleTimerGPR) / 10))
    mu = np.squeeze(pred)

    mu = np.maximum(mu, 0)

    AEE = np.sum(np.abs((valuesFFTCallsTraining - mu))) / amountTraining
    MAE = np.max(np.abs((valuesFFTCallsTraining - mu)))

    print('In sample MAE ' + str(MAE))
    print('In sample AEE ' + str(AEE))

    # The prediction code in the package is very slow since it does not
    # fully optimise the training and it calculates the variance (or its trace)
    # We write the prediction ourself.

    # startPredictingOutSampleTimerGPR = timer()
    # mu, var = gp.predict(X_new, point=mp, diag=True)
    # endPredictingOutSampleTimerGPR = timer()
    # print('Timer of predicting out sample GPR ' + str(
    #     endPredictingOutSampleTimerGPR - startPredictingOutSampleTimerGPR))

    startPredictingOutSampleTimerGPR = timer()
    for i in range(10):
        K_xastu = kernel(X_new, Xu)
        pred = np.dot(K_xastu, alpha)
    endPredictingOutSampleTimerGPR = timer()

    print('Timer of predicting out sample GPR ' +
          str((endPredictingOutSampleTimerGPR -
               startPredictingOutSampleTimerGPR) / 10))

    mu = np.squeeze(pred)

    mu = np.maximum(mu, 0)

    AEE = np.sum(np.abs((valuesFFTCallsTest - mu))) / amountTest
    MAE = np.max(np.abs((valuesFFTCallsTest - mu)))

    print('In sample MAE ' + str(MAE))
    print('In sample AEE ' + str(AEE))
    def test_gpr_rbf_unfitted(self):

        se = (C(1.0, (1e-3, 1e3)) *
              RBF(length_scale=10, length_scale_bounds=(1e-3, 1e3)))
        kernel = (Sum(
            se,
            C(0.1, (1e-3, 1e3)) *
            RBF(length_scale=1, length_scale_bounds=(1e-3, 1e3))))

        gp = GaussianProcessRegressor(alpha=1e-7,
                                      kernel=kernel,
                                      n_restarts_optimizer=15,
                                      normalize_y=True)

        # return_cov=False, return_std=False
        model_onnx = to_onnx(gp,
                             initial_types=[('X', FloatTensorType([]))],
                             dtype=np.float32)
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(Xtest_.astype(np.float32),
                            gp,
                            model_onnx,
                            verbose=False,
                            basename="SklearnGaussianProcessRBFUnfitted")

        # return_cov=True, return_std=True
        options = {
            GaussianProcessRegressor: {
                "return_std": True,
                "return_cov": True
            }
        }
        try:
            to_onnx(gp, Xtrain_.astype(np.float32), options=options)
        except RuntimeError as e:
            assert "Not returning standard deviation" in str(e)

        # return_std=True
        options = {GaussianProcessRegressor: {"return_std": True}}
        model_onnx = to_onnx(gp,
                             options=options,
                             initial_types=[('X', FloatTensorType([None,
                                                                   None]))],
                             dtype=np.float32)
        self.assertTrue(model_onnx is not None)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float32),
            predict_attributes=options[GaussianProcessRegressor])

        # return_cov=True
        options = {GaussianProcessRegressor: {"return_cov": True}}
        # model_onnx = to_onnx(gp, Xtrain_.astype(np.float32), options=options)
        model_onnx = to_onnx(gp,
                             options=options,
                             initial_types=[('X', FloatTensorType([None,
                                                                   None]))],
                             dtype=np.float32)
        self.assertTrue(model_onnx is not None)
        self.check_outputs(
            gp,
            model_onnx,
            Xtest_.astype(np.float32),
            predict_attributes=options[GaussianProcessRegressor])
while (n_out > 0 or final_fit==0) and num_finite >= 2:

    beta1, beta0, incert_slope, _, _ = ft.wls_matrix(time_vals[good_vals], data_vals[good_vals],
                                                     1. / err_vals[good_vals], conf_slope=0.99)

    # standardized dispersion from linearity
    res_stdized = np.sqrt(np.mean(
        (data_vals[good_vals] - (beta0 + beta1 * time_vals[good_vals])) ** 2 / err_vals[good_vals]))
    res = np.sqrt(np.mean((data_vals[good_vals] - (beta0 + beta1 * time_vals[good_vals])) ** 2))
    if perc_nonlin[min(niter, len(perc_nonlin) - 1)] == 0:
        opt_var = 0
    else:
        opt_var = (res / res_stdized ** 2) ** 2 * 100. / (5 * perc_nonlin[min(niter, len(perc_nonlin) - 1)])

    k1 = PairwiseKernel(1, metric='linear') + PairwiseKernel(1, metric='linear') * C(opt_var) * RQ(10, 3)  # linear kernel
    k2 = C(30) * ESS(length_scale=1, periodicity=1)  # periodic kernel
    k3 = C(50) * RBF(0.75)
    kernel = k1 + k2 + k3

    mu_x = np.nanmean(time_vals[good_vals])
    detr_t_pred = t_pred - mu_x
    detr_time_vals = time_vals - mu_x
    mu_y = np.nanmean(data_vals)
    detr_data_vals = data_vals - mu_y

    # if we remove a linear trend, normalize_y should be false...
    gp = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer,
                                  alpha=err_vals[good_vals], normalize_y=False)
    gp.fit(detr_time_vals[good_vals].reshape(-1, 1), detr_data_vals[good_vals].reshape(-1, 1))
    y_pred, sigma = gp.predict(detr_t_pred.reshape(-1, 1), return_std=True)
예제 #29
0
# print(featureNum)
# print(dimension)

# 线性搜索回归
alg = 'brute'
begin_time = time()
print('{}搜索方案:'.format(alg))
print('        预测值      误差')
err = []
for m in range(queryBase.shape[0]):
    query = np.array([queryBase[m, :]])
    data = ordinarySearch(dataBaseInitial, query, alg, 100)  # 得到训练集
    # print(data.shape)

    # 高斯回归
    kernel = C(0.1, (0.001, 0.1)) * RBF(0.5, (1e-4, 10))
    reg = GaussianProcessRegressor(kernel=kernel,
                                   n_restarts_optimizer=10,
                                   alpha=0.01)
    reg.fit(data[:, 1:], data[:, 0])
    # print(query.shape)
    output = reg.predict(query)
    print('test{} '.format(m), end=' ')
    print('{0:.3f}℃ {1:.3f}℃'.format(
        output[0], abs(queryBaseInitial.iloc[m, 0] - output[0])))
    err.append(abs(queryBaseInitial.iloc[m, 0] - output[0]))

print('MAE:', sum(err) / len(err))
print('RMSE', sqrt(sum([num**2 for num in err]) / len(err)))
end_time = time()
print('运行时间:', end_time - begin_time)
예제 #30
0
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)
#train_data = np.log(train_data)
X_train, Y_train = reshape_dataset(train_data, lags, steps_ahead)
X_test, Y_test = reshape_dataset(test_data, lags, steps_ahead)

week_data = [' ' for i in range(len(train_data))]
for i in range(len(train_data)):
    if i % 8 == 0:
        week_data[i] = str(int(data[i, 0])) + '-' + str(int(data[i, 1]))

X_train_weeks, Y_train_weeks = reshape_dataset(week_data, lags, steps_ahead)

#kernel = C()*RBF() + WhiteKernel()
kernel = C() * Matern() + WhiteKernel()
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
#gp.fit(X_train, Y_train)

training_size = int(X_train.shape[0] * 0.5)
validation_size = X_train.shape[0] - training_size
j = training_size
validation_predictions = np.zeros(validation_size)
for i in range(validation_size):
    gp.fit(X_train[i:j], Y_train[i:j])
    validation_predictions[i] = gp.predict(np.array([X_train[j]]))
    j += 1

validation_predictions = scaler.inverse_transform(validation_predictions)
Y_train = scaler.inverse_transform(Y_train)
#mape = np.mean(np.abs((Y_train[training_size:] - validation_predictions) / Y_train[training_size:])) * 100