Example #1
0
 def _generate_gaussian_process_sample(self):
     if 'length_scale' in self.params:
         if isinstance(self.params['length_scale'], tuple):
             min_ls = self.params['length_scale'][0]
             max_ls = self.params['length_scale'][1]
             ls = min_ls + (max_ls - min_ls) * np.random.rand()
         else:
             ls = self.params['length_scale']
     else:
         ls = 1.0
     if 'kernel' in self.params:
         if self.params['kernel'] == 'RBF':
             kernel = kernels.RBF(length_scale=ls)
         elif self.params['kernel'] == 'Matern':
             kernel = kernels.Matern(length_scale=ls)
         else:
             raise Exception('unknown kernel')
     else:
         kernel = kernels.RBF(length_scale=ls)
     gpr = GaussianProcessRegressor(kernel=kernel)
     X = np.zeros((1, self.domain_dimension))
     y = np.zeros(1)
     gpr.fit(X, y)
     points = np.random.rand(self.sampling_points_count,
                             self.domain_dimension)
     values = gpr.sample_y(points, random_state=np.random.randint(100000))
     return points, values
Example #2
0
def predictiveGP(function, sigma_square_f, Omega, N, x, x_star,
                 lenghtscale_gt):
    ## Compute K given the known lenghscale and the current sigma^2_f to get the complete covariance function of the GP
    ## at different inputs
    x_star = x_star[:, np.newaxis]
    x = x[:, np.newaxis]
    k_xstar_x = sigma_square_f * kern.RBF(lenghtscale_gt)(x_star, x)
    k_xstar_xstar = sigma_square_f * kern.RBF(lenghtscale_gt)(x_star, x_star)
    k_xx = sigma_square_f * Omega + np.eye(N) * 0.001
    k_x_xstar = sigma_square_f * kern.RBF(lenghtscale_gt)(x, x_star)

    f_star = np.dot(k_xstar_x, np.dot(np.linalg.inv(k_xx), function))
    cov_f_star = k_xstar_xstar - np.dot(np.dot(k_xstar_x, np.linalg.inv(k_xx)),
                                        k_x_xstar)
    return f_star, cov_f_star
Example #3
0
    def setup_latentforces(self, kernels=None):
        """Initalises the latent force GPs

        Parameters
        ----------

        kernels : list, optional
            Kernels of the latent force Gaussian process objects

        """
        if kernels is None:
            # Default is for kernels 1 * exp(-0.5 * (s-t)**2 )
            kernels = [
                sklearn_kernels.ConstantKernel(1.) * sklearn_kernels.RBF(1.)
                for r in range(self.dim.R)
            ]

        if len(kernels) != self.dim.R or \
           not all(isinstance(k, sklearn_kernels.Kernel) for k in kernels):
            _msg = "kernels should be a list of {} kernel objects".format(
                self.dim.R)
            raise ValueError(_msg)

        self.latentforces = [
            GaussianProcessRegressor(kern) for kern in kernels
        ]
Example #4
0
def gp_fit_sklearn(x_input, x_tar, y_input, y_tar, params=None, title=''):
    k1 = kernels.DotProduct(sigma_0=1, sigma_0_bounds=(1e-05, 5))
    k2 = kernels.RBF(length_scale=10, length_scale_bounds=(1e-3, x_tar[-1]))
    k3 = kernels.RationalQuadratic(alpha=1,
                                   length_scale=10,
                                   length_scale_bounds=(1e-3, x_tar[-1]))

    kernel = k1 * k2 * k3

    gp1 = GaussianProcessRegressor(kernel=kernel,
                                   n_restarts_optimizer=10,
                                   normalize_y=True,
                                   alpha=0)
    if params:
        gp1.set_params(params)
    gp1.fit(x_input.reshape(-1, 1), y_input)
    pred, std = gp1.predict(x_tar.reshape(-1, 1), return_std=True)

    plt.plot(x_input, y_input, 'bo', label='Input', alpha=0.4)
    plt.plot(x_tar, y_tar, 'go', label='Target', alpha=0.4)
    plt.plot(x_tar, pred, 'ro', label='Prediction', alpha=0.4)
    plt.gca().fill_between(x_tar,
                           pred.reshape(-1) - 2 * std,
                           pred.reshape(-1) + 2 * std,
                           color='lightblue',
                           alpha=0.5,
                           label=r"$2\sigma$")
    plt.title(title)
    plt.legend()
    plt.show()
    return gp1, pred
Example #5
0
def activity_3_3():
    iris = datasets.load_iris()
    X = iris.data[:, :2]
    y = np.array(iris.target, dtype=int)

    h = .02

    # crea una malla para realizar la grafica
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    kernel = 1.0 * kernels.RBF([1.0])
    gpc_rbf_isotropic = GaussianProcessClassifier(kernel=kernel).fit(X, y)

    Z = gpc_rbf_isotropic.predict_proba(np.c_[xx.ravel(), yy.ravel()])

    # coloca el resultado en colores
    Z = Z.reshape((xx.shape[0], xx.shape[1], 3))
    plt.imshow(Z, extent=(x_min, x_max, y_min, y_max), origin="lower")

    # Grafica
    plt.scatter(X[:, 0],
                X[:, 1],
                c=np.array(["r", "g", "b"])[y],
                edgecolors=(0, 0, 0))
    plt.xlabel('Sepal length')
    plt.ylabel('Sepal width')
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.show()
Example #6
0
def cal_scale_factor(scale, scale_err, lamda, lamdas, tmp_var):
    # Calculate the scale factor for all wavelength
    # scale/scale_err is the output from cal_ratio
    # lamdas is the effective wavelength for DES bands
    # tmp_var is the output from cal_flux
    scale = np.array(scale)
    scale_err = np.array(scale_err)
    # using 2-d polynomials to construct scale factor for OZDES spectra
    fn_raw = np.polyfit(lamdas, scale, 2)
    fn = np.poly1d(fn_raw)
    flux_calib = fn(lamda)

    # Calculate the variance for calibrated spectra
    # add in Gaussian process to estimate uncertainties, /10**-17 because it gets a bit panicky if you use small numbers
    stddev = (scale_err**0.5) / 10**-17
    scale_v = scale / 10**-17

    kernel = kernels.RBF(length_scale=300, length_scale_bounds=(.01, 2000.0))
    gp = GaussianProcessRegressor(kernel=kernel, alpha=stddev**2)

    xprime = np.atleast_2d(lamdas).T
    yprime = np.atleast_2d(scale_v).T
    gp.fit(xprime, yprime)
    xplot_prime = np.atleast_2d(lamda).T
    y_pred, sigma = gp.predict(xplot_prime, return_std=True)
    y_pred = y_pred[:, 0]
    sigModel = (sigma / y_pred) * flux_calib

    # now scale the original variance and combine with scale factor uncertainty
    varScale = tmp_var * pow(flux_calib, 2) + sigModel**2
    return flux_calib, varScale
Example #7
0
 def fit_length_scale(self, dataset, grid):
     best_err = np.inf
     best_h = None
     count = 0
     for h in grid:
         kernel = kernels.RBF(length_scale=h)
         k = pairwise_kernels(
             self.embed(dataset.x),
             self.embed(self.x),
             metric=kernel,
             filter_params=False,
         )
         mu0 = self.mu0(k)
         mu1 = self.mu1(k)
         y = dataset.y.reshape(-1, 1)
         t = dataset.t.reshape(-1, 1)
         err0 = mean_squared_error(y[t == 0], mu0[t == 0])
         err1 = mean_squared_error(y[t == 1], mu1[t == 1])
         err = err0 + err1
         if err < best_err:
             best_err = err
             best_h = h
             count = 0
         elif count < 20:
             count += 1
         else:
             break
         if self.verbose:
             print(f"h-{h:.03f}_err-{err:.03f}")
     self.kernel.length_scale = best_h
Example #8
0
    def build_model(self, specie, members):
        """
        Build the model using GP
        """

        # Get the data
        decisions_df = self.build_member_decision_score_df(specie, members)

        # Extract the choices as the response variables
        choices = ComponentState.get(specie).list_choices()
        choice_names = [ c.get_name() for c in choices ]
        x = decisions_df.loc[:, choice_names]

        # Extract the scores as the dependant variables
        y = decisions_df.loc[:, "score"]

        # Preprocess using one hot encoding
        categories = [ c.get_component_names() for c in choices ]
        encoder = OneHotEncoder(sparse = False, categories = categories)

        # Define the isotropic kernel
        kernel = 1.0 * kernels.RBF([5]) + kernels.WhiteKernel()

        # Define the regressor
        regressor = GaussianProcessRegressor(kernel=kernel, normalize_y = True)

        # Build the pipeline and fit it
        pipeline = Pipeline(steps = [
            ('encoder', encoder),
            ('regressor', regressor)
        ])
        pipeline.fit(x, y)
        return pipeline
Example #9
0
def gp_noise_estimation(
    chunk: Type[DataChunk], rbf_params={}, noise_params={}, verbose=False
) -> np.ndarray:
    """
    Uses a simple Gaussian Process model to perform noise estimation on spectral
    data. A given chunk of the full spectrum is fit with a GP model comprising
    RBF and white noise kernels, where the former explains covariance in intensities
    between channels and the latter models variation in the signal as i.i.d white
    noise. 
    
    The GP model is conditioned to provide a maximum likelihood estimate
    of the data, and depends heavily on the initial parameters. The arguments
    `rbf_params` and `noise_params` allow the user to override defaults for the kernels,
    and may require some tweaking to get the desired behavior.
    
    The objective of this function is to estimate the noise at every point of the
    spectrum, and returns a NumPy 1D array of noise values with the same shape as
    the frequency bins.

    Parameters
    ----------
    chunk : Type[DataChunk]
        [description]
    rbf_params : dict, optional
        [description], by default {}
    noise_params : dict, optional
        [description], by default {}

    Returns
    -------
    np.ndarray
        NumPy 1D array containing the noise at every channel
    """
    freq, intensity = chunk.frequency, chunk.intensity
    # RBF parameters affect how correlated each channel is
    # noise parameters affect the variance in signal explained as normally
    # distributed noise
    rbf_kern = {"length_scale": 5e-1, "length_scale_bounds": (1e-1, 10.0)}
    noise_kern = {"noise_level": 1e-1, "noise_level_bounds": (1e-3, 1.0)}
    rbf_kern.update(**rbf_params)
    noise_kern.update(**noise_params)
    # instantiate the model
    kernel = kernels.RBF(**rbf_kern) + kernels.WhiteKernel(**noise_kern)
    gp_model = GaussianProcessRegressor(kernel, normalize_y=True)
    gp_result = gp_model.fit(freq[:, None], intensity[:, None])
    # reproduce the spectrum with uncertainties
    pred_y, pred_std = gp_result.predict(freq[:, None], return_std=True)
    # log some information about the GP result
    if verbose:
        logger.info(f"GP results for catalog index {chunk.catalog_index}.")
        logger.info(
            f"MSE from GP fit: {mean_squared_error(pred_y.flatten(), intensity):.4f}"
        )
        logger.info(
            f"Marginal log likelihood: {gp_result.log_marginal_likelihood_value_:.4f}"
        )
        logger.info(f"Kernel parameters: {gp_result.kernel_}")
    return pred_std
Example #10
0
 def estimate_depth(self):
     kernel = 1.5 * kernels.RBF(length_scale=1.0,
                                length_scale_bounds=(0, 3.0))
     clf = GaussianProcessClassifier(optimizer=None,
                                     n_restarts_optimizer=9,
                                     kernel=kernel)
     input_data = np.hstack((self.le_centers, self.re_centers))
     clf.fit(input_data, self.ids.ravel())
     self.regressor = clf
Example #11
0
 def __init__(self, alpha=1):
     mu = np.array([1, 1])
     super().__init__(alpha=alpha,
                      copy_X_train=True,
                      kernel=kernels.RBF(mu),
                      n_restarts_optimizer=0,
                      normalize_y=False,
                      optimizer='fmin_l_bfgs_b',
                      random_state=None)
Example #12
0
    def run(self):
        """Connects to the Redis queue with the results and pulls them"""

        # Make a random guess to start
        for i in range(self.batch_size):
            self.queues.send_inputs(
                np.random.uniform(-32.768, 32.768, size=(self.dim, )).tolist())
        self.logger.info('Submitted initial random guesses to queue')
        train_X = []
        train_y = []

        # Use the initial guess to train a GPR
        gpr = Pipeline([('scale', MinMaxScaler(feature_range=(-1, 1))),
                        ('gpr',
                         GaussianProcessRegressor(normalize_y=True,
                                                  kernel=kernels.RBF() *
                                                  kernels.ConstantKernel()))])

        with open(self.output_path, 'a') as fp:
            for _ in range(self.batch_size):
                result = self.queues.get_result()
                print(result.json(), file=fp)
                train_X.append(result.args)
                train_y.append(result.value)

        # Make guesses based on expected improvement
        for _ in range(self.n_guesses // self.batch_size - 1):
            # Update the GPR with the available training data
            gpr.fit(np.vstack(train_X), train_y)

            # Generate a random assortment of potential next points to sample
            sample_X = np.random.uniform(size=(self.batch_size * 1024,
                                               self.dim),
                                         low=-32.768,
                                         high=32.768)

            # Compute the expected improvement for each point
            pred_y, pred_std = gpr.predict(sample_X, return_std=True)
            best_so_far = np.min(train_y)
            ei = (best_so_far - pred_y) / pred_std

            # Run the samples with the highest EI
            best_inds = np.argsort(ei)[-self.batch_size:]
            self.logger.info(
                f'Selected {len(best_inds)} best samples. EI: {ei[best_inds]}')
            for i in best_inds:
                best_ei = sample_X[i, :]
                self.queues.send_inputs(best_ei.tolist())
            self.logger.info('Sent all of the inputs')

            # Wait for the value to complete
            with open(self.output_path, 'a') as fp:
                for _ in range(self.batch_size):
                    result = self.queues.get_result()
                    print(result.json(), file=fp)
                    train_X.append(result.args)
                    train_y.append(result.value)
Example #13
0
 def custom_RBF(params):
     kernel = params[0][0]**2 * kernels.RBF(length_scale=params[0][1])
     gpc_rbf_isotropic = GaussianProcessClassifier(kernel=kernel,
                                                   optimizer=None).fit(
                                                       X_train, y_train)
     y_pred = gpc_rbf_isotropic.predict(X_test)
     acc = accuracy_score(y_test, y_pred)
     print(params, acc)
     return acc
Example #14
0
def GPR_fit(x_train,y_train,x_test):
    kernel = sk_kern.RBF(1.0, (1e-3, 1e3)) + sk_kern.ConstantKernel(1.0, (1e-3, 1e3)) + sk_kern.WhiteKernel()
    clf = GaussianProcessRegressor(
        kernel=kernel,
        alpha=1e-10, 
        optimizer="fmin_l_bfgs_b", 
        n_restarts_optimizer=20,
        normalize_y=True)
    clf.fit(x_train,y_train)
    pred_mean, pred_std = clf.predict(x_test, return_std=True)
    return pred_mean,pred_std
def get_gpr(kernel_type, X, y):
    mean, _, std = get_distribution_measures(y)
    if kernel_type == 'rbf':
        kernel = kernels.ConstantKernel(mean) * kernels.RBF(std)
    elif kernel_type == 'dot':
        kernel = kernels.ConstantKernel(mean) * kernels.DotProduct(std)

    gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.05, optimizer=None)
    gpr.fit(X, y)

    return gpr
Example #16
0
 def estimate_gaze(self):
     kernel = 1.5*kernels.RBF(length_scale=1.0, length_scale_bounds=(0,3.0))
     clf = GaussianProcessRegressor(alpha=1e-5,
                                    optimizer=None,
                                    n_restarts_optimizer=9,
                                    kernel = kernel)
     if self.binocular:
         input_data = np.hstack((self.l_centers, self.r_centers))
         clf.fit(input_data, self.targets)
     else:
         clf.fit(self.l_centers, self.targets)
     self.regressor = clf
Example #17
0
def make_RBF(n_dim_obs=5, n_dim_lat=0, T=1, **kwargs):
    """Make precision matrices using a temporal RBF kernel."""
    from regain.bayesian.gaussian_process_ import sample as samplegp
    from sklearn.gaussian_process import kernels

    length_scale = kwargs.get("length_scale", 1.0)
    length_scale_bounds = kwargs.get("length_scale_bounds", (1e-05, 100000.0))
    epsilon = kwargs.get("epsilon", 0.8)
    sparse = kwargs.get("sparse", True)
    temporal_kernel = kernels.RBF(length_scale=length_scale,
                                  length_scale_bounds=length_scale_bounds)(
                                      np.arange(T)[:, None])

    n = n_dim_obs + n_dim_lat
    u = samplegp(temporal_kernel, p=n * (n - 1) // 2)[0]
    K = []
    for i, uu in enumerate(u.T):
        theta = squareform(uu)
        if sparse:
            theta_obs = theta[n_dim_lat:, n_dim_lat:]
            theta_lat = theta[:n_dim_lat, :n_dim_lat]
            theta_OH = theta[n_dim_lat:, :n_dim_lat]

            # sparsify
            theta_obs[np.abs(theta_obs) < epsilon] = 0
            theta_lat[np.abs(theta_lat) < epsilon / 3] = 0
            theta_OH[np.abs(theta_OH) < epsilon / 3] = 0
            theta[n_dim_lat:, n_dim_lat:] = theta_obs
            theta[:n_dim_lat, :n_dim_lat] = theta_lat
            theta[n_dim_lat:, :n_dim_lat] = theta_OH
            theta[:n_dim_lat, n_dim_lat:] = theta_OH.T
        if i == 0:
            inter_links = theta[n_dim_lat:, :n_dim_lat]
        theta[n_dim_lat:, :n_dim_lat] = inter_links
        theta[:n_dim_lat, n_dim_lat:] = inter_links.T
        theta += np.diag(np.sum(np.abs(theta), axis=1) + 0.01)
        K.append(theta)

        assert is_pos_def(theta)

    thetas = np.array(K)

    theta_obs = []
    ells = []
    for t in thetas:
        L = (theta[n_dim_lat:, :n_dim_lat].dot(
            linalg.pinv(t[:n_dim_lat, :n_dim_lat])).dot(theta[:n_dim_lat,
                                                              n_dim_lat:]))
        theta_obs.append(t[n_dim_lat:, n_dim_lat:] - L)
        ells.append(L)
    return thetas, theta_obs, ells
def fit_gaussian(x, y, err):

    #    kernel = kr.RBF(length_scale=10.0)
    #kernel = kr.ConstantKernel(1) * kr.Matern(length_scale=0.3, nu=2.5)
    #    kernel = kr.Matern(length_scale=0.3, nu=2.5)
    kernel = kr.RBF(length_scale=0.3)
    gp = GaussianProcessRegressor(kernel=kernel,
                                  alpha=err**2,
                                  n_restarts_optimizer=10)

    gp.fit(x, y)
    print(gp.kernel_)

    return gp
Example #19
0
    def fit(self, X, Y, alpha=0.0, verbose=False):
        X = np.array(X)
        kernel = 1.0 * GPKernels.RBF(
            length_scale=100.0,
            length_scale_bounds=(1e-2, 1e3)) + GPKernels.WhiteKernel(
                noise_level=1, noise_level_bounds=(1e-10, 1e+1))
        gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha)
        self.model = gp.fit(X, Y)
        self.fitted = True

        if verbose:
            print("Parameters of " + self.name + " :")
            print("-" * 30)
            print("-" * 30)
Example #20
0
    def gaussian_interpolation(self, params, m=200):
        a = np.linspace(0, 100, self.nums_params)
        a = np.concatenate([np.array([0]),a,np.array([0])])
        x_train = np.atleast_2d(a).T
        y_train = params
        x_test = np.atleast_2d(np.linspace(0, 100, m+1)).T

        kernel = skk.RBF()
        model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100, alpha=1e-4, normalize_y=True)

        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)

        return y_pred
Example #21
0
    def run(self):
        """Connects to the Redis queue with the results and pulls them"""

        # Make a random guess to start
        self.queues.send_inputs(uniform(0, 10), method='target_fun')
        self.logger.info('Submitted initial random guess')
        train_X = []
        train_y = []

        # Initialize the GPR and generator
        gpr = GaussianProcessRegressor(normalize_y=True,
                                       kernel=kernels.RBF() *
                                       kernels.ConstantKernel())
        generator = Generator()

        # Make guesses based on expected improvement
        for _ in range(self.n_guesses - 1):
            # Wait for the result
            result = self.queues.get_result()
            self.logger.info(f'Received result: {(result.args, result.value)}')
            train_X.append(result.args)
            train_y.append(result.value)

            # Update the generator and  the entry generator
            generator.partial_fit(*result.args, result.value)
            gpr.fit(train_X, train_y)

            # Generate a random assortment of potential next points to sample
            self.queues.send_inputs(generator, 64, method='generate')
            result = self.queues.get_result()
            sample_X = result.value

            # Compute the expected improvement for each point
            self.queues.send_inputs(gpr, sample_X, method='score')
            result = self.queues.get_result()
            pred_y, pred_std = result.value

            # Select the best point
            best_y = np.min(train_y)
            self.queues.send_inputs(best_y, pred_y, pred_std, method='select')
            result = self.queues.get_result()
            chosen_ix = result.value

            # Run the sample with the highest EI
            self.queues.send_inputs(*sample_X[chosen_ix], method='target_fun')

        # Write the best answer to disk
        with open('answer.out', 'w') as fp:
            print(np.min(train_y), file=fp)
 def fit(self, x, y):
     self.kernel = 1 * kernels.RBF(length_scale=1.0)
     parameters = {
         'kernel': [self.kernel],
         'alpha': [1, 1e-1, 1e-2, 1e-3, 1e-4]
     }
     model = GaussianProcessRegressor(kernel=self.kernel,
                                      alpha=5e-4,
                                      random_state=0)
     self.cv = GridSearchCV(model, parameters, cv=5)
     self.norm = np.mean(y)
     self.cv.fit(x, y / self.norm)
     self.model = self.cv.best_estimator_
     self.x_fit = x
     self.y_fit = y
Example #23
0
 def __build_model_one_eye(self, eyeball, centers):
     kernel = 1.5*kernels.RBF(length_scale=1.0, length_scale_bounds=(0,3.0))
     clf = GaussianProcessRegressor(alpha=1e-5,
                                    optimizer=None,
                                    n_restarts_optimizer=9,
                                    kernel = kernel)
     surface = np.empty((0,3), float)                                       
     for t in self.targets:
         t = t - eyeball
         t_norm = t/np.linalg.norm(t)
         s = t_norm * self.radius
         surface = np.vstack((surface, s))
     print('centers length:', len(centers), "surface length:", len(surface))
     clf.fit(centers, surface)
     return clf
Example #24
0
 def __init__(self, n_dims, mean_cutoff=None, *,
         kernel=None, sample_scale=1, maximise_effort=100, **kwargs):
     self.n_dims = n_dims
     if kernel is None:
         kernel = 1.0 * kernels.RBF(1.0)
     self.model = AugmentedGaussianProcess(kernel, **kwargs)
     self.x_samples = []
     self.y_samples = []
     self.y_err_samples = []
     self.mean_cutoff = mean_cutoff
     self.sample_scale = sample_scale
     self.maximise_effort = maximise_effort
     self.dirty = False
     
     self.Thresh = AcquisitionFunctionUCB(self.model, 2, invert=True)
Example #25
0
    def run(self):
        """Connects to the Redis queue with the results and pulls them"""

        # Make a random guess to start
        self.queues.send_inputs(uniform(0, 10))
        self.logger.info('Submitted initial random guess')
        train_X = []
        train_y = []

        # Use the initial guess to train a GPR
        gpr = GaussianProcessRegressor(normalize_y=True,
                                       kernel=kernels.RBF() *
                                       kernels.ConstantKernel())
        result = self.queues.get_result()
        train_X.append(result.args)
        train_y.append(result.value)

        # Make guesses based on expected improvement
        for _ in range(self.n_guesses - 1):
            # Update the GPR with the available training data
            gpr.fit(train_X, train_y)

            # Generate a random assortment of potential next points to sample
            sample_X = np.random.uniform(size=(64, 1), low=0, high=10)

            # Compute the expected improvement for each point
            pred_y, pred_std = gpr.predict(sample_X, return_std=True)
            best_so_far = np.min(train_y)
            ei = (best_so_far - pred_y) / pred_std

            # Run the sample with the highest EI
            best_ei = sample_X[np.argmax(ei), 0]
            self.queues.send_inputs(best_ei)
            self.logger.info(f'Sent new guess based on EI: {best_ei}')

            # Wait for the value to complete
            result = self.queues.get_result()
            self.logger.info('Received value')

            # Add the value to the training set for the GPR
            train_X.append([best_ei])
            train_y.append(result.value)

        # Write the best answer to disk
        with open('answer.out', 'w') as fp:
            print(np.min(train_y), file=fp)
Example #26
0
def fit_model(opt_spec: OptimizationProblem, train_x: np.ndarray,
              train_y: np.ndarray) -> Pipeline:
    """Fit and test a model using the latest data

    Args:
        opt_spec: Configuration file for the optimization
        train_x: Input columns
        train_y: Output column
        out_dir: Location to store the data
    """
    # Create an initial RBF kernel, using the training set mean as a scaling parameter
    kernel = train_y.mean()**2 * kernels.RBF(length_scale=1)

    # TODO (wardlt): Make it clear where featurization would appear, as we are soon to introduce additives
    #  This will yield chemical degrees of freedom better captured using features of the additives rather
    #  than a new variable per additive
    #  Notes for now: Mol. Weight, Side Chain Length, and ... are the likely candidates

    # Add a noise parameter based on user settings
    noise = opt_spec.planner_options.get('noise_level', 0)
    if noise < 0:
        # Use standard deviation of the distribution of train_y will be the estimation of initial noise
        # TODO (wardlt): Document where 3, 4, and 11 come from
        noise_estimated = np.std(train_y) / 3
        noise_lb = noise_estimated / 4
        noise_ub = noise_estimated * 11

        kernel_noise = kernels.WhiteKernel(noise_level=noise_estimated**2,
                                           noise_level_bounds=(noise_lb**2,
                                                               noise_ub**2))
        kernel = kernel + kernel_noise
    elif noise > 0:
        kernel = kernel + kernels.WhiteKernel(
            noise**2, noise_level_bounds=(noise**2, ) * 2)

    # Train a GPR model
    model = Pipeline([('variance', VarianceThreshold()),
                      ('scale', StandardScaler()),
                      ('gpr', GaussianProcessRegressor(kernel))])

    # Train and save the model
    model.fit(train_x, train_y)
    print(f'Finished fitting the model on {len(train_x)} data points')
    print(f'Optimized model: {model["gpr"].kernel_}')
    return model
def Which_Surrogate_Model_Single(_x_train, _y_train, func):
    _cov = 0
    if func.__name__ == 'GPR':
        s_model = func()
        _w = s_model.fit(_x_train.reshape(-1, 1),
                         np.asarray(_y_train).reshape(-1, 1))
        # list_predict_results = s_model.predict(_x_pre.reshape(-1, 1))
    elif func.__name__ == 'GaussianProcessRegressor':
        kernel = ConstantKernel(1.0, (1e-4, 1e4)) * kns.RBF(5, (1e-2, 1e2))
        s_model = func(kernel=kernel, n_restarts_optimizer=20)
        _w = s_model.fit(_x_train.reshape(-1, 1),
                         np.asarray(_y_train).reshape(-1, 1))
        # list_predict_results, _cov = s_model.predict(_x_pre.reshape(-1, 1), return_cov=True)
    else:
        s_model = func()
        _w = s_model.fit(_x_train, _y_train)
        # list_predict_results = s_model.predict(_x_pre)
    return _w, _cov
Example #28
0
    def __init__(
        self,
        dataset,
        initial_length_scale=1.0,
        feature_extractor=None,
        propensity_model=None,
        verbose=False,
    ):
        super().__init__()
        self.feature_extractor = feature_extractor
        self.device = propensity_model.device if propensity_model is not None else None
        self.kernel = kernels.RBF(length_scale=initial_length_scale)
        idx = np.argsort(dataset.y.ravel())
        self.x = dataset.x[idx]
        self.t = dataset.t[idx].reshape(-1, 1)
        self.y = dataset.y[idx].reshape(-1, 1)
        self.s = self.y.std()
        self.m = self.y.mean()

        if propensity_model is None:
            propensity_model = LogisticRegression()
            propensity_model = propensity_model.fit(self.x, self.t.ravel())
            self.e = propensity_model.predict_proba(self.x)[:, -1:]
        else:
            with torch.no_grad():
                e = []
                for _ in range(50):
                    e.append(
                        propensity_model.network(
                            torch.tensor(np.hstack([self.x, self.t])).to(
                                propensity_model.device))[1].probs.to("cpu"))
                self.e = torch.cat(e, dim=-1).mean(1, keepdim=True).numpy()
        self.e = np.clip(self.e, 1e-7, 1 - 1e-7)

        self._gamma = None

        self.alpha_0 = None
        self.alpha_1 = None

        self.beta_0 = None
        self.beta_1 = None

        self.verbose = verbose
def Which_Surrogate_Model_Mutiple(x_train, x_pre, func, list_train):
    list_predict_results = []
    for train_data in list_train:
        if func.__name__ == 'GPR':
            s_model = func()
            _w = s_model.fit(x_train.reshape(-1, 1),
                             np.asarray(train_data).reshape(-1, 1))
            list_predict_results.append(s_model.predict(x_pre.reshape(-1, 1)))
        elif func.__name__ == 'GaussianProcessRegressor':
            kernel = ConstantKernel(1.0, (1e-4, 1e4)) * kns.RBF(5, (1e-2, 1e2))
            s_model = func(kernel=kernel, n_restarts_optimizer=0)
            _w = s_model.fit(x_train.reshape(-1, 1),
                             np.asarray(train_data).reshape(-1, 1))
            list_predict_results.append(
                s_model.predict(x_pre.reshape(-1, 1), return_cov=True))
        else:
            s_model = func()
            _w = s_model.fit(x_train, train_data)
            list_predict_results.append(s_model.predict(x_pre))
    return list_predict_results
def gaussian_interpolation(params, a=0, b=100, m=200, show_plot=True):
    x_train = np.atleast_2d(np.linspace(a, b, len(params))).T
    y_train = params
    x_test = np.atleast_2d(np.linspace(0, 100, m)).T

    kernel = skk.RBF()
    model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100, normalize_y=True)

    model.fit(x_train, y_train)
    y_pred, sigma_pred = model.predict(x_test, return_std=True)

    plt.figure(figsize=(16, 9))
    plt.plot(x_train, y_train,linewidth='0.5', color='#000000')
    plt.scatter(x_test, y_pred)
    plt.scatter(x_test, y_pred+sigma_pred, c='#00CED1')
    plt.scatter(x_test, y_pred-sigma_pred, c='#DC143C')
    if show_plot:
        plt.show()

    return y_pred[1:m-1], sigma_pred[1:m-1]