Beispiel #1
0
    def __init__(self, sample_kernel, error_kernel=None, *, fit_white_noise=False):
        if fit_white_noise:
            sample_kernel = sample_kernel + kernels.WhiteKernel()

        if error_kernel is None:
            if fit_white_noise:
                error_kernel = sample_kernel
            else:
                error_kernel = sample_kernel + kernels.WhiteKernel()

        self.fit_white_noise = fit_white_noise
        self.sample_kernel = sample_kernel
        self.submodel_samples = GaussianProcessRegressor(self.sample_kernel)
        self.submodel_errors = GaussianProcessRegressor(error_kernel)
Beispiel #2
0
def bo_(x_obs, y_obs):
    kernel = kernels.Matern() + kernels.WhiteKernel()
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16)
    gp.fit(x_obs, y_obs)

    xs = list(repeat(np.atleast_2d(np.linspace(0, 10, 128)).T, 2))
    x = cartesian_product(*xs)

    a = a_EI(gp, x_obs=x_obs, y_obs=y_obs)

    argmin_a_x = x[np.argmax(a(x))]

    # heavy evaluation
    print("f({})".format(argmin_a_x))
    f_argmin_a_x = f2d(np.atleast_2d(argmin_a_x))


    plot_2d(gp, x_obs, y_obs, argmin_a_x, a, xs)
    plt.show()


    bo_(
        x_obs=np.vstack((x_obs, argmin_a_x)),
        y_obs=np.hstack((y_obs, f_argmin_a_x)),
    )
    def build_model(self, specie, members):
        """
        Build the model using GP
        """

        # Get the data
        decisions_df = self.build_member_decision_score_df(specie, members)

        # Extract the choices as the response variables
        choices = ComponentState.get(specie).list_choices()
        choice_names = [ c.get_name() for c in choices ]
        x = decisions_df.loc[:, choice_names]

        # Extract the scores as the dependant variables
        y = decisions_df.loc[:, "score"]

        # Preprocess using one hot encoding
        categories = [ c.get_component_names() for c in choices ]
        encoder = OneHotEncoder(sparse = False, categories = categories)

        # Define the isotropic kernel
        kernel = 1.0 * kernels.RBF([5]) + kernels.WhiteKernel()

        # Define the regressor
        regressor = GaussianProcessRegressor(kernel=kernel, normalize_y = True)

        # Build the pipeline and fit it
        pipeline = Pipeline(steps = [
            ('encoder', encoder),
            ('regressor', regressor)
        ])
        pipeline.fit(x, y)
        return pipeline
Beispiel #4
0
    def __init__(self, sample_kernel, error_kernel=None):
        if error_kernel is None:
            error_kernel = sample_kernel + kernels.WhiteKernel()

        self.sample_kernel = sample_kernel
        self.submodel_samples = GaussianProcessRegressor(self.sample_kernel)
        self.submodel_errors = GaussianProcessRegressor(error_kernel)
Beispiel #5
0
def cov_function_sklearn(params, nu=5 / 2):
    """Generates a default covariance function.

  Args:
    params: A dictionary with GP hyperparameters.
    nu: Degree of the matern kernel.

  Returns:
    cov_fun: an ARD Matern covariance function with diagonal noise for
    numerical stability.

  """
    amplitude = params['amplitude']
    noise = params['noise']
    lengthscale = params['lengthscale'].flatten()

    amplitude_bounds = PARAMS_BOUNDS['amplitude']
    lengthscale_bounds = PARAMS_BOUNDS['lengthscale']
    noise_bounds = PARAMS_BOUNDS['noise']

    cov_fun = kernels.ConstantKernel(
        amplitude, constant_value_bounds=amplitude_bounds) * kernels.Matern(
            lengthscale, nu=nu,
            length_scale_bounds=lengthscale_bounds) + kernels.WhiteKernel(
                noise, noise_level_bounds=noise_bounds)
    return cov_fun
Beispiel #6
0
def gp_noise_estimation(
    chunk: Type[DataChunk], rbf_params={}, noise_params={}, verbose=False
) -> np.ndarray:
    """
    Uses a simple Gaussian Process model to perform noise estimation on spectral
    data. A given chunk of the full spectrum is fit with a GP model comprising
    RBF and white noise kernels, where the former explains covariance in intensities
    between channels and the latter models variation in the signal as i.i.d white
    noise. 
    
    The GP model is conditioned to provide a maximum likelihood estimate
    of the data, and depends heavily on the initial parameters. The arguments
    `rbf_params` and `noise_params` allow the user to override defaults for the kernels,
    and may require some tweaking to get the desired behavior.
    
    The objective of this function is to estimate the noise at every point of the
    spectrum, and returns a NumPy 1D array of noise values with the same shape as
    the frequency bins.

    Parameters
    ----------
    chunk : Type[DataChunk]
        [description]
    rbf_params : dict, optional
        [description], by default {}
    noise_params : dict, optional
        [description], by default {}

    Returns
    -------
    np.ndarray
        NumPy 1D array containing the noise at every channel
    """
    freq, intensity = chunk.frequency, chunk.intensity
    # RBF parameters affect how correlated each channel is
    # noise parameters affect the variance in signal explained as normally
    # distributed noise
    rbf_kern = {"length_scale": 5e-1, "length_scale_bounds": (1e-1, 10.0)}
    noise_kern = {"noise_level": 1e-1, "noise_level_bounds": (1e-3, 1.0)}
    rbf_kern.update(**rbf_params)
    noise_kern.update(**noise_params)
    # instantiate the model
    kernel = kernels.RBF(**rbf_kern) + kernels.WhiteKernel(**noise_kern)
    gp_model = GaussianProcessRegressor(kernel, normalize_y=True)
    gp_result = gp_model.fit(freq[:, None], intensity[:, None])
    # reproduce the spectrum with uncertainties
    pred_y, pred_std = gp_result.predict(freq[:, None], return_std=True)
    # log some information about the GP result
    if verbose:
        logger.info(f"GP results for catalog index {chunk.catalog_index}.")
        logger.info(
            f"MSE from GP fit: {mean_squared_error(pred_y.flatten(), intensity):.4f}"
        )
        logger.info(
            f"Marginal log likelihood: {gp_result.log_marginal_likelihood_value_:.4f}"
        )
        logger.info(f"Kernel parameters: {gp_result.kernel_}")
    return pred_std
def fit_model(opt_spec: OptimizationProblem, train_x: np.ndarray,
              train_y: np.ndarray) -> Pipeline:
    """Fit and test a model using the latest data

    Args:
        opt_spec: Configuration file for the optimization
        train_x: Input columns
        train_y: Output column
        out_dir: Location to store the data
    """
    # Create an initial RBF kernel, using the training set mean as a scaling parameter
    kernel = train_y.mean()**2 * kernels.RBF(length_scale=1)

    # TODO (wardlt): Make it clear where featurization would appear, as we are soon to introduce additives
    #  This will yield chemical degrees of freedom better captured using features of the additives rather
    #  than a new variable per additive
    #  Notes for now: Mol. Weight, Side Chain Length, and ... are the likely candidates

    # Add a noise parameter based on user settings
    noise = opt_spec.planner_options.get('noise_level', 0)
    if noise < 0:
        # Use standard deviation of the distribution of train_y will be the estimation of initial noise
        # TODO (wardlt): Document where 3, 4, and 11 come from
        noise_estimated = np.std(train_y) / 3
        noise_lb = noise_estimated / 4
        noise_ub = noise_estimated * 11

        kernel_noise = kernels.WhiteKernel(noise_level=noise_estimated**2,
                                           noise_level_bounds=(noise_lb**2,
                                                               noise_ub**2))
        kernel = kernel + kernel_noise
    elif noise > 0:
        kernel = kernel + kernels.WhiteKernel(
            noise**2, noise_level_bounds=(noise**2, ) * 2)

    # Train a GPR model
    model = Pipeline([('variance', VarianceThreshold()),
                      ('scale', StandardScaler()),
                      ('gpr', GaussianProcessRegressor(kernel))])

    # Train and save the model
    model.fit(train_x, train_y)
    print(f'Finished fitting the model on {len(train_x)} data points')
    print(f'Optimized model: {model["gpr"].kernel_}')
    return model
Beispiel #8
0
def GPR_fit(x_train, y_train, x_test):
    kernel = sk_kern.RBF(1.0, (1e-3, 1e3)) + sk_kern.ConstantKernel(
        1.0, (1e-3, 1e3)) + sk_kern.WhiteKernel()
    clf = GaussianProcessRegressor(kernel=kernel,
                                   alpha=1e-10,
                                   optimizer="fmin_l_bfgs_b",
                                   n_restarts_optimizer=20,
                                   normalize_y=True)
    clf.fit(x_train, y_train)
    pred_mean, pred_std = clf.predict(x_test, return_std=True)
    return pred_mean, pred_std
def main():
    # Specify type_ and region of data
    type_ = 'organic'
    region = 'WestTexNewMexico'

    # Specify the kernel functions; please see the paper for the rationale behind the choices
    kernel = Kernels.ExpSineSquared(length_scale=20., periodicity=365.) \
        + 0.8 * Kernels.RationalQuadratic(alpha=20., length_scale=80.) \
        + Kernels.WhiteKernel(.2)

    # Fit gp model and plot
    run_gp(kernel, n_restarts_optimizer=10, type_=type_, region=region)
Beispiel #10
0
    def fit(self, X, Y, alpha=0.0, verbose=False):
        X = np.array(X)
        kernel = 1.0 * GPKernels.RBF(
            length_scale=100.0,
            length_scale_bounds=(1e-2, 1e3)) + GPKernels.WhiteKernel(
                noise_level=1, noise_level_bounds=(1e-10, 1e+1))
        gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha)
        self.model = gp.fit(X, Y)
        self.fitted = True

        if verbose:
            print("Parameters of " + self.name + " :")
            print("-" * 30)
            print("-" * 30)
Beispiel #11
0
    def select_kernel(self, kernel):
        """Get the sklearn.gaussian_process.kernels kernel by matching the given kernel identifier.

        Parameters:
            kernel (str): Kernel string such as 'RBF' or depending on the surrogate also product and sum kernels
                such as 'RBF+Matern52'.

        Returns:
            sklearn.gaussian_process.kernels: Scikit-learn kernel object. Currently, for sum and product kernels,
            the initial hyperparameters are the same for all kernels.
        """

        from re import split
        from sklearn.gaussian_process import kernels as sklearn_kernels
        full_str = split('([+*])', kernel)
        try:
            kernel = []
            for key in full_str:
                kernel += [
                    key if key in ('+', '*') else getattr(
                        sklearn_kernels, key)(
                            length_scale=self.hyperparameters['length_scale'])
                ]
        except AttributeError:
            raise RuntimeError("Kernel {} is not implemented.".format(kernel))

        if len(kernel) == 1:
            kernel = kernel[0]
        else:
            kernel = [
                str(key) if not isinstance(key, str) else key for key in kernel
            ]
            kernel = eval(''.join(kernel))

        # Add scale and noise to kernel
        kernel *= sklearn_kernels.ConstantKernel(
            constant_value=1 / self.hyperparameters['sigma_f'].item()**2)
        if not self.fixed_sigma_n:
            kernel += sklearn_kernels.WhiteKernel(
                noise_level=self.hyperparameters['sigma_n'].item()**2)

        return kernel
Beispiel #12
0
        def bo_(x_obs, y_obs, n_iter):
            if n_iter > 0:
                kernel = kernels.Matern() + kernels.WhiteKernel()
                gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16)
                gp.fit(x_obs, 1-y_obs)

                a = a_EI(gp, x_obs=x_obs, y_obs=1-y_obs)

                argmax_f_x_ = x[np.argmax(a(x))]

                # heavy evaluation
                f_argmax_f_x_ = cross_validation(argmax_f_x_)

                y_ob = np.atleast_2d(mean_mean_validation_scores(f_argmax_f_x_)).T

                return f_argmax_f_x_ + bo_(
                    x_obs=np.vstack((x_obs, argmax_f_x_)),
                    y_obs=np.vstack((y_obs, y_ob)),
                    n_iter=n_iter-1,
                )

            else:
                return []
Beispiel #13
0
def integrated_sigma(alpha, n_samples, n_restarts_optimizer=16, f=f):
    print("integrated_sigma(n_samples={n_samples}, alpha={alpha})".format(
        n_samples=n_samples,
        alpha=alpha,
    ))
    X = np.atleast_2d(np.linspace(1, 9, n_samples)).T
    y = f(X).ravel()
    x = np.atleast_2d(np.linspace(0, 10, 16 * 1024)).T

    kernel = kernels.Matern() + (kernels.WhiteKernel(
        noise_level=alpha) if alpha is not None else 0.0)
    gp = GaussianProcessRegressor(
        kernel=kernel,
        n_restarts_optimizer=n_restarts_optimizer,
    )
    gp.fit(X, y)

    y_pred, sigma = gp.predict(x, return_std=True)

    return simps(
        x=x.ravel(),
        y=sigma,
    )
Beispiel #14
0
def gp(xdata, ydata):
    kernel = [
        kernels.RBF(),
        kernels.Matern(),
        kernels.ConstantKernel(),
        kernels.WhiteKernel(),
        kernels.RationalQuadratic()
    ]
    max_iter_predict = [10, 50, 100, 500, 1000]
    warm_start = [False, True]
    multi_class = ['one_vs_rest', 'one_vs_one']

    with open('gaussianprocess.csv', mode='w', newline='') as file:
        writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC)
        writer.writerow([
            'kernel', 'max_iter_predict', 'warm_start', 'multi_class',
            'accuracy'
        ])
        for k in kernel:
            for m in max_iter_predict:
                for w in warm_start:
                    for mc in multi_class:
                        accuracy = 0
                        model = GaussianProcessClassifier(kernel=k,
                                                          max_iter_predict=m,
                                                          warm_start=w,
                                                          multi_class=mc,
                                                          random_state=1)
                        kf = StratifiedKFold(n_splits=5, shuffle=True)
                        for i, j in kf.split(xdata, ydata):
                            X_ktrain, X_ktest = X[i], X[j]
                            y_ktrain, y_ktest = y[i], y[j]
                            model.fit(X_ktrain, y_ktrain)
                            ypred = model.predict(X_ktest)
                            accuracy += np.mean(ypred == y_ktest)
                        accuracy /= 5
                        writer.writerow([k, m, w, mc, accuracy])
Beispiel #15
0
    def fit_model(self, train_x, train_y):
        """
            Fit a Gaussian process regressor with noisy Matern kernel to the given data
        """

        train_x, train_y = self.preprocess(train_x, train_y, 1500)

        k = ker.Matern(length_scale=0.01, nu=2.5) + \
            ker.WhiteKernel(noise_level=1e-05)

        gpr = gp.GaussianProcessRegressor(kernel=k,
                                          alpha=0.01,
                                          n_restarts_optimizer=20,
                                          random_state=42,
                                          normalize_y=True)
        noisyMat_gpr = pipeline.Pipeline([("scaler", self.scaler),
                                          ("gpr", gpr)])

        print("Fitting noisy Matern GPR")
        start = time()
        noisyMat_gpr.fit(train_x, train_y)
        print("Took {} seconds".format(time() - start))

        self.model = noisyMat_gpr
Beispiel #16
0
# a        = 10.0
a        = 2.0
f0       = 3.0
T        = 1000
# N        = 10
N        = 10

def process(x):
    return a*np.sin(2.0*np.pi*f0*x)

locations        = np.linspace(0,1.0,T)
trueValues       = process(locations)
noise            = noiseStd*np.random.randn(len(trueValues))
observableValues = trueValues + noise

kernel = ((a**2) * gpk.RBF(length_scale= 0.25 / f0)) + gpk.WhiteKernel(noiseStd**2)

def do_update():

    # update values
    # indexes          = np.random.randint(0,T-1, N)
    indexes          = np.random.randint(int(T/4.0), int(3.0*T/4.0), N)
    obsLocations     = np.array([locations[i]        for i in indexes])
    obsValues        = np.array([observableValues[i] for i in indexes])
    gprProcessor = GaussianProcessRegressor(kernel,
                                            alpha=0.0,
                                            optimizer=None,
                                            copy_X_train=False)
    gprProcessor.fit(obsLocations.reshape(-1,1), obsValues.reshape(-1,1))
    prediction = gprProcessor.predict(locations.reshape(-1,1), return_std=True)
Beispiel #17
0
    calc_d[obs_name] = {
        'x_list': x_list,
        'y_list': y_list,
        'mean': calculation_mean_list,
        'uncert': calculation_uncert_list
    }

#########################################
# Make interpolator for each observable #
#########################################

kernel = (
    1. * kernels.RBF(length_scale=.2, length_scale_bounds=(.05, .5))
    #    + kernels.ConstantKernel()
    + kernels.WhiteKernel(noise_level=1., noise_level_bounds=(1e-5, 1e5)))

gp = GPR(kernel=kernel, n_restarts_optimizer=5, copy_X_train=False)
meshmesh = np.zeros((nlenp * nlenx, 2))
z_list_new = np.zeros((nlenp * nlenx, 1))
for ii in range(nlenp * nlenx):
    meshmesh[ii][0] = x_mesh[math.floor(ii / nlenx)][0]
    meshmesh[ii][1] = y_mesh[0][int(ii % nlenx)]
    z_list_new[ii] = z_list[math.floor(ii / nlenx)][int(ii % nlenx)]
gp.fit(np.atleast_2d(meshmesh), z_list_new)
print("C^2 = ", gp.kernel_.get_params()['k1'])
print(gp.kernel_.get_params()['k2'])


def predictM(x, gpx):
    mean2 = gpx.predict(return_cov=False, X=np.atleast_2d(x).T)
Beispiel #18
0
    def __init__(self, system_str, npc, nrestarts=2):
        print("Emulators for system " + system_str)
        print("with viscous correction type {:d}".format(idf))
        print("NPC : " + str(npc))
        print("Nrestart : " + str(nrestarts))

        #list of observables is defined in calculations_file_format_event_average
        #here we get their names and sum all the centrality bins to find the total number of observables nobs
        self.nobs = 0
        self.observables = []
        self._slices = {}

        for obs, cent_list in obs_cent_list[system_str].items():
            #for obs, cent_list in calibration_obs_cent_list[system_str].items():
            self.observables.append(obs)
            n = np.array(cent_list).shape[0]
            self._slices[obs] = slice(self.nobs, self.nobs + n)
            self.nobs += n

        print("self.nobs = " + str(self.nobs))
        #read in the model data from file
        print("Loading model calculations from " \
               + SystemsInfo[system_str]['main_obs_file'])

        # things to drop
        delete = []
        # build a matrix of dimension (num design pts) x (number of observables)
        Y = []
        for ipt, data in enumerate(trimmed_model_data[system_str]):
            row = np.array([])
            for obs in self.observables:
                #n_bins_bayes = len(calibration_obs_cent_list[system_str][obs]) # only using these bins for calibration
                #values = np.array(trimmed_model_data[system_str][pt, idf][obs]['mean'][:n_bins_bayes] )
                values = np.array(data[idf][obs]['mean'])
                if np.isnan(values).sum() > 0:
                    print(
                        "WARNING! FOUND NAN IN MODEL DATA WHILE BUILDING EMULATOR!"
                    )
                    print("Design pt = " + str(pt) + "; Obs = " + obs)
                row = np.append(row, values)
            Y.append(row)
        Y = np.array(Y)
        print("Y_Obs shape[Ndesign, Nobs] = " + str(Y.shape))

        #Principal Components
        self.npc = npc
        self.scaler = StandardScaler(copy=False)
        #whiten to ensure uncorrelated outputs with unit variances
        self.pca = PCA(copy=False, whiten=True, svd_solver='full')
        # Standardize observables and transform through PCA.  Use the first
        # `npc` components but save the full PC transformation for later.
        Z = self.pca.fit_transform(
            self.scaler.fit_transform(Y)
        )[:, :
          npc]  # save all the rows (design points), but keep first npc columns

        design, design_max, design_min, labels = prepare_emu_design(system_str)

        #delete undesirable data
        if len(delete_design_pts_set) > 0:
            print("Warning! Deleting " + str(len(delete_design_pts_set)) +
                  " points from data")
        design = np.delete(design, list(delete_design_pts_set), 0)

        ptp = design_max - design_min
        print("Design shape[Ndesign, Nparams] = " + str(design.shape))
        # Define kernel (covariance function):
        # Gaussian correlation (RBF) plus a noise term.
        # noise term is necessary since model calculations contain statistical noise
        k0 = 1. * kernels.RBF(
            length_scale=ptp,
            length_scale_bounds=np.outer(ptp, (4e-1, 1e2)),
            #nu = 3.5
        )
        k1 = kernels.ConstantKernel()
        k2 = kernels.WhiteKernel(noise_level=.1,
                                 noise_level_bounds=(1e-2, 1e2))

        #kernel = (k0 + k1 + k2) #this includes a consant kernel
        kernel = (k0 + k2)  # this does not

        # Fit a GP (optimize the kernel hyperparameters) to each PC.

        self.gps = []
        for i, z in enumerate(Z.T):
            print("Fitting PC #", i)
            self.gps.append(
                GPR(kernel=kernel,
                    alpha=0.1,
                    n_restarts_optimizer=nrestarts,
                    copy_X_train=False).fit(design, z))

        for n, (z, gp) in enumerate(zip(Z.T, self.gps)):
            print("GP " + str(n) + " score : " + str(gp.score(design, z)))

        print("Constructing full linear transformation matrix")
        # Construct the full linear transformation matrix, which is just the PC
        # matrix with the first axis multiplied by the explained standard
        # deviation of each PC and the second axis multiplied by the
        # standardization scale factor of each observable.
        self._trans_matrix = (self.pca.components_ * np.sqrt(
            self.pca.explained_variance_[:, np.newaxis]) * self.scaler.scale_)

        # Pre-calculate some arrays for inverse transforming the predictive
        # variance (from PC space to physical space).

        # Assuming the PCs are uncorrelated, the transformation is
        #
        #   cov_ij = sum_k A_ki var_k A_kj
        #
        # where A is the trans matrix and var_k is the variance of the kth PC.
        # https://en.wikipedia.org/wiki/Propagation_of_uncertainty

        print("Computing partial transformation for first npc components")
        # Compute the partial transformation for the first `npc` components
        # that are actually emulated.
        A = self._trans_matrix[:npc]
        self._var_trans = np.einsum('ki,kj->kij', A, A,
                                    optimize=False).reshape(npc, self.nobs**2)

        # Compute the covariance matrix for the remaining neglected PCs
        # (truncation error).  These components always have variance == 1.
        B = self._trans_matrix[npc:]
        self._cov_trunc = np.dot(B.T, B)

        # Add small term to diagonal for numerical stability.
        self._cov_trunc.flat[::self.nobs + 1] += 1e-4 * self.scaler.var_
Beispiel #19
0
    param1_paramspace_length = param1_max - param1_min
    param2_paramspace_length = param2_max - param2_min

    # Kernels
    k0 = 1. * kernels.RBF(length_scale=(param1_paramspace_length / 2.,
                                        param2_paramspace_length / 2.),
                          length_scale_bounds=(
                              (param1_paramspace_length / param1_nb_design_pts,
                               3. * param1_paramspace_length),
                              (param2_paramspace_length / param2_nb_design_pts,
                               3. * param2_paramspace_length)))

    relative_uncertainty = info_d['theoretical_relative_uncertainty']
    k2 = 1. * kernels.WhiteKernel(
        noise_level=relative_uncertainty,
        # noise_level_bounds='fixed'
        noise_level_bounds=(relative_uncertainty / 4.,
                            4 * relative_uncertainty))

    kernel = (k0 + k2)

    nrestarts = 10

    emulator_design_pts_value = np.transpose([
        np.ravel(calc_d[obs_name]['param1_mesh']),
        np.ravel(calc_d[obs_name]['param2_mesh'])
    ])
    # Should have format [[param1, param2],[],[],...]

    emulator_obs_mean_value = np.ravel(calc_d[obs_name]['mean_plus_noise'])
    # Should have format [ob1, obs1, ...]
Beispiel #20
0
 def to_sklearn(self):
     """Convert it to a sklearn kernel, if there is one"""
     return sklearn_kern.WhiteKernel(noise_level=self.variance)
Beispiel #21
0
def integrate_EI(x,
                 sample_theta_list,
                 evaluated_loss,
                 mode,
                 greater_is_better=False,
                 n_params=1):
    """ expected_improvement

    Expected improvement acquisition function.

    Arguments:
    ----------
        x: array-like, shape = [n_samples, n_hyperparams]
            The point for which the expected improvement needs to be computed.
        sample_theta_list: hyperparameter samples of the GP model, which will be used to 
            calculate integrated acquisition function
        evaluated_loss: Numpy array.
            Numpy array that contains the values off the loss function for the previously
            evaluated hyperparameters.
        greater_is_better: Boolean.
            Boolean flag that indicates whether the loss function is to be maximised or minimised.
        n_params: int.
            Dimension of the hyperparameter space.

    """
    # sample_theta_list contains all samples of hyperparameters
    ei_list = list()
    input_dimension = n_params
    init_length_scale = np.ones((input_dimension, ))
    kernel = kernels.Sum(
        kernels.WhiteKernel(),
        kernels.Product(
            kernels.ConstantKernel(),
            kernels.Matern(length_scale=init_length_scale, nu=5. / 2.)))
    for theta_set in sample_theta_list:
        model = Gaussian_Process(kernel, mode)
        '''
        model = gp.GaussianProcessRegressor(kernel=kernel, alpha=1e-5, optimizer = None, normalize_y=True)
        model.set_params(**{"kernel__k1__noise_level": np.abs(theta_set[0]),
                            "kernel__k2__k1__constant_value": np.abs(theta_set[1]),
                            "kernel__k2__k2__length_scale": theta_set[2:]})
        '''
        model.set_params(theta_set)
        x_to_predict = x.reshape(-1, n_params)

        mu, sigma = model.predict(x_to_predict)
        #mu, sigma = model.predict(x_to_predict, return_std=True)

        if greater_is_better:
            loss_optimum = np.max(evaluated_loss)
        else:
            loss_optimum = np.min(evaluated_loss)

        scaling_factor = (-1)**(not greater_is_better)

        # In case sigma equals zero
        with np.errstate(divide='ignore'):
            Z = scaling_factor * (mu - loss_optimum) / sigma
            expected_improvement = scaling_factor * (
                mu - loss_optimum) * norm.cdf(Z) + sigma * norm.pdf(Z)
            expected_improvement[sigma == 0.0] == 0.0
        ei_list.append(expected_improvement[0])
    res_ei = np.mean(ei_list)
    result = np.array([res_ei])
    return -1 * result
Beispiel #22
0
def scikit_prior(filename0,
                 varname='v',
                 dt=0,
                 tlim=6,
                 radar='',
                 xlim=[0, 0],
                 ylim=[0, 0],
                 dx=0,
                 ind=0,
                 xrange=3):
    startTime = datetime.now()
    dir0, a = filename0.split("res")
    b, fname0 = a.split("/")
    fname0 = dir0 + fname0
    fm = sio.loadmat(fname0 + '.mat')
    print 'Longitude limits:', xlim
    print 'Latitude limits :', ylim

    # get radar data grid, if that is the case:
    if radar != '':
        inFile = Dataset(radar, 'r')
        lon0, lat0 = inFile.variables['imageOriginPosition'][:]
        x0, y0 = NAD83(lon0, lat0)
        x0 = (x0 - x_ori) / 1000.  # in km
        y0 = (y0 - y_ori) / 1000.  # in km
        xg = x0 + inFile.variables['xCoords'][:] / 1000.
        yg = y0 + inFile.variables['yCoords'][:] / 1000.
        tr = inFile.variables['time'][:]
        ur = inFile.variables['ux'][:]
        vr = inFile.variables['uy'][:]
        t0 = datetime(2016, 01, 01)  # radar data counts from here, in hours
        t0D = datetime(2016, 2, 7, 2,
                       15)  # first time from Filtered_2016_2_7.pkl'
        tg = np.array([(t0 + timedelta(tr[0]) - t0D).total_seconds() / 3600])
        it = 0
        filename = filename0 + '_radar'
        Yg, Tg, Xg = np.meshgrid(yg, tg, xg)
        Tg = np.reshape(Tg, [Tg.size, 1])
        Yg = np.reshape(Yg, [Yg.size, 1])
        Xg = np.reshape(Xg, [Xg.size, 1])
        X = np.concatenate([Tg, Yg, Xg], axis=1)
    else:  # DEFINE GRID
        if (xlim[1] > xlim[0]) & (ylim[1] > ylim[0]):  # should focus here
            X, tcenter, yg, xg = getGrid([dt, dt + 1], ylim, xlim, 1, dx)
            filename = filename0 + '_cyc'
        else:  # this is for preexisting grids
            f = Dataset(filename0 + '.nc', 'r')
            #         HPU = f.variables['hyperparam_u'][:]
            #         HPV = f.variables['hyperparam_v'][:]
            xg = f.variables['x'][:]
            yg = f.variables['y'][:]
            tg = f.variables['time'][:]
            it = dt  #tg.size/2 + dt
            tcenter = np.array([tg[it]])
            Yg, Tg, Xg = np.meshgrid(yg, tg, xg)
            Tg = np.reshape(Tg, [Tg.size, 1])
            Yg = np.reshape(Yg, [Yg.size, 1])
            Xg = np.reshape(Xg, [Xg.size, 1])
            X = np.concatenate([Tg, Yg, Xg], axis=1)
            filename = filename0
            inc = yg.size * xg.size
            i2 = inc * it
            X = X[i2:i2 + inc, :]

    filename = filename + '_' + str(np.round(tcenter[0],
                                             decimals=2)) + 'h_scikit_'
    outFile = filename + str(ind) + '.nc'

    # LOAD Observations
    to = fm['Xo'][:, 0]
    tt = fm['Xt'][:, 0]
    xo = fm['Xo'][:, 2]
    xt = fm['Xt'][:, 2]
    ito = np.where((to >= tcenter - tlim) & (to <= tcenter + tlim)
                   & (xo >= xlim[0] - xrange) & (xo <= xlim[1] + xrange))
    itt = np.where((tt >= tcenter - tlim) & (tt <= tcenter + tlim)
                   & (xt >= xlim[0] - xrange) & (xt <= xlim[1] + xrange))
    Xo = fm['Xo'][ito, :].squeeze()
    Xt = fm['Xt'][itt, :].squeeze()
    XT = np.concatenate([Xo, Xt], axis=0)
    print 'Number of observation points: ', np.size(XT, 0)
    obs = fm['obs'][ito, :].squeeze()
    obst = fm['test_points'][itt, :].squeeze()
    # LOAD Hyper-Parameters
    cheatPickle = GPy.load('cheatPickle.pkl')
    model = GPy.load(fname0 + '_' + varname + '.pkl')
    HP = model.param_array
    covarname = varname + 'var'
    modelName = filename + varname + '.pkl'
    if varname == 'u':
        u = np.concatenate([obs[:, 1], obst[:, 1]])[:, None]
    else:
        u = np.concatenate([obs[:, 0], obst[:, 0]])[:, None]
    N = HP.size - 1
    noise = HP[-1]
    print 'noise = ' + str(HP[-1])
    # Build Model
    print modelName
    #   if not os.path.isfile(modelName):
    k = HP[0] * kernels.RBF(length_scale=[HP[1], HP[2], HP[3]])
    print 'var1 = ' + str(HP[0])
    if N > 5:
        i = 4
        k = k + HP[i] * kernels.RBF(
            length_scale=[HP[i + 1], HP[i + 2], HP[i + 3]])
        print 'var2 = ' + str(HP[i])
    k = k + kernels.WhiteKernel(noise_level=noise)
    print k
    model_u = GaussianProcessRegressor(kernel=k, optimizer=None)
    print np.size(XT, 0), np.size(XT, 1)
    print np.size(u, 0), np.size(u, 1)
    model_u.fit(XT, u)
    # file might be to large to save
    #      with open(modelName,'wb') as output:
    #      pickle.dump(model_u,open(modelName,'wb'))
    #   else:
    #      with open(modelName,'rb') as input:
    #           model_u = pickle.load(input)

    # REGRESSION
    U, Ustd = model_u.predict(X, return_std=True)
    U = np.reshape(U, [tcenter.size, yg.size, xg.size])
    Ustd = np.reshape(Ustd, [tcenter.size, yg.size, xg.size])
    # SAVE NETCDF
    if not os.path.isfile(outFile):
        createNC(outFile, tcenter, yg, xg, HP)
    print np.ndim(U), np.size(U, 0), np.size(U, 1)
    print np.ndim(Ustd), np.size(Ustd, 0), np.size(Ustd, 1)
    fi = Dataset(outFile, 'a')
    fi = writeNC(fi, varname, U)
    fi = writeNC(fi, covarname, Ustd**2)
    fi = writeNC(fi, 'hyperparam_' + varname, HP)
    fi.close()
    print 'End of script, time : ' + str(datetime.now() - startTime)
Beispiel #23
0
def bayesian_optimisation(slice_sample_num,
                          coor_sigma,
                          burn_in,
                          input_dimension,
                          n_iters,
                          sample_loss,
                          bounds,
                          x0=None,
                          n_pre_samples=5,
                          acqui_eva_num=10,
                          random_search=False,
                          epsilon=1e-7,
                          greater_is_better=False,
                          mode='OPT',
                          acqui_mode='MCMC',
                          acqui_sample_num=3):
    """ bayesian_optimisation

    Uses Gaussian Processes to optimise the loss function `sample_loss`.

    Arguments:
    ----------
        slice_sample_num: integer.
            how many samples we draw for each time of slice sampling
        coor_sigma: numpy array
            step-size for slice sampling of each coordinate, the dimension is equal to the number of 
            hyperparameters contained in the kernel
        burn_in: integer.
            how many iterations we want to wait before draw samples from slice sampling
        input_dimension: integer.
            dimension of input data
        n_iters: integer.
            Number of iterations to run the search algorithm.
        sample_loss: function.
            Function to be optimised.
        bounds: array-like, shape = [n_params, 2].
            Lower and upper bounds on the parameters of the function `sample_loss`.
        x0: array-like, shape = [n_pre_samples, n_params].
            Array of initial points to sample the loss function for. If None, randomly
            samples from the loss function.
        n_pre_samples: integer.
            If x0 is None, samples `n_pre_samples` initial points from the loss function.
        acqui_eva_num:
            when evaluating acquisition function, how many points we want to look into
        gp_params: dictionary.
            Dictionary of parameters to pass on to the underlying Gaussian Process.
        random_search: integer.
            Flag that indicates whether to perform random search or L-BFGS-B optimisation
            over the acquisition function.
        alpha: double.
            Variance of the error term of the GP.
        epsilon: double.
            Precision tolerance for floats.
        greater_is_better: boolean
            True: maximize the sample_loss function,
            False: minimize the sample_loss function
        mode: OPT means using optimizer to optimize the hyperparameters of GP
              MAP means using sample posterior mean to optimize the hyperparameters of GP
        acqui_mode: mode controlling the acquisition
            'OPT': using one prediction based on previously optimized model
            'MCMC': using several samples to sample the expected acquisition function
        acqui_sample_num:
            the number of hyperparameter samples we want to use for integrated acquisition function
    """

    # call slice sampler
    slice_sampler = Slice_sampler(slice_sample_num, coor_sigma, burn_in)
    acqui_slice_sampler = Slice_sampler(
        1, coor_sigma, burn_in)  # only sample one sample a time

    x_list = []
    y_list = []
    y_dur_list = []

    n_params = bounds.shape[0]

    if x0 is None:
        # random draw several points as GP prior
        for params in np.random.uniform(bounds[:, 0], bounds[:, 1],
                                        (n_pre_samples, bounds.shape[0])):
            x_list.append(params)
            start = time.clock()
            y_list.append(sample_loss(params))
            elapsed = (time.clock() - start)
            y_dur_list.append(elapsed)
    else:
        for params in x0:
            x_list.append(params)
            start = time.clock()
            y_list.append(sample_loss(params))
            elapsed = (time.clock() - start)
            y_dur_list.append(elapsed)

    xp = np.array(x_list)
    yp = np.array(y_list)
    yp_logdur = np.log(np.array(y_dur_list))
    #print (xp,yp)

    # Create the GP

    #kernel = gp.kernels.Matern()
    init_length_scale = np.ones((input_dimension, ))
    kernel = kernels.Sum(
        kernels.WhiteKernel(),
        kernels.Product(
            kernels.ConstantKernel(),
            kernels.Matern(length_scale=init_length_scale, nu=5. / 2.)))
    if mode == 'OPT':
        model = Gaussian_Process(kernel, mode)

    elif mode == 'MAP':
        model = Gaussian_Process(kernel, mode)

    else:
        raise Exception('Wrong GP model initialization mode!!!')

    dur = Gaussian_Process(kernel, 'OPT')

    iter_num = 0
    for n in range(n_iters):
        iter_num += 1
        if iter_num % int(n_iters / 2) == 0:
            print('%d iterations have been run' % iter_num)
        else:
            pass

        # for each iteration, one sample will be drawn and used to train GP
        model.fit(xp, yp)
        dur.fit(xp, yp_logdur)
        # Sample next hyperparameter
        #print ('One sample start')

        if random_search:
            x_random = np.random.uniform(bounds[:, 0],
                                         bounds[:, 1],
                                         size=(random_search, n_params))
            ei = -1 * expected_improvement(x_random,
                                           model,
                                           yp,
                                           greater_is_better=greater_is_better,
                                           n_params=n_params)
            next_sample = x_random[np.argmax(ei), :]
        else:
            if acqui_mode == 'OPT':
                next_sample = sample_next_hyperparameter(
                    expected_improvement,
                    model,
                    yp,
                    greater_is_better=greater_is_better,
                    bounds=bounds,
                    n_restarts=acqui_eva_num)

            elif acqui_mode == 'MCMC':
                sample_theta_list = list()
                for sample_acqui_time in range(acqui_sample_num):
                    initial_log_theta = np.ones((input_dimension + 2, ))
                    initial_theta = np.exp(1.0 + initial_log_theta)
                    one_log_theta = acqui_slice_sampler.sample(
                        init=initial_theta, gp=model)
                    one_theta = np.exp(1.0 + one_log_theta)
                    sample_theta_list.append(one_theta)

                next_sample = integrate_sample(
                    integrate_EI,
                    sample_theta_list,
                    yp,
                    mode,
                    greater_is_better=greater_is_better,
                    bounds=bounds,
                    n_restarts=acqui_eva_num)

            elif acqui_mode == 'PERSEC':
                sample_theta_list = list()
                for sample_acqui_time in range(acqui_sample_num):
                    initial_log_theta = np.ones((input_dimension + 2, ))
                    initial_theta = np.exp(1.0 + initial_log_theta)
                    one_log_theta = acqui_slice_sampler.sample(
                        init=initial_theta, gp=model)
                    one_theta = np.exp(1.0 + one_log_theta)
                    sample_theta_list.append(one_theta)

                next_sample = integrate_sample_perSec(
                    integrate_EI_perSec,
                    sample_theta_list,
                    dur,
                    yp,
                    mode,
                    greater_is_better=greater_is_better,
                    bounds=bounds,
                    n_restarts=acqui_eva_num)

            else:
                raise Exception('Wrong acquisition mode!!!')

        #print ('One sample finished')

        # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point.
        if np.any(np.abs(next_sample - xp) <= epsilon):
            next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1],
                                            bounds.shape[0])

        # Sample loss for new set of parameters
        start = time.clock()
        func_value = sample_loss(next_sample)
        elapsed = (time.clock() - start)

        # Update lists
        x_list.append(next_sample)
        y_list.append(func_value)
        y_dur_list.append(elapsed)

        # Update xp and yp
        xp = np.array(x_list)
        yp = np.array(y_list)
        yp_logdur = np.log(np.array(y_dur_list))

    return xp, yp, yp_logdur
Beispiel #24
0
    def __init__(self, system, npc=10, nrestarts=0):
        logging.info('training emulator for system %s (%d PC, %d restarts)',
                     system, npc, nrestarts)

        Y = []
        self._slices = {}

        # Build an array of all observables to emulate.
        nobs = 0
        for obs, subobslist in self.observables:
            self._slices[obs] = {}
            for subobs in subobslist:
                Y.append(model.data[system][obs][subobs]['Y'])
                n = Y[-1].shape[1]
                self._slices[obs][subobs] = slice(nobs, nobs + n)
                nobs += n

        Y = np.concatenate(Y, axis=1)

        self.npc = npc
        self.nobs = nobs
        self.scaler = StandardScaler(copy=False)
        self.pca = PCA(copy=False, whiten=True, svd_solver='full')

        # Standardize observables and transform through PCA.  Use the first
        # `npc` components but save the full PC transformation for later.
        Z = self.pca.fit_transform(self.scaler.fit_transform(Y))[:, :npc]

        # Define kernel (covariance function):
        # Gaussian correlation (RBF) plus a noise term.
        design = Design(system)
        ptp = design.max - design.min
        kernel = (1. * kernels.RBF(length_scale=ptp,
                                   length_scale_bounds=np.outer(ptp,
                                                                (.1, 10))) +
                  kernels.WhiteKernel(noise_level=.1**2,
                                      noise_level_bounds=(.01**2, 1)))

        # Fit a GP (optimize the kernel hyperparameters) to each PC.
        self.gps = [
            GPR(kernel=kernel,
                alpha=0,
                n_restarts_optimizer=nrestarts,
                copy_X_train=False).fit(design, z) for z in Z.T
        ]

        # Construct the full linear transformation matrix, which is just the PC
        # matrix with the first axis multiplied by the explained standard
        # deviation of each PC and the second axis multiplied by the
        # standardization scale factor of each observable.
        self._trans_matrix = (self.pca.components_ * np.sqrt(
            self.pca.explained_variance_[:, np.newaxis]) * self.scaler.scale_)

        # Pre-calculate some arrays for inverse transforming the predictive
        # variance (from PC space to physical space).

        # Assuming the PCs are uncorrelated, the transformation is
        #
        #   cov_ij = sum_k A_ki var_k A_kj
        #
        # where A is the trans matrix and var_k is the variance of the kth PC.
        # https://en.wikipedia.org/wiki/Propagation_of_uncertainty

        # Compute the partial transformation for the first `npc` components
        # that are actually emulated.
        A = self._trans_matrix[:npc]
        self._var_trans = np.einsum('ki,kj->kij', A, A,
                                    optimize=False).reshape(npc, nobs**2)

        # Compute the covariance matrix for the remaining neglected PCs
        # (truncation error).  These components always have variance == 1.
        B = self._trans_matrix[npc:]
        self._cov_trunc = np.dot(B.T, B)

        # Add small term to diagonal for numerical stability.
        self._cov_trunc.flat[::nobs + 1] += 1e-4 * self.scaler.var_
Beispiel #25
0
    df = df.drop('Unnamed: 0', axis=0)

    X, y = pre_processing(df.astype(str))
    X = X.astype(float)

    skf = StratifiedKFold(n_splits=metrics.folds, shuffle=True)
    scorer = make_scorer(accuracy_score)

    #modelos a serem treinados
    nmodels = {
        'gauss': [
            GaussianProcessClassifier(n_jobs=2), {
                'kernel': [
                    1 * kernels.RBF(), 1 * kernels.DotProduct(),
                    1 * kernels.Matern(), 1 * kernels.RationalQuadratic(),
                    1 * kernels.WhiteKernel()
                ]
            }
        ],
        'nb': [GaussianNB()],
        'rf': [
            RandomForestClassifier(), {
                'n_estimators': [10, 50, 100, 200, 500],
                'criterion': ["gini", "entropy"]
            }
        ],
        'dt': [
            DecisionTreeClassifier(), {
                "criterion": ["gini", "entropy"],
                "splitter": ["best", "random"]
            }
Beispiel #26
0
def bayesian_optimisation(coor_sigma,
                          burn_in,
                          input_dimension,
                          n_iters,
                          sample_loss,
                          bounds,
                          x0=None,
                          n_pre_samples=5,
                          acqui_eva_num=10,
                          alpha=1e-5,
                          epsilon=1e-7,
                          greater_is_better=False,
                          mode='OPT',
                          acqui_mode='MCMC',
                          acqui_sample_num=3,
                          process_sample_mode='normal',
                          prior_mode='normal_prior',
                          likelihood_mode='normal_likelihood'):
    """ bayesian_optimisation
    Uses Gaussian Processes to optimise the loss function `sample_loss`.

    Arguments:
    ----------
        slice_sample_num: integer.
            how many samples we draw for each time of slice sampling
        coor_sigma: numpy array
            step-size for slice sampling of each coordinate, the dimension is equal to the number of 
            hyperparameters contained in the kernel
        burn_in: integer.
            how many iterations we want to wait before draw samples from slice sampling
        input_dimension: integer.
            dimension of input data
        n_iters: integer.
            Number of iterations to run the search algorithm.
        sample_loss: function.
            Function to be optimised.
        bounds: array-like, shape = [n_params, 2].
            Lower and upper bounds on the parameters of the function `sample_loss`.
        x0: array-like, shape = [n_pre_samples, n_params].
            Array of initial points to sample the loss function for. If None, randomly
            samples from the loss function.
        n_pre_samples: integer.
            If x0 is None, samples `n_pre_samples` initial points from the loss function.
        acqui_eva_num:
            when evaluating acquisition function, how many points we want to look into, number of restarts
        alpha: double.
            Variance of the error term of the GP.
        epsilon: double.
            Precision tolerance for floats.
        greater_is_better: boolean
            True: maximize the sample_loss function,
            False: minimize the sample_loss function
        mode: OPT means using optimizer to optimize the hyperparameters of GP
              MAP means using sample posterior mean to optimize the hyperparameters of GP
        acqui_mode: mode controlling the acquisition
            'OPT': using one prediction based on previously optimized model
            'MCMC': using several samples to sample the expected acquisition function
        acqui_sample_num:
            the number of hyperparameter samples we want to use for integrated acquisition function
        process_sample_mode:
            after getting sample, how to process it
            'normal': only accept positive sample and reject negative ones
            'abs': accept all samples after taking absolute value
            'rho': reparamization trick is used, the samples are rho
        prior_mode:
            the prior distribution we want to use
            'normal_prior': normal distribution
            'exp_prior': exponential distribution
        likelihood_mode: how to calculate likelihood
            'normal_likelihood': directly using input hyperparameter to calculate likelihood
            'rho_likelihood': using reparamization trick (theta = np.log(1.0 + np.exp(rho)))
    """

    # call slice sampler
    acqui_slice_sampler = Slice_sampler(
        1, coor_sigma, burn_in, prior_mode,
        likelihood_mode)  # only sample one sample a time

    x_list = []
    y_list = []
    y_dur_list = []
    time_list = []

    n_params = bounds.shape[0]

    print('Start presampling...')
    if x0 is None:
        # random draw several points as GP prior
        for params in np.random.uniform(bounds[:, 0], bounds[:, 1],
                                        (n_pre_samples, bounds.shape[0])):
            x_list.append(params)
            start = time.clock()
            y_list.append(sample_loss(params))
            elapsed = (time.clock() - start)
            y_dur_list.append(elapsed)
    else:
        for params in x0:
            x_list.append(params)
            start = time.clock()
            y_list.append(sample_loss(params))
            elapsed = (time.clock() - start)
            y_dur_list.append(elapsed)
    print('Presampling finished.')

    xp = np.array(x_list)
    yp = np.array(y_list)
    yp_logdur = np.log(np.array(y_dur_list))

    # Create the GP
    init_length_scale = np.ones((input_dimension, ))
    kernel = kernels.Sum(
        kernels.WhiteKernel(),
        kernels.Product(
            kernels.ConstantKernel(),
            kernels.Matern(length_scale=init_length_scale, nu=5. / 2.)))
    if mode == 'OPT':
        model = gp.GaussianProcessRegressor(kernel=kernel,
                                            alpha=alpha,
                                            n_restarts_optimizer=10,
                                            normalize_y=True)
    elif mode == 'MAP':
        model = gp.GaussianProcessRegressor(kernel=kernel,
                                            alpha=alpha,
                                            optimizer=None,
                                            n_restarts_optimizer=0,
                                            normalize_y=True)
    else:
        raise Exception('Wrong GP model initialization mode!!!')

    dur = gp.GaussianProcessRegressor(kernel=kernel,
                                      alpha=alpha,
                                      n_restarts_optimizer=10,
                                      normalize_y=True)

    iter_num = 0
    for n in range(n_iters):
        # Start the clock for recording total running time per iteration
        ite_start = time.clock()
        iter_num += 1
        if iter_num % int(n_iters / 2) == 0:
            print('%d iterations have been run' % iter_num)
        else:
            pass
        # for each iteration, one sample will be drawn and used to train GP

        dur.fit(xp, yp_logdur)

        if mode == 'OPT':
            # for optimization mode, the hyperparameters are optimized during the process of fitting
            model.fit(xp, yp)
        elif mode == 'MAP':
            # for MAP mode, we use slice sampling to sample the posterior of hyperparameters and use the mean to update GP's hyperparameters
            model.fit(xp, yp)
            initial_theta = 10 * np.ones(
                (input_dimension + 2, )
            )  # input_dimension + 2 = number of length_scale + amplitude + noise_sigma
        else:
            raise Exception('Wrong GP model initialization mode!!!')
        # Sample next hyperparameter

        if acqui_mode == 'OPT':
            next_sample = sample_next_hyperparameter(
                expected_improvement,
                model,
                yp,
                greater_is_better=greater_is_better,
                bounds=bounds,
                n_restarts=acqui_eva_num)
        elif acqui_mode == 'MCMC':
            sample_theta_list = list()
            while (len(sample_theta_list) <
                   acqui_sample_num):  # all samples of theta must be valid
                one_sample = acqui_slice_sampler.sample(init=initial_theta,
                                                        gp=model)
                if process_sample_mode == 'normal':
                    if np.all(one_sample[:, 0] > 0):
                        one_theta = [
                            np.mean(samples_k) for samples_k in one_sample
                        ]
                        sample_theta_list.append(one_theta)
                    else:
                        continue
                elif process_sample_mode == 'abs':
                    one_theta = [
                        np.abs(np.mean(samples_k)) for samples_k in one_sample
                    ]
                    sample_theta_list.append(one_theta)
                elif process_sample_mode == 'rho':
                    one_theta = [
                        np.log(1.0 + np.exp((np.mean(samples_k))))
                        for samples_k in one_sample
                    ]
                    sample_theta_list.append(one_theta)
                else:
                    raise Exception('Wrong process sample mode!!!')

            next_sample = integrate_sample(integrate_EI,
                                           sample_theta_list,
                                           yp,
                                           greater_is_better=greater_is_better,
                                           bounds=bounds,
                                           n_restarts=acqui_eva_num)

        elif acqui_mode == 'PERSEC':
            sample_theta_list = list()
            while (len(sample_theta_list) <
                   acqui_sample_num):  # all samples of theta must be valid
                one_sample = acqui_slice_sampler.sample(init=initial_theta,
                                                        gp=model)
                if process_sample_mode == 'normal':
                    if np.all(one_sample[:, 0] > 0):
                        one_theta = [
                            np.mean(samples_k) for samples_k in one_sample
                        ]
                        sample_theta_list.append(one_theta)
                    else:
                        continue
                elif process_sample_mode == 'abs':
                    one_theta = [
                        np.abs(np.mean(samples_k)) for samples_k in one_sample
                    ]
                    sample_theta_list.append(one_theta)
                elif process_sample_mode == 'rho':
                    one_theta = [
                        np.log(1.0 + np.exp((np.mean(samples_k))))
                        for samples_k in one_sample
                    ]
                    sample_theta_list.append(one_theta)
                else:
                    raise Exception('Wrong process sample mode!!!')
            next_sample = integrate_sample_perSec(
                integrate_EI_perSec,
                sample_theta_list,
                dur,
                yp,
                greater_is_better=greater_is_better,
                bounds=bounds,
                n_restarts=acqui_eva_num)

        elif acqui_mode == 'RANDOM':
            x_random = np.random.uniform(bounds[:, 0],
                                         bounds[:, 1],
                                         size=(5, n_params))
            ei = -1 * expected_improvement(x_random,
                                           model,
                                           yp,
                                           greater_is_better=greater_is_better,
                                           n_params=n_params)
            next_sample = x_random[np.argmax(ei), :]
        else:
            raise Exception('Wrong acquisition mode!!!')

        # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point.
        if np.any(np.abs(next_sample - xp) <= epsilon):
            next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1],
                                            bounds.shape[0])

        # Sample loss for new set of parameters
        start = time.clock()
        func_value = sample_loss(next_sample)
        elapsed = (time.clock() - start)

        # Update lists
        x_list.append(next_sample)
        y_list.append(func_value)
        y_dur_list.append(elapsed)

        # Update xp and yp
        xp = np.array(x_list)
        yp = np.array(y_list)
        yp_logdur = np.log(np.array(y_dur_list))

        ite_elapsed = (time.clock() - ite_start)
        time_list.append(ite_elapsed)
        timep = np.array(time_list)

    return xp, yp, timep
Beispiel #27
0
def bo(X, y):

    data = list(zip(X, y))

    x = np.atleast_2d(np.linspace(0, 10, 1024)).T
    x_= np.atleast_2d(np.linspace(0, 10, 1024)).T


    kernel = kernels.Matern() + kernels.WhiteKernel()

    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16, )#normalize_y=True)

    gp.fit(X, y)
    # FIXME is it possible for mu(x) < min{x \in observed_x}?
    # is this due to that GaussainProcess's prior states that mu(x) = 0?
    # will this effect the performance of GO, since everything not observed will automatically give an additional boost since the prior plays a bigger role (look it up) [we know that the loss we in the end are optimizing is \in [0, 1]
    y_pred, sigma = gp.predict(x, return_std=True)


    #http://www.scipy-lectures.org/advanced/mathematical_optimization/

    # x_min = fmin(negate(silly_f), 5)  # TODO better maximizer
    # Strong points: it is robust to noise, as it does not rely on computing gradients. Thus it can work on functions that are not locally smooth such as experimental data points, as long as they display a large-scale bell-shape behavior. However it is slower than gradient-based methods on smooth, non-noisy functions.


    #opt_result = minimize(negate(silly_f), 5, bounds=[(0, 10)])  # TODO better maximizer
    #print(opt_result)
    #assert(opt_result.success)


    #x_min = opt_result.x


    # x_min = brent(negate(silly_f), brack=(0, 10))  # NOTE 1D only, NOTE not guaranteed to be within range brack=(0, 10) (see documentation)

    # TODO getting the gradient the gaussian would unlock all gradient based optimization methods!! (including L_BFGS)


    a = a_EI(gp, x_obs=X, y_obs=y, theta=0.01)
    a_x = np.apply_along_axis(a, 1, x)

    (x_min_,) = max(x, key=a)

    # TODO have a reasonable optimization (this doesn't scale well)
    #(x_min_,) = brute(
    #    negate(a),
    #    ranges=((0, 10),),
    #    Ns=64,
    #    finish=fmin,
    #)
    # FIXME brute can return numbers outside of the range! X = np.linspace(0, 10, 32), Ns=64, ranges=((0, 10)  (x_min_ = 10.22...)
    # I think it occurs when the function is pretty flat (but not constant)
    # TODO verify that finish function gets the same range as brute and don't wonder off (perhaps this is intended behaviour?)
    # TODO check https://github.com/scipy/scipy/blob/master/scipy/optimize/optimize.py#L2614 to see if it's possible for x_min to end up outside of the range (and if then when)

    print(x_min_)


    #plot_2d(x=x, x_=x_, y_pred=y_pred, sigma = sigma, a_x=a_x)
    #plot(x=x, y_pred=y_pred, x_obs=X, y_obs=y, x_min_=x_min_, sigma=sigma, a_x=a_x)
    #plt.show()

    # evaluate
    fx_min_ = f(x_min_)
    bo(
        X=np.vstack(
            (X,[x_min_,])
        ),
        y=np.hstack(
            (y,[fx_min_,])
        ),
    )
Beispiel #28
0
# 1) Put in initial length scales for param A and B
# 2) Put in reasonable lenght scales bounds for optimization
# 3) Fit separate emulatior to each principal component.
#    Take a look at the optimized hyper-parameters.
#    What do they mean?
"""
kernel = (
    1. * kernels.RBF(
        length_scale=[1, 1],
        length_scale_bounds=[(.1,10), (.1, 10)]
    )  
    + kernels.WhiteKernel(.1)
)
"""
kernel = (1. * kernels.RBF(length_scale=1, length_scale_bounds=(.1, 10)) +
          kernels.WhiteKernel(.1))
# Build and train each GP
gps = [GPR(kernel=kernel, n_restarts_optimizer=10) for i in range(npc)]
for i, gp in enumerate(gps):
    gp.fit(design, Z[:, i])
    print('RBF: ', gp.kernel_.get_params()['k1'])
    print('White: ', gp.kernel_.get_params()['k2'])

### Step 4-2: Validating the emulators #######################
# It is important to validate the performance of emulators to
# make sure they behave as expected.
# 1) Pick 6 random combinations of A and B. Compare the
#    emulators prediction and the model calculations.
#    Do they agree?
fig, (ax1, ax2) = plt.subplots(ncols=2, sharex=True)
for a in [
Beispiel #29
0

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), 1)[:, np.newaxis]


def logit(x):
    logx = np.log(x)
    return logx - logx[:, -1][:, np.newaxis]


depths_train = np.linspace(0, 3000, 3000)[:, np.newaxis]
depths_ts = depths_train[::30, :]

matk_fabric = k.Matern(length_scale=300.0, nu=0.5)
kern_p = k.WhiteKernel(noise_level=5.0) + matk_fabric
kern_sh = k.WhiteKernel(noise_level=5.0) + matk_fabric
kern_sv = k.WhiteKernel(noise_level=5.0) + matk_fabric

matk_vel_error = 100 * k.RBF(length_scale=600)
kern_a11 = k.WhiteKernel(noise_level=0.0001) + matk_fabric
kern_aii_noise = k.WhiteKernel(noise_level=0.2) + matk_fabric
kern_a22 = k.WhiteKernel(noise_level=10.0) + matk_fabric
kern_a33 = k.WhiteKernel(noise_level=1.0) + matk_fabric
kern_a22 = k.WhiteKernel(noise_level=10.0) + matk_fabric
kern_a33 = k.WhiteKernel(noise_level=1.0) + matk_fabric

gpr = gp.GaussianProcessRegressor(matk_fabric)
gpr_noise = gp.GaussianProcessRegressor(k.WhiteKernel(0.05))
aii = gpr.sample_y(depths_train, 3)
aii += np.array([-1, 0, 2])
Beispiel #30
0
    def _fit_model(self, train_x: np.ndarray, train_y: np.ndarray,
                   out_dir: Path) -> Pipeline:
        """Fit and test a model using the latest data

        Args:
            train_x: Input columns
            train_y: Output column
            out_dir: Location to store the data
        """
        # Min-max scaling
        scale_factor = (train_y.max() - train_y.min())
        train_y = (train_y - train_y.min()) / scale_factor

        # Create an initial RBF kernel, using the training set mean as a scaling parameter
        kernel = train_y.mean()**2 * kernels.RBF(length_scale=1)

        # TODO (wardlt): Make it clear where featurization would appear, as we are soon to introduce additives
        #  This will yield chemical degrees of freedom better captured using features of the additives rather
        #  than a new variable per additive
        #  Notes for now: Mol. Weight, Side Chain Length, and ... are the likely candidates

        # Add a noise parameter based on user settings
        noise = self.opt_spec.planner_options.get('noise_level', 0)
        self.logger.debug(f'Using a noise level of {noise}')
        if noise < 0:
            # Use standard deviation of the distribution of train_y will be the estimation of initial noise
            # TODO (wardlt): Document where 3, 4, and 11 come from
            noise_estimated = np.std(train_y) / 3
            noise_lb = noise_estimated / 4
            noise_ub = noise_estimated * 11

            kernel_noise = kernels.WhiteKernel(
                noise_level=noise_estimated**2,
                noise_level_bounds=(noise_lb**2, noise_ub**2))
            kernel = kernel + kernel_noise
        elif noise > 0:
            kernel = kernel + kernels.WhiteKernel(
                noise**2, noise_level_bounds=(noise**2, ) * 2)

        # Train a GPR model
        self.logger.debug('Starting kernel')
        model = Pipeline([('variance', VarianceThreshold()),
                          ('scale', StandardScaler()),
                          ('gpr', GaussianProcessRegressor(kernel))])

        # Perform k-Fold cross-validation to estimate model performance
        if len(train_x) > 5:
            cv_results = cross_validate(model,
                                        train_x,
                                        train_y,
                                        cv=RepeatedKFold(),
                                        return_train_score=True,
                                        scoring='neg_mean_squared_error')
            with out_dir.joinpath('cross-val-results.pkl').open('wb') as fp:
                pkl.dump(cv_results, fp)

            # Get the RMSE in the unscaled units
            rmse = np.sqrt(-1 * np.mean(cv_results["test_score"]))
            rmse *= scale_factor

            # Print out to screen
            self.logger.info(f'Performed cross-validation. RMSE: {rmse:.2e}')
        else:
            self.logger.info('Insufficient data for cross-validation')

        # Train and save the model
        model.fit(train_x, train_y)
        self.logger.info(
            f'Finished fitting the model on {len(train_x)} data points')
        self.logger.info(f'Optimized model: {model["gpr"].kernel_}')
        with out_dir.joinpath('model.pkl').open('wb') as fp:
            pkl.dump(model, fp)
        return model