コード例 #1
0
from sklearn.gaussian_process.kernels import Matern
from sklearn.gaussian_process.kernels import Kernel

dim = 100
sigma = .1
#Make Data
####################
X = np.linspace(0, 2 * np.pi, dim)
y = np.sin(X) + sigma * np.random.standard_normal(dim)
##################

length_scale = 2

#define required functions
phi = lambda x, y: np.linalg.norm(x - y)
mat = Matern(length_scale=length_scale)
mat.nu = 2.5


def calc_cov_matrix(X: np.array, ker: Kernel):
    '''uses kernels form scipy.gaussian_process.kernels to calculate a cov matrix'''
    return ker.__call__(X)


def w_pCN_step(xi: np.array,
               y: np.array,
               beta: float,
               T: np.array,
               phi: Callable,
               dim: int = -1):
    '''defines one step of the w-pCN algorithm as found in https://arxiv.org/pdf/1803.03344.pdf Section 3.1
コード例 #2
0
n_features = x.shape[1]

kernel = {
    1:
    ConstantKernel() * DotProduct() + WhiteKernel(),
    2:
    ConstantKernel() * RBF() + WhiteKernel(),
    3:
    ConstantKernel() * RBF() + WhiteKernel() + ConstantKernel() * DotProduct(),
    4:
    ConstantKernel() * RBF(np.ones(n_features)) + WhiteKernel(),
    5:
    ConstantKernel() * RBF(np.ones(n_features)) + WhiteKernel() +
    ConstantKernel() * DotProduct(),
    6:
    ConstantKernel() * Matern(nu=1.5) + WhiteKernel(),
    7:
    ConstantKernel() * Matern(nu=1.5) + WhiteKernel() +
    ConstantKernel() * DotProduct(),
    8:
    ConstantKernel() * Matern(nu=0.5) + WhiteKernel(),
    9:
    ConstantKernel() * Matern(nu=0.5) + WhiteKernel() +
    ConstantKernel() * DotProduct(),
    10:
    ConstantKernel() * Matern(nu=2.5) + WhiteKernel(),
    11:
    ConstantKernel() * Matern(nu=2.5) + WhiteKernel() +
    ConstantKernel() * DotProduct(),
    12:
    RBF()
コード例 #3
0
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
import numpy as np
import matplotlib.pyplot as plt


def dummy_environment(context, action):
    return np.sum(np.abs(action - (1 - context))) / len(context)


discvars = {'a1': np.linspace(0, 1, 100), 'a2': np.linspace(0, 1, 100)}
action_dim = len(discvars)
contexts = {'c1': '', 'c2': ''}
context_dim = len(contexts)

length_scale = np.ones(context_dim + action_dim)
kernel = WhiteKernel(noise_level=1) + Matern(nu=1.5, length_scale=length_scale)
noise = 1e-6

beta_function = 'const'
beta_const_val = 2.5

# In this example, we do not provide an initial dataset to perform an initial optimization of the kernel hyperparameters. This may lead the algorithm to stuck in local optima.
optimizer = ContextualBayesianOptimization(all_actions_dict=discvars,
                                           contexts=contexts,
                                           kernel=kernel)

utility = UtilityFunction(kind="ucb",
                          beta_kind=beta_function,
                          beta_const=beta_const_val)

nIters = 150
コード例 #4
0
    xtrain.shape, ytrain.shape, xtest.shape, ytest.shape))

print("\nGAUSSIAN PROCESS CALIBRATION (Matern kernel)")
print("-" * 40, "\n")
print("Training observation samples: ", xtrain.shape)
print("Training target samples: ", ytrain.shape)
print("Starting...")

param_grid = {"n_restarts_optimizer": [3], "alpha": [0.25, 0.5, 0.75, 1]}

list_length_scale = np.arange(0, 1, 0.05)
list_length_scale_bounds = [(1e-1, 1.0), (1e-1, 5.0)]
list_nu = [1.5, 2.5]

for l in list_length_scale:
    for b in list_length_scale_bounds:
        for n in list_nu:
            print("")
            print("GPR Matern with parameters: ", l, b, n)
            gp_kernel = 1.0 * Matern(
                length_scale=l, length_scale_bounds=b, nu=n)
            gpr = GaussianProcessRegressor(kernel=gp_kernel)
            # run grid search
            grid_search = GridSearchCV(gpr, param_grid=param_grid)
            start = time()
            grid_search.fit(xtrain, ytrain)
            print(
                "\tGridSearchCV took %.2f seconds for %d candidate parameter settings."
                % (time() - start, len(grid_search.cv_results_['params'])))
            report(grid_search.cv_results_)
コード例 #5
0
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MaxAbsScaler, RobustScaler
from tpot.export_utils import set_param_recursive

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=123)

# Average CV score on the training set was: 0.9689994406236551
exported_pipeline = make_pipeline(
    RobustScaler(),
    MaxAbsScaler(),
    GaussianProcessRegressor(kernel=Matern(length_scale=4.0, nu=2.5), n_restarts_optimizer=20, normalize_y=False)
)
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 123)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
コード例 #6
0
    def __init__(self,
                 target_func,
                 pbounds,
                 is_int,
                 invariant=None,
                 random_state=None,
                 verbose=True):
        """
        :param target_func:
            Function to be maximized.

        :param pbounds:
            Dictionary with parameters names as keys and a tuple with minimum
            and maximum values.

        :param is_int:
            List with True/False as value, 
            representing whether each parameter is integer

        :param invariant:
			Dictionary with parameters names as keys and its value, passing to
			the target_func.

        :param verbose:
            Whether or not to print progress.

        """
        # Store the original dictionary
        self.pbounds = pbounds

        self.random_state = ensure_rng(random_state)

        # Data structure containing the function to be optimized, the bounds of
        # its domain, and a record of the evaluations we have done so far
        self.space = TargetSpace(target_func,
                                 pbounds,
                                 is_int,
                                 invariant=invariant,
                                 random_state=random_state)
        self.is_int = is_int

        # Initialization flag
        self.initialized = False

        # Initialization lists --- stores starting points before process begins
        self.init_points = []
        self.x_init = []
        self.y_init = []

        # Counter of iterations
        self.i = 0

        # Internal GP regressor
        self.gp = GaussianProcessRegressor(kernel=Matern(nu=2.5),
                                           n_restarts_optimizer=25,
                                           random_state=self.random_state)

        # Utility Function placeholder
        self.util = None

        # PrintLog object
        self.plog = PrintLog(self.space.keys)

        # Output dictionary
        self.res = {}
        # Output dictionary
        self.res['max'] = {'max_val': None, 'max_params': None}
        self.res['all'] = {'values': [], 'params': []}

        # non-public config for maximizing the aquisition function
        # (used to speedup tests, but generally leave these as is)
        self._acqkw = {'n_warmup': 100000, 'n_iter': 250}

        # Verbose
        self.verbose = verbose
コード例 #7
0
ファイル: l_frac_fit.py プロジェクト: lgalbany/sniilocal
        X is the known age values: the actual 23 data points
        """

        # x Values
        x = np.atleast_2d(np.linspace(6, 10.255, 1000)).T

        # X Values
        X = np.log10(final_ages)
        X = np.reshape(X, (23, 1))

        # y Values
        y = np.asarray(xj_totals)
        y = np.reshape(y, 23)

        # Instantiate a Gaussian Process model
        kernel = 1.0 * Matern(
            length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5)
        gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

        # Fit to data using Maximum Likelihood Estimation of the parameters
        gp.fit(X, y)

        # Make the prediction on the meshed x-axis
        y_pred, sigma = gp.predict(x, return_std=True)
        """New Tasks
        --Get rid of any negative values: default to zero
        --Do the integral of the fit and normalize
            (array of 1000 values / integral * 100)
        """

        print(np.shape(y_pred))
コード例 #8
0
    reg_alpha=0.9,
    reg_lambda=0.6,
    subsample=0.2,
    random_state=SEED,
    silent=1)

best_alpha = 0.00099
regr2 = Lasso(alpha=best_alpha, max_iter=50000)

regr3 = ElasticNet(alpha=0.001)

regr4 = KernelRidge(alpha=0.3, kernel='polynomial', degree=2, coef0=1.85)

# regr5 = svm.SVR(kernel='rbf')

kernel = 1.0**2 * Matern(
    length_scale=1.0, length_scale_bounds=(1e-05, 100000.0), nu=0.5)
regr5 = GaussianProcessRegressor(kernel=kernel,
                                 alpha=5e-9,
                                 optimizer='fmin_l_bfgs_b',
                                 n_restarts_optimizer=0,
                                 normalize_y=False,
                                 copy_X_train=True,
                                 random_state=SEED)

en_regr = RandomForestRegressor(n_estimators=200,
                                max_features='auto',
                                max_depth=12,
                                min_samples_leaf=2)

# regr6 = ExtraTreesRegressor(n_estimators=200, max_features=24,
#                            max_depth=13, min_samples_leaf=2)
コード例 #9
0
def choose_location(dataset_loc1,
                    dataset_loc2,
                    gp_params=None,
                    alpha=1e-3,
                    epsilon=1e-3,
                    sig_level=0.05):
    """
     Return a decision variable indicating which location to choose
        decision = 1, loc1 > loc2
                = 0 , inconclusive
                = -1, loc1 < loc2

    :param dataset_loc1: PS dataset for loc1
    :param dataset_loc2: PS dataset for loc2
    :param gp_params: params for gaussian process regressor (set to sklearn defaults)
    :param alpha:
    :param epsilon:
    :param sig_level: significant level for t-test

    """

    # set up kernels
    kernel_matern = Matern() + WhiteKernel(noise_level=1)
    model = gp.GaussianProcessRegressor(kernel=kernel_matern,
                                        alpha=alpha,
                                        n_restarts_optimizer=5,
                                        normalize_y=True)

    # get predicted values for loc1
    xp_loc1 = np.array(map(lambda x: [x], dataset_loc1[:, 0]))
    yp_loc1 = np.array(map(lambda y: [y], dataset_loc1[:, 1]))

    x_max_loc1, y_max_loc1, se_1 = find_max(xp_loc1, yp_loc1, model)

    # get predicted values for loc2
    xp_loc2 = np.array(map(lambda x: [x], dataset_loc2[:, 0]))
    yp_loc2 = np.array(map(lambda y: [y], dataset_loc2[:, 1]))

    x_max_loc2, y_max_loc2, se_2 = find_max(xp_loc2, yp_loc2, model)

    t_stat = (y_max_loc1 - y_max_loc2) / np.sqrt(se_1**2 + se_2**2)
    p_val = norm.cdf(-np.abs(t_stat))

    if p_val < sig_level:
        if t_stat > 0:
            decision = {
                'Amplitude': x_max_loc1,
                'Max Delta Classifier': y_max_loc1,
                'Location': 'Loc1',
                'decision': 1,
                'tie': 1
            }
            print "Loc 1 is better than Loc 2"
        else:
            print "Loc 2 is better than  Loc 1"
            decision = {
                'Amplitude': x_max_loc2,
                'Max Delta Classifier': y_max_loc2,
                'Location': 'Loc2',
                'decision': -1,
                'tie': 1
            }

    else:
        if se_1 < se_2:
            print "Same but Loc1 is more reliable"
            decision = {
                'Amplitude': x_max_loc1,
                'Max Delta Classifier': y_max_loc1,
                'Location': 'Loc1',
                'decision': 1,
                'tie': 1
            }
        else:
            print "Same but Loc2 is more reliable"
            decision = {
                'Amplitude': x_max_loc2,
                'Max Delta Classifier': y_max_loc2,
                'Location': 'Loc2',
                'decision': -1,
                'tie': 1
            }

    result_dict = OrderedDict()
    result_dict['decision'] = decision
    result_dict['p_val'] = p_val
    result_dict['loc1'] = {
        'Amplitude': x_max_loc1,
        'Maximum Delta Classifier ': y_max_loc1[0],
        'se': se_1[0]
    }
    result_dict['loc2'] = {
        'Amplitude': x_max_loc2,
        'Maximum Delta Classifier': y_max_loc2[0],
        'se': se_2[0]
    }

    return result_dict
コード例 #10
0
    def test_bayes_optimizaion(self):
        print("Start bayesian optimization")

        # 1. Initialize parameters
        acquisition_fucntion_kappa = 5
        init_point_number = 3
        iteration_number = 3
        iteration_index = 0
        train_features = []
        train_labels = []

        gp = GaussianProcessRegressor(
            kernel=Matern(nu=2.5),
            n_restarts_optimizer=25,
        )

        bound_dict = {'x': (-4, 4), 'y': (-3, 3)}
        # Example: [[-4,  4], [-3,  3]]
        bounds = []
        for key in bound_dict.keys():
            bounds.append(bound_dict[key])
        # Example: ndarray([[-4,  4], [-3,  3]])
        bounds = np.asarray(bounds)

        # 2. Get init random samples
        # Example: array([-3.66909025, -1.93270006, 1.36095631])
        init_xs = np.random.uniform(-4, 4, size=init_point_number)
        # Example: array([-0.84486644, -0.95367483, 0.61358525])
        init_ys = np.random.uniform(-3, 3, size=init_point_number)

        # Example: [[-3.66909025, -0.84486644], [-1.93270006, -0.95367483], [1.36095631, 0.61358525]]
        init_points = []
        for i in range(init_point_number):
            init_points.append([init_xs[i], init_ys[i]])

        # Example: [-4.4555402320291684, -7.9016857176523114]
        init_labels = []
        for point in init_points:
            init_labels.append(self.test_function(point[0], point[1]))

        # 3. GP compute the prior
        train_features = np.asarray(init_points)
        train_labels = np.asarray(init_labels)
        current_max_label = train_labels.max()

        gp.fit(train_features, train_labels)

        # 4. Acquision function computes the max value
        # Example: [[-3.66909025, -0.84486644], [-1.93270006, -0.95367483], [1.36095631, 0.61358525], ...], shape is [100000, 2]
        x_tries = np.random.uniform(bounds[:, 0],
                                    bounds[:, 1],
                                    size=(100000, bounds.shape[0]))

        mean, std = gp.predict(x_tries, return_std=True)
        # Confidence bound criteria
        acquisition_fucntion_values = mean + acquisition_fucntion_kappa * std
        x_max = x_tries[acquisition_fucntion_values.argmax()]
        max_acquision_fucntion_value = acquisition_fucntion_values.max()

        x_max = np.clip(x_max, bounds[:, 0], bounds[:, 1])
        print("Current max acquision function choose: {}".format(x_max))

        for i in range(iteration_number):
            iteration_index += 1

            # 5. Choose the best and compute to add in train dataset
            train_features = np.vstack((train_features, x_max.reshape(
                (1, -1))))
            train_labels = np.append(train_labels,
                                     self.test_function(x_max[0], x_max[1]))

            # 6. Re-compute gaussian process and acquistion function
            gp.fit(train_features, train_labels)

            # Update maximum value
            if train_labels[-1] > current_max_label:
                current_max_label = train_labels[-1]
                print("Get the better parameters!")

            x_tries = np.random.uniform(bounds[:, 0],
                                        bounds[:, 1],
                                        size=(100000, bounds.shape[0]))

            mean, std = gp.predict(x_tries, return_std=True)
            acquisition_fucntion_values = mean + acquisition_fucntion_kappa * std
            x_max = x_tries[acquisition_fucntion_values.argmax()]
            max_acquision_fucntion_value = acquisition_fucntion_values.max()

            x_max = np.clip(x_max, bounds[:, 0], bounds[:, 1])
            print(
                "Max label: {}, current label: {}, acquision function choose: {}"
                .format(current_max_label, train_labels[-1], x_max))
コード例 #11
0
ファイル: gp_opt.py プロジェクト: yunjie-yang/scikit-optimize
def gp_minimize(func,
                dimensions,
                base_estimator=None,
                alpha=10e-10,
                acq="EI",
                xi=0.01,
                kappa=1.96,
                search="auto",
                n_calls=100,
                n_points=500,
                n_random_starts=10,
                n_restarts_optimizer=5,
                x0=None,
                y0=None,
                random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standard optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    The total number of evaluations, `n_calls`, are performed like the
    following. If `x0` is provided but not `y0`, then the elements of `x0`
    are first evaluated, followed by `n_random_starts` evaluations.
    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
    made guided by the surrogate model. If `x0` and `y0` are both
    provided then `n_random_starts` evaluations are first made then
    `n_calls - n_random_starts` subsequent evaluations are made
    guided by the surrogate model.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `alpha` [float, default=1e-10]:
        Value added to the diagonal of the kernel matrix during fitting.
        Larger values correspond to an increased noise level in the
        observations and reduce potential numerical issues during fitting.

    * `acq` [string, default=`"EI"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"auto"`, then it is set to `"lbfgs"`` if
        all the search dimensions are Real(continuous). It defaults to
        `"sampling"` for all other cases.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `n_calls` [int, default=100]:
        Number of calls to `func`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_random_starts` [int, default=10]:
        Number of evaluations of `func` with random initialization points
        before approximating the `func` with `base_estimator`.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimization space.
        - `specs` [dict]`: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {
        "args": copy.copy(inspect.currentframe().f_locals),
        "function": inspect.currentframe().f_code.co_name
    }

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(
                length_scale=np.ones(space.transformed_n_dims),
                length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                nu=2.5)),
            normalize_y=True,
            alpha=alpha,
            random_state=random_state)

    # Initialize with provided points (x0 and y0) and/or random points
    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError("Expected `n_random_starts` > 0, got %d" %
                         n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError("Expected `n_calls` >= %d, got %d" %
                         (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s" %
                             type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    if search == "auto":
        if space.is_real:
            search = "lbfgs"
        else:
            search = "sampling"
    elif search not in ["lbfgs", "sampling"]:
        raise ValueError(
            "Expected search to be 'lbfgs', 'sampling' or 'auto', "
            "got %s" % search)

    # Bayesian optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(space.transform(Xi), yi)

        models.append(gp)

        if search == "sampling":
            X = space.transform(space.rvs(n_samples=n_points,
                                          random_state=rng))
            values = _gaussian_acquisition(X=X,
                                           model=gp,
                                           y_opt=np.min(yi),
                                           method=acq,
                                           xi=xi,
                                           kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = space.transform(space.rvs(n_samples=1,
                                               random_state=rng))[0]

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(_acquisition,
                                            x0,
                                            args=(gp, np.min(yi), acq, xi,
                                                  kappa),
                                            bounds=space.transformed_bounds,
                                            approx_grad=True,
                                            maxiter=20)

                if a < best:
                    next_x, best = x, a

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
コード例 #12
0
    def get_new_suggestions(self, study_id, trials=[], number=1):
        # TODO: Only support returning one trial
        number = 1

        # Get study and completed data
        study = Study.objects.get(id=study_id)
        completed_trials = Trial.objects.filter(study_id=study_id,
                                                status="Completed")
        study_configuration_json = json.loads(study.study_configuration)
        random_init_trial_number = study_configuration_json.get(
            "randomInitTrials", 3)
        params = study_configuration_json["params"]
        study_goal = study_configuration_json["goal"]

        # Use random search if it has less dataset
        if len(completed_trials) < random_init_trial_number:
            randomSearchAlgorithm = RandomSearchAlgorithm()
            return_trials = randomSearchAlgorithm.get_new_suggestions(
                study_id, trials, number)
            return return_trials

        # Construct the map of name and scope to compute gaussian process
        acquisition_fucntion_kappa = 5

        # Example: {'x': (-4, 4), 'y': (-3, 3)}
        # name_scope_map = {}
        # Construct the list with only scope, Example: [(40, 400)]
        bounds = []

        for param in params:

            if param["type"] == "DOUBLE" or param["type"] == "INTEGER":
                min_value = param["minValue"]
                max_value = param["maxValue"]
                # name_scope_map[param["parameterName"]] = (min_value, max_value)
                bounds.append((min_value, max_value))

            elif param["type"] == "DISCRETE":
                feasible_points_string = param["feasiblePoints"]
                feasible_points = [
                    float(value.strip())
                    for value in feasible_points_string.split(",")
                ]
                for feasible_point in feasible_points:
                    parameter_name = "{}_{}".format(param["parameterName"],
                                                    feasible_point)
                    # name_scope_map[parameter_name] = (0, 1)
                    bounds.append((0, 1))

            elif param["type"] == "CATEGORICAL":
                feasible_points_string = param["feasiblePoints"]
                feasible_points = [
                    value.strip()
                    for value in feasible_points_string.split(",")
                ]
                for feasible_point in feasible_points:
                    parameter_name = "{}_{}".format(param["parameterName"],
                                                    feasible_point)
                    # name_scope_map[parameter_name] = (0, 1)
                    bounds.append((0, 1))

        # Make sure it is numpy ndarry
        bounds = np.asarray(bounds)

        # Construct data to train gaussian process, Example: [[50], [150], [250]]
        init_points = []
        # Example: [0.6, 0.8, 0.6]
        init_labels = []

        # Construct train data with completed trials
        for trial in completed_trials:
            # Example: {"learning_rate": 0.01, "optimizer": "ftrl"}
            parameter_values_json = json.loads(trial.parameter_values)

            # Example: [0.01, "ftrl"]
            instance_features = []
            instance_label = trial.objective_value

            for param in params:

                if param["type"] == "DOUBLE" or param["type"] == "INTEGER":
                    instance_feature = parameter_values_json[
                        param["parameterName"]]
                    instance_features.append(instance_feature)

                elif param["type"] == "DISCRETE":
                    feasible_points_string = param["feasiblePoints"]
                    feasible_points = [
                        float(value.strip())
                        for value in feasible_points_string.split(",")
                    ]
                    parameter_value = parameter_values_json[
                        param["parameterName"]]
                    for feasible_point in feasible_points:
                        if feasible_point == parameter_value:
                            instance_features.append(1)
                        else:
                            instance_features.append(0)

                elif param["type"] == "CATEGORICAL":
                    feasible_points_string = param["feasiblePoints"]
                    # Example: ["sgd", "adagrad", "adam", "ftrl"]
                    feasible_points = [
                        value.strip()
                        for value in feasible_points_string.split(",")
                    ]
                    # Example: "ftrl"
                    parameter_value = parameter_values_json[
                        param["parameterName"]]
                    for feasible_point in feasible_points:
                        if feasible_point == parameter_value:
                            instance_features.append(1)
                        else:
                            instance_features.append(0)

            init_points.append(instance_features)
            init_labels.append(instance_label)

        # Example: ndarray([[ 50], [150], [250]])
        train_features = np.asarray(init_points)
        # Example: ndarray([0.6, 0.8, 0.6])
        train_labels = np.asarray(init_labels)
        # current_max_label = train_labels.max()

        # Train with gaussian process
        gp = GaussianProcessRegressor(
            kernel=Matern(nu=2.5),
            n_restarts_optimizer=25,
        )

        gp.fit(train_features, train_labels)

        # Example: [[-3.66909025, -0.84486644], [-1.93270006, -0.95367483], [1.36095631, 0.61358525], ...], shape is [100000, 2]
        x_tries = np.random.uniform(bounds[:, 0],
                                    bounds[:, 1],
                                    size=(100000, bounds.shape[0]))

        mean, std = gp.predict(x_tries, return_std=True)

        # Confidence bound criteria
        acquisition_fucntion_values = mean + acquisition_fucntion_kappa * std

        #x_max = x_tries[acquisition_fucntion_values.argmax()]
        # tobe
        #x_max = x_tries[acquisition_fucntion_values.argmin()]

        if study_goal == "MAXIMIZE":
            x_max = x_tries[acquisition_fucntion_values.argmax()]
            #max_acquision_fucntion_value = acquisition_fucntion_values.max()
        elif study_goal == "MINIMIZE":
            x_max = x_tries[acquisition_fucntion_values.argmin()]
            #max_acquision_fucntion_value = acquisition_fucntion_values.min()
        else:
            # TODO: Throw the error
            x_max = []

        # Example: [3993.864683994805, 44.15441513231316]
        x_max = np.clip(x_max, bounds[:, 0], bounds[:, 1])
        print("Current max acquision function choose: {}".format(x_max))

        # Example: {"hidden2": 3993.864683994805, "hidden1": 44.15441513231316}
        suggested_parameter_values_json = {}

        index = 0
        """
    Construct the suggested params according to the result of gaussian process
    # Example prior result: [0.1, 0.5, 0.3, 0.9]
    # Example param scope: {"learning_rate": (0.01, 0.5), "hidden1": (40, 400), "optimizer_sgd": (0, 1), "optimizer_ftrl": (0, 1)}
    for key in bound_dict.keys():
      parameter_values_json[key] = x_max[index]
      index += 1
    """

        for param in params:

            if param["type"] == "DOUBLE":
                suggested_parameter_values_json[
                    param["parameterName"]] = x_max[index]
                index += 1

            elif param["type"] == "INTEGER":
                suggested_parameter_values_json[param["parameterName"]] = int(
                    x_max[index])
                index += 1

            elif param["type"] == "DISCRETE":
                feasible_points_string = param["feasiblePoints"]
                feasible_points = [
                    float(value.strip())
                    for value in feasible_points_string.split(",")
                ]

                # Find the max value of these and get its string
                current_max = x_max[index]
                suggested_parameter_value = feasible_points[0]

                for feasible_point in feasible_points:
                    if x_max[index] > current_max:
                        current_max = x_max[index]
                        suggested_parameter_value = feasible_point
                    index += 1

                suggested_parameter_values_json[
                    param["parameterName"]] = suggested_parameter_value

            elif param["type"] == "CATEGORICAL":
                feasible_points_string = param["feasiblePoints"]
                # Example: ["sgd", "adagrad", "adam", "ftrl"]
                feasible_points = [
                    value.strip()
                    for value in feasible_points_string.split(",")
                ]

                # Find the max value of these and get its string
                current_max = x_max[index]
                suggested_parameter_value = feasible_points[0]

                for feasible_point in feasible_points:
                    if x_max[index] > current_max:
                        current_max = x_max[index]
                        suggested_parameter_value = feasible_point
                    index += 1

                suggested_parameter_values_json[
                    param["parameterName"]] = suggested_parameter_value

        return_trial = Trial.create(study.id, "BayesianOptimizationTrial")
        return_trial.parameter_values = json.dumps(
            suggested_parameter_values_json)
        return_trial.save()

        return [return_trial]
コード例 #13
0
 def __init__(self, nu):
     self.nu = nu
     self.matern = Matern(nu)
コード例 #14
0
ファイル: bayesian.py プロジェクト: mihkelKR/masin
#kirjuta funktsioon mida hakata optimiseerima
#Sisesta 2D XY array.
def f(XY, a=1, b=100):
    x = XY[:, 0]
    y = XY[:, 1]
    Z = (a - x)**2 + b * (y - x**2)**2
    return Z.reshape(-1, 1)


#algpunktide genereerimine

XY = np.random.uniform(bounds[0, 0], bounds[0, 1], [no_startingPoints, 2])
Z = f(XY)

#Mis mudel ja Kernel
customKernel = C(1.0) * Matern()
model = GaussianProcessRegressor(kernel=customKernel)

#Expected improvementi arvutamine


def expected_improvement(sample, XY, Z, model, exploration):
    '''
    Computes the EI at points X based on existing samples XY
    and Z using a Gaussian process surrogate model.
    
    Args:
        sample: Points at which EI shall be computed (m x d).
        XY: Sample locations (n x d).
        Z: Sample values (n x 1).
        model: A GaussianProcessRegressor fitted to samples.
コード例 #15
0
# Noise-free objective function values at X 
Y = f(X,0)

# Plot optimization objective with noise level 
plt.plot(X, Y, 'y--', lw=2, label='Noise-free objective')
plt.plot(X, f(X), 'bx', lw=1, alpha=0.1, label='Noisy samples')
plt.plot(X_init, Y_init, 'kx', mew=3, label='Initial samples')
plt.legend();
if save_figures: save_fig('bayes-opt-init.pdf')
plt.show()


################


kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5)
gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2)


"""
https://github.com/scikit-learn/scikit-learn/blob/7b136e9/sklearn/gaussian_process/kernels.py#L1287
The parameter nu controlling the smoothness of the learned function.
        The smaller nu, the less smooth the approximated function is.
        For nu=inf, the kernel becomes equivalent to the RBF kernel and for
        nu=0.5 to the absolute exponential kernel. Important intermediate
        values are nu=1.5 (once differentiable functions) and nu=2.5
        (twice differentiable functions). Note that values of nu not in
        [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost
        (appr. 10 times higher) since they require to evaluate the modified
        Bessel function. Furthermore, in contrast to l, nu is kept fixed to
        its initial value and not optimized.
コード例 #16
0
def main():
    global evalcnt

    if len(sys.argv) != 4:
        print "Error: exactly 3 arguments are required"

    ref_dir = sys.argv[1]
    out_dir = sys.argv[2]
    lumi = float(sys.argv[3])

    print ref_dir
    print out_dir
    print lumi

    def punzi_target(WP_VBF2j, WP_VBF1j, WP_WHh, WP_ZHh):
        global evalcnt

        bin_dir = "/home/llr/cms/wind/cmssw/CMSSW_9_4_2/bin/slc6_amd64_gcc630/"
        cost_function_evaluator = "run_WP_evaluator"

        output = check_output([
            bin_dir + cost_function_evaluator, ref_dir, out_dir,
            str(lumi),
            str(WP_VBF2j),
            str(WP_VBF1j),
            str(WP_WHh),
            str(WP_ZHh)
        ])

        costval = 0.0

        for line in output.split('\n'):
            if "cost = " in line:
                costval = float(line.replace("cost = ", ""))
                break

        if math.isnan(costval):
            costval = -8.75

        # save the sampled point such that later they can be used as exploration points (if the need occurs)
        confhandler = ConfigFileHandler()
        evaluations_path = out_dir + 'evaluations.txt'

        if os.path.exists(evaluations_path):
            confhandler.load_configuration(evaluations_path)

        print "saving evaluation for iteration " + str(evalcnt)

        section_name = 'evaluation_' + str(evalcnt)
        confhandler.new_section(section_name)
        confhandler.set_field(section_name, 'cost', str(costval))
        confhandler.set_field(section_name, 'WP_VBF2j', str(WP_VBF2j))
        confhandler.set_field(section_name, 'WP_VBF1j', str(WP_VBF1j))
        confhandler.set_field(section_name, 'WP_WHh', str(WP_WHh))
        confhandler.set_field(section_name, 'WP_ZHh', str(WP_ZHh))

        confhandler.save_configuration(evaluations_path)

        evalcnt += 1

        return costval

    eps = 1e-3
    delta = 0.2
    bo = BayesianOptimization(
        punzi_target, {
            'WP_VBF2j': (eps, 1.0 - eps),
            'WP_VBF1j': (eps, 1.0 - eps),
            'WP_WHh': (eps, 1.0 - eps),
            'WP_ZHh': (eps, 1.0 - eps)
        })

    # check if a file with previously evaluated points exists, if so, use them for initialization
    confhandler = ConfigFileHandler()
    evaluations_path = out_dir + 'evaluations.txt'

    if os.path.exists(evaluations_path):
        confhandler.load_configuration(evaluations_path)

        targets_init = []
        WP_VBF2j_init = []
        WP_VBF1j_init = []
        WP_WHh_init = []
        WP_ZHh_init = []

        for section_name in confhandler.get_sections():
            cur_section = confhandler.get_section(section_name)

            targets_init.append(float(cur_section['cost']))
            WP_VBF2j_init.append(float(cur_section['WP_VBF2j']))
            WP_VBF1j_init.append(float(cur_section['WP_VBF1j']))
            WP_WHh_init.append(float(cur_section['WP_WHh']))
            WP_ZHh_init.append(float(cur_section['WP_ZHh']))

        init_dict = {
            'target': targets_init,
            'WP_VBF2j': WP_VBF2j_init,
            'WP_VBF1j': WP_VBF1j_init,
            'WP_WHh': WP_WHh_init,
            'WP_ZHh': WP_ZHh_init
        }

        evalcnt = int(re.sub('evaluation_', '',
                             confhandler.get_sections()[-1])) + 1

        print "resuming at evaluation " + str(evalcnt)

        bo.initialize(init_dict)
        initialized = True
    else:
        initialized = False

    # change the kernel to have a length scale more appropriate to this function
    gp_params = {
        'kernel':
        1.0 *
        Matern(length_scale=0.05, length_scale_bounds=(1e-5, 1e5), nu=1.5),
        'alpha':
        1e-5
    }

    # perform the standard initialization and setup
    if initialized:
        bo.maximize(init_points=0,
                    n_iter=0,
                    acq='poi',
                    kappa=3,
                    xi=xi_scheduler(0.0),
                    **gp_params)
    else:
        bo.maximize(init_points=6,
                    n_iter=0,
                    acq='poi',
                    kappa=3,
                    xi=xi_scheduler(0.0),
                    **gp_params)

    cur_iteration = 1
    for it in range(1000):
        cur_xi = xi_scheduler(cur_iteration)
        cur_iteration += 1
        print "using xi = " + str(cur_xi)

        bo.maximize(init_points=6,
                    n_iter=1,
                    acq='poi',
                    kappa=3,
                    xi=cur_xi,
                    **gp_params)

        # evaluate the current maximum
        curval = bo.res['max']
        cost = curval['max_val']
        WPs = curval['max_params']

        confhandler = ConfigFileHandler()
        confhandler.config.optionxform = str
        confhandler.new_section('WPs')
        confhandler.set_field('WPs', 'cost', str(cost))

        for key, val in WPs.iteritems():
            confhandler.set_field('WPs', key, str(val))

        confhandler.save_configuration(out_dir + 'WPs.txt')
コード例 #17
0
from sklearn.gaussian_process.kernels import Matern
from sklearn.kernel_approximation import Nystroem
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from tpot.builtins import StackingEstimator
from tpot.export_utils import set_param_recursive
from sklearn.preprocessing import FunctionTransformer
from copy import copy

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=123)

# Average CV score on the training set was: 0.9917144075724378
exported_pipeline = make_pipeline(
    make_union(FunctionTransformer(copy),
               Nystroem(gamma=0.4, kernel="chi2", n_components=6)),
    GaussianProcessRegressor(kernel=Matern(length_scale=2.9000000000000004,
                                           nu=1.5),
                             n_restarts_optimizer=155,
                             normalize_y=True))
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 123)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
コード例 #18
0
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import PowerTransformer
from tpot.builtins import StackingEstimator
from tpot.export_utils import set_param_recursive
from sklearn.preprocessing import FunctionTransformer
from copy import copy

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=123)

# Average CV score on the training set was: 0.9943988338287667
exported_pipeline = make_pipeline(
    make_union(
        FunctionTransformer(copy),
        FunctionTransformer(copy)
    ),
    PowerTransformer(),
    GaussianProcessRegressor(kernel=Matern(length_scale=4.3999999999999995, nu=1.5), n_restarts_optimizer=20, normalize_y=True)
)
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 123)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
コード例 #19
0
def update_b_to_default(n, slider_choice, form, noise, lower, upper,
                        iterations):
    if not n:
        raise PreventUpdate

    global run_count, plot_list, conv_plot

    if n == run_count:

        marks = {i: '{}'.format(i) for i in range(iterations + 1)}

        # Iterations
        ## Iterative plots
        bounds = np.array([[lower, upper]])
        noise = noise
        # Number of iterations
        n_iter = iterations

        def f(X, noise=noise):
            return eval(form + ' + noise * np.random.randn(*X.shape)')

        X_init = np.array(
            [np.random.uniform(low=bounds[:, 0], high=bounds[:, 1])])
        Y_init = f(X_init)

        ## General Plot
        # Dense grid of points within bounds
        X = np.arange(bounds[:, 0], bounds[:, 1], 0.01).reshape(-1, 1)
        # Noise-free objective function values at X
        Y = f(X, 0)

        ## GRP
        # Gaussian process with Mat??rn kernel as surrogate model
        m52 = Matern(length_scale=1.0, nu=2.5)
        gpr = GaussianProcessRegressor(kernel=m52, alpha=noise**1.6)

        # Initialize samples
        X_sample = X_init
        Y_sample = Y_init

        plot_list = []
        for i in range(n_iter):
            # Update Gaussian process with existing samples
            gpr.fit(X_sample, Y_sample)

            # Obtain next sampling point from the acquisition function (expected_improvement)
            X_next = propose_location(expected_improvement, X_sample, Y_sample,
                                      gpr, bounds)

            # Obtain next noisy sample from the objective function
            Y_next = f(X_next, noise)

            xx = create_plots(f, gpr, X, Y, X_sample, Y_sample,
                              expected_improvement(X, X_sample, Y_sample, gpr),
                              X_next, bounds)

            plot_list.append(xx)

            # Add sample to previous samples
            X_sample = np.vstack((X_sample, X_next))
            Y_sample = np.vstack((Y_sample, Y_next))

        conv_plot = plot_convergence(X_sample, Y_sample)
        run_count += 1

        return [iterations, 1, marks, plot_list[0], conv_plot]

    else:
        marks = {i: '{}'.format(i) for i in range(iterations + 1)}
        return [
            iterations, slider_choice, marks, plot_list[slider_choice - 1],
            conv_plot
        ]
コード例 #20
0
def bayesian_optimization2D(n_iters,
                            target_func,
                            bounds,
                            x0=None,
                            n_pre_samples=10,
                            gp_params=None,
                            alpha=1e-4,
                            epsilon=1e-4):

    # using matern kernel with white noise
    n_params = bounds.shape[0]
    xy_list = []
    z_list = []

    xy_pre_samples = np.random.uniform(bounds[:, 0], bounds[:, 1],
                                       (n_pre_samples, bounds.shape[0]))

    for x in xy_pre_samples:

        xy_list.append(x)
        z_list.append(target_func(x[0], x[1]))

    xyp = np.array(xy_list)
    zp = np.array(z_list)

    kernel_matern = Matern() + WhiteKernel(noise_level=1)
    model = gp.GaussianProcessRegressor(kernel=kernel_matern,
                                        alpha=alpha,
                                        n_restarts_optimizer=10,
                                        normalize_y=True)

    times = np.zeros(n_iters)

    delta0 = 0.02  # jump control

    for n in range(n_iters):
        #print 'iter: ', n

        start_time = time.time()

        delta = delta0
        model.fit(xyp, zp)
        evaluated_loss = model.predict(xyp)
        next_sample = sample_next_value(expected_improvement, model,
                                        evaluated_loss, n_params, bounds,
                                        delta)

        epsilon_vec = map(lambda x: np.sum((next_sample - x)**2), xyp)

        if (np.any(epsilon_vec <= epsilon)):
            next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1],
                                            bounds.shape[0])

        xy_list.append(next_sample)
        z_list.append(target_func(next_sample[0], next_sample[1]))

        xyp = np.array(xy_list)
        zp = np.array(z_list)

        duration = time.time() - start_time
        times[n] = duration
        print 'duration: ', duration

    evaluated_loss = model.predict(xyp)

    model = model

    return xyp, zp, evaluated_loss, times, model
コード例 #21
0
        ['Offset', 'Schedule', 'A.Azimuth', 'A.Elevation', 'A.Sonic Temp'],
        axis=1)  # 删除不需要的数据
    data.rename(columns={'A.3D Wind Speed u': '3Dspeed'}, inplace=True)
    data['ux'] = ux
    data['uy'] = uy
    data['uz'] = uz

    # 计算x,y,z方向平均风速(高斯过程回归)
    #     time_stamp = []; ux_mean = []; uy_mean = []; uz_mean = []
    start_time = np.datetime64(start_time_main)
    end_time = np.datetime64(end_time_main)
    data = data[start_time:end_time]
    time_series = data.index
    data_sample = data.sample(1000)
    time_stamp_sample = data_sample.index[:, np.newaxis]
    gp_kernel = 1.0 * Matern(
        length_scale=1, length_scale_bounds=(1e-2, 1e2), nu=1.5)
    gpr = GaussianProcessRegressor(alpha=1, kernel = gp_kernel, random_state=None)\
          .fit(time_stamp_sample,data_sample['ux'])
    y_pred, sigma = gpr.predict(time_series[:, np.newaxis], return_std=True)

    plt.figure(figsize=(20, 5))
    plt.rcParams['savefig.dpi'] = 600  # 图片像素
    plt.rcParams['figure.dpi'] = 600  # 分辨率
    plt.plot(time_series,
             data['ux'],
             'o',
             markersize=0.5,
             label=u'Observations')
    plt.plot(time_series, y_pred, '-', label=u'Prediction')
    plt.fill_between(time_series,
                     y_pred - 1.9600 * sigma,
コード例 #22
0
def choose_location(dataset_loc1,
                    dataset_loc2,
                    bounds,
                    gp_params=None,
                    alpha=1e-3,
                    epsilon=1e-3,
                    sig_level=0.05):
    """
     Return a decision variable indicating which location to choose
        decision = 1, loc1 > loc2
                = 0 , inconclusive
                = -1, loc1 < loc2
        Tie = 1 if two-sample t test insignificant
            = 0 of two-sample t test significant

    :param dataset_loc1: PS dataset for loc1, Nx2 numpy array
    :param dataset_loc2: PS dataset for loc2, Nx2 numpy array
    :param gp_params: params for gaussian process regressor (set to sklearn defaults)
    :param alpha:
    :param epsilon:
    :param sig_level: significant level for t-test

    """
    # set up kernels
    kernel_matern = Matern() + WhiteKernel(noise_level=1)
    model = gp.GaussianProcessRegressor(kernel=kernel_matern,
                                        alpha=alpha,
                                        n_restarts_optimizer=5,
                                        normalize_y=True)

    # get predicted values for loc1
    xp_loc1 = np.array(map(lambda x: [x], dataset_loc1[:, 0]))
    yp_loc1 = np.array(map(lambda y: [y], dataset_loc1[:, 1]))

    x_max_loc1, y_max_loc1, se_1, SNR_1 = find_max(xp_loc1, yp_loc1, model,
                                                   bounds)

    # get predicted values for loc2
    xp_loc2 = np.array(map(lambda x: [x], dataset_loc2[:, 0]))
    yp_loc2 = np.array(map(lambda y: [y], dataset_loc2[:, 1]))

    x_max_loc2, y_max_loc2, se_2, SNR_2 = find_max(xp_loc2, yp_loc2, model,
                                                   bounds)
    t_stat = (y_max_loc1 - y_max_loc2) / np.sqrt(se_1**2 + se_2**2)
    p_val = norm.cdf(-np.abs(t_stat))

    decision = collections.OrderedDict()  # store decision
    loc1 = collections.OrderedDict(
    )  # store info of best amplitude, delta-classifier and se for loc1
    loc2 = collections.OrderedDict()  # info for loc2

    print x_max_loc1

    loc1['best_amplitude'] = x_max_loc1
    loc1['best_delta_classifier'] = y_max_loc1
    loc1['se'] = se_1
    loc1['SNR'] = SNR_1

    loc2['best_amplitude'] = x_max_loc2
    loc2['best_delta_lassifier'] = y_max_loc2
    loc2['se'] = se_2
    loc2['SNR'] = SNR_2

    if p_val < sig_level:
        if t_stat > 0:
            decision['Best_Amplitude'] = x_max_loc1
            decision['Best_Location'] = 'Loc1'
            decision['Tie'] = 0
            decision['p_val'] = p_val
            decision['t_stat'] = t_stat
            print "Loc 1 is better than Loc 2"
        else:
            decision['Best_Amplitude'] = x_max_loc2
            decision['Best_Location'] = 'Loc2'
            decision['Tie'] = 0
            decision['p_val'] = p_val
            decision['t_stat'] = t_stat

            print "Loc 2 is better than  Loc 1"
    else:
        if se_1 < se_2:
            print "Same but Loc1 is more reliable"
            decision['Best_Amplitude'] = x_max_loc1
            decision['Best_Location'] = 'Loc1'
            decision['Tie'] = 1
            decision['p_val'] = p_val
            decision['t_stat'] = t_stat

        else:
            decision['Best_Amplitude'] = x_max_loc2
            decision['Best_Location'] = 'Loc2'
            decision['Tie'] = 1
            decision['p_val'] = p_val
            decision['t_stat'] = t_stat

            print "Same but Loc2 is more reliable"

    return {'decision': decision, 'loc1_info': loc1, 'loc2_info': loc2}
コード例 #23
0
label = shuffle(label, random_state=41)[:5000]
Kfold = StratifiedKFold(n_splits=n_splits)

accuracy_rbf_training = np.zeros(n_splits)
accuracy_rbf_testing = np.zeros(n_splits)
accuracy_matern_traing = np.zeros(n_splits)
accuracy_matern_testing = np.zeros(n_splits)
nlpd_matern_t = np.zeros(n_splits)
nlpd_matern_v = np.zeros(n_splits)
best_kernel = None
best_nlpd = np.inf

for i, (train_index, test_index) in enumerate(Kfold.split(features, label)):
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = label[train_index], label[test_index]
    gp_matern_fix = GaussianProcessClassifier(kernel=3.7**2 * Matern(length_scale=9.4, nu=1.5),
                                              optimizer=None)
    gp_matern_fix.fit(X_train,y_train)
    accuracy_matern_traing[i] = accuracy_score(y_train, gp_matern_fix.predict(X_train))
    accuracy_matern_testing[i] = accuracy_score(y_test, gp_matern_fix.predict(X_test))
    neg_lpd_matern_t = -np.mean(np.log(gp_matern_fix.predict_proba(X_train)[np.arange(len(X_train)), y_train]))
    neg_lpd_matern_v = -np.mean(np.log(gp_matern_fix.predict_proba(X_test)[np.arange(len(X_test)), y_test]))
    nlpd_matern_t[i] = neg_lpd_matern_t
    nlpd_matern_v[i] = neg_lpd_matern_v
print("Average training accuracy with matern kernel: %.5f" % np.mean(accuracy_matern_traing))
print("Average testing accuracy with matern kernel: %.5f" % np.mean(accuracy_matern_testing))
print("Average negative log predictive density of training set with matern kernel: %.5f"
      % np.mean(nlpd_matern_t))
print("Average negative log predictive density of validation set with matern kernel: %.5f"
      % np.mean(nlpd_matern_v))
print("Total elapsed time: %.5f" % (time.time()-elapsed))
コード例 #24
0
def bayesian_optimization(n_iters,
                          target_func,
                          bounds,
                          x0=None,
                          n_pre_samples=10,
                          gp_params=None,
                          alpha=1e-4,
                          epsilon=1e-4):

    # using matern kernel with white noise
    n_params = bounds.shape[0]
    x_list = []
    y_list = []

    x_pre_samples = np.random.uniform(bounds[:, 0], bounds[:, 1],
                                      (n_pre_samples, bounds.shape[0]))

    for x in x_pre_samples:

        print type(x)
        x_list.append(x)
        y_list.append(target_func(x))

    print x_list
    print y_list

    xp = np.array(x_list)
    yp = np.array(y_list)

    print xp
    print yp

    kernel_matern = Matern() + WhiteKernel(noise_level=1)
    model = gp.GaussianProcessRegressor(kernel=kernel_matern,
                                        alpha=alpha,
                                        n_restarts_optimizer=1,
                                        normalize_y=True)

    times = np.zeros(n_iters)

    delta0 = 0.02  # jump control

    for n in range(n_iters):
        #print 'iter: ', n
        start_time = time.time()
        delta = delta0
        model.fit(xp, yp)
        evaluated_loss = model.predict(xp)
        next_sample = sample_next_value(expected_improvement, model,
                                        evaluated_loss, n_params, bounds,
                                        delta)

        if (np.any(np.abs(next_sample - xp) <= epsilon)):
            next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1],
                                            bounds.shape[0])

        x_list.append(next_sample)
        y_list.append(target_func(next_sample))

        xp = np.array(x_list)
        yp = np.array(y_list)

        duration = time.time() - start_time
        times[n] = duration
        print 'duration: ', duration

    evaluated_loss = model.predict(xp)
    print type(model)
    print model.kernel_

    noise_level = model.kernel_.k2.noise_level

    return xp, yp, evaluated_loss, times, noise_level
コード例 #25
0
    return eng.code2(1, param)


#Xi=np.concatenate((nprand.uniform(10.0,30.0,[2,6]),nprand.uniform(-0.3,0.3,[2,42]),nprand.uniform(0.0,30.0,[2,1]),nprand.uniform(-30.0,30.0,[2,1]),nprand.uniform(0.0,0.3,[2,6]),nprand.uniform(5.0,20.0,[2,6])),axis=1)
#yi = np.asarray([f1(Xi[0]),f1(Xi[1])])
l2 = 2
#gp = gaussian_process.GaussianProcess(corr='cubic',theta0=1e-2, thetaL=1e-4, thetaU=1e-1)
lenSc = np.asarray([0.0] * 62)
lenSc[0:6] = 30.0 - 10.0
lenSc[6:48] = 0.3 - (-0.3)
lenSc[48:49] = 30.0 - 0.0
lenSc[49:50] = 30.0 - (-30.0)
lenSc[50:56] = 0.3 - 0.0
lenSc[56:62] = 20.0 - 5.0

kernel = 34.4**2 * Matern(length_scale=lenSc)
gp = GaussianProcessRegressor(kernel=kernel)
Xi = np.concatenate(
    (nprand.uniform(10.0, 30.0, [l2, 6]), nprand.uniform(-0.3, 0.3, [l2, 42]),
     nprand.uniform(0.0, 30.0, [l2, 1]), nprand.uniform(-30.0, 30.0, [l2, 1]),
     nprand.uniform(0.0, 0.3, [l2, 6]), nprand.uniform(5.0, 20.0, [l2, 6])),
    axis=1)
#Xi[0]= np.asarray([  1.96099393e+01,   1.58550894e+01,   1.09609545e+01,   1.22583452e+01, 1.13845861e+01,   2.32301699e+01,  -2.60330376e-01,  -9.59519233e-02, -3.92556934e-02,  -1.26138915e-03,  -3.75292346e-02,  -1.99628860e-01, 1.04272141e-01,  -2.98902972e-02,  -2.62179470e-02,  -3.41731807e-02, -2.90053980e-01,  -9.25959548e-02,   1.31797861e-01,  -8.80510331e-02, 4.03836475e-02,  -2.42839690e-01,  -1.26907174e-01,  -2.77364914e-01, 1.97789667e-01,  -2.65836378e-01,  -2.99287642e-01,  -8.78932771e-02, -3.57962043e-02,  -2.64148367e-01,   9.12228069e-02,  -5.01370722e-02, -3.07490603e-02,  -1.70573523e-01,   2.19795808e-02,  -2.28975755e-01, -2.36748681e-01,  -1.88787172e-01,  -2.50019760e-01,   5.18324371e-03, 2.37976187e-01,  -6.05562408e-02,  -5.76364679e-02,   2.95182555e-01, 2.03315798e-01,  -2.72488934e-01,   2.10069505e-02,  -2.50743497e-01, 2.66326081e+01,  -2.73519136e+01,   6.57654619e-02,   2.89214259e-01, 2.92997646e-01,   3.04660132e-01,   1.77162416e-01,   3.00612232e-01, 1.49886262e+01,   1.18078788e+01,   1.43817508e+01,   1.81800275e+01, 1.79311669e+01, 1.88175811e+01])
yi = map(lambda x: f1(x), Xi)
#gp = gaussian_process.GaussianProcess(corr='linear', theta0=1e-2, thetaL=1e-4, thetaU=1e-1)
gp.fit(Xi, yi)
randomIter = nprand.randn() + 2.0
paramB = np.asarray([
    1.96099393e+01, 1.58550894e+01, 1.09609545e+01, 1.22583452e+01,
    1.13845861e+01, 2.32301699e+01, -2.60330376e-01, -9.59519233e-02,
    -3.92556934e-02, -1.26138915e-03, -3.75292346e-02, -1.99628860e-01,
コード例 #26
0
def make_plot(days_ago, dates, mag):
    #mpl.rcParams['font.sans-serif']=['Times New Roman']   #指定默认字体 SimHei为黑体
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    mpl.rcParams['axes.unicode_minus'] = False  #用来正常显示负号
    #fontcn = {'family': 'Droid Sans Fallback'} # 1pt = 4/3px
    fontcn = {'family': 'SimHei'}  # 1pt = 4/3px
    fonten = {'family': 'Times New Roman'}
    print('Making plot...')
    time_span = np.max(dates) - np.min(dates)
    min_plot = 0.0
    max_plot = 2
    x_days = -120

    # Make daily bins
    nights = np.arange(0, 120, 1)
    daily_mags = []
    errors = []
    for night in nights:
        selector = np.where((days_ago < night + 1) & (days_ago > night))
        n_obs = np.size(mag[selector])
        flux = biweight_location(mag[selector])
        error = np.std(mag[selector]) / np.sqrt(n_obs)
        if error > 0.75:
            error = 0
        daily_mags.append(flux)
        errors.append(error)
        print(night, flux, error, n_obs, np.std(mag[selector]))
    nights_all = nights.copy()
    daily_mags_all = daily_mags.copy()
    errors_all = errors.copy()

    lookback = np.arange(1, 20, 1)

    for missing_days in lookback:
        nights = nights_all.copy()[missing_days:]
        daily_mags = daily_mags_all.copy()[missing_days:]
        errors = errors_all.copy()[missing_days:]
        plt.errorbar(-(nights + 0.5),
                     daily_mags,
                     yerr=errors,
                     fmt='.k',
                     alpha=0.5)
        plt.xlabel(u'从今天算起的天数', fontdict=fontcn)
        plt.ylabel(u'视星等', fontdict=fontcn)
        #plt.ylabel('视星等')
        mid = biweight_location(mag)
        plt.ylim(min_plot, max_plot)
        plt.xlim(-100, 100)
        plt.gca().invert_yaxis()
        date_text = datetime.datetime.now().strftime("%Y-%m-%d")
        plt.text(95, min_plot + 0.1, u'每日观测数据合成', ha='right', fontdict=fontcn)
        plt.text(60, min_plot + 0.2, u'由 天文通 译制于', ha='right', fontdict=fontcn)
        plt.text(95, min_plot + 0.2, date_text, ha='right', fontdict=fonten)
        use_days = 60 - missing_days
        X = np.array(nights + 0.5)
        X = X[:use_days]
        y = np.array(daily_mags)
        y = y[:use_days]
        X, y = cleaned_array(X, y)
        length_scale = 1
        kernel = ConstantKernel() + Matern(
            length_scale=length_scale, nu=3 / 2) + WhiteKernel(noise_level=1)
        X = X.reshape(-1, 1)
        gp = gaussian_process.GaussianProcessRegressor(kernel=kernel)
        gp.fit(X, y)
        GaussianProcessRegressor(alpha=1e-10,
                                 copy_X_train=True,
                                 kernel=1**2 +
                                 Matern(length_scale=length_scale, nu=1.5) +
                                 WhiteKernel(noise_level=1),
                                 n_restarts_optimizer=0,
                                 normalize_y=False,
                                 optimizer='fmin_l_bfgs_b',
                                 random_state=None)
        x_pred = np.linspace(60, -120, 250).reshape(-1, 1)
        y_pred, sigma = gp.predict(x_pred, return_std=True)
        plt.plot(-x_pred, y_pred, linestyle='dashed', color='blue')
        plt.fill_between(-x_pred.ravel(),
                         y_pred + sigma,
                         y_pred - sigma,
                         alpha=0.5)
        idx = 20 - missing_days
        if idx < 10:
            filename = "0" + str(idx) + '.png'
        else:
            filename = str(idx) + '.png'

        plt.savefig(filename, bbox_inches='tight', dpi=100)
        print('Plot made', filename)
        plt.clf()
コード例 #27
0
 def setUp(self):
     self.XY = numpy.array([[3, 4], [5, 7]])
     self.model = GaussianProcessRegressor(kernel=Matern(), alpha=1e-10)
コード例 #28
0
def gp(
    genius_ufuns=True,
    sigma_generator=0.0,
    n_trials=100,  # only used with random ufuns
    n_outcomes=100,  # interpreted as max_n_outcomes for genius ufuns
    n_comparisons_per_outcome=2,
    compress_only=True,
    kernel=None,
    learn_sigma=False,
    use_their_map=True,
    use_our_map=False,
    sigma_learner=0.0,
    tol=1e-3,
):
    if kernel is None:
        kernel = Matern()
    if use_their_map:
        compress_only = True

    testing_error, training_error, comparison_error = 0.0, 0.0, 0.0
    n_failed_optimization = 0

    if genius_ufuns:
        ufuns = generate_genius_ufuns_1d(max_n_outcomes=n_outcomes)
    else:
        ufuns = generate_random_ufuns(n_outcomes, n_trials)

    n_trials = len(ufuns)

    comparisons = [
        generate_comparisons(
            ufun=ufun, n=n_comparisons_per_outcome * len(ufun), sigma=sigma_generator
        )
        for ufun in ufuns
    ]

    for trial, (gt_ufun, (noisy_comparisons, gt_comparisons)) in enumerate(
        zip(ufuns, comparisons)
    ):
        n_real_outcomes = len(gt_ufun)
        ws = extract_ws(noisy_comparisons)
        errors = find_errors(
            create_table(ws, [gt_ufun[w] for w in ws]), noisy_comparisons, tolerance=0.0
        )
        if len(errors) > 0 and sigma_generator == 0.0:
            print(
                f"failed on GT :{gt_ufun}\n\n{[(e[1], noisy_comparisons[e[0], :]) for e in errors]}"
            )
            return
        errors = find_errors(
            create_table(ws, [gt_ufun[w] for w in ws]), gt_comparisons, tolerance=0.0
        )
        if len(errors) > 0:
            print(
                f"failed on GT with GT comparisons: {gt_ufun}\n\n{[(e[1], gt_comparisons[e[0], :]) for e in errors]}"
            )
            return

        if use_their_map:
            ufun = ChuIndexGPUfun(
                outcomes=[(_,) for _ in range(n_real_outcomes)],
                comparisons=noisy_comparisons,
                sigma=sigma_learner,
                kernel=kernel,
            )
        else:
            ufun = CompressRegressIndexUfun(
                outcomes=[(_,) for _ in range(n_real_outcomes)],
                comparisons=noisy_comparisons,
                sigma=sigma_learner,
                kernel=kernel,
                use_map=use_our_map,
                tol=tol,
                compress_only=compress_only,
                optimize_sigma=learn_sigma,
            )
        if ufun.fitted:
            comparison_error += ufun.evaluate(comparisons=gt_comparisons)
            training_error += ufun.evaluate(comparisons=noisy_comparisons)
            testing_error += ufun.evaluate(gt=gt_ufun)
        else:
            # print(
            #     f'\t{"Optimization" if not use_their_map else "Root finding"} failed!! :-('
            # )
            n_failed_optimization += 1
        print(
            f"{trial + 1:06} of {n_trials:06} completed [n={n_real_outcomes}]",
            flush=True,
            end="",
        )
        n_t = trial + 1 - n_failed_optimization
        if n_t == 0:
            continue
        print(
            f" Training Error: {training_error / n_t:0.03%}"
            f" GT Comparisons Error: {comparison_error / n_t:0.03%}"
            f" Testing Error: {testing_error / n_t:0.03%}"
            f", OP Failures: {n_failed_optimization / (trial + 1):0.03%} ",
            end="",
        )
        print("", end="\n", flush=True)
コード例 #29
0
from sklearn.utils.testing import (assert_almost_equal, assert_array_equal,
                                   assert_array_almost_equal,
                                   assert_raise_message)

X = np.random.RandomState(0).normal(0, 1, (5, 2))
Y = np.random.RandomState(0).normal(0, 1, (6, 2))

kernel_white = RBF(length_scale=2.0) + WhiteKernel(noise_level=3.0)
kernels = [
    RBF(length_scale=2.0),
    RBF(length_scale_bounds=(0.5, 2.0)),
    ConstantKernel(constant_value=10.0),
    2.0 * RBF(length_scale=0.33, length_scale_bounds="fixed"), 2.0 *
    RBF(length_scale=0.5), kernel_white, 2.0 * RBF(length_scale=[0.5, 2.0]),
    2.0 * Matern(length_scale=0.33, length_scale_bounds="fixed"),
    2.0 * Matern(length_scale=0.5, nu=0.5),
    2.0 * Matern(length_scale=1.5, nu=1.5),
    2.0 * Matern(length_scale=2.5, nu=2.5),
    2.0 * Matern(length_scale=[0.5, 2.0], nu=0.5),
    3.0 * Matern(length_scale=[2.0, 0.5], nu=1.5),
    4.0 * Matern(length_scale=[0.5, 0.5], nu=2.5),
    RationalQuadratic(length_scale=0.5, alpha=1.5),
    ExpSineSquared(length_scale=0.5, periodicity=1.5),
    DotProduct(sigma_0=2.0),
    DotProduct(sigma_0=2.0)**2,
    RBF(length_scale=[2.0]),
    Matern(length_scale=[2.0])
]
for metric in PAIRWISE_KERNEL_FUNCTIONS:
    if metric in ["additive_chi2", "chi2"]:
コード例 #30
0
Rf_Amp_thresh = 1e-7
index = data[25] > Rf_Amp_thresh

data = data[:][index]

X = np.asarray(
    data[[1, 10, 7, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])
Y = np.asarray(data[[25]])[:, np.newaxis]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

##############################################################################
# Instanciate a Gaussian Process model
# Choose Kernel [Tricky]

kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
kernel = Matern(length_scale=0.2, nu=0.5) + WhiteKernel(
    noise_level=0.1) + C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))

##############################################################################

gp = GaussianProcessRegressor(alpha=1e-3,
                              copy_X_train=True,
                              kernel=kernel,
                              n_restarts_optimizer=10,
                              normalize_y=False,
                              optimizer='fmin_l_bfgs_b',
                              random_state=None)
'''
OKish Parameter Values 
gp = GaussianProcessRegressor(alpha=1e-7, copy_X_train=True,
kernel=1**2 + Matern(length_scale=0.2, nu=0.5) + WhiteKernel(noise_level=0.1),