Beispiel #1
0
def test_gp_regression_with_warping():
    def f(x):
        return np.sin(3 * np.log(x))

    np.random.seed(7)

    L, U = -5., 12.
    input_range = (2.**L, 2.**U)

    x_train = np.sort(2.**np.random.uniform(L, U, 250))
    x_test = np.sort(2.**np.random.uniform(L, U, 500))
    y_train = f(x_train)
    y_test = f(x_test)

    # to mx.nd
    y_train_mx_nd = mx.nd.array(y_train)
    x_train_mx_nd = mx.nd.array(x_train)
    x_test_mx_nd = mx.nd.array(x_test)

    kernels = [
        Matern52(dimension=1),
        WarpedKernel(kernel=Matern52(dimension=1),
                     warping=Warping(dimension=1,
                                     index_to_range={0: input_range}))
    ]

    models = [
        GaussianProcessRegression(kernel=k, random_seed=0) for k in kernels
    ]
    train_errors, test_errors = [], []

    for model in models:

        model.fit(x_train_mx_nd, y_train_mx_nd)

        mu_train, var_train = model.predict(x_train_mx_nd)[0]
        mu_test, var_test = model.predict(x_test_mx_nd)[0]

        # back to np.array
        mu_train = mu_train.asnumpy()
        mu_test = mu_test.asnumpy()
        # var_train = var_train.asnumpy()
        # var_test = var_test.asnumpy()

        train_errors.append(np.mean(np.abs((mu_train - y_train))))
        test_errors.append(np.mean(np.abs((mu_test - y_test))))

    # The two models have similar performance on training points
    np.testing.assert_almost_equal(train_errors[0], train_errors[1], decimal=4)

    # As expected, the model with warping largely outperforms the model without
    assert test_errors[1] < 0.1 * test_errors[0]
def test_likelihood_encoding():
    mean = ScalarMeanFunction()
    kernel = Matern52(dimension=1)
    likelihood = MarginalLikelihood(mean=mean, kernel=kernel)
    assert isinstance(likelihood.encoding, LogarithmScalarEncoding)
    likelihood = MarginalLikelihood(mean=mean, kernel=kernel, encoding_type="positive")
    assert isinstance(likelihood.encoding, PositiveScalarEncoding)
Beispiel #3
0
def build_kernel(state: TuningJobState,
                 do_warping: bool = False) -> KernelFunction:
    dims, warping_ranges = dimensionality_and_warping_ranges(state.hp_ranges)
    kernel = Matern52(dims, ARD=True)
    if do_warping:
        return WarpedKernel(kernel=kernel,
                            warping=Warping(dims, warping_ranges))
    else:
        return kernel
def resource_kernel_factory(
        name: str, kernel_x: KernelFunction, mean_x: gluon.HybridBlock,
        max_metric_value: float) -> (KernelFunction, gluon.HybridBlock):
    """
    Given kernel function kernel_x and mean function mean_x over config x,
    create kernel and mean functions over (x, r), where r is the resource
    attribute (nonnegative scalar, usually in [0, 1]).

    :param name: Selects resource kernel type
    :param kernel_x: Kernel function over configs x
    :param mean_x: Mean function over configs x
    :return: res_kernel, res_mean, both over (x, r)

    """
    if name == 'matern52':
        res_kernel = Matern52(dimension=kernel_x.dimension + 1, ARD=True)
        res_mean = mean_x
    elif name == 'matern52-res-warp':
        # Warping on resource dimension (last one)
        dim_x = kernel_x.dimension
        res_warping = Warping(dimension=dim_x + 1,
                              index_to_range={dim_x: (0., 1.)})
        res_kernel = WarpedKernel(kernel=Matern52(dimension=dim_x + 1,
                                                  ARD=True),
                                  warping=res_warping)
        res_mean = mean_x
    else:
        if name == 'exp-decay-sum':
            delta_fixed_value = 0.0
        elif name == 'exp-decay-combined':
            delta_fixed_value = None
        elif name == 'exp-decay-delta1':
            delta_fixed_value = 1.0
        else:
            raise AssertionError("name = '{}' not supported".format(name))
        res_kernel = ExponentialDecayResourcesKernelFunction(
            kernel_x,
            mean_x,
            gamma_init=0.5 * max_metric_value,
            delta_fixed_value=delta_fixed_value,
            max_metric_value=max_metric_value)
        res_mean = ExponentialDecayResourcesMeanFunction(kernel=res_kernel)

    return res_kernel, res_mean
def test_set_gp_hps():
    mean = ScalarMeanFunction()
    kernel = Matern52(dimension=1)
    warping = Warping(dimension=1, index_to_range={0: (-4., 4.)})
    warped_kernel = WarpedKernel(kernel=kernel, warping=warping)
    likelihood = MarginalLikelihood(kernel=warped_kernel,
                                    mean=mean,
                                    initial_noise_variance=1e-6)
    likelihood.initialize(ctx=mx.cpu(), force_reinit=True)
    likelihood.hybridize()
    hp_values = np.array([1e-2, 1.0, 0.5, 0.3, 0.2, 1.1])
    _set_gp_hps(hp_values, likelihood)
    np.testing.assert_array_almost_equal(hp_values, _get_gp_hps(likelihood))
Beispiel #6
0
def test_incremental_update():
    def f(x):
        return np.sin(x) / x

    np.random.seed(298424)
    std_noise = 0.01

    for rep in range(10):
        model = GaussianProcessRegression(kernel=Matern52(dimension=1))
        # Sample data
        num_train = np.random.randint(low=5, high=15)
        num_incr = np.random.randint(low=1, high=7)
        sizes = [num_train, num_incr]
        features = []
        targets = []
        for sz in sizes:
            feats = np.random.uniform(low=-1.0, high=1.0, size=sz).reshape((-1, 1))
            features.append(feats)
            targs = f(feats)
            targs += np.random.normal(0.0, std_noise, size=targs.shape)
            targets.append(targs)
        # Posterior state by incremental updating
        train_features = to_nd(features[0])
        train_targets = to_nd(targets[0])
        model.fit(train_features, train_targets)
        noise_variance_1 = model.likelihood.get_noise_variance()
        state_incr = IncrementalUpdateGPPosteriorState(
            features=train_features, targets=train_targets,
            mean=model.likelihood.mean, kernel=model.likelihood.kernel,
            noise_variance=model.likelihood.get_noise_variance(as_ndarray=True))
        for i in range(num_incr):
            state_incr = state_incr.update(
                to_nd(features[1][i].reshape((1, -1))),
                to_nd(targets[1][i].reshape((1, -1))))
        noise_variance_2 = state_incr.noise_variance.asscalar()
        # Posterior state by direct computation
        state_comp = GaussProcPosteriorState(
            features=to_nd(np.concatenate(features, axis=0)),
            targets=to_nd(np.concatenate(targets, axis=0)),
            mean=model.likelihood.mean, kernel=model.likelihood.kernel,
            noise_variance=state_incr.noise_variance)
        # Compare them
        assert noise_variance_1 == noise_variance_2, \
            "noise_variance_1 = {} != {} = noise_variance_2".format(
                noise_variance_1, noise_variance_2)
        chol_fact_incr = state_incr.chol_fact.asnumpy()
        chol_fact_comp = state_comp.chol_fact.asnumpy()
        np.testing.assert_almost_equal(chol_fact_incr, chol_fact_comp, decimal=2)
        pred_mat_incr = state_incr.pred_mat.asnumpy()
        pred_mat_comp = state_comp.pred_mat.asnumpy()
        np.testing.assert_almost_equal(pred_mat_incr, pred_mat_comp, decimal=2)
def test_get_gp_hps():
    mean = ScalarMeanFunction()
    kernel = Matern52(dimension=1)
    warping = Warping(dimension=1, index_to_range={0: (-4., 4.)})
    warped_kernel = WarpedKernel(kernel=kernel, warping=warping)
    likelihood = MarginalLikelihood(kernel=warped_kernel,
                                    mean=mean,
                                    initial_noise_variance=1e-6)
    likelihood.initialize(ctx=mx.cpu(), force_reinit=True)
    likelihood.hybridize()
    hp_values = _get_gp_hps(likelihood)
    # the oder of hps are noise, mean, covariance scale, bandwidth, warping a, warping b
    np.testing.assert_array_almost_equal(
        hp_values, np.array([1e-6, 0.0, 1.0, 1.0, 1.0, 1.0]))
def test_gp_regression_2d_with_ard():

    def f(x):
        # Only dependent on the first column of x
        return np.sin(x[:,0])/x[:,0]

    np.random.seed(7)

    dimension = 3

    # 30 train and test points in R^3
    x_train = np.random.uniform(-5, 5, size=(30,dimension))
    x_test = np.random.uniform(-5, 5, size=(30,dimension))
    y_train = f(x_train)
    y_test = f(x_test)

    # to mx.nd
    y_train_mx_nd = mx.nd.array(y_train)
    x_train_mx_nd = mx.nd.array(x_train)
    x_test_mx_nd = mx.nd.array(x_test)

    model = GaussianProcessRegression(kernel=Matern52(dimension=dimension, ARD=True))
    model.fit(x_train_mx_nd, y_train_mx_nd)

    # Check that the value of the residual noise variance learned by empirical Bayes is in the same order as the smallest allowed value (since there is no noise)
    noise_variance = model.likelihood.get_noise_variance()
    np.testing.assert_almost_equal(noise_variance, NOISE_VARIANCE_LOWER_BOUND)

    # Check that the bandwidths learned by empirical Bayes reflect the fact that only the first column is useful
    # In particular, for the useless dimensions indexed by {1,2}, the inverse bandwidths should be close to INVERSE_BANDWIDTHS_LOWER_BOUND
    # (or conversely, bandwidths should be close to their highest allowed values)
    sqd = model.likelihood.kernel.squared_distance
    inverse_bandwidths = sqd.encoding.get(mx.nd, sqd.inverse_bandwidths_internal.data()).asnumpy()

    assert inverse_bandwidths[0] > inverse_bandwidths[1] and inverse_bandwidths[0] > inverse_bandwidths[2]
    np.testing.assert_almost_equal(inverse_bandwidths[1], INVERSE_BANDWIDTHS_LOWER_BOUND)
    np.testing.assert_almost_equal(inverse_bandwidths[2], INVERSE_BANDWIDTHS_LOWER_BOUND)

    mu_train, _ = model.predict(x_train_mx_nd)[0]
    mu_test, _ = model.predict(x_test_mx_nd)[0]

    # back to np.array
    mu_train = mu_train.asnumpy()
    mu_test = mu_test.asnumpy()

    np.testing.assert_almost_equal(mu_train, y_train, decimal=2)
    # Fewer decimals imposed for the test points
    np.testing.assert_almost_equal(mu_test, y_test, decimal=1)
def fit_predict_ours(data: dict,
                     random_seed: int,
                     optimization_config: OptimizationConfig,
                     test_intermediates: Optional[dict] = None) -> dict:
    # Create surrogate model
    num_dims = len(data['ss_limits'])
    _gpmodel = GaussianProcessRegression(
        kernel=Matern52(num_dims, ARD=True),
        mean=ZeroMeanFunction(),  # Instead of ScalarMeanFunction
        optimization_config=optimization_config,
        random_seed=random_seed,
        test_intermediates=test_intermediates)
    model = GPMXNetModel(data['state'],
                         DEFAULT_METRIC,
                         random_seed,
                         _gpmodel,
                         fit_parameters=True,
                         num_fantasy_samples=20)
    model_params = model.get_params()
    print('Hyperparameters: {}'.format(model_params))
    # Prediction
    means, stddevs = model.predict(data['test_inputs'])[0]
    return {'means': means, 'stddevs': stddevs}
def test_gp_regression_with_noise():

    def f(x):
        return np.sin(x)/x

    np.random.seed(7)

    x_train = np.arange(-5, 5, 0.2)# [-5, -4.8, -4.6,..., 4.8]
    x_test = np.arange(-4.9, 5, 0.2)# [-4.9, -4.7, -4.5,..., 4.9], note that train and test points do not overlap
    y_train = f(x_train)
    y_test = f(x_test)

    std_noise = 0.01
    noise_train = np.random.normal(0.0, std_noise,size=y_train.shape)

    # to mx.nd
    y_train_mx_nd = mx.nd.array(y_train)
    noise_train_mx_nd = mx.nd.array(noise_train)
    x_train_mx_nd = mx.nd.array(x_train)
    x_test_mx_nd = mx.nd.array(x_test)

    model = GaussianProcessRegression(kernel=Matern52(dimension=1))
    model.fit(x_train_mx_nd, y_train_mx_nd + noise_train_mx_nd)

    # Check that the value of the residual noise variance learned by empirical Bayes is in the same order as std_noise^2
    noise_variance = model.likelihood.get_noise_variance()
    np.testing.assert_almost_equal(noise_variance, std_noise**2, decimal=4)

    mu_train, _ = model.predict(x_train_mx_nd)[0]
    mu_test, _ = model.predict(x_test_mx_nd)[0]

    # back to np.array
    mu_train = mu_train.asnumpy()
    mu_test = mu_test.asnumpy()

    np.testing.assert_almost_equal(mu_train, y_train, decimal=2)
    np.testing.assert_almost_equal(mu_test, y_test, decimal=2)
def test_gp_regression_no_noise():

    def f(x):
        return np.sin(x)/x

    x_train = np.arange(-5, 5, 0.2)# [-5,-4.8,-4.6,...,4.8]
    x_test = np.arange(-4.9, 5, 0.2)# [-4.9, -4.7, -4.5,...,4.9], note that train and test points do not overlap
    y_train = f(x_train)
    y_test = f(x_test)

    # to mx.nd
    y_train_mx_nd = mx.nd.array(y_train)
    x_train_mx_nd = mx.nd.array(x_train)
    x_test_mx_nd = mx.nd.array(x_test)

    model = GaussianProcessRegression(kernel=Matern52(dimension=1))
    model.fit(x_train_mx_nd, y_train_mx_nd)

    # Check that the value of the residual noise variance learned by empirical Bayes is in the same order
    # as the smallest allowed value (since there is no noise)
    noise_variance = model.likelihood.get_noise_variance()
    np.testing.assert_almost_equal(noise_variance, NOISE_VARIANCE_LOWER_BOUND)

    mu_train, var_train = model.predict(x_train_mx_nd)[0]
    mu_test, var_test = model.predict(x_test_mx_nd)[0]

    # back to np.array
    mu_train = mu_train.asnumpy()
    mu_test = mu_test.asnumpy()
    var_train = var_train.asnumpy()
    var_test = var_test.asnumpy()

    np.testing.assert_almost_equal(mu_train, y_train, decimal=4)
    np.testing.assert_almost_equal(var_train, [0.0] * len(var_train), decimal=4)
    # Fewer decimals imposed for the test points
    np.testing.assert_almost_equal(mu_test, y_test, decimal=3)
def _create_common_objects(**kwargs):
    # TODO: Validity checks on kwargs arguments
    scheduler = kwargs['scheduler']
    config_space = kwargs['configspace']
    is_hyperband = scheduler.startswith('hyperband')
    if kwargs.get('debug_use_hyperparameter_ranges', False):
        assert isinstance(config_space, HyperparameterRanges)
        assert not is_hyperband, \
            "Cannot use debug_use_hyperparameter_ranges with Hyperband scheduling"
        hp_ranges_cs = config_space
    else:
        import ConfigSpace as CS
        assert isinstance(config_space, CS.ConfigurationSpace)
        hp_ranges_cs = HyperparameterRanges_CS(config_space)
    # Note: This base random seed is used to create different random seeds for
    # each BO get_config call internally
    random_seed = kwargs.get('random_seed', 31415927)
    # Skip optimization predicate for GP surrogate model
    if kwargs.get('opt_skip_num_max_resource', False) and is_hyperband:
        skip_optimization = SkipNoMaxResourcePredicate(
            init_length=kwargs['opt_skip_init_length'],
            resource_attr_name=kwargs['resource_attribute'],
            max_resource=kwargs['max_epochs'])
    elif kwargs.get('opt_skip_period', 1) > 1:
        skip_optimization = SkipPeriodicallyPredicate(
            init_length=kwargs['opt_skip_init_length'],
            period=kwargs['opt_skip_period'])
    else:
        skip_optimization = None
    # Profiler
    if kwargs.get('profiler', False):
        profiler = GPMXNetSimpleProfiler()
    else:
        profiler = None
    # Conversion from reward to metric (strictly decreasing) and back
    _map_reward = kwargs.get('map_reward', '1_minus_x')
    if isinstance(_map_reward, str):
        _map_reward_name = _map_reward
        supp_map_reward = {'1_minus_x', 'minus_x'}
        assert _map_reward_name in supp_map_reward, \
            "This factory needs map_reward in {}".format(supp_map_reward)
        _map_reward: MapReward = map_reward(
            const=1.0 if _map_reward_name == '1_minus_x' else 0.0)
    else:
        assert isinstance(_map_reward, MapReward), \
            "map_reward must either be string or of MapReward type"
    if is_hyperband:
        # Note: 'min_reward' is needed only to support the exp-decay
        # surrogate model. If not given, it is assumed to be 0.
        min_reward = kwargs.get('min_reward', 0)
        max_metric_value = _map_reward(min_reward)
    else:
        max_metric_value = None
    opt_warmstart = kwargs.get('opt_warmstart', False)

    # Underlying GP regression model
    kernel = Matern52(dimension=hp_ranges_cs.ndarray_size(), ARD=True)
    mean = ScalarMeanFunction()
    if is_hyperband:
        kernel, mean = resource_kernel_factory(
            kwargs['gp_resource_kernel'],
            kernel_x=kernel, mean_x=mean,
            max_metric_value=max_metric_value)
    optimization_config = OptimizationConfig(
        lbfgs_tol=DEFAULT_OPTIMIZATION_CONFIG.lbfgs_tol,
        lbfgs_maxiter=kwargs['opt_maxiter'],
        verbose=kwargs['opt_verbose'],
        n_starts=kwargs['opt_nstarts'])
    debug_writer = None
    if kwargs.get('opt_debug_writer', False):
        fname_msk = kwargs.get('opt_debug_writer_fmask', 'debug_gpr_{}')
        debug_writer = DebugGPRegression(
            fname_msk=fname_msk, rolling_size=5)
    gpmodel = GaussianProcessRegression(
        kernel=kernel, mean=mean,
        optimization_config=optimization_config,
        fit_reset_params=not opt_warmstart,
        debug_writer=debug_writer)
    model_args = GPMXNetModelArgs(
        num_fantasy_samples=kwargs['num_fantasy_samples'],
        random_seed=random_seed,
        active_metric=DEFAULT_METRIC,
        normalize_targets=True)
    debug_log = DebugLogPrinter() if kwargs.get('debug_log', False) else None

    return hp_ranges_cs, random_seed, gpmodel, model_args, profiler, \
           _map_reward, skip_optimization, debug_log
 def build_kernel():
     return WarpedKernel(kernel=Matern52(dimension=1),
                         warping=Warping(dimension=1,
                                         index_to_range={0: (-4., 4.)}))