Example #1
0
 def test_normalize_unnormalize(self):
     for dtype in (torch.float, torch.double):
         X = torch.tensor([0.0, 0.25, 0.5], device=self.device, dtype=dtype).view(
             -1, 1
         )
         expected_X_normalized = torch.tensor(
             [0.0, 0.5, 1.0], device=self.device, dtype=dtype
         ).view(-1, 1)
         bounds = torch.tensor([0.0, 0.5], device=self.device, dtype=dtype).view(
             -1, 1
         )
         X_normalized = normalize(X, bounds=bounds)
         self.assertTrue(torch.equal(expected_X_normalized, X_normalized))
         self.assertTrue(torch.equal(X, unnormalize(X_normalized, bounds=bounds)))
         X2 = torch.tensor(
             [[0.25, 0.125, 0.0], [0.25, 0.0, 0.5]], device=self.device, dtype=dtype
         ).transpose(1, 0)
         expected_X2_normalized = torch.tensor(
             [[1.0, 0.5, 0.0], [0.5, 0.0, 1.0]], device=self.device, dtype=dtype
         ).transpose(1, 0)
         bounds2 = torch.tensor(
             [[0.0, 0.0], [0.25, 0.5]], device=self.device, dtype=dtype
         )
         X2_normalized = normalize(X2, bounds=bounds2)
         self.assertTrue(torch.equal(X2_normalized, expected_X2_normalized))
         self.assertTrue(torch.equal(X2, unnormalize(X2_normalized, bounds=bounds2)))
Example #2
0
def optimize_qehvi_and_get_observation(model, train_obj, sampler):
    """Optimizes the qEHVI acquisition function, and returns a new candidate and observation."""
    # partition non-dominated space into disjoint rectangles
    partitioning = NondominatedPartitioning(ref_point=problem.ref_point,
                                            Y=train_obj)
    acq_func = qExpectedHypervolumeImprovement(
        model=model,
        ref_point=problem.ref_point.tolist(),  # use known reference point
        partitioning=partitioning,
        sampler=sampler,
    )
    # optimize
    candidates, _ = optimize_acqf(
        acq_function=acq_func,
        bounds=standard_bounds,
        q=BATCH_SIZE,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={
            "batch_limit": 5,
            "maxiter": 200,
            "nonnegative": True
        },
        sequential=True,
    )
    # observe new values
    new_x = unnormalize(candidates.detach(), bounds=problem.bounds)
    new_obj = problem(new_x)
    return new_x, new_obj
    def _torch_optimize_qehvi_and_get_observation(self):
        torch_anti_ideal_point = torch.tensor(
            self._transformed_anti_ideal_point, dtype=torch.double)
        qehvi_partitioning = NondominatedPartitioning(
            ref_point=torch_anti_ideal_point,
            Y=torch.stack(self._torch_model.train_targets, dim=1))
        qehvi_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)
        self._acquisition = qExpectedHypervolumeImprovement(
            model=self._torch_model,
            ref_point=self._transformed_anti_ideal_point,
            partitioning=qehvi_partitioning,
            sampler=qehvi_sampler)

        # these options all come from the tutorial
        # and likely need a serious review
        candidates, _ = optimize_acqf(
            acq_function=self._acquisition,
            bounds=self._botorch_domain,
            q=BATCH_SIZE,
            num_restarts=NUM_RESTARTS,
            raw_samples=RAW_SAMPLES,  # used for intialization heuristic
            options={
                "batch_limit": 5,
                "maxiter": 200,
                "nonnegative": True
            },
            sequential=True,
        )

        # is unnormalize necessary here?
        # we are providing the same bounds here and in optimizer
        new_x = unnormalize(candidates.detach(), bounds=self._botorch_domain)
        transformed_eps, transformed_err = self._optimization_handler(new_x)
        return new_x, transformed_eps, transformed_err
    def get_bayes_pop(self, n_suggestions):
        """
        Parameters
        ----------
        n_suggestions: Number of new suggestions/trial solutions to generate using BO
        Returns
        The new set of trial solutions obtained by optimizing the acquisition function
        -------
        """

        try:
            candidates, _ = optimize_acqf(
                acq_function=self.acquisition,
                bounds=self.min_max_bounds,
                q=n_suggestions,
                num_restarts=10,
                raw_samples=512,  # used for initialization heuristic
                sequential=True)

            bayes_pop = unnormalize(candidates, self.torch_bounds).numpy()
        except Exception as e:
            print('Error in get_bayes_pop(): {}'.format(e))

        population = self.search_space.unwarp(
            bayes_pop)  # Translate the solution back to the original space

        return population
Example #5
0
def optimize_qparego_and_get_observation(model, train_obj, sampler):
    """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization
    of the qParEGO acquisition function, and returns a new candidate and observation."""
    acq_func_list = []
    for _ in range(BATCH_SIZE):
        weights = sample_simplex(problem.num_objectives, **tkwargs).squeeze()
        objective = GenericMCObjective(
            get_chebyshev_scalarization(weights=weights, Y=train_obj))
        acq_func = qExpectedImprovement(  # pyre-ignore: [28]
            model=model,
            objective=objective,
            best_f=objective(train_obj).max(),
            sampler=sampler,
        )
        acq_func_list.append(acq_func)
    # optimize
    candidates, _ = optimize_acqf_list(
        acq_function_list=acq_func_list,
        bounds=standard_bounds,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
    )
    # observe new values
    new_x = unnormalize(candidates.detach(), bounds=problem.bounds)
    new_obj = problem(new_x)
    return new_x, new_obj
def get_calibrated_params(*,
                          country,
                          area,
                          multi_beta_calibration,
                          maxiters=None):
    """
    Returns calibrated parameters for a `country` and an `area`
    """

    if maxiters:
        param_dict = get_calibrated_params_limited_iters(
            country,
            area,
            multi_beta_calibration=multi_beta_calibration,
            maxiters=maxiters,
        )
        return param_dict

    state = load_state(calibration_states[country][area])
    theta = state['train_theta']
    best_observed_idx = state['best_observed_idx']
    norm_params = theta[best_observed_idx]
    param_bounds = (calibration_model_param_bounds_multi
                    if multi_beta_calibration else
                    calibration_model_param_bounds_single)
    sim_bounds = pdict_to_parr(pdict=param_bounds,
                               multi_beta_calibration=multi_beta_calibration).T
    params = transforms.unnormalize(norm_params, sim_bounds)
    param_dict = parr_to_pdict(parr=params,
                               multi_beta_calibration=multi_beta_calibration)
    return param_dict
Example #7
0
 def test_evaluate_slack_true(self):
     for dtype in (torch.float, torch.double):
         for f in self.functions:
             f.to(device=self.device, dtype=dtype)
             X = unnormalize(torch.rand(1, f.dim), bounds=f.bounds)
             slack = f.evaluate_slack_true(X)
             self.assertEqual(slack.shape, torch.Size([1, f.num_constraints]))
 def objective_function(x: torch.Tensor):
     # Caution: unnormalize and maximize
     x = unnormalize(x, bounds=problem_bounds)
     x = x.cpu().numpy().astype(np.float64)  # caution
     res = problem.evaluate(x)
     res['objs'] = [-y for y in res['objs']]
     return res  # Caution: negative values imply feasibility in botorch
def get_calibrated_params_from_path(path,
                                    estimate_mobility_reduction=False,
                                    multi_beta_calibration=False):
    """
    Returns calibrated parameters for a `country` and an `area`
    """

    state = load_state(path)
    theta = state['train_theta']
    best_observed_idx = state['best_observed_idx']
    norm_params = theta[best_observed_idx]
    param_bounds = (calibration_model_param_bounds_multi
                    if multi_beta_calibration else
                    calibration_model_param_bounds_single)

    if estimate_mobility_reduction:
        param_bounds['p_stay_home'] = [0.0, 1.0]

    sim_bounds = pdict_to_parr(
        pdict=param_bounds,
        multi_beta_calibration=multi_beta_calibration,
        estimate_mobility_reduction=estimate_mobility_reduction).T

    params = transforms.unnormalize(norm_params, sim_bounds)
    param_dict = parr_to_pdict(
        parr=params,
        multi_beta_calibration=False,
        estimate_mobility_reduction=estimate_mobility_reduction)
    return param_dict
 def objective_function(x: torch.Tensor):
     # Caution: unnormalize and maximize
     x = unnormalize(x, bounds=problem_bounds)
     x = x.cpu().numpy().astype(np.float64)  # caution
     res = problem.evaluate(x)
     objs = [-y for y in res['objs']]
     return objs
Example #11
0
def eval_objective(x, dim=10):
    """This is a helper function we use to unnormalize and evalaute a point"""

    fun = Ackley(dim=dim, negate=True).to(dtype=dtype, device=device)
    fun.bounds[0, :].fill_(low)
    fun.bounds[1, :].fill_(up)
    dim = fun.dim
    lb, ub = fun.bounds
    return fun(unnormalize(x, fun.bounds))
Example #12
0
 def test_normalize_unnormalize(self, cuda=False):
     tkwargs = {"device": torch.device("cuda" if cuda else "cpu")}
     for dtype in (torch.float, torch.double):
         tkwargs["dtype"] = dtype
         X = torch.tensor([0.0, 0.25, 0.5], **tkwargs).view(-1, 1)
         expected_X_normalized = torch.tensor([0.0, 0.5, 1.0], **tkwargs).view(-1, 1)
         bounds = torch.tensor([0.0, 0.5], **tkwargs).view(-1, 1)
         X_normalized = normalize(X, bounds=bounds)
         self.assertTrue(torch.equal(expected_X_normalized, X_normalized))
         self.assertTrue(torch.equal(X, unnormalize(X_normalized, bounds=bounds)))
         X2 = torch.tensor(
             [[0.25, 0.125, 0.0], [0.25, 0.0, 0.5]], **tkwargs
         ).transpose(1, 0)
         expected_X2_normalized = torch.tensor(
             [[1.0, 0.5, 0.0], [0.5, 0.0, 1.0]], **tkwargs
         ).transpose(1, 0)
         bounds2 = torch.tensor([[0.0, 0.0], [0.25, 0.5]], **tkwargs)
         X2_normalized = normalize(X2, bounds=bounds2)
         self.assertTrue(torch.equal(X2_normalized, expected_X2_normalized))
         self.assertTrue(torch.equal(X2, unnormalize(X2_normalized, bounds=bounds2)))
Example #13
0
 def f_2(self, X: Tensor) -> Tensor:
     X = torch.cat(
         [X, torch.zeros_like(X)],
         dim=-1,
     )
     # Cut out the first part of the function.
     X = X * 0.95 + 0.03
     X = unnormalize(X, self.levy.bounds.to(X))
     Y = self.levy(X).unsqueeze(-1)
     Y -= X[..., :1].pow(2) * 0.75
     return Y
Example #14
0
 def test_normalize_unnormalize(self, cuda=False):
     tkwargs = {"device": torch.device("cuda" if cuda else "cpu")}
     for dtype in (torch.float, torch.double):
         tkwargs["dtype"] = dtype
         X = torch.tensor([0.0, 0.25, 0.5], **tkwargs).view(-1, 1)
         expected_X_normalized = torch.tensor([0.0, 0.5, 1.0],
                                              **tkwargs).view(-1, 1)
         bounds = torch.tensor([0.0, 0.5], **tkwargs).view(-1, 1)
         X_normalized = normalize(X, bounds=bounds)
         self.assertTrue(torch.equal(expected_X_normalized, X_normalized))
         self.assertTrue(
             torch.equal(X, unnormalize(X_normalized, bounds=bounds)))
         X2 = torch.tensor([[0.25, 0.125, 0.0], [0.25, 0.0, 0.5]],
                           **tkwargs).transpose(1, 0)
         expected_X2_normalized = torch.tensor(
             [[1.0, 0.5, 0.0], [0.5, 0.0, 1.0]], **tkwargs).transpose(1, 0)
         bounds2 = torch.tensor([[0.0, 0.0], [0.25, 0.5]], **tkwargs)
         X2_normalized = normalize(X2, bounds=bounds2)
         self.assertTrue(torch.equal(X2_normalized, expected_X2_normalized))
         self.assertTrue(
             torch.equal(X2, unnormalize(X2_normalized, bounds=bounds2)))
def get_calibrated_params_limited_iters(country, area, multi_beta_calibration,
                                        maxiters):
    """
    Returns calibrated parameters using only the first `maxiters` iterations of BO.
    """

    state = load_state(calibration_states[country][area])
    train_G = state['train_G']
    train_G = train_G[:min(maxiters, len(train_G))]
    train_theta = state['train_theta']

    mob_settings = calibration_mob_paths[country][area][0]
    with open(mob_settings, 'rb') as fp:
        mob_kwargs = pickle.load(fp)
    mob = MobilitySimulator(**mob_kwargs)

    data_start_date = calibration_start_dates[country][area]
    data_end_date = calibration_lockdown_dates[country]['end']

    unscaled_area_cases = collect_data_from_df(
        country=country,
        area=area,
        datatype='new',
        start_date_string=data_start_date,
        end_date_string=data_end_date)
    assert (len(unscaled_area_cases.shape) == 2)

    # Scale down cases based on number of people in town and region
    sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs)
    n_days, n_age = sim_cases.shape

    G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age)
    G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1)

    def objective(G):
        return -(G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days

    train_G_objectives = objective(train_G)
    best_observed_idx = train_G_objectives.argmax()
    best_observed_obj = train_G_objectives[best_observed_idx].item()

    param_bounds = (calibration_model_param_bounds_multi
                    if multi_beta_calibration else
                    calibration_model_param_bounds_single)
    sim_bounds = pdict_to_parr(pdict=param_bounds,
                               multi_beta_calibration=multi_beta_calibration).T

    normalized_calibrated_params = train_theta[best_observed_idx]
    calibrated_params = transforms.unnormalize(normalized_calibrated_params,
                                               sim_bounds)
    calibrated_params = parr_to_pdict(
        parr=calibrated_params, multi_beta_calibration=multi_beta_calibration)
    return calibrated_params
Example #16
0
def sample_truncated_normal_perturbations(
    X: Tensor,
    n_discrete_points: int,
    sigma: float,
    bounds: Tensor,
    qmc: bool = True,
) -> Tensor:
    r"""Sample points around `X`.

    Sample perturbed points around `X` such that the added perturbations
    are sampled from N(0, sigma^2 I) and truncated to be within [0,1]^d.

    Args:
        X: A `n x d`-dim tensor starting points.
        n_discrete_points: The number of points to sample.
        sigma: The standard deviation of the additive gaussian noise for
            perturbing the points.
        bounds: A `2 x d`-dim tensor containing the bounds.
        qmc: A boolean indicating whether to use qmc.

    Returns:
        A `n_discrete_points x d`-dim tensor containing the sampled points.
    """
    X = normalize(X, bounds=bounds)
    d = X.shape[1]
    # sample points from N(X_center, sigma^2 I), truncated to be within
    # [0, 1]^d.
    if X.shape[0] > 1:
        rand_indices = torch.randint(X.shape[0], (n_discrete_points, ),
                                     device=X.device)
        X = X[rand_indices]
    if qmc:
        std_bounds = torch.zeros(2, d, dtype=X.dtype, device=X.device)
        std_bounds[1] = 1
        u = draw_sobol_samples(bounds=std_bounds, n=n_discrete_points,
                               q=1).squeeze(1)
    else:
        u = torch.rand((n_discrete_points, d), dtype=X.dtype, device=X.device)
    # compute bounds to sample from
    a = -X
    b = 1 - X
    # compute z-score of bounds
    alpha = a / sigma
    beta = b / sigma
    normal = Normal(0, 1)
    cdf_alpha = normal.cdf(alpha)
    # use inverse transform
    perturbation = normal.icdf(cdf_alpha + u *
                               (normal.cdf(beta) - cdf_alpha)) * sigma
    # add perturbation and clip points that are still outside
    perturbed_X = (X + perturbation).clamp(0.0, 1.0)
    return unnormalize(perturbed_X, bounds=bounds)
Example #17
0
        def optimize_acqf_and_get_observation(acq_func):
            """Optimizes the acquisition function,
            and returns a new candidate and a noisy observation"""
            candidates, _ = optimize_acqf(
                acq_function=acq_func,
                bounds=torch.stack([
                    torch.zeros(d, dtype=dt),
                    torch.ones(d, dtype=dt),
                ]),
                q=1,
                num_restarts=10,
                raw_samples=200,
            )

            x = unnormalize(candidates.detach(), bounds=bounds)
            print('Hyper-parameter: ' + str(x))
            obj = self.train(x.view(-1)).unsqueeze(-1)
            print(print('Error: ' + str(obj)))
            return x, obj
Example #18
0
def qparego_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization.

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with multi-objective optimization when the number of objectives is larger than three.

    .. seealso::
        :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value
        descriptions.
    """

    n_objectives = train_obj.size(-1)

    weights = sample_simplex(n_objectives).squeeze()
    scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj)

    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        constraints = []
        n_constraints = train_con.size(1)

        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])

        objective = ConstrainedMCObjective(
            objective=lambda Z: scalarization(Z[..., :n_objectives]),
            constraints=constraints,
        )
    else:
        train_y = train_obj

        objective = GenericMCObjective(scalarization)

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.size(-1)))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    acqf = qExpectedImprovement(
        model=model,
        best_f=objective(train_y).max(),
        sampler=SobolQMCNormalSampler(num_samples=256),
        objective=objective,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=20,
        raw_samples=1024,
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
Example #19
0
def qehvi_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI).

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with multi-objective optimization when the number of objectives is three or less.

    .. seealso::
        :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value
        descriptions.
    """

    n_objectives = train_obj.size(-1)

    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        is_feas = (train_con <= 0).all(dim=-1)
        train_obj_feas = train_obj[is_feas]

        constraints = []
        n_constraints = train_con.size(1)

        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])
        additional_qehvi_kwargs = {
            "objective":
            IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))),
            "constraints":
            constraints,
        }
    else:
        train_y = train_obj

        train_obj_feas = train_obj

        additional_qehvi_kwargs = {}

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.shape[-1]))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    # Approximate box decomposition similar to Ax when the number of objectives is large.
    # https://github.com/facebook/Ax/blob/master/ax/models/torch/botorch_moo_defaults
    if n_objectives > 2:
        alpha = 10**(-8 + n_objectives)
    else:
        alpha = 0.0
    partitioning = NondominatedPartitioning(num_outcomes=n_objectives,
                                            Y=train_obj_feas,
                                            alpha=alpha)

    ref_point = train_obj.min(dim=0).values - 1e-8
    ref_point_list = ref_point.tolist()

    acqf = qExpectedHypervolumeImprovement(
        model=model,
        ref_point=ref_point_list,
        partitioning=partitioning,
        sampler=SobolQMCNormalSampler(num_samples=256),
        **additional_qehvi_kwargs,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=20,
        raw_samples=1024,
        options={
            "batch_limit": 5,
            "maxiter": 200,
            "nonnegative": True
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
Example #20
0
    def generate_initial_observations(n, logger):
        """
        Takes an integer `n` and generates `n` initial observations
        from the black box function using Sobol random parameter settings
        in the unit cube. Returns parameter setting and black box function outputs
        """

        if n <= 0:
            raise ValueError(
                'qKnowledgeGradient and GP needs at least one observation to be defined properly.')

        # sobol sequence
        # new_thetas: [n, n_params]
        new_thetas = torch.tensor(
            sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float)

        # simulator observations
        # new_G, new_G_sem: [n, n_days * n_age] (flattened outputs)
        new_G = torch.zeros((n, n_days * n_age), dtype=torch.float)
        new_G_sem = torch.zeros((n, n_days * n_age), dtype=torch.float)

        for i in range(n):

            t0 = time.time()

            # get mean and standard error of mean (sem) of every simulation output
            G, G_sem = composite_simulation(new_thetas[i, :])
            new_G[i, :] = G
            new_G_sem[i, :] = G_sem

            # log
            G_objectives = objective(new_G[:i+1])
            best_idx = G_objectives.argmax()
            best = G_objectives[best_idx].item()
            current = objective(G).item()
            case_diff = (
                G.reshape(n_days, n_age)[-1].sum()
                - G_obs.reshape(n_days, n_age)[-1].sum())

            t1 = time.time()
            logger.log(
                i=i - n,
                time=t1 - t0,
                best=best,
                objective=current,
                case_diff=case_diff,
                theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds)
            )

            # save state
            state = {
                'train_theta': new_thetas[:i+1],
                'train_G': new_G[:i+1],
                'train_G_sem': new_G_sem[:i+1],
                'best_observed_obj': best,
                'best_observed_idx': best_idx,
            }
            save_state(state, logger.filename + '_init')

        # compute best objective from simulations
        f = objective(new_G)
        best_f_idx = f.argmax()
        best_f = f[best_f_idx].item()

        return new_thetas, new_G, new_G_sem, best_f, best_f_idx
Example #21
0
    def composite_simulation(norm_params):
        """
        Takes a set of normalized (unit cube) BO parameters
        and returns simulator output means and standard errors based on multiple
        random restarts. This corresponds to the black-box function.
        """

        # un-normalize normalized params to obtain simulation parameters
        params = transforms.unnormalize(norm_params, sim_bounds)

        # finalize settings based which parameters are calibrated
        kwargs = copy.deepcopy(launch_kwargs)
        if args.measures_optimized:

            '''
            Measures are calibrated
            '''

            measure_params = parr_to_pdict(params, measures_optimized=args.measures_optimized)

            # social distancing measures: calibration is only done for `SocialDistancingForAllMeasure` for now
            measure_list_ = [
                SocialDistancingForPositiveMeasure(
                    t_window=Interval(0.0, max_time), p_stay_home=1.0),
                SocialDistancingForPositiveMeasureHousehold(
                    t_window=Interval(0.0, max_time), p_isolate=1.0),
                SocialDistancingForAllMeasure(
                    t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                      TO_HOURS * days_until_lockdown_end),
                    p_stay_home=measure_params['p_stay_home']),
            ]
            
            # close sites if specified
            if args.measures_close:
                beta_multipliers = {'education': 1.0, 'social': 1.0,
                                'bus_stop': 1.0, 'office': 1.0, 'supermarket': 1.0}
                for category in args.measures_close:
                    if category in beta_multipliers.keys():
                        beta_multipliers[category] = 0.0
                    else:
                        raise ValueError(f'Site type `{category}` passed in `--measures_close` is invalid.\n'
                                         f'Available are {str(list(beta_multipliers.keys()))}')
                
                measure_list_.append(BetaMultiplierMeasureByType(
                    t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                      TO_HOURS * days_until_lockdown_end),
                    beta_multiplier=beta_multipliers
                ))
            
            kwargs['measure_list'] = MeasureList(measure_list_)

            # get optimized model paramters for this country and area
            calibrated_model_params = settings_optimized_town_params[args.country][args.area]
            if calibrated_model_params is None:
                raise ValueError(f'Cannot optimize measures for {args.country}-{args.area} because model parameters ' 
                                  'have not been fitted yet. Set values in `calibration_settings.py`')
            kwargs['params'] = calibrated_model_params

        else:

            '''
            Model parameters calibrated
            '''
            
            kwargs['measure_list'] = MeasureList([
                SocialDistancingForPositiveMeasure(
                    t_window=Interval(0.0, max_time), p_stay_home=1.0),
                SocialDistancingForPositiveMeasureHousehold(
                    t_window=Interval(0.0, max_time), p_isolate=1.0),
            ])

            kwargs['params'] = parr_to_pdict(params, measures_optimized=args.measures_optimized)


        # run simulation in parallel,
        summary = launch_parallel_simulations(**kwargs)

        # (random_repeats, n_people)
        posi_started = torch.tensor(summary.state_started_at['posi'])
        posi_started -= test_lag_days * TO_HOURS # account for test lag

        # (random_repeats, n_days)
        age_groups = torch.tensor(summary.people_age)
        posi_cumulative = convert_timings_to_cumulative_daily(
            timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS)

        if posi_cumulative.shape[0] <= 1:
            raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.')

        # compute mean and standard error of means        
        G = torch.mean(posi_cumulative, dim=0)
        G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0])

        # make sure noise is not zero for non-degerateness
        G_sem = torch.max(G_sem, MIN_NOISE)

        # flatten
        G = G.reshape(1, n_days * n_age)
        G_sem = G_sem.reshape(1, n_days * n_age)

        return G, G_sem
Example #22
0
 def unnormalize_theta(theta):
     '''
     Computes unnormalized parameters
     '''
     return transforms.unnormalize(theta, sim_bounds)
Example #23
0
def run_RGPE(test_task: int, objective, bounds, base_model_list):
    input_dim = bounds.shape[1]
    best_rgpe_all = []
    best_argmax_rgpe_all = []
    # Average over multiple trials
    for trial in range(N_TRIALS):
        print(f"Trial {trial + 1} of {N_TRIALS}")
        best_BMs = []
        best_rgpe = []
        # Initial random observations
        raw_x = draw_sobol_samples(bounds=bounds,
                                   n=RANDOM_INITIALIZATION_SIZE,
                                   q=1,
                                   seed=trial).squeeze(1)
        train_x = normalize(raw_x, bounds=bounds)
        train_y_noiseless = objective(raw_x, shift=test_task)
        train_y = train_y_noiseless + noise_std * torch.randn_like(
            train_y_noiseless)
        train_yvar = torch.full_like(train_y, noise_std**2)
        # keep track of the best observed point at each iteration
        best_value = train_y.max().item()
        best_rgpe.append(best_value)

        # Run N_BATCH rounds of BayesOpt after the initial random batch
        for iteration in range(N_BATCH):
            target_model = get_fitted_model(train_x, train_y, train_yvar)
            model_list = base_model_list + [target_model]
            rank_weights = compute_rank_weights(
                train_x,
                train_y,
                base_model_list,
                target_model,
                NUM_POSTERIOR_SAMPLES,
            )

            # create model and acquisition function
            rgpe_model = RGPE(model_list, rank_weights)
            sampler_qnei = SobolQMCNormalSampler(num_samples=MC_SAMPLES)
            qNEI = qNoisyExpectedImprovement(
                model=rgpe_model,
                X_baseline=train_x,
                sampler=sampler_qnei,
            )

            # optimize
            candidate, _ = optimize_acqf(
                acq_function=qNEI,
                bounds=bounds,
                q=Q_BATCH_SIZE,
                num_restarts=N_RESTARTS,
                raw_samples=N_RESTART_CANDIDATES,
            )

            # fetch the new values
            new_x = candidate.detach()
            new_y_noiseless = objective(unnormalize(new_x, bounds=bounds),
                                        shift=test_task)
            new_y = new_y_noiseless + noise_std * torch.randn_like(
                new_y_noiseless)
            new_yvar = torch.full_like(new_y, noise_std**2)

            # update training points
            train_x = torch.cat((train_x, new_x))
            train_y = torch.cat((train_y, new_y))
            train_yvar = torch.cat((train_yvar, new_yvar))

            # get the new best observed value
            best_value = train_y.max().item()
            best_idx = torch.argmax(train_y).item()
            best_candidate = train_x[best_idx].view(1, -1)
            _, best_BM = objective(unnormalize(best_candidate, bounds=bounds),
                                   shift=test_task,
                                   include_BMs=True)
            best_rgpe.append(best_value)
            best_BMs.append(best_BM)

        best_rgpe_all.append(best_rgpe)
        best_argmax_rgpe_all.append(best_BMs)
    BM_winner_idx = np.argmax(np.array(best_rgpe_all)[:, -1], axis=0)
    BM_winner = np.reshape(np.array(best_argmax_rgpe_all[BM_winner_idx][-1]),
                           (2, input_dim))
    return BM_winner
 def mixin_tree(T: Tensor, bounds: Tensor, alpha: float) -> Tensor:
     return (1 - alpha) * T + alpha * unnormalize(torch.rand_like(T),
                                                  bounds)
 def mixin_layer(X: Tensor, bounds: Tensor, eta: float) -> Tensor:
     perturbations = unnormalize(B.sample(X.shape).squeeze(-1), bounds)
     return (1 - eta) * X + eta * perturbations
Example #26
0
 def evaluate_slack_true(self, X: Tensor) -> Tensor:
     X_tf = unnormalize(X, self.con_bounds)
     return 50 - (X_tf[..., 0:1] - 2.5).pow(2) - (X_tf[..., 1:2] -
                                                  7.5).pow(2)
Example #27
0
    def generate_initial_observations(n, logger, loaded_init_theta=None, loaded_init_G=None, loaded_init_G_sem=None):
        """
        Takes an integer `n` and generates `n` initial observations
        from the black box function using Sobol random parameter settings
        in the unit cube. Returns parameter setting and black box function outputs.
        If `loaded_init_theta/G/G_sem` are specified, initialization is loaded (possibly partially, in which
        case the initialization using the Sobol random sequence is continued where left off).
        """

        if n <= 0:
            raise ValueError(
                'qKnowledgeGradient and GP needs at least one observation to be defined properly.')

        # sobol sequence proposal points
        # new_thetas: [n, n_params]
        new_thetas = torch.tensor(
            sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float)

        # check whether initial observations are loaded
        loaded = (loaded_init_theta is not None
              and loaded_init_G is not None 
              and loaded_init_G_sem is not None)
        if loaded:
            n_loaded = loaded_init_theta.shape[0] # loaded no. of observations total
            n_loaded_init = min(n_loaded, n)      # loaded no. of quasi-random initialization observations
            n_init = max(n_loaded, n)             # final no. of observations returned, at least quasi-random initializations

            # check whether loaded proposal points are same as without loading observations
            try:
                assert(np.allclose(loaded_init_theta[:n_loaded_init], new_thetas[:n_loaded_init]))
            except AssertionError:
                print(
                    '\n\n\n===> Warning: parameters of loaded inital observations '
                    'do not coincide with initialization that would have been done. '
                    'Double check simulation, ninit, and parameter bounds, which could change '
                    'the initial random Sobol sequence. \nThe loaded parameter settings are used. \n\n\n'
                )
            
            if n_init > n:
                new_thetas = loaded_init_theta # size of tensor increased to `n_init`, as more than Sobol init points loaded

        else:
            n_loaded = 0       # loaded no. of observations total
            n_loaded_init = 0  # loaded no. of quasi-random initialization observations
            n_init = n         # final no. of observations returned, at least quasi-random initializations

        # instantiate simulator observation tensors
        if per_age_group_objective:
            # new_G, new_G_sem: [n_init, n_days * n_age] (flattened outputs)
            new_G = torch.zeros((n_init, n_days * n_age), dtype=torch.float)
            new_G_sem = torch.zeros((n_init, n_days * n_age), dtype=torch.float)
        else:
            # new_G, new_G_sem: [n_init, n_days]
            new_G = torch.zeros((n_init, n_days), dtype=torch.float)
            new_G_sem = torch.zeros((n_init, n_days), dtype=torch.float)

        # generate `n` initial evaluations at quasi random settings; if applicable, skip and load expensive evaluation result
        for i in range(n_init):
            
            # if loaded, use initial observation for this parameter settings
            if loaded and i <= n_loaded - 1:
                new_thetas[i] = loaded_init_theta[i]
                G, G_sem = loaded_init_G[i], loaded_init_G_sem[i]
                walltime = 0.0

            # if not loaded, evaluate as usual
            else:
                t0 = time.time()
                G, G_sem = composite_simulation(new_thetas[i])
                walltime = time.time() - t0

            new_G[i] = G
            new_G_sem[i] = G_sem

            # log
            G_objectives = objective(new_G[:i+1])
            best_idx = G_objectives.argmax()
            best = G_objectives[best_idx].item()
            current = objective(G).item()

            if per_age_group_objective:
                case_diff = G.reshape(n_days, n_age)[-1].sum() - G_obs_aggregate[-1]
            else:
                case_diff = G[-1] - G_obs_aggregate[-1]
            
            logger.log(
                i=i - n,
                time=walltime,
                best=best,
                objective=current,
                case_diff=case_diff,
                theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds)
            )

            # save state
            state = {
                'train_theta': new_thetas[:i+1],
                'train_G': new_G[:i+1],
                'train_G_sem': new_G_sem[:i+1],
                'best_observed_obj': best,
                'best_observed_idx': best_idx,
            }
            save_state(state, logger.filename)

        # compute best objective from simulations
        f = objective(new_G)
        best_f_idx = f.argmax()
        best_f = f[best_f_idx].item()

        return new_thetas, new_G, new_G_sem, best_f, best_f_idx
Example #28
0
def qei_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based batch Expected Improvement (qEI).

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with single-objective optimization.

    Args:
        train_x:
            Previous parameter configurations. A ``torch.Tensor`` of shape
            ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials
            and ``n_params`` is the number of parameters. ``n_params`` may be larger than the
            actual number of parameters if categorical parameters are included in the search
            space, since these parameters are one-hot encoded.
            Values are not normalized.
        train_obj:
            Previously observed objectives. A ``torch.Tensor`` of shape
            ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``.
            ``n_objectives`` is the number of objectives. Observations are not normalized.
        train_con:
            Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``.
            ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of
            constraints. A constraint is violated if strictly larger than 0. If no constraints are
            involved in the optimization, this argument will be :obj:`None`.
        bounds:
            Search space bounds. A ``torch.Tensor`` of shape ``(n_params, 2)``. ``n_params`` is
            identical to that of ``train_x``. The first and the second column correspond to the
            lower and upper bounds for each parameter respectively.

    Returns:
        Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``.

    """

    if train_obj.size(-1) != 1:
        raise ValueError("Objective may only contain single values with qEI.")
    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        is_feas = (train_con <= 0).all(dim=-1)
        train_obj_feas = train_obj[is_feas]

        if train_obj_feas.numel() == 0:
            # TODO(hvy): Do not use 0 as the best observation.
            _logger.warning(
                "No objective values are feasible. Using 0 as the best objective in qEI."
            )
            best_f = torch.zeros(())
        else:
            best_f = train_obj_feas.max()

        constraints = []
        n_constraints = train_con.size(1)
        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])
        objective = ConstrainedMCObjective(
            objective=lambda Z: Z[..., 0],
            constraints=constraints,
        )
    else:
        train_y = train_obj

        best_f = train_obj.max()

        objective = None  # Using the default identity objective.

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.size(-1)))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    acqf = qExpectedImprovement(
        model=model,
        best_f=best_f,
        sampler=SobolQMCNormalSampler(num_samples=256),
        objective=objective,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=10,
        raw_samples=512,
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
def evaluate(mth, run_i, seed):
    print(mth, run_i, seed, '===== start =====', flush=True)

    def objective_function(x: torch.Tensor):
        # Caution: unnormalize and maximize
        x = unnormalize(x, bounds=problem_bounds)
        x = x.cpu().numpy().astype(np.float64)  # caution
        res = problem.evaluate(x)
        res['objs'] = [-y for y in res['objs']]
        return res  # Caution: negative values imply feasibility in botorch

    hv_diffs = []
    time_list = []
    global_start_time = time.time()

    # random seed
    np.random.seed(seed)
    torch.manual_seed(seed)

    # call helper functions to generate initial training data and initialize model
    train_x, train_obj, train_con = generate_initial_data(
        initial_runs, objective_function, time_list, global_start_time)
    # fix bug: find feasible
    real_initial_runs = initial_runs
    while real_initial_runs < max_runs:
        # compute feasible observations
        is_feas = (train_con <= 0).all(dim=-1)
        # compute points that are better than the known reference point
        better_than_ref = (train_obj > problem.ref_point).all(dim=-1)
        if (is_feas & better_than_ref).any():
            break
        train_x, train_obj, train_con = expand_initial_data(
            train_x, train_obj, train_con, objective_function, time_list,
            global_start_time)
        real_initial_runs += 1
        print('=== Expand initial data to find feasible. Iter =',
              real_initial_runs)
    mll, model = initialize_model(train_x, train_obj, train_con)

    # for plot
    X_init = train_x.cpu().numpy().astype(np.float64)
    Y_init = -1 * train_obj.cpu().numpy().astype(np.float64)
    # calculate hypervolume of init data
    for i in range(real_initial_runs):
        train_obj_i = train_obj[:i + 1]
        train_con_i = train_con[:i + 1]
        # compute pareto front
        is_feas_i = (train_con_i <= 0).all(dim=-1)
        feas_train_obj_i = train_obj_i[is_feas_i]
        pareto_mask = is_non_dominated(feas_train_obj_i)
        pareto_y = feas_train_obj_i[pareto_mask]
        # compute hypervolume
        volume = hv.compute(pareto_y)
        hv_diff = problem.max_hv - volume
        hv_diffs.append(hv_diff)

    # run (max_runs - real_initial_runs) rounds of BayesOpt after the initial random batch
    for iteration in range(real_initial_runs + 1, max_runs + 1):
        t0 = time.time()
        try:
            # fit the models
            fit_gpytorch_model(mll)

            # define the qEHVI acquisition modules using a QMC sampler
            sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)
            # compute feasible observations
            is_feas = (train_con <= 0).all(dim=-1)
            # compute points that are better than the known reference point
            better_than_ref = (train_obj > problem.ref_point).all(dim=-1)
            # partition non-dominated space into disjoint rectangles
            partitioning = NondominatedPartitioning(
                num_outcomes=problem.num_objs,
                # use observations that are better than the specified reference point and feasible
                Y=train_obj[better_than_ref & is_feas],
            )
            qEHVI = qExpectedHypervolumeImprovement(
                model=model,
                ref_point=problem.ref_point.tolist(
                ),  # use known reference point
                partitioning=partitioning,
                sampler=sampler,
                # define an objective that specifies which outcomes are the objectives
                objective=IdentityMCMultiOutputObjective(
                    outcomes=list(range(problem.num_objs))),
                # specify that the constraint is on the last outcome
                constraints=constraint_callable_list(
                    problem.num_constraints, num_objs=problem.num_objs),
            )
            # optimize and get new observation
            new_x, new_obj, new_con = optimize_acqf_and_get_observation(
                qEHVI, objective_function, time_list, global_start_time)
        except Exception as e:  # handle numeric problem
            step = 2
            print(
                '===== Exception in optimization loop, restart with 1/%d of training data: %s'
                % (step, str(e)))
            if refit == 1:
                mll, model = initialize_model(train_x[::step],
                                              train_obj[::step],
                                              train_con[::step])
            else:
                mll, model = initialize_model(
                    train_x[::step],
                    train_obj[::step],
                    train_con[::step],
                    model.state_dict(),
                )
            # fit the models
            fit_gpytorch_model(mll)

            # define the qEHVI acquisition modules using a QMC sampler
            sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)
            # compute feasible observations
            is_feas = (train_con[::step] <= 0).all(dim=-1)
            # compute points that are better than the known reference point
            better_than_ref = (train_obj[::step] > problem.ref_point).all(
                dim=-1)
            # partition non-dominated space into disjoint rectangles
            partitioning = NondominatedPartitioning(
                num_outcomes=problem.num_objs,
                # use observations that are better than the specified reference point and feasible
                Y=train_obj[::step][better_than_ref & is_feas],
            )
            qEHVI = qExpectedHypervolumeImprovement(
                model=model,
                ref_point=problem.ref_point.tolist(
                ),  # use known reference point
                partitioning=partitioning,
                sampler=sampler,
                # define an objective that specifies which outcomes are the objectives
                objective=IdentityMCMultiOutputObjective(
                    outcomes=list(range(problem.num_objs))),
                # specify that the constraint is on the last outcome
                constraints=constraint_callable_list(
                    problem.num_constraints, num_objs=problem.num_objs),
            )
            # optimize and get new observation
            new_x, new_obj, new_con = optimize_acqf_and_get_observation(
                qEHVI, objective_function, time_list, global_start_time)
            assert len(time_list) == iteration

        # update training points
        train_x = torch.cat([train_x, new_x])
        train_obj = torch.cat([train_obj, new_obj])
        train_con = torch.cat([train_con, new_con])

        # update progress
        # compute pareto front
        is_feas = (train_con <= 0).all(dim=-1)
        feas_train_obj = train_obj[is_feas]
        pareto_mask = is_non_dominated(feas_train_obj)
        pareto_y = feas_train_obj[pareto_mask]
        # compute hypervolume
        volume = hv.compute(pareto_y)
        hv_diff = problem.max_hv - volume
        hv_diffs.append(hv_diff)

        # reinitialize the models so they are ready for fitting on next iteration
        # use the current state dict to speed up fitting
        # Note: they find improved performance from not warm starting the model hyperparameters
        # using the hyperparameters from the previous iteration
        if refit == 1:
            mll, model = initialize_model(train_x, train_obj, train_con)
        else:
            mll, model = initialize_model(
                train_x,
                train_obj,
                train_con,
                model.state_dict(),
            )

        t1 = time.time()
        print(
            "Iter %d: x=%s, perf=%s, con=%s, hv_diff=%f, time=%.2f, global_time=%.2f"
            % (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj,
               new_con, hv_diff, t1 - t0, time_list[-1]),
            flush=True)

    # compute pareto front
    is_feas = (train_con <= 0).all(dim=-1)
    feas_train_obj = train_obj[is_feas]
    pareto_mask = is_non_dominated(feas_train_obj)
    pareto_y = feas_train_obj[pareto_mask]
    pf = -1 * pareto_y.cpu().numpy().astype(np.float64)
    # Save result
    X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype(
        np.float64)  # caution
    train_obj[~is_feas] = -INFEASIBLE_OBJ_VALUE  # set infeasible
    Y = -1 * train_obj.cpu().numpy().astype(np.float64)

    # plot for debugging
    if plot_mode == 1:
        plot_pf(problem, problem_str, mth, pf, Y_init)

    return hv_diffs, pf, X, Y, time_list
Example #30
0
                # ========================================================================
                with torch.no_grad():
                    pred_r_train_mean = gp_recon_model.posterior(x_train).mean
                    pred_r_train_std = gp_recon_model.posterior(x_train).variance.sqrt()
                    pred_r_test_mean = gp_recon_model.posterior(x_val).mean
                    pred_r_test_std = gp_recon_model.posterior(x_val).variance.sqrt()

                    pred_r_train_mean = pred_r_train_mean.clamp_min(
                        1e-4).log() if transfo == "exp" else pred_r_train_mean
                    pred_r_train_std = pred_r_train_std.clamp_min(
                        1e-4).log() if transfo == "exp" else pred_r_train_std
                    pred_r_test_mean = pred_r_test_mean.clamp_min(
                        1e-4).log() if transfo == "exp" else pred_r_test_mean
                    pred_r_test_std = pred_r_test_std.clamp_min(1e-4).log() if transfo == "exp" else pred_r_test_std

                    pred_r_train_mean = unnormalize(pred_r_train_mean,
                                                    rbounds) if transfo == "normalize" else pred_r_train_mean
                    pred_r_train_std = unnormalize(pred_r_train_std,
                                                   rbounds) if transfo == "normalize" else pred_r_train_std
                    pred_r_test_mean = unnormalize(pred_r_test_mean,
                                                   rbounds) if transfo == "normalize" else pred_r_test_mean
                    pred_r_test_std = unnormalize(pred_r_test_std,
                                                  rbounds) if transfo == "normalize" else pred_r_test_std

                    gp_recon_model_fit_train = (pred_r_train_mean - r_train).pow(2).div(len(r_train))
                    gp_recon_model_fit_test = (pred_r_test_mean - r_val).pow(2).div(len(r_train))
                    torch.save(gp_recon_model_fit_train, os.path.join(save_folder, "gp_recon_model_fit_train.pt"))
                    torch.save(gp_recon_model_fit_test, os.path.join(save_folder, "gp_recon_model_fit_test.pt"))
                    print(f'\tMSE on r train set     : {gp_recon_model_fit_train.sum().item():.3f}')
                    print(f'\tMSE on r validation set: {gp_recon_model_fit_test.sum().item():.3f}')

                    df_ = pd.DataFrame(
def evaluate(mth, run_i, seed):
    print(mth, run_i, seed, '===== start =====', flush=True)

    def objective_function(x: torch.Tensor):
        # Caution: unnormalize and maximize
        x = unnormalize(x, bounds=problem_bounds)
        x = x.cpu().numpy().astype(np.float64)  # caution
        res = problem.evaluate(x)
        objs = [-y for y in res['objs']]
        return objs

    hv_diffs = []
    time_list = []
    global_start_time = time.time()

    # random seed
    np.random.seed(seed)
    torch.manual_seed(seed)

    # call helper functions to generate initial training data and initialize model
    train_x, train_obj = generate_initial_data(initial_runs,
                                               objective_function, time_list,
                                               global_start_time)
    mll, model = initialize_model(train_x, train_obj)

    # for plot
    X_init = train_x.cpu().numpy().astype(np.float64)
    Y_init = -1 * train_obj.cpu().numpy().astype(np.float64)
    # calculate hypervolume of init data
    for i in range(initial_runs):
        train_obj_i = train_obj[:i + 1]
        # compute pareto front
        pareto_mask = is_non_dominated(train_obj_i)
        pareto_y = train_obj_i[pareto_mask]
        # compute hypervolume
        volume = hv.compute(pareto_y)
        hv_diff = problem.max_hv - volume
        hv_diffs.append(hv_diff)

    # run (max_runs - initial_runs) rounds of BayesOpt after the initial random batch
    for iteration in range(initial_runs + 1, max_runs + 1):
        t0 = time.time()
        try:
            # fit the models
            fit_gpytorch_model(mll)

            # define the qEHVI acquisition modules using a QMC sampler
            sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)
            # partition non-dominated space into disjoint rectangles
            partitioning = NondominatedPartitioning(
                num_outcomes=problem.num_objs, Y=train_obj)
            qEHVI = qExpectedHypervolumeImprovement(
                model=model,
                ref_point=problem.ref_point.tolist(
                ),  # use known reference point
                partitioning=partitioning,
                sampler=sampler,
            )
            # optimize and get new observation
            new_x, new_obj = optimize_acqf_and_get_observation(
                qEHVI, objective_function, time_list, global_start_time)
        except Exception as e:
            step = 2
            print(
                '===== Exception in optimization loop, restart with 1/%d of training data: %s'
                % (step, str(e)))
            if refit == 1:
                mll, model = initialize_model(train_x[::step],
                                              train_obj[::step])
            else:
                mll, model = initialize_model(
                    train_x[::step],
                    train_obj[::step],
                    model.state_dict(),
                )
            # fit the models
            fit_gpytorch_model(mll)

            # define the qEHVI acquisition modules using a QMC sampler
            sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)
            # partition non-dominated space into disjoint rectangles
            partitioning = NondominatedPartitioning(
                num_outcomes=problem.num_objs, Y=train_obj[::step])
            qEHVI = qExpectedHypervolumeImprovement(
                model=model,
                ref_point=problem.ref_point.tolist(
                ),  # use known reference point
                partitioning=partitioning,
                sampler=sampler,
            )
            # optimize and get new observation
            new_x, new_obj = optimize_acqf_and_get_observation(
                qEHVI, objective_function, time_list, global_start_time)
            assert len(time_list) == iteration

        # update training points
        train_x = torch.cat([train_x, new_x])
        train_obj = torch.cat([train_obj, new_obj])

        # update progress
        # compute pareto front
        pareto_mask = is_non_dominated(train_obj)
        pareto_y = train_obj[pareto_mask]
        # compute hypervolume
        volume = hv.compute(pareto_y)
        hv_diff = problem.max_hv - volume
        hv_diffs.append(hv_diff)

        # reinitialize the models so they are ready for fitting on next iteration
        # use the current state dict to speed up fitting
        # Note: they find improved performance from not warm starting the model hyperparameters
        # using the hyperparameters from the previous iteration
        if refit == 1:
            mll, model = initialize_model(train_x, train_obj)
        else:
            mll, model = initialize_model(
                train_x,
                train_obj,
                model.state_dict(),
            )

        t1 = time.time()
        print(
            "Iter %d: x=%s, perf=%s, hv_diff=%f, time=%.2f, global_time=%.2f" %
            (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj,
             hv_diff, t1 - t0, time_list[-1]),
            flush=True)

    # Save result
    X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype(
        np.float64)  # caution
    Y = -1 * train_obj.cpu().numpy().astype(np.float64)
    # compute pareto front
    pareto_mask = is_non_dominated(train_obj)
    pareto_y = train_obj[pareto_mask]
    pf = -1 * pareto_y.cpu().numpy().astype(np.float64)

    # plot for debugging
    if plot_mode == 1:
        plot_pf(problem, problem_str, mth, pf, Y_init)

    return hv_diffs, pf, X, Y, time_list