Esempio n. 1
0
def mins(config):
    """Optimize a design problem score using the algorithm MINS
    otherwise known as Model Inversion Networks

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    # create the training task and logger
    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])

    if config['normalize_ys']:
        task.map_normalize_y()
    if config['normalize_xs']:
        task.map_normalize_x()

    x = task.x
    y = task.y

    def map_to_probs(x, *rest):
        x = task.to_logits(x)
        x = tf.pad(x, [[0, 0]] * (len(x.shape) - 1) + [[1, 0]])
        return (tf.math.softmax(x / 1e-5), *rest)

    input_shape = x.shape[1:]
    if task.is_discrete:
        input_shape = list(x.shape[1:]) + [task.num_classes]

    base_temp = config.get('base_temp', None)

    if config['offline']:

        # make several keras neural networks with two hidden layers
        forward_models = [ForwardModel(
            input_shape,
            hidden_size=config['hidden_size'],
            num_layers=config['num_layers'],
            initial_max_std=config['initial_max_std'],
            initial_min_std=config['initial_min_std'])
            for _ in range(config['bootstraps'])]

        # create a trainer for a forward model with a conservative objective
        oracle = Ensemble(forward_models,
                          forward_model_optim=tf.keras.optimizers.Adam,
                          forward_model_lr=config['oracle_lr'],
                          is_discrete=task.is_discrete,
                          noise_std=config.get('noise_std', 0.0),
                          keep=config.get('keep', 1.0),
                          temp=config.get('temp', 0.001))

        # build a bootstrapped data set
        train_data, val_data = build_pipeline(
            x=x, y=y, bootstraps=config['bootstraps'],
            batch_size=config['oracle_batch_size'],
            val_size=config['val_size'], buffer=1)

        train_data = train_data.map(
            map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        val_data = val_data.map(
            map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE)

        # train the model for an additional number of epochs
        oracle.launch(train_data,
                      val_data,
                      logger,
                      config['oracle_epochs'])

    # create replay buffers for both GANS
    explore_pool = ReplayBuffer(config['pool_size'], input_shape)
    exploit_pool = ReplayBuffer(config['pool_size'], input_shape)

    if task.is_discrete:

        # build a Gumbel-Softmax GAN to sample discrete outputs
        explore_gen = DiscreteGenerator(
            input_shape, config['latent_size'],
            hidden=config['hidden_size'])
        exploit_gen = DiscreteGenerator(
            input_shape, config['latent_size'],
            hidden=config['hidden_size'])

    else:

        # build an LS-GAN to sample continuous outputs
        explore_gen = ContinuousGenerator(
            input_shape, config['latent_size'],
            hidden=config['hidden_size'])
        exploit_gen = ContinuousGenerator(
            input_shape, config['latent_size'],
            hidden=config['hidden_size'])

    # build the neural network GAN components
    explore_discriminator = Discriminator(
        input_shape,
        hidden=config['hidden_size'],
        method=config['method'])
    explore_gan = WeightedGAN(
        explore_gen, explore_discriminator, explore_pool,
        critic_frequency=config['critic_frequency'],
        flip_frac=config['flip_frac'],
        pool_frac=config['pool_frac'],
        pool_save=config['pool_save'],
        fake_pair_frac=config['fake_pair_frac'],
        penalty_weight=config['penalty_weight'],
        generator_lr=config['generator_lr'],
        generator_beta_1=config['generator_beta_1'],
        generator_beta_2=config['generator_beta_2'],
        discriminator_lr=config['discriminator_lr'],
        discriminator_beta_1=config['discriminator_beta_1'],
        discriminator_beta_2=config['discriminator_beta_2'],
        is_discrete=task.is_discrete,
        noise_std=config.get('noise_std', 0.0),
        keep=config.get('keep', 1.0),
        start_temp=config.get('start_temp', 5.0),
        final_temp=config.get('final_temp', 1.0))

    # build the neural network GAN components
    exploit_discriminator = Discriminator(
        input_shape,
        hidden=config['hidden_size'],
        method=config['method'])
    exploit_gan = WeightedGAN(
        exploit_gen, exploit_discriminator, exploit_pool,
        critic_frequency=config['critic_frequency'],
        flip_frac=config['flip_frac'],
        pool_frac=config['pool_frac'],
        pool_save=config['pool_save'],
        fake_pair_frac=config['fake_pair_frac'],
        penalty_weight=config['penalty_weight'],
        generator_lr=config['generator_lr'],
        generator_beta_1=config['generator_beta_1'],
        generator_beta_2=config['generator_beta_2'],
        discriminator_lr=config['discriminator_lr'],
        discriminator_beta_1=config['discriminator_beta_1'],
        discriminator_beta_2=config['discriminator_beta_2'],
        is_discrete=task.is_discrete,
        noise_std=config.get('noise_std', 0.0),
        keep=config.get('keep', 1.0),
        start_temp=config.get('start_temp', 5.0),
        final_temp=config.get('final_temp', 1.0))

    # build a weighted data set using newly collected samples
    train_data, val_data = build_pipeline(
        x=x, y=y, w=get_weights(y, base_temp=base_temp),
        batch_size=config['gan_batch_size'],
        val_size=config['val_size'], buffer=1)

    train_data = train_data.map(
        map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    val_data = val_data.map(
        map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    # train the gan for several epochs
    explore_gan.launch(
        train_data, val_data, logger, config['initial_epochs'],
        header="exploration/")

    # sample designs from the GAN and evaluate them
    condition_ys = tf.tile(tf.reduce_max(
        y, keepdims=True), [config['solver_samples'], 1])

    # record score percentiles
    logger.record("exploration/condition_ys",
                  task.denormalize_y(condition_ys)
                  if task.is_normalized_y else condition_ys,
                  0,
                  percentile=True)

    # train the gan for several epochs
    exploit_gan.launch(
        train_data, val_data, logger, config['initial_epochs'],
        header="exploitation/")

    # record score percentiles
    logger.record("exploitation/condition_ys",
                  task.denormalize_y(condition_ys)
                  if task.is_normalized_y else condition_ys,
                  0,
                  percentile=True)

    # prevent the temperature from being annealed further
    if task.is_discrete:
        explore_gan.start_temp = explore_gan.final_temp
        exploit_gan.start_temp = exploit_gan.final_temp

    # train the gan using an importance sampled data set
    for iteration in range(config['iterations']):

        # generate synthetic x paired with high performing scores
        tilde_x, tilde_y = get_synthetic_data(
            x, y,
            exploration_samples=config['exploration_samples'],
            exploration_rate=config['exploration_rate'],
            base_temp=base_temp)

        # build a weighted data set using newly collected samples
        train_data, val_data = build_pipeline(
            x=tilde_x.numpy(), y=tilde_y.numpy(),
            w=get_weights(tilde_y.numpy(), base_temp=base_temp),
            batch_size=config['gan_batch_size'],
            val_size=config['val_size'], buffer=1)

        train_data = train_data.map(
            map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        val_data = val_data.map(
            map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE)

        # train the gan for several epochs
        explore_gan.launch(
            train_data, val_data, logger, config['epochs_per_iteration'],
            start_epoch=config['epochs_per_iteration'] * iteration +
                        config['initial_epochs'],
            header="exploration/")

        # sample designs from the GAN and evaluate them
        condition_ys = tf.tile(tf.reduce_max(
            tilde_y, keepdims=True), [config['thompson_samples'], 1])

        # generate samples for exploration
        solver_xs = explore_gen.sample(condition_ys, temp=0.001)
        if task.is_discrete:
            solver_xs = tf.argmax(
                solver_xs, axis=-1, output_type=tf.int32)
        actual_ys = oracle.get_distribution(solver_xs).mean() \
            if config['offline'] else task.predict(solver_xs)

        # record score percentiles
        logger.record("exploration/condition_ys",
                      task.denormalize_y(condition_ys)
                      if task.is_normalized_y else condition_ys,
                      0,
                      percentile=True)
        logger.record("exploration/actual_ys",
                      task.denormalize_y(actual_ys)
                      if task.is_normalized_y else actual_ys,
                      0,
                      percentile=True)

        # concatenate newly paired samples with the existing data set
        x = tf.concat([x, solver_xs], 0)
        y = tf.concat([y, actual_ys], 0)

        # build a weighted data set using newly collected samples
        train_data, val_data = build_pipeline(
            x=x.numpy(), y=y.numpy(),
            w=get_weights(y.numpy(), base_temp=base_temp),
            batch_size=config['gan_batch_size'],
            val_size=config['val_size'], buffer=1)

        train_data = train_data.map(
            map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        val_data = val_data.map(
            map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE)

        # train the gan for several epochs
        exploit_gan.launch(
            train_data, val_data, logger, config['epochs_per_iteration'],
            start_epoch=config['epochs_per_iteration'] * iteration +
                        config['initial_epochs'],
            header="exploitation/")

        # sample designs from the GAN and evaluate them
        condition_ys = tf.tile(tf.reduce_max(
            y, keepdims=True), [config['solver_samples'], 1])

        # record score percentiles
        logger.record("exploitation/condition_ys",
                      task.denormalize_y(condition_ys)
                      if task.is_normalized_y else condition_ys,
                      0,
                      percentile=True)

    # generate samples for exploration
    solver_xs = exploit_gen.sample(condition_ys, temp=0.001)
    solution = tf.argmax(solver_xs, axis=-1, output_type=tf.int32) \
               if task.is_discrete else solver_xs

    # save the current solution to the disk
    np.save(os.path.join(config["logging_dir"],
                         f"solution.npy"), solution.numpy())

    # evaluate the found solution and record a video
    score = task.predict(solution)
    if task.is_normalized_y:
        score = task.denormalize_y(score)
    logger.record("score", score, config['iterations'], percentile=True)
Esempio n. 2
0
def coms_original(config):
    """Train a forward model and perform offline model-based
    optimization using a conservative objective model

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    # create the training task and logger
    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])

    # save the initial dataset statistics for safe keeping
    x = task.x
    y = task.y

    # if the task is discrete then use a continuous relaxation
    if config['is_discrete']:
        p = np.full_like(x, 1 / float(x.shape[-1]))
        x = config.get('discrete_clip',
                       5.0) * x + (1.0 - config.get('discrete_clip', 5.0)) * p
        x = np.log(x)
        x = x[:, :, 1:] - x[:, :, :1]

    if config['normalize_ys']:
        # compute normalization statistics for the score
        mu_y = np.mean(y, axis=0, keepdims=True).astype(np.float32)
        y = y - mu_y
        st_y = np.std(y, axis=0,
                      keepdims=True).astype(np.float32).clip(1e-6, 1e9)
        y = y / st_y
    else:
        # compute normalization statistics for the score
        mu_y = np.zeros_like(y[:1])
        st_y = np.ones_like(y[:1])

    if config['normalize_xs']:
        # compute normalization statistics for the data vectors
        mu_x = np.mean(x, axis=0, keepdims=True).astype(np.float32)
        x = x - mu_x
        st_x = np.std(x, axis=0,
                      keepdims=True).astype(np.float32).clip(1e-6, 1e9)
        x = x / st_x
    else:
        # compute normalization statistics for the score
        mu_x = np.zeros_like(x[:1])
        st_x = np.ones_like(x[:1])

    input_shape = list(task.input_shape)
    if config['is_discrete']:
        input_shape[-1] = input_shape[-1] - 1

    solver_lr = config['solver_lr'] * np.sqrt(np.prod(input_shape))
    solver_interval = int(config['solver_interval'] *
                          (x.shape[0] - config['val_size']) /
                          config['batch_size'])
    solver_warmup = int(config['solver_warmup'] *
                        (x.shape[0] - config['val_size']) /
                        config['batch_size'])

    # make a neural network to predict scores
    forward_model = ForwardModel(input_shape,
                                 activations=config['activations'],
                                 hidden=config['hidden_size'],
                                 final_tanh=config['final_tanh'])

    # create a trainer for a forward model with a conservative objective
    trainer = ConservativeMaximumLikelihood(
        forward_model,
        forward_model_opt=tf.keras.optimizers.Adam,
        forward_model_lr=config['forward_model_lr'],
        initial_alpha=config['initial_alpha'],
        alpha_opt=tf.keras.optimizers.Adam,
        alpha_lr=config['alpha_lr'],
        target_conservatism=config['target_conservatism'],
        negatives_fraction=config['negatives_fraction'],
        lookahead_steps=config['lookahead_steps'],
        lookahead_backprop=config['lookahead_backprop'],
        solver_beta=config['solver_beta'],
        solver_lr=solver_lr,
        solver_interval=solver_interval,
        solver_warmup=solver_warmup,
        solver_steps=config['solver_steps'],
        constraint_type=config['constraint_type'],
        entropy_coefficient=config['entropy_coefficient'],
        continuous_noise_std=config.get('continuous_noise_std', 0.0))

    # make a neural network to predict scores
    validation_models = [
        ForwardModel(input_shape,
                     activations=config['activations'],
                     hidden=config['hidden_size'],
                     final_tanh=config['final_tanh'])
    ]

    # create a trainer for a forward model with a conservative objective
    validation_trainers = [
        TransformedMaximumLikelihood(
            model,
            forward_model_optim=tf.keras.optimizers.Adam,
            forward_model_lr=config['forward_model_lr'],
            continuous_noise_std=config.get('continuous_noise_std', 0.0),
            logger_prefix=f"validation_model_{i}")
        for i, model in enumerate(validation_models)
    ]

    # create a data set
    train_data, validate_data = task.build(x=x,
                                           y=y,
                                           batch_size=config['batch_size'],
                                           val_size=config['val_size'])

    # train the validation models
    for t in validation_trainers:
        t.launch(train_data, validate_data, logger, 100)

    # select the top k initial designs from the dataset
    indices = tf.math.top_k(y[:, 0], k=config['batch_size'])[1]
    initial_x = tf.gather(x, indices, axis=0)

    # create the starting point for the optimizer
    evaluations = 0
    score = None
    trainer.solution = tf.Variable(initial_x)
    trainer.done = tf.Variable(
        tf.fill([config['batch_size']] + [1 for _ in x.shape[1:]], False))

    def evaluate_solution(xt):
        nonlocal evaluations, score

        # evaluate the design using the oracle and the forward model
        with tf.GradientTape() as tape:
            tape.watch(xt)
            model = forward_model(xt)

        # evaluate the predictions and gradient norm
        evaluations += 1
        grads = tape.gradient(model, xt)
        model = model * st_y + mu_y

        for i, val in enumerate(validation_models):
            prediction = val(xt)
            logger.record(f"validation_model_{i}/prediction",
                          prediction * st_y + mu_y, evaluations)

        # record the prediction and score to the logger
        logger.record("distance/travelled", tf.linalg.norm(xt - initial_x),
                      evaluations)
        logger.record(f"train/prediction", model, evaluations)
        logger.record(
            f"train/grad_norm",
            tf.linalg.norm(tf.reshape(grads, [grads.shape[0], -1]), axis=-1),
            evaluations)

        if evaluations in config['evaluate_steps'] \
                or len(config['evaluate_steps']) == 0 or score is None:
            solution = xt * st_x + mu_x
            if config['is_discrete']:
                solution = tf.math.softmax(
                    tf.pad(solution, [[0, 0], [0, 0], [1, 0]]) / 0.001)
            score = task.score(solution)
            logger.record("score", score, evaluations, percentile=True)
            logger.record(f"rank_corr/model_to_real",
                          spearman(model[:, 0], score[:, 0]), evaluations)

        return score, model

    # keep track of when to record performance
    interval = trainer.solver_interval
    warmup = trainer.solver_warmup

    scores = []
    predictions = []

    # train model for many epochs with conservatism
    for e in range(config['epochs']):

        statistics = defaultdict(list)
        for x, y in train_data:
            for name, tensor in trainer.train_step(x, y).items():
                statistics[name].append(tensor)

            # evaluate the current solution
            if tf.logical_and(tf.equal(tf.math.mod(trainer.step, interval), 0),
                              tf.math.greater_equal(trainer.step, warmup)):
                score, model = evaluate_solution(trainer.solution)
                scores.append(score)
                predictions.append(model.numpy())

        for name in statistics.keys():
            logger.record(name, tf.concat(statistics[name], axis=0), e)

        statistics = defaultdict(list)
        for x, y in validate_data:
            for name, tensor in trainer.validate_step(x, y).items():
                statistics[name].append(tensor)

        for name in statistics.keys():
            logger.record(name, tf.concat(statistics[name], axis=0), e)

        if tf.reduce_all(trainer.done):
            break

    # save the model predictions and scores to be aggregated later
    np.save(os.path.join(config['logging_dir'], "scores.npy"),
            np.concatenate(scores, axis=1))
    np.save(os.path.join(config['logging_dir'], "predictions.npy"),
            np.stack(predictions, axis=1))
Esempio n. 3
0
def bo_qei(config):
    """Optimizes over designs x in an offline optimization problem
    using the CMA Evolution Strategy

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    # create the training task and logger
    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])

    if config['normalize_ys']:
        task.map_normalize_y()
    if task.is_discrete and not config["use_vae"]:
        task.map_to_logits()
    if config['normalize_xs']:
        task.map_normalize_x()

    x = task.x
    y = task.y

    if task.is_discrete and config["use_vae"]:

        vae_model = SequentialVAE(task,
                                  hidden_size=config['vae_hidden_size'],
                                  latent_size=config['vae_latent_size'],
                                  activation=config['vae_activation'],
                                  kernel_size=config['vae_kernel_size'],
                                  num_blocks=config['vae_num_blocks'])

        vae_trainer = VAETrainer(vae_model,
                                 vae_optim=tf.keras.optimizers.Adam,
                                 vae_lr=config['vae_lr'],
                                 beta=config['vae_beta'])

        # create the training task and logger
        train_data, val_data = build_pipeline(
            x=x,
            y=y,
            batch_size=config['vae_batch_size'],
            val_size=config['val_size'])

        # estimate the number of training steps per epoch
        vae_trainer.launch(train_data, val_data, logger, config['vae_epochs'])

        # map the x values to latent space
        x = vae_model.encoder_cnn.predict(x)[0]

        mean = np.mean(x, axis=0, keepdims=True)
        standard_dev = np.std(x - mean, axis=0, keepdims=True)
        x = (x - mean) / standard_dev

    input_shape = x.shape[1:]
    input_size = np.prod(input_shape)

    # create the training task and logger
    train_data, val_data = build_pipeline(
        x=x,
        y=y,
        bootstraps=config['bootstraps'],
        batch_size=config['ensemble_batch_size'],
        val_size=config['val_size'])

    # make several keras neural networks with two hidden layers
    forward_models = [
        ForwardModel(input_shape,
                     hidden_size=config['hidden_size'],
                     num_layers=config['num_layers'],
                     initial_max_std=config['initial_max_std'],
                     initial_min_std=config['initial_min_std'])
        for b in range(config['bootstraps'])
    ]

    # create a trainer for a forward model with a conservative objective
    ensemble = Ensemble(forward_models,
                        forward_model_optim=tf.keras.optimizers.Adam,
                        forward_model_lr=config['ensemble_lr'])

    # train the model for an additional number of epochs
    ensemble.launch(train_data, val_data, logger, config['ensemble_epochs'])

    # select the top 1 initial designs from the dataset
    indices = tf.math.top_k(y[:, 0], k=config['bo_gp_samples'])[1]
    initial_x = tf.gather(x, indices, axis=0)
    initial_y = tf.gather(y, indices, axis=0)

    from botorch.models import FixedNoiseGP, ModelListGP
    from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
    from botorch.acquisition.objective import GenericMCObjective
    from botorch.optim import optimize_acqf
    from botorch import fit_gpytorch_model
    from botorch.acquisition.monte_carlo import qExpectedImprovement
    from botorch.sampling.samplers import SobolQMCNormalSampler
    from botorch.exceptions import BadInitialCandidatesWarning

    import torch
    import time
    import warnings

    warnings.filterwarnings('ignore', category=BadInitialCandidatesWarning)
    warnings.filterwarnings('ignore', category=RuntimeWarning)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dtype = torch.float32

    def objective(input_x):
        original_x = input_x
        # convert the tensor into numpy before using a TF model
        if torch.cuda.is_available():
            input_x = input_x.detach().cpu().numpy()
        else:
            input_x = input_x.detach().numpy()
        batch_shape = input_x.shape[:-1]
        # pass the input into a TF model
        input_x = tf.reshape(input_x, [-1, *input_shape])

        # optimize teh ground truth or the learned model
        if config["optimize_ground_truth"]:
            if task.is_discrete and config["use_vae"]:
                input_x = tf.argmax(
                    vae_model.decoder_cnn.predict(input_x * standard_dev +
                                                  mean),
                    axis=2,
                    output_type=tf.int32)
            value = task.predict(input_x)
        else:
            value = ensemble.get_distribution(input_x).mean()

        ys = value.numpy()

        ys.reshape(list(batch_shape) + [1])
        # convert the scores back to pytorch tensors
        return torch.tensor(ys).type_as(original_x).to(device, dtype=dtype)

    NOISE_SE = config['bo_noise_se']
    train_yvar = torch.tensor(NOISE_SE**2, device=device, dtype=dtype)

    def initialize_model(train_x, train_obj, state_dict=None):
        # define models for objective
        model_obj = FixedNoiseGP(train_x, train_obj,
                                 train_yvar.expand_as(train_obj)).to(train_x)
        # combine into a multi-output GP model
        model = ModelListGP(model_obj)
        mll = SumMarginalLogLikelihood(model.likelihood, model)
        # load state dict if it is passed
        if state_dict is not None:
            model.load_state_dict(state_dict)
        return mll, model

    def obj_callable(Z):
        return Z[..., 0]

    # define a feasibility-weighted objective for optimization
    obj = GenericMCObjective(obj_callable)

    BATCH_SIZE = config['bo_batch_size']
    bounds = torch.tensor([
        np.min(x, axis=0).reshape([input_size]).tolist(),
        np.max(x, axis=0).reshape([input_size]).tolist()
    ],
                          device=device,
                          dtype=dtype)

    def optimize_acqf_and_get_observation(acq_func):
        """Optimizes the acquisition function, and returns
        a new candidate and a noisy observation."""
        # optimize
        try:
            candidates, _ = optimize_acqf(
                acq_function=acq_func,
                bounds=bounds,
                q=BATCH_SIZE,
                num_restarts=config['bo_num_restarts'],
                raw_samples=config[
                    'bo_raw_samples'],  # used for intialization heuristic
                options={
                    "batch_limit": config['bo_batch_limit'],
                    "maxiter": config['bo_maxiter']
                })
        except RuntimeError:
            return
        # observe new values
        new_x = candidates.detach()
        exact_obj = objective(candidates)
        new_obj = exact_obj + NOISE_SE * torch.randn_like(exact_obj)
        return new_x, new_obj

    N_BATCH = config['bo_iterations']
    MC_SAMPLES = config['bo_mc_samples']

    best_observed_ei = []

    # call helper functions to generate initial training data and initialize model
    train_x_ei = initial_x.numpy().reshape([initial_x.shape[0], input_size])
    train_x_ei = torch.tensor(train_x_ei).to(device, dtype=dtype)

    train_obj_ei = initial_y.numpy().reshape([initial_y.shape[0], 1])
    train_obj_ei = torch.tensor(train_obj_ei).to(device, dtype=dtype)

    best_observed_value_ei = train_obj_ei.max().item()
    mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei)
    best_observed_ei.append(best_observed_value_ei)

    # run N_BATCH rounds of BayesOpt after the initial random batch
    for iteration in range(1, N_BATCH + 1):

        t0 = time.time()

        # fit the models
        fit_gpytorch_model(mll_ei)

        # define the qEI acquisition module using a QMC sampler
        qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)

        # for best_f, we use the best observed noisy values as an approximation
        qEI = qExpectedImprovement(model=model_ei,
                                   best_f=train_obj_ei.max(),
                                   sampler=qmc_sampler,
                                   objective=obj)

        # optimize and get new observation
        result = optimize_acqf_and_get_observation(qEI)
        if result is None:
            print("RuntimeError was encountered, most likely a "
                  "'symeig_cpu: the algorithm failed to converge'")
            break
        new_x_ei, new_obj_ei = result

        # update training points
        train_x_ei = torch.cat([train_x_ei, new_x_ei])
        train_obj_ei = torch.cat([train_obj_ei, new_obj_ei])

        # update progress
        best_value_ei = obj(train_x_ei).max().item()
        best_observed_ei.append(best_value_ei)

        # reinitialize the models so they are ready for fitting on next iteration
        # use the current state dict to speed up fitting
        mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei,
                                            model_ei.state_dict())

        t1 = time.time()
        print(
            f"Batch {iteration:>2}: best_value = "
            f"({best_value_ei:>4.2f}), "
            f"time = {t1 - t0:>4.2f}.",
            end="")

    if torch.cuda.is_available():
        x_sol = train_x_ei.detach().cpu().numpy()
        y_sol = train_obj_ei.detach().cpu().numpy()

    else:
        x_sol = train_x_ei.detach().numpy()
        y_sol = train_obj_ei.detach().numpy()

    # select the top 1 initial designs from the dataset
    indices = tf.math.top_k(y_sol[:, 0], k=config['solver_samples'])[1]
    solution = tf.gather(x_sol, indices, axis=0)
    solution = tf.reshape(solution, [-1, *input_shape])

    if task.is_discrete and config["use_vae"]:
        solution = solution * standard_dev + mean
        logits = vae_model.decoder_cnn.predict(solution)
        solution = tf.argmax(logits, axis=2, output_type=tf.int32)

    # save the current solution to the disk
    np.save(os.path.join(config["logging_dir"], f"solution.npy"),
            solution.numpy())

    # evaluate the found solution and record a video
    score = task.predict(solution)
    if task.is_normalized_y:
        score = task.denormalize_y(score)
    logger.record("score", score, N_BATCH, percentile=True)
Esempio n. 4
0
def reinforce(config):
    """Optimizes over designs x in an offline optimization problem
    using the REINFORCE policy gradient method

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])
    if task.is_discrete:
        task.map_to_integers()

    if config['normalize_ys']:
        task.map_normalize_y()
    if config['normalize_xs']:
        task.map_normalize_x()

    x = task.x
    y = task.y

    # create the training task and logger
    train_data, val_data = build_pipeline(
        x=x, y=y, bootstraps=config['bootstraps'],
        batch_size=config['ensemble_batch_size'],
        val_size=config['val_size'])

    # make several keras neural networks with two hidden layers
    forward_models = [ForwardModel(
        task,
        embedding_size=config['embedding_size'],
        hidden_size=config['hidden_size'],
        num_layers=config['num_layers'],
        initial_max_std=config['initial_max_std'],
        initial_min_std=config['initial_min_std'])
        for b in range(config['bootstraps'])]

    # create a trainer for a forward model with a conservative objective
    ensemble = Ensemble(
        forward_models,
        forward_model_optim=tf.keras.optimizers.Adam,
        forward_model_lr=config['ensemble_lr'])

    # train the model for an additional number of epochs
    ensemble.launch(train_data,
                    val_data,
                    logger,
                    config['ensemble_epochs'])

    rl_opt = tf.keras.optimizers.Adam(
        learning_rate=config['reinforce_lr'])

    # select the top 1 initial designs from the dataset
    indices = tf.math.top_k(y[:, 0], k=config['solver_samples'])[1]
    initial_x = tf.gather(x, indices, axis=0)

    if task.is_discrete:
        logits = tf.pad(task.to_logits(initial_x), [[0, 0], [0, 0], [1, 0]])
        probs = tf.math.softmax(logits / 1e-5)
        logits = tf.math.log(tf.reduce_mean(probs, axis=0))
        sampler = DiscreteMarginal(logits)

    else:
        mean = tf.reduce_mean(initial_x, axis=0)
        logstd = tf.math.log(tf.ones_like(mean) * config['exploration_std'])
        sampler = ContinuousMarginal(mean, logstd)

    for iteration in range(config['iterations']):

        with tf.GradientTape() as tape:
            td = sampler.get_distribution()
            tx = td.sample(sample_shape=config['reinforce_batch_size'])
            if config['optimize_ground_truth']:
                ty = task.predict(tx)
            else:  # use the surrogate model for optimization
                ty = ensemble.get_distribution(tx).mean()

            mean_y = tf.reduce_mean(ty)
            standard_dev_y = tf.math.reduce_std(ty - mean_y)
            log_probs = td.log_prob(tf.stop_gradient(tx))
            loss = tf.reduce_mean(-log_probs[:, tf.newaxis] *
                                  tf.stop_gradient(
                                      (ty - mean_y) / standard_dev_y))

        print(f"[Iteration {iteration}] "
              f"Average Prediction = {tf.reduce_mean(ty)}")

        logger.record("reinforce/prediction",
                      ty, iteration, percentile=True)
        logger.record("reinforce/loss",
                      loss, iteration, percentile=True)

        grads = tape.gradient(
            loss, sampler.trainable_variables)

        rl_opt.apply_gradients(zip(
            grads, sampler.trainable_variables))

    td = sampler.get_distribution()
    solution = td.sample(sample_shape=config['solver_samples'])

    # save the current solution to the disk
    np.save(os.path.join(config["logging_dir"],
                         f"solution.npy"), solution.numpy())

    # evaluate the found solution and record a video
    score = task.predict(solution)
    if config['normalize_ys']:
        score = task.denormalize_y(score)
    logger.record(
        "score", score, config['iterations'], percentile=True)
Esempio n. 5
0
def cma_es(config):
    """Optimizes over designs x in an offline optimization problem
    using the CMA Evolution Strategy

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    # create the training task and logger
    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])

    if config['normalize_ys']:
        task.map_normalize_y()
    if task.is_discrete and not config["use_vae"]:
        task.map_to_logits()
    if config['normalize_xs']:
        task.map_normalize_x()

    x = task.x
    y = task.y

    if task.is_discrete and config["use_vae"]:

        vae_model = SequentialVAE(task,
                                  hidden_size=config['vae_hidden_size'],
                                  latent_size=config['vae_latent_size'],
                                  activation=config['vae_activation'],
                                  kernel_size=config['vae_kernel_size'],
                                  num_blocks=config['vae_num_blocks'])

        vae_trainer = VAETrainer(vae_model,
                                 vae_optim=tf.keras.optimizers.Adam,
                                 vae_lr=config['vae_lr'],
                                 beta=config['vae_beta'])

        # create the training task and logger
        train_data, val_data = build_pipeline(
            x=x,
            y=y,
            batch_size=config['vae_batch_size'],
            val_size=config['val_size'])

        # estimate the number of training steps per epoch
        vae_trainer.launch(train_data, val_data, logger, config['vae_epochs'])

        # map the x values to latent space
        x = vae_model.encoder_cnn.predict(x)[0]

        mean = np.mean(x, axis=0, keepdims=True)
        standard_dev = np.std(x - mean, axis=0, keepdims=True)
        x = (x - mean) / standard_dev

    input_shape = x.shape[1:]
    input_size = np.prod(input_shape)

    # make several keras neural networks with two hidden layers
    forward_models = [
        ForwardModel(input_shape,
                     hidden_size=config['hidden_size'],
                     num_layers=config['num_layers'],
                     initial_max_std=config['initial_max_std'],
                     initial_min_std=config['initial_min_std'])
        for b in range(config['bootstraps'])
    ]

    # create a trainer for a forward model with a conservative objective
    ensemble = Ensemble(forward_models,
                        forward_model_optim=tf.keras.optimizers.Adam,
                        forward_model_lr=config['ensemble_lr'])

    # create the training task and logger
    train_data, val_data = build_pipeline(
        x=x,
        y=y,
        bootstraps=config['bootstraps'],
        batch_size=config['ensemble_batch_size'],
        val_size=config['val_size'])

    # train the model for an additional number of epochs
    ensemble.launch(train_data, val_data, logger, config['ensemble_epochs'])

    # select the top 1 initial designs from the dataset
    indices = tf.math.top_k(y[:, 0], k=config['solver_samples'])[1]
    initial_x = tf.gather(x, indices, axis=0)
    x = initial_x

    # create a fitness function for optimizing the expected task score
    def fitness(input_x):
        input_x = tf.reshape(input_x, input_shape)[tf.newaxis]
        if config["optimize_ground_truth"]:
            if task.is_discrete and config["use_vae"]:
                input_x = tf.argmax(
                    vae_model.decoder_cnn.predict(input_x * standard_dev +
                                                  mean),
                    axis=2,
                    output_type=tf.int32)
            value = task.predict(input_x)
        else:
            value = ensemble.get_distribution(input_x).mean()
        return (-value[0].numpy()).tolist()[0]

    import cma
    result = []
    for i in range(config['solver_samples']):
        xi = x[i].numpy().flatten().tolist()
        es = cma.CMAEvolutionStrategy(xi, config['cma_sigma'])
        step = 0
        while not es.stop() and step < config['cma_max_iterations']:
            solutions = es.ask()
            es.tell(solutions, [fitness(x) for x in solutions])
            step += 1
        result.append(tf.reshape(es.result.xbest, input_shape))
        print(f"CMA: {i + 1} / {config['solver_samples']}")

    # convert the solution found by CMA-ES to a tensor
    x = tf.stack(result, axis=0)
    solution = x

    if task.is_discrete and config["use_vae"]:
        solution = solution * standard_dev + mean
        logits = vae_model.decoder_cnn.predict(solution)
        solution = tf.argmax(logits, axis=2, output_type=tf.int32)

    # save the current solution to the disk
    np.save(os.path.join(config["logging_dir"], f"solution.npy"),
            solution.numpy())

    # evaluate the found solution
    score = task.predict(solution)
    if task.is_normalized_y:
        score = task.denormalize_y(score)
    logger.record("score", score, 0, percentile=True)
Esempio n. 6
0
def gradient_ascent(config):
    """Train a Score Function to solve a Model-Based Optimization
    using gradient ascent on the input design

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    # create the training task and logger
    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])

    if config['normalize_ys']:
        task.map_normalize_y()
    if task.is_discrete and not config["use_vae"]:
        task.map_to_logits()
    if config['normalize_xs']:
        task.map_normalize_x()

    x = task.x
    y = task.y

    if task.is_discrete and config["use_vae"]:

        vae_model = SequentialVAE(task,
                                  hidden_size=config['vae_hidden_size'],
                                  latent_size=config['vae_latent_size'],
                                  activation=config['vae_activation'],
                                  kernel_size=config['vae_kernel_size'],
                                  num_blocks=config['vae_num_blocks'])

        vae_trainer = VAETrainer(vae_model,
                                 vae_optim=tf.keras.optimizers.Adam,
                                 vae_lr=config['vae_lr'],
                                 beta=config['vae_beta'])

        # create the training task and logger
        train_data, val_data = build_pipeline(
            x=x,
            y=y,
            batch_size=config['vae_batch_size'],
            val_size=config['val_size'])

        # estimate the number of training steps per epoch
        vae_trainer.launch(train_data, val_data, logger, config['vae_epochs'])

        # map the x values to latent space
        x = vae_model.encoder_cnn.predict(x)[0]

        mean = np.mean(x, axis=0, keepdims=True)
        standard_dev = np.std(x - mean, axis=0, keepdims=True)
        x = (x - mean) / standard_dev

    input_shape = x.shape[1:]
    input_size = np.prod(input_shape)

    # make several keras neural networks with different architectures
    forward_models = [
        ForwardModel(input_shape,
                     activations=activations,
                     hidden_size=config['hidden_size'],
                     initial_max_std=config['initial_max_std'],
                     initial_min_std=config['initial_min_std'])
        for activations in config['activations']
    ]

    # scale the learning rate based on the number of channels in x
    config['solver_lr'] *= np.sqrt(np.prod(x.shape[1:]))

    trs = []
    for i, fm in enumerate(forward_models):

        # create a bootstrapped data set
        train_data, validate_data = build_pipeline(
            x=x,
            y=y,
            batch_size=config['batch_size'],
            val_size=config['val_size'],
            bootstraps=1)

        # create a trainer for a forward model with a conservative objective
        trainer = MaximumLikelihood(
            fm,
            forward_model_optim=tf.keras.optimizers.Adam,
            forward_model_lr=config['forward_model_lr'],
            noise_std=config.get('model_noise_std', 0.0))

        # train the model for an additional number of epochs
        trs.append(trainer)
        trainer.launch(train_data,
                       validate_data,
                       logger,
                       config['epochs'],
                       header=f'oracle_{i}/')

    # select the top k initial designs from the dataset
    mean_x = tf.reduce_mean(x, axis=0, keepdims=True)
    indices = tf.math.top_k(y[:, 0], k=config['solver_samples'])[1]
    initial_x = tf.gather(x, indices, axis=0)
    x = initial_x

    # evaluate the starting point
    solution = x
    if task.is_normalized_y:
        preds = [
            task.denormalize_y(fm.get_distribution(solution).mean())
            for fm in forward_models
        ]
    else:
        preds = [fm.get_distribution(solution).mean() for fm in forward_models]

    # record the prediction and score to the logger
    logger.record("distance/travelled", tf.linalg.norm(solution - initial_x),
                  0)
    logger.record("distance/from_mean", tf.linalg.norm(solution - mean_x), 0)
    for n, prediction_i in enumerate(preds):
        logger.record(f"oracle_{n}/prediction", prediction_i, 0)
        if n > 0:
            logger.record(f"rank_corr/0_to_{n}",
                          spearman(preds[0][:, 0], prediction_i[:, 0]), 0)

    # perform gradient ascent on the score through the forward model
    for i in range(1, config['solver_steps'] + 1):
        # back propagate through the forward model
        with tf.GradientTape() as tape:
            tape.watch(x)
            predictions = []
            for fm in forward_models:
                solution = x
                predictions.append(fm.get_distribution(solution).mean())
            if config['aggregation_method'] == 'mean':
                score = tf.reduce_min(predictions, axis=0)
            if config['aggregation_method'] == 'min':
                score = tf.reduce_min(predictions, axis=0)
            if config['aggregation_method'] == 'random':
                score = predictions[np.random.randint(len(predictions))]
        grads = tape.gradient(score, x)

        # use the conservative optimizer to update the solution
        x = x + config['solver_lr'] * grads
        solution = x

        # evaluate the design using the oracle and the forward model
        if task.is_normalized_y:
            preds = [
                task.denormalize_y(fm.get_distribution(solution).mean())
                for fm in forward_models
            ]
        else:
            preds = [
                fm.get_distribution(solution).mean() for fm in forward_models
            ]

        # record the prediction and score to the logger
        logger.record("distance/travelled",
                      tf.linalg.norm(solution - initial_x), i)
        logger.record("distance/from_mean", tf.linalg.norm(solution - mean_x),
                      i)
        for n, prediction_i in enumerate(preds):
            logger.record(f"oracle_{n}/prediction", prediction_i, i)
            logger.record(
                f"oracle_{n}/grad_norm",
                tf.linalg.norm(tf.reshape(grads[n], [-1, input_size]),
                               axis=-1), i)
            if n > 0:
                logger.record(f"rank_corr/0_to_{n}",
                              spearman(preds[0][:, 0], prediction_i[:, 0]), i)
                logger.record(
                    f"grad_corr/0_to_{n}",
                    tfp.stats.correlation(grads[0],
                                          grads[n],
                                          sample_axis=0,
                                          event_axis=None), i)

    if task.is_discrete and config["use_vae"]:
        solution = solution * standard_dev + mean
        logits = vae_model.decoder_cnn.predict(solution)
        solution = tf.argmax(logits, axis=2, output_type=tf.int32)

    # save the current solution to the disk
    np.save(os.path.join(config["logging_dir"], f"solution.npy"),
            solution.numpy())

    # evaluate the found solution and record a video
    score = task.predict(solution)
    if task.is_normalized_y:
        score = task.denormalize_y(score)
    logger.record("score", score, config['solver_steps'], percentile=True)
Esempio n. 7
0
def coms_cleaned(config):
    """Train a forward model and perform model based optimization
    using a conservative objective function

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    # create the training task and logger
    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])

    # save the initial dataset statistics for safe keeping
    x = task.x
    y = task.y

    if config['is_discrete']:

        # clip the distribution probabilities to a max of discrete_clip
        p = np.full_like(x, 1 / float(x.shape[-1]))
        discrete_clip = config.get('discrete_clip', 5.0)
        x = discrete_clip * x + (1.0 - discrete_clip) * p

        # map the distribution probabilities to logits
        x = np.log(x)
        x = x[:, :, 1:] - x[:, :, :1]

    if config['normalize_ys']:

        # remove the mean from the score values
        mu_y = np.mean(y, axis=0,
                       keepdims=True).astype(np.float32)
        y = y - mu_y

        # standardize the variance of the score values
        st_y = np.std(y, axis=0,
                      keepdims=True).astype(np.float32).clip(1e-6, 1e9)
        y = y / st_y

    else:

        # create placeholder normalization statistics
        mu_y = 0.0
        st_y = 1.0

    if config['normalize_xs']:

        # remove the mean from the data vectors
        mu_x = np.mean(x, axis=0,
                       keepdims=True).astype(np.float32)
        x = x - mu_x

        # standardize the variance of the data vectors
        st_x = np.std(x, axis=0,
                      keepdims=True).astype(np.float32).clip(1e-6, 1e9)
        x = x / st_x

    else:

        # create placeholder normalization statistics
        mu_x = 0.0
        st_x = 1.0

    # record the inputs shape of the forward model
    input_shape = list(task.input_shape)
    if config['is_discrete']:
        input_shape[-1] = input_shape[-1] - 1

    # compute the normalized learning rate of the model
    inner_lr = config['inner_lr'] * np.sqrt(np.prod(input_shape))
    outer_lr = config['outer_lr'] * np.sqrt(np.prod(input_shape))

    # make a neural network to predict scores
    forward_model = ForwardModel(
        input_shape, activations=config['activations'],
        hidden=config['hidden'], final_tanh=config['final_tanh'])

    # make a trainer for the forward model
    trainer = ConservativeObjectiveModel(
        forward_model, forward_model_opt=tf.keras.optimizers.Adam,
        forward_model_lr=config['forward_model_lr'],
        initial_alpha=config['initial_alpha'],
        alpha_opt=tf.keras.optimizers.Adam, alpha_lr=config['alpha_lr'],
        target_conservatism=config['target_conservatism'],
        inner_lr=inner_lr, outer_lr=outer_lr,
        inner_gradient_steps=config['inner_gradient_steps'],
        outer_gradient_steps=config['outer_gradient_steps'],
        beta=config['train_beta'],
        entropy_coefficient=config['entropy_coefficient'],
        continuous_noise_std=config['continuous_noise_std'])

    # create a data set
    train_data, validate_data = task.build(
        x=x, y=y, batch_size=config['batch_size'],
        val_size=config['val_size'])

    # train the forward model
    trainer.launch(train_data,
                   validate_data,
                   logger,
                   config["epochs"])

    # select the top k initial designs from the dataset
    indices = tf.math.top_k(y[:, 0], k=config['batch_size'])[1]
    initial_x = tf.gather(x, indices, axis=0)
    xt = initial_x

    scores = []
    predictions = []
    eval_beta = config['eval_beta']

    for step in range(config['outer_gradient_steps']):

        xt = trainer.outer_optimize(xt, eval_beta, 1, training=False)
        prediction = forward_model(
            xt, training=False).numpy() * st_y + mu_y

        next_xt = trainer.inner_optimize(xt, training=False)
        next_prediction = forward_model(
            next_xt, training=False).numpy() * st_y + mu_y

        final_xt = trainer.outer_optimize(
            xt, eval_beta, config['outer_gradient_steps'], training=False)
        final_prediction = forward_model(
            final_xt, training=False).numpy() * st_y + mu_y

        solution = xt * st_x + mu_x
        if config['is_discrete']:
            solution = tf.math.softmax(tf.pad(
                solution, [[0, 0], [0, 0], [1, 0]]) / 0.001)

        score = task.score(solution)

        # record the prediction and score to the logger
        logger.record(f"score", score, step, percentile=True)
        logger.record(f"solver/model_to_real",
                      spearman(prediction[:, 0], score[:, 0]), step)
        logger.record(f"solver/distance",
                      tf.linalg.norm(xt - initial_x), step)
        logger.record(f"solver/prediction",
                      prediction, step)
        logger.record(f"solver/beta_conservatism",
                      prediction - eval_beta * next_prediction, step)
        logger.record(f"solver/conservatism",
                      prediction - final_prediction, step)
        logger.record(f"solver/overestimation",
                      prediction - score, step)

        scores.append(score)
        predictions.append(prediction)

        # save the model predictions and scores to be aggregated later
        np.save(os.path.join(config['logging_dir'], "scores.npy"),
                np.concatenate(scores, axis=1))
        np.save(os.path.join(config['logging_dir'], "predictions.npy"),
                np.stack(predictions, axis=1))
Esempio n. 8
0
def autofocused_cbas(config):
    """Optimize a design problem score using the algorithm CBAS
    otherwise known as Conditioning by Adaptive Sampling

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])
    if task.is_discrete:
        task.map_to_integers()

    if config['normalize_ys']:
        task.map_normalize_y()
    if config['normalize_xs']:
        task.map_normalize_x()

    x = task.x
    y = task.y

    # create the training task and logger
    train_data, val_data = build_pipeline(
        x=x, y=y, w=np.ones_like(y),
        val_size=config['val_size'],
        batch_size=config['ensemble_batch_size'],
        bootstraps=config['bootstraps'])

    # make several keras neural networks with two hidden layers
    forward_models = [ForwardModel(
        task,
        embedding_size=config['embedding_size'],
        hidden_size=config['hidden_size'],
        num_layers=config['num_layers'],
        initial_max_std=config['initial_max_std'],
        initial_min_std=config['initial_min_std'])
        for b in range(config['bootstraps'])]

    # create a trainer for a forward model with a conservative objective
    ensemble = Ensemble(
        forward_models,
        forward_model_optim=tf.keras.optimizers.Adam,
        forward_model_lr=config['ensemble_lr'])

    # train the model for an additional number of epochs
    ensemble.launch(train_data,
                    val_data,
                    logger,
                    config['ensemble_epochs'])

    # determine which arcitecture for the decoder to use
    decoder = DiscreteDecoder \
        if task.is_discrete else ContinuousDecoder

    # build the encoder and decoder distribution and the p model
    p_encoder = Encoder(task, config['latent_size'],
                        embedding_size=config['embedding_size'],
                        hidden_size=config['hidden_size'],
                        num_layers=config['num_layers'],
                        initial_max_std=config['initial_max_std'],
                        initial_min_std=config['initial_min_std'])
    p_decoder = decoder(task, config['latent_size'],
                        hidden_size=config['hidden_size'],
                        num_layers=config['num_layers'],
                        initial_max_std=config['initial_max_std'],
                        initial_min_std=config['initial_min_std'])
    p_vae = WeightedVAE(p_encoder, p_decoder,
                        vae_optim=tf.keras.optimizers.Adam,
                        vae_lr=config['vae_lr'],
                        vae_beta=config['vae_beta'])

    # build a weighted data set
    train_data, val_data = build_pipeline(
        x=x, y=y, w=np.ones_like(task.y),
        batch_size=config['vae_batch_size'],
        val_size=config['val_size'])

    # train the initial vae fit to the original data distribution
    p_vae.launch(train_data,
                 val_data,
                 logger,
                 config['offline_epochs'])

    # build the encoder and decoder distribution and the p model
    q_encoder = Encoder(task, config['latent_size'],
                        embedding_size=config['embedding_size'],
                        hidden_size=config['hidden_size'],
                        num_layers=config['num_layers'],
                        initial_max_std=config['initial_max_std'],
                        initial_min_std=config['initial_min_std'])
    q_decoder = decoder(task, config['latent_size'],
                        hidden_size=config['hidden_size'],
                        num_layers=config['num_layers'],
                        initial_max_std=config['initial_max_std'],
                        initial_min_std=config['initial_min_std'])
    q_vae = WeightedVAE(q_encoder, q_decoder,
                        vae_optim=tf.keras.optimizers.Adam,
                        vae_lr=config['vae_lr'],
                        vae_beta=config['vae_beta'])

    # create the cbas importance weight generator
    cbas = CBAS(ensemble,
                p_vae,
                q_vae,
                latent_size=config['latent_size'])

    # train and validate the q_vae using online samples
    q_encoder.set_weights(p_encoder.get_weights())
    q_decoder.set_weights(p_decoder.get_weights())
    for i in range(config['iterations']):

        # generate an importance weighted dataset
        x_t, y_t, w = cbas.generate_data(
            config['online_batches'],
            config['vae_batch_size'],
            config['percentile'])

        # build a weighted data set
        train_data, val_data = build_pipeline(
            x=x_t.numpy(),
            y=y_t.numpy(),
            w=w.numpy(),
            batch_size=config['vae_batch_size'],
            val_size=config['val_size'])

        # train a vae fit using weighted maximum likelihood
        start_epoch = config['online_epochs'] * i + \
                      config['offline_epochs']
        q_vae.launch(train_data,
                     val_data,
                     logger,
                     config['online_epochs'],
                     start_epoch=start_epoch)

        # autofocus the forward model using importance weights
        v = cbas.autofocus_weights(
            x, batch_size=config['ensemble_batch_size'])
        train_data, val_data = build_pipeline(
            x=x, y=y, w=v.numpy(),
            bootstraps=config['bootstraps'],
            batch_size=config['ensemble_batch_size'],
            val_size=config['val_size'])

        # train a vae fit using weighted maximum likelihood
        start_epoch = config['autofocus_epochs'] * i + \
            config['ensemble_epochs']
        ensemble.launch(train_data,
                        val_data,
                        logger,
                        config['autofocus_epochs'],
                        start_epoch=start_epoch)

    # sample designs from the prior
    z = tf.random.normal([config['solver_samples'], config['latent_size']])
    q_dx = q_decoder.get_distribution(z, training=False)
    x_t = q_dx.sample()
    np.save(os.path.join(config["logging_dir"],
                         f"solution.npy"), x_t.numpy())
    score = task.predict(x_t)
    if task.is_normalized_y:
        score = task.denormalize_y(score)
    logger.record("score",
                  score,
                  config['iterations'],
                  percentile=True)