def mins(config): """Optimize a design problem score using the algorithm MINS otherwise known as Model Inversion Networks Args: config: dict a dictionary of hyper parameters such as the learning rate """ # create the training task and logger logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) if config['normalize_ys']: task.map_normalize_y() if config['normalize_xs']: task.map_normalize_x() x = task.x y = task.y def map_to_probs(x, *rest): x = task.to_logits(x) x = tf.pad(x, [[0, 0]] * (len(x.shape) - 1) + [[1, 0]]) return (tf.math.softmax(x / 1e-5), *rest) input_shape = x.shape[1:] if task.is_discrete: input_shape = list(x.shape[1:]) + [task.num_classes] base_temp = config.get('base_temp', None) if config['offline']: # make several keras neural networks with two hidden layers forward_models = [ForwardModel( input_shape, hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) for _ in range(config['bootstraps'])] # create a trainer for a forward model with a conservative objective oracle = Ensemble(forward_models, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['oracle_lr'], is_discrete=task.is_discrete, noise_std=config.get('noise_std', 0.0), keep=config.get('keep', 1.0), temp=config.get('temp', 0.001)) # build a bootstrapped data set train_data, val_data = build_pipeline( x=x, y=y, bootstraps=config['bootstraps'], batch_size=config['oracle_batch_size'], val_size=config['val_size'], buffer=1) train_data = train_data.map( map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE) val_data = val_data.map( map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE) # train the model for an additional number of epochs oracle.launch(train_data, val_data, logger, config['oracle_epochs']) # create replay buffers for both GANS explore_pool = ReplayBuffer(config['pool_size'], input_shape) exploit_pool = ReplayBuffer(config['pool_size'], input_shape) if task.is_discrete: # build a Gumbel-Softmax GAN to sample discrete outputs explore_gen = DiscreteGenerator( input_shape, config['latent_size'], hidden=config['hidden_size']) exploit_gen = DiscreteGenerator( input_shape, config['latent_size'], hidden=config['hidden_size']) else: # build an LS-GAN to sample continuous outputs explore_gen = ContinuousGenerator( input_shape, config['latent_size'], hidden=config['hidden_size']) exploit_gen = ContinuousGenerator( input_shape, config['latent_size'], hidden=config['hidden_size']) # build the neural network GAN components explore_discriminator = Discriminator( input_shape, hidden=config['hidden_size'], method=config['method']) explore_gan = WeightedGAN( explore_gen, explore_discriminator, explore_pool, critic_frequency=config['critic_frequency'], flip_frac=config['flip_frac'], pool_frac=config['pool_frac'], pool_save=config['pool_save'], fake_pair_frac=config['fake_pair_frac'], penalty_weight=config['penalty_weight'], generator_lr=config['generator_lr'], generator_beta_1=config['generator_beta_1'], generator_beta_2=config['generator_beta_2'], discriminator_lr=config['discriminator_lr'], discriminator_beta_1=config['discriminator_beta_1'], discriminator_beta_2=config['discriminator_beta_2'], is_discrete=task.is_discrete, noise_std=config.get('noise_std', 0.0), keep=config.get('keep', 1.0), start_temp=config.get('start_temp', 5.0), final_temp=config.get('final_temp', 1.0)) # build the neural network GAN components exploit_discriminator = Discriminator( input_shape, hidden=config['hidden_size'], method=config['method']) exploit_gan = WeightedGAN( exploit_gen, exploit_discriminator, exploit_pool, critic_frequency=config['critic_frequency'], flip_frac=config['flip_frac'], pool_frac=config['pool_frac'], pool_save=config['pool_save'], fake_pair_frac=config['fake_pair_frac'], penalty_weight=config['penalty_weight'], generator_lr=config['generator_lr'], generator_beta_1=config['generator_beta_1'], generator_beta_2=config['generator_beta_2'], discriminator_lr=config['discriminator_lr'], discriminator_beta_1=config['discriminator_beta_1'], discriminator_beta_2=config['discriminator_beta_2'], is_discrete=task.is_discrete, noise_std=config.get('noise_std', 0.0), keep=config.get('keep', 1.0), start_temp=config.get('start_temp', 5.0), final_temp=config.get('final_temp', 1.0)) # build a weighted data set using newly collected samples train_data, val_data = build_pipeline( x=x, y=y, w=get_weights(y, base_temp=base_temp), batch_size=config['gan_batch_size'], val_size=config['val_size'], buffer=1) train_data = train_data.map( map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE) val_data = val_data.map( map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE) # train the gan for several epochs explore_gan.launch( train_data, val_data, logger, config['initial_epochs'], header="exploration/") # sample designs from the GAN and evaluate them condition_ys = tf.tile(tf.reduce_max( y, keepdims=True), [config['solver_samples'], 1]) # record score percentiles logger.record("exploration/condition_ys", task.denormalize_y(condition_ys) if task.is_normalized_y else condition_ys, 0, percentile=True) # train the gan for several epochs exploit_gan.launch( train_data, val_data, logger, config['initial_epochs'], header="exploitation/") # record score percentiles logger.record("exploitation/condition_ys", task.denormalize_y(condition_ys) if task.is_normalized_y else condition_ys, 0, percentile=True) # prevent the temperature from being annealed further if task.is_discrete: explore_gan.start_temp = explore_gan.final_temp exploit_gan.start_temp = exploit_gan.final_temp # train the gan using an importance sampled data set for iteration in range(config['iterations']): # generate synthetic x paired with high performing scores tilde_x, tilde_y = get_synthetic_data( x, y, exploration_samples=config['exploration_samples'], exploration_rate=config['exploration_rate'], base_temp=base_temp) # build a weighted data set using newly collected samples train_data, val_data = build_pipeline( x=tilde_x.numpy(), y=tilde_y.numpy(), w=get_weights(tilde_y.numpy(), base_temp=base_temp), batch_size=config['gan_batch_size'], val_size=config['val_size'], buffer=1) train_data = train_data.map( map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE) val_data = val_data.map( map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE) # train the gan for several epochs explore_gan.launch( train_data, val_data, logger, config['epochs_per_iteration'], start_epoch=config['epochs_per_iteration'] * iteration + config['initial_epochs'], header="exploration/") # sample designs from the GAN and evaluate them condition_ys = tf.tile(tf.reduce_max( tilde_y, keepdims=True), [config['thompson_samples'], 1]) # generate samples for exploration solver_xs = explore_gen.sample(condition_ys, temp=0.001) if task.is_discrete: solver_xs = tf.argmax( solver_xs, axis=-1, output_type=tf.int32) actual_ys = oracle.get_distribution(solver_xs).mean() \ if config['offline'] else task.predict(solver_xs) # record score percentiles logger.record("exploration/condition_ys", task.denormalize_y(condition_ys) if task.is_normalized_y else condition_ys, 0, percentile=True) logger.record("exploration/actual_ys", task.denormalize_y(actual_ys) if task.is_normalized_y else actual_ys, 0, percentile=True) # concatenate newly paired samples with the existing data set x = tf.concat([x, solver_xs], 0) y = tf.concat([y, actual_ys], 0) # build a weighted data set using newly collected samples train_data, val_data = build_pipeline( x=x.numpy(), y=y.numpy(), w=get_weights(y.numpy(), base_temp=base_temp), batch_size=config['gan_batch_size'], val_size=config['val_size'], buffer=1) train_data = train_data.map( map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE) val_data = val_data.map( map_to_probs, num_parallel_calls=tf.data.experimental.AUTOTUNE) # train the gan for several epochs exploit_gan.launch( train_data, val_data, logger, config['epochs_per_iteration'], start_epoch=config['epochs_per_iteration'] * iteration + config['initial_epochs'], header="exploitation/") # sample designs from the GAN and evaluate them condition_ys = tf.tile(tf.reduce_max( y, keepdims=True), [config['solver_samples'], 1]) # record score percentiles logger.record("exploitation/condition_ys", task.denormalize_y(condition_ys) if task.is_normalized_y else condition_ys, 0, percentile=True) # generate samples for exploration solver_xs = exploit_gen.sample(condition_ys, temp=0.001) solution = tf.argmax(solver_xs, axis=-1, output_type=tf.int32) \ if task.is_discrete else solver_xs # save the current solution to the disk np.save(os.path.join(config["logging_dir"], f"solution.npy"), solution.numpy()) # evaluate the found solution and record a video score = task.predict(solution) if task.is_normalized_y: score = task.denormalize_y(score) logger.record("score", score, config['iterations'], percentile=True)
def coms_original(config): """Train a forward model and perform offline model-based optimization using a conservative objective model Args: config: dict a dictionary of hyper parameters such as the learning rate """ # create the training task and logger logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) # save the initial dataset statistics for safe keeping x = task.x y = task.y # if the task is discrete then use a continuous relaxation if config['is_discrete']: p = np.full_like(x, 1 / float(x.shape[-1])) x = config.get('discrete_clip', 5.0) * x + (1.0 - config.get('discrete_clip', 5.0)) * p x = np.log(x) x = x[:, :, 1:] - x[:, :, :1] if config['normalize_ys']: # compute normalization statistics for the score mu_y = np.mean(y, axis=0, keepdims=True).astype(np.float32) y = y - mu_y st_y = np.std(y, axis=0, keepdims=True).astype(np.float32).clip(1e-6, 1e9) y = y / st_y else: # compute normalization statistics for the score mu_y = np.zeros_like(y[:1]) st_y = np.ones_like(y[:1]) if config['normalize_xs']: # compute normalization statistics for the data vectors mu_x = np.mean(x, axis=0, keepdims=True).astype(np.float32) x = x - mu_x st_x = np.std(x, axis=0, keepdims=True).astype(np.float32).clip(1e-6, 1e9) x = x / st_x else: # compute normalization statistics for the score mu_x = np.zeros_like(x[:1]) st_x = np.ones_like(x[:1]) input_shape = list(task.input_shape) if config['is_discrete']: input_shape[-1] = input_shape[-1] - 1 solver_lr = config['solver_lr'] * np.sqrt(np.prod(input_shape)) solver_interval = int(config['solver_interval'] * (x.shape[0] - config['val_size']) / config['batch_size']) solver_warmup = int(config['solver_warmup'] * (x.shape[0] - config['val_size']) / config['batch_size']) # make a neural network to predict scores forward_model = ForwardModel(input_shape, activations=config['activations'], hidden=config['hidden_size'], final_tanh=config['final_tanh']) # create a trainer for a forward model with a conservative objective trainer = ConservativeMaximumLikelihood( forward_model, forward_model_opt=tf.keras.optimizers.Adam, forward_model_lr=config['forward_model_lr'], initial_alpha=config['initial_alpha'], alpha_opt=tf.keras.optimizers.Adam, alpha_lr=config['alpha_lr'], target_conservatism=config['target_conservatism'], negatives_fraction=config['negatives_fraction'], lookahead_steps=config['lookahead_steps'], lookahead_backprop=config['lookahead_backprop'], solver_beta=config['solver_beta'], solver_lr=solver_lr, solver_interval=solver_interval, solver_warmup=solver_warmup, solver_steps=config['solver_steps'], constraint_type=config['constraint_type'], entropy_coefficient=config['entropy_coefficient'], continuous_noise_std=config.get('continuous_noise_std', 0.0)) # make a neural network to predict scores validation_models = [ ForwardModel(input_shape, activations=config['activations'], hidden=config['hidden_size'], final_tanh=config['final_tanh']) ] # create a trainer for a forward model with a conservative objective validation_trainers = [ TransformedMaximumLikelihood( model, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['forward_model_lr'], continuous_noise_std=config.get('continuous_noise_std', 0.0), logger_prefix=f"validation_model_{i}") for i, model in enumerate(validation_models) ] # create a data set train_data, validate_data = task.build(x=x, y=y, batch_size=config['batch_size'], val_size=config['val_size']) # train the validation models for t in validation_trainers: t.launch(train_data, validate_data, logger, 100) # select the top k initial designs from the dataset indices = tf.math.top_k(y[:, 0], k=config['batch_size'])[1] initial_x = tf.gather(x, indices, axis=0) # create the starting point for the optimizer evaluations = 0 score = None trainer.solution = tf.Variable(initial_x) trainer.done = tf.Variable( tf.fill([config['batch_size']] + [1 for _ in x.shape[1:]], False)) def evaluate_solution(xt): nonlocal evaluations, score # evaluate the design using the oracle and the forward model with tf.GradientTape() as tape: tape.watch(xt) model = forward_model(xt) # evaluate the predictions and gradient norm evaluations += 1 grads = tape.gradient(model, xt) model = model * st_y + mu_y for i, val in enumerate(validation_models): prediction = val(xt) logger.record(f"validation_model_{i}/prediction", prediction * st_y + mu_y, evaluations) # record the prediction and score to the logger logger.record("distance/travelled", tf.linalg.norm(xt - initial_x), evaluations) logger.record(f"train/prediction", model, evaluations) logger.record( f"train/grad_norm", tf.linalg.norm(tf.reshape(grads, [grads.shape[0], -1]), axis=-1), evaluations) if evaluations in config['evaluate_steps'] \ or len(config['evaluate_steps']) == 0 or score is None: solution = xt * st_x + mu_x if config['is_discrete']: solution = tf.math.softmax( tf.pad(solution, [[0, 0], [0, 0], [1, 0]]) / 0.001) score = task.score(solution) logger.record("score", score, evaluations, percentile=True) logger.record(f"rank_corr/model_to_real", spearman(model[:, 0], score[:, 0]), evaluations) return score, model # keep track of when to record performance interval = trainer.solver_interval warmup = trainer.solver_warmup scores = [] predictions = [] # train model for many epochs with conservatism for e in range(config['epochs']): statistics = defaultdict(list) for x, y in train_data: for name, tensor in trainer.train_step(x, y).items(): statistics[name].append(tensor) # evaluate the current solution if tf.logical_and(tf.equal(tf.math.mod(trainer.step, interval), 0), tf.math.greater_equal(trainer.step, warmup)): score, model = evaluate_solution(trainer.solution) scores.append(score) predictions.append(model.numpy()) for name in statistics.keys(): logger.record(name, tf.concat(statistics[name], axis=0), e) statistics = defaultdict(list) for x, y in validate_data: for name, tensor in trainer.validate_step(x, y).items(): statistics[name].append(tensor) for name in statistics.keys(): logger.record(name, tf.concat(statistics[name], axis=0), e) if tf.reduce_all(trainer.done): break # save the model predictions and scores to be aggregated later np.save(os.path.join(config['logging_dir'], "scores.npy"), np.concatenate(scores, axis=1)) np.save(os.path.join(config['logging_dir'], "predictions.npy"), np.stack(predictions, axis=1))
def bo_qei(config): """Optimizes over designs x in an offline optimization problem using the CMA Evolution Strategy Args: config: dict a dictionary of hyper parameters such as the learning rate """ # create the training task and logger logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) if config['normalize_ys']: task.map_normalize_y() if task.is_discrete and not config["use_vae"]: task.map_to_logits() if config['normalize_xs']: task.map_normalize_x() x = task.x y = task.y if task.is_discrete and config["use_vae"]: vae_model = SequentialVAE(task, hidden_size=config['vae_hidden_size'], latent_size=config['vae_latent_size'], activation=config['vae_activation'], kernel_size=config['vae_kernel_size'], num_blocks=config['vae_num_blocks']) vae_trainer = VAETrainer(vae_model, vae_optim=tf.keras.optimizers.Adam, vae_lr=config['vae_lr'], beta=config['vae_beta']) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, batch_size=config['vae_batch_size'], val_size=config['val_size']) # estimate the number of training steps per epoch vae_trainer.launch(train_data, val_data, logger, config['vae_epochs']) # map the x values to latent space x = vae_model.encoder_cnn.predict(x)[0] mean = np.mean(x, axis=0, keepdims=True) standard_dev = np.std(x - mean, axis=0, keepdims=True) x = (x - mean) / standard_dev input_shape = x.shape[1:] input_size = np.prod(input_shape) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, bootstraps=config['bootstraps'], batch_size=config['ensemble_batch_size'], val_size=config['val_size']) # make several keras neural networks with two hidden layers forward_models = [ ForwardModel(input_shape, hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) for b in range(config['bootstraps']) ] # create a trainer for a forward model with a conservative objective ensemble = Ensemble(forward_models, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['ensemble_lr']) # train the model for an additional number of epochs ensemble.launch(train_data, val_data, logger, config['ensemble_epochs']) # select the top 1 initial designs from the dataset indices = tf.math.top_k(y[:, 0], k=config['bo_gp_samples'])[1] initial_x = tf.gather(x, indices, axis=0) initial_y = tf.gather(y, indices, axis=0) from botorch.models import FixedNoiseGP, ModelListGP from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood from botorch.acquisition.objective import GenericMCObjective from botorch.optim import optimize_acqf from botorch import fit_gpytorch_model from botorch.acquisition.monte_carlo import qExpectedImprovement from botorch.sampling.samplers import SobolQMCNormalSampler from botorch.exceptions import BadInitialCandidatesWarning import torch import time import warnings warnings.filterwarnings('ignore', category=BadInitialCandidatesWarning) warnings.filterwarnings('ignore', category=RuntimeWarning) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dtype = torch.float32 def objective(input_x): original_x = input_x # convert the tensor into numpy before using a TF model if torch.cuda.is_available(): input_x = input_x.detach().cpu().numpy() else: input_x = input_x.detach().numpy() batch_shape = input_x.shape[:-1] # pass the input into a TF model input_x = tf.reshape(input_x, [-1, *input_shape]) # optimize teh ground truth or the learned model if config["optimize_ground_truth"]: if task.is_discrete and config["use_vae"]: input_x = tf.argmax( vae_model.decoder_cnn.predict(input_x * standard_dev + mean), axis=2, output_type=tf.int32) value = task.predict(input_x) else: value = ensemble.get_distribution(input_x).mean() ys = value.numpy() ys.reshape(list(batch_shape) + [1]) # convert the scores back to pytorch tensors return torch.tensor(ys).type_as(original_x).to(device, dtype=dtype) NOISE_SE = config['bo_noise_se'] train_yvar = torch.tensor(NOISE_SE**2, device=device, dtype=dtype) def initialize_model(train_x, train_obj, state_dict=None): # define models for objective model_obj = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(train_x) # combine into a multi-output GP model model = ModelListGP(model_obj) mll = SumMarginalLogLikelihood(model.likelihood, model) # load state dict if it is passed if state_dict is not None: model.load_state_dict(state_dict) return mll, model def obj_callable(Z): return Z[..., 0] # define a feasibility-weighted objective for optimization obj = GenericMCObjective(obj_callable) BATCH_SIZE = config['bo_batch_size'] bounds = torch.tensor([ np.min(x, axis=0).reshape([input_size]).tolist(), np.max(x, axis=0).reshape([input_size]).tolist() ], device=device, dtype=dtype) def optimize_acqf_and_get_observation(acq_func): """Optimizes the acquisition function, and returns a new candidate and a noisy observation.""" # optimize try: candidates, _ = optimize_acqf( acq_function=acq_func, bounds=bounds, q=BATCH_SIZE, num_restarts=config['bo_num_restarts'], raw_samples=config[ 'bo_raw_samples'], # used for intialization heuristic options={ "batch_limit": config['bo_batch_limit'], "maxiter": config['bo_maxiter'] }) except RuntimeError: return # observe new values new_x = candidates.detach() exact_obj = objective(candidates) new_obj = exact_obj + NOISE_SE * torch.randn_like(exact_obj) return new_x, new_obj N_BATCH = config['bo_iterations'] MC_SAMPLES = config['bo_mc_samples'] best_observed_ei = [] # call helper functions to generate initial training data and initialize model train_x_ei = initial_x.numpy().reshape([initial_x.shape[0], input_size]) train_x_ei = torch.tensor(train_x_ei).to(device, dtype=dtype) train_obj_ei = initial_y.numpy().reshape([initial_y.shape[0], 1]) train_obj_ei = torch.tensor(train_obj_ei).to(device, dtype=dtype) best_observed_value_ei = train_obj_ei.max().item() mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei) best_observed_ei.append(best_observed_value_ei) # run N_BATCH rounds of BayesOpt after the initial random batch for iteration in range(1, N_BATCH + 1): t0 = time.time() # fit the models fit_gpytorch_model(mll_ei) # define the qEI acquisition module using a QMC sampler qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # for best_f, we use the best observed noisy values as an approximation qEI = qExpectedImprovement(model=model_ei, best_f=train_obj_ei.max(), sampler=qmc_sampler, objective=obj) # optimize and get new observation result = optimize_acqf_and_get_observation(qEI) if result is None: print("RuntimeError was encountered, most likely a " "'symeig_cpu: the algorithm failed to converge'") break new_x_ei, new_obj_ei = result # update training points train_x_ei = torch.cat([train_x_ei, new_x_ei]) train_obj_ei = torch.cat([train_obj_ei, new_obj_ei]) # update progress best_value_ei = obj(train_x_ei).max().item() best_observed_ei.append(best_value_ei) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei, model_ei.state_dict()) t1 = time.time() print( f"Batch {iteration:>2}: best_value = " f"({best_value_ei:>4.2f}), " f"time = {t1 - t0:>4.2f}.", end="") if torch.cuda.is_available(): x_sol = train_x_ei.detach().cpu().numpy() y_sol = train_obj_ei.detach().cpu().numpy() else: x_sol = train_x_ei.detach().numpy() y_sol = train_obj_ei.detach().numpy() # select the top 1 initial designs from the dataset indices = tf.math.top_k(y_sol[:, 0], k=config['solver_samples'])[1] solution = tf.gather(x_sol, indices, axis=0) solution = tf.reshape(solution, [-1, *input_shape]) if task.is_discrete and config["use_vae"]: solution = solution * standard_dev + mean logits = vae_model.decoder_cnn.predict(solution) solution = tf.argmax(logits, axis=2, output_type=tf.int32) # save the current solution to the disk np.save(os.path.join(config["logging_dir"], f"solution.npy"), solution.numpy()) # evaluate the found solution and record a video score = task.predict(solution) if task.is_normalized_y: score = task.denormalize_y(score) logger.record("score", score, N_BATCH, percentile=True)
def reinforce(config): """Optimizes over designs x in an offline optimization problem using the REINFORCE policy gradient method Args: config: dict a dictionary of hyper parameters such as the learning rate """ logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) if task.is_discrete: task.map_to_integers() if config['normalize_ys']: task.map_normalize_y() if config['normalize_xs']: task.map_normalize_x() x = task.x y = task.y # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, bootstraps=config['bootstraps'], batch_size=config['ensemble_batch_size'], val_size=config['val_size']) # make several keras neural networks with two hidden layers forward_models = [ForwardModel( task, embedding_size=config['embedding_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) for b in range(config['bootstraps'])] # create a trainer for a forward model with a conservative objective ensemble = Ensemble( forward_models, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['ensemble_lr']) # train the model for an additional number of epochs ensemble.launch(train_data, val_data, logger, config['ensemble_epochs']) rl_opt = tf.keras.optimizers.Adam( learning_rate=config['reinforce_lr']) # select the top 1 initial designs from the dataset indices = tf.math.top_k(y[:, 0], k=config['solver_samples'])[1] initial_x = tf.gather(x, indices, axis=0) if task.is_discrete: logits = tf.pad(task.to_logits(initial_x), [[0, 0], [0, 0], [1, 0]]) probs = tf.math.softmax(logits / 1e-5) logits = tf.math.log(tf.reduce_mean(probs, axis=0)) sampler = DiscreteMarginal(logits) else: mean = tf.reduce_mean(initial_x, axis=0) logstd = tf.math.log(tf.ones_like(mean) * config['exploration_std']) sampler = ContinuousMarginal(mean, logstd) for iteration in range(config['iterations']): with tf.GradientTape() as tape: td = sampler.get_distribution() tx = td.sample(sample_shape=config['reinforce_batch_size']) if config['optimize_ground_truth']: ty = task.predict(tx) else: # use the surrogate model for optimization ty = ensemble.get_distribution(tx).mean() mean_y = tf.reduce_mean(ty) standard_dev_y = tf.math.reduce_std(ty - mean_y) log_probs = td.log_prob(tf.stop_gradient(tx)) loss = tf.reduce_mean(-log_probs[:, tf.newaxis] * tf.stop_gradient( (ty - mean_y) / standard_dev_y)) print(f"[Iteration {iteration}] " f"Average Prediction = {tf.reduce_mean(ty)}") logger.record("reinforce/prediction", ty, iteration, percentile=True) logger.record("reinforce/loss", loss, iteration, percentile=True) grads = tape.gradient( loss, sampler.trainable_variables) rl_opt.apply_gradients(zip( grads, sampler.trainable_variables)) td = sampler.get_distribution() solution = td.sample(sample_shape=config['solver_samples']) # save the current solution to the disk np.save(os.path.join(config["logging_dir"], f"solution.npy"), solution.numpy()) # evaluate the found solution and record a video score = task.predict(solution) if config['normalize_ys']: score = task.denormalize_y(score) logger.record( "score", score, config['iterations'], percentile=True)
def cma_es(config): """Optimizes over designs x in an offline optimization problem using the CMA Evolution Strategy Args: config: dict a dictionary of hyper parameters such as the learning rate """ # create the training task and logger logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) if config['normalize_ys']: task.map_normalize_y() if task.is_discrete and not config["use_vae"]: task.map_to_logits() if config['normalize_xs']: task.map_normalize_x() x = task.x y = task.y if task.is_discrete and config["use_vae"]: vae_model = SequentialVAE(task, hidden_size=config['vae_hidden_size'], latent_size=config['vae_latent_size'], activation=config['vae_activation'], kernel_size=config['vae_kernel_size'], num_blocks=config['vae_num_blocks']) vae_trainer = VAETrainer(vae_model, vae_optim=tf.keras.optimizers.Adam, vae_lr=config['vae_lr'], beta=config['vae_beta']) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, batch_size=config['vae_batch_size'], val_size=config['val_size']) # estimate the number of training steps per epoch vae_trainer.launch(train_data, val_data, logger, config['vae_epochs']) # map the x values to latent space x = vae_model.encoder_cnn.predict(x)[0] mean = np.mean(x, axis=0, keepdims=True) standard_dev = np.std(x - mean, axis=0, keepdims=True) x = (x - mean) / standard_dev input_shape = x.shape[1:] input_size = np.prod(input_shape) # make several keras neural networks with two hidden layers forward_models = [ ForwardModel(input_shape, hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) for b in range(config['bootstraps']) ] # create a trainer for a forward model with a conservative objective ensemble = Ensemble(forward_models, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['ensemble_lr']) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, bootstraps=config['bootstraps'], batch_size=config['ensemble_batch_size'], val_size=config['val_size']) # train the model for an additional number of epochs ensemble.launch(train_data, val_data, logger, config['ensemble_epochs']) # select the top 1 initial designs from the dataset indices = tf.math.top_k(y[:, 0], k=config['solver_samples'])[1] initial_x = tf.gather(x, indices, axis=0) x = initial_x # create a fitness function for optimizing the expected task score def fitness(input_x): input_x = tf.reshape(input_x, input_shape)[tf.newaxis] if config["optimize_ground_truth"]: if task.is_discrete and config["use_vae"]: input_x = tf.argmax( vae_model.decoder_cnn.predict(input_x * standard_dev + mean), axis=2, output_type=tf.int32) value = task.predict(input_x) else: value = ensemble.get_distribution(input_x).mean() return (-value[0].numpy()).tolist()[0] import cma result = [] for i in range(config['solver_samples']): xi = x[i].numpy().flatten().tolist() es = cma.CMAEvolutionStrategy(xi, config['cma_sigma']) step = 0 while not es.stop() and step < config['cma_max_iterations']: solutions = es.ask() es.tell(solutions, [fitness(x) for x in solutions]) step += 1 result.append(tf.reshape(es.result.xbest, input_shape)) print(f"CMA: {i + 1} / {config['solver_samples']}") # convert the solution found by CMA-ES to a tensor x = tf.stack(result, axis=0) solution = x if task.is_discrete and config["use_vae"]: solution = solution * standard_dev + mean logits = vae_model.decoder_cnn.predict(solution) solution = tf.argmax(logits, axis=2, output_type=tf.int32) # save the current solution to the disk np.save(os.path.join(config["logging_dir"], f"solution.npy"), solution.numpy()) # evaluate the found solution score = task.predict(solution) if task.is_normalized_y: score = task.denormalize_y(score) logger.record("score", score, 0, percentile=True)
def gradient_ascent(config): """Train a Score Function to solve a Model-Based Optimization using gradient ascent on the input design Args: config: dict a dictionary of hyper parameters such as the learning rate """ # create the training task and logger logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) if config['normalize_ys']: task.map_normalize_y() if task.is_discrete and not config["use_vae"]: task.map_to_logits() if config['normalize_xs']: task.map_normalize_x() x = task.x y = task.y if task.is_discrete and config["use_vae"]: vae_model = SequentialVAE(task, hidden_size=config['vae_hidden_size'], latent_size=config['vae_latent_size'], activation=config['vae_activation'], kernel_size=config['vae_kernel_size'], num_blocks=config['vae_num_blocks']) vae_trainer = VAETrainer(vae_model, vae_optim=tf.keras.optimizers.Adam, vae_lr=config['vae_lr'], beta=config['vae_beta']) # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, batch_size=config['vae_batch_size'], val_size=config['val_size']) # estimate the number of training steps per epoch vae_trainer.launch(train_data, val_data, logger, config['vae_epochs']) # map the x values to latent space x = vae_model.encoder_cnn.predict(x)[0] mean = np.mean(x, axis=0, keepdims=True) standard_dev = np.std(x - mean, axis=0, keepdims=True) x = (x - mean) / standard_dev input_shape = x.shape[1:] input_size = np.prod(input_shape) # make several keras neural networks with different architectures forward_models = [ ForwardModel(input_shape, activations=activations, hidden_size=config['hidden_size'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) for activations in config['activations'] ] # scale the learning rate based on the number of channels in x config['solver_lr'] *= np.sqrt(np.prod(x.shape[1:])) trs = [] for i, fm in enumerate(forward_models): # create a bootstrapped data set train_data, validate_data = build_pipeline( x=x, y=y, batch_size=config['batch_size'], val_size=config['val_size'], bootstraps=1) # create a trainer for a forward model with a conservative objective trainer = MaximumLikelihood( fm, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['forward_model_lr'], noise_std=config.get('model_noise_std', 0.0)) # train the model for an additional number of epochs trs.append(trainer) trainer.launch(train_data, validate_data, logger, config['epochs'], header=f'oracle_{i}/') # select the top k initial designs from the dataset mean_x = tf.reduce_mean(x, axis=0, keepdims=True) indices = tf.math.top_k(y[:, 0], k=config['solver_samples'])[1] initial_x = tf.gather(x, indices, axis=0) x = initial_x # evaluate the starting point solution = x if task.is_normalized_y: preds = [ task.denormalize_y(fm.get_distribution(solution).mean()) for fm in forward_models ] else: preds = [fm.get_distribution(solution).mean() for fm in forward_models] # record the prediction and score to the logger logger.record("distance/travelled", tf.linalg.norm(solution - initial_x), 0) logger.record("distance/from_mean", tf.linalg.norm(solution - mean_x), 0) for n, prediction_i in enumerate(preds): logger.record(f"oracle_{n}/prediction", prediction_i, 0) if n > 0: logger.record(f"rank_corr/0_to_{n}", spearman(preds[0][:, 0], prediction_i[:, 0]), 0) # perform gradient ascent on the score through the forward model for i in range(1, config['solver_steps'] + 1): # back propagate through the forward model with tf.GradientTape() as tape: tape.watch(x) predictions = [] for fm in forward_models: solution = x predictions.append(fm.get_distribution(solution).mean()) if config['aggregation_method'] == 'mean': score = tf.reduce_min(predictions, axis=0) if config['aggregation_method'] == 'min': score = tf.reduce_min(predictions, axis=0) if config['aggregation_method'] == 'random': score = predictions[np.random.randint(len(predictions))] grads = tape.gradient(score, x) # use the conservative optimizer to update the solution x = x + config['solver_lr'] * grads solution = x # evaluate the design using the oracle and the forward model if task.is_normalized_y: preds = [ task.denormalize_y(fm.get_distribution(solution).mean()) for fm in forward_models ] else: preds = [ fm.get_distribution(solution).mean() for fm in forward_models ] # record the prediction and score to the logger logger.record("distance/travelled", tf.linalg.norm(solution - initial_x), i) logger.record("distance/from_mean", tf.linalg.norm(solution - mean_x), i) for n, prediction_i in enumerate(preds): logger.record(f"oracle_{n}/prediction", prediction_i, i) logger.record( f"oracle_{n}/grad_norm", tf.linalg.norm(tf.reshape(grads[n], [-1, input_size]), axis=-1), i) if n > 0: logger.record(f"rank_corr/0_to_{n}", spearman(preds[0][:, 0], prediction_i[:, 0]), i) logger.record( f"grad_corr/0_to_{n}", tfp.stats.correlation(grads[0], grads[n], sample_axis=0, event_axis=None), i) if task.is_discrete and config["use_vae"]: solution = solution * standard_dev + mean logits = vae_model.decoder_cnn.predict(solution) solution = tf.argmax(logits, axis=2, output_type=tf.int32) # save the current solution to the disk np.save(os.path.join(config["logging_dir"], f"solution.npy"), solution.numpy()) # evaluate the found solution and record a video score = task.predict(solution) if task.is_normalized_y: score = task.denormalize_y(score) logger.record("score", score, config['solver_steps'], percentile=True)
def coms_cleaned(config): """Train a forward model and perform model based optimization using a conservative objective function Args: config: dict a dictionary of hyper parameters such as the learning rate """ # create the training task and logger logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) # save the initial dataset statistics for safe keeping x = task.x y = task.y if config['is_discrete']: # clip the distribution probabilities to a max of discrete_clip p = np.full_like(x, 1 / float(x.shape[-1])) discrete_clip = config.get('discrete_clip', 5.0) x = discrete_clip * x + (1.0 - discrete_clip) * p # map the distribution probabilities to logits x = np.log(x) x = x[:, :, 1:] - x[:, :, :1] if config['normalize_ys']: # remove the mean from the score values mu_y = np.mean(y, axis=0, keepdims=True).astype(np.float32) y = y - mu_y # standardize the variance of the score values st_y = np.std(y, axis=0, keepdims=True).astype(np.float32).clip(1e-6, 1e9) y = y / st_y else: # create placeholder normalization statistics mu_y = 0.0 st_y = 1.0 if config['normalize_xs']: # remove the mean from the data vectors mu_x = np.mean(x, axis=0, keepdims=True).astype(np.float32) x = x - mu_x # standardize the variance of the data vectors st_x = np.std(x, axis=0, keepdims=True).astype(np.float32).clip(1e-6, 1e9) x = x / st_x else: # create placeholder normalization statistics mu_x = 0.0 st_x = 1.0 # record the inputs shape of the forward model input_shape = list(task.input_shape) if config['is_discrete']: input_shape[-1] = input_shape[-1] - 1 # compute the normalized learning rate of the model inner_lr = config['inner_lr'] * np.sqrt(np.prod(input_shape)) outer_lr = config['outer_lr'] * np.sqrt(np.prod(input_shape)) # make a neural network to predict scores forward_model = ForwardModel( input_shape, activations=config['activations'], hidden=config['hidden'], final_tanh=config['final_tanh']) # make a trainer for the forward model trainer = ConservativeObjectiveModel( forward_model, forward_model_opt=tf.keras.optimizers.Adam, forward_model_lr=config['forward_model_lr'], initial_alpha=config['initial_alpha'], alpha_opt=tf.keras.optimizers.Adam, alpha_lr=config['alpha_lr'], target_conservatism=config['target_conservatism'], inner_lr=inner_lr, outer_lr=outer_lr, inner_gradient_steps=config['inner_gradient_steps'], outer_gradient_steps=config['outer_gradient_steps'], beta=config['train_beta'], entropy_coefficient=config['entropy_coefficient'], continuous_noise_std=config['continuous_noise_std']) # create a data set train_data, validate_data = task.build( x=x, y=y, batch_size=config['batch_size'], val_size=config['val_size']) # train the forward model trainer.launch(train_data, validate_data, logger, config["epochs"]) # select the top k initial designs from the dataset indices = tf.math.top_k(y[:, 0], k=config['batch_size'])[1] initial_x = tf.gather(x, indices, axis=0) xt = initial_x scores = [] predictions = [] eval_beta = config['eval_beta'] for step in range(config['outer_gradient_steps']): xt = trainer.outer_optimize(xt, eval_beta, 1, training=False) prediction = forward_model( xt, training=False).numpy() * st_y + mu_y next_xt = trainer.inner_optimize(xt, training=False) next_prediction = forward_model( next_xt, training=False).numpy() * st_y + mu_y final_xt = trainer.outer_optimize( xt, eval_beta, config['outer_gradient_steps'], training=False) final_prediction = forward_model( final_xt, training=False).numpy() * st_y + mu_y solution = xt * st_x + mu_x if config['is_discrete']: solution = tf.math.softmax(tf.pad( solution, [[0, 0], [0, 0], [1, 0]]) / 0.001) score = task.score(solution) # record the prediction and score to the logger logger.record(f"score", score, step, percentile=True) logger.record(f"solver/model_to_real", spearman(prediction[:, 0], score[:, 0]), step) logger.record(f"solver/distance", tf.linalg.norm(xt - initial_x), step) logger.record(f"solver/prediction", prediction, step) logger.record(f"solver/beta_conservatism", prediction - eval_beta * next_prediction, step) logger.record(f"solver/conservatism", prediction - final_prediction, step) logger.record(f"solver/overestimation", prediction - score, step) scores.append(score) predictions.append(prediction) # save the model predictions and scores to be aggregated later np.save(os.path.join(config['logging_dir'], "scores.npy"), np.concatenate(scores, axis=1)) np.save(os.path.join(config['logging_dir'], "predictions.npy"), np.stack(predictions, axis=1))
def autofocused_cbas(config): """Optimize a design problem score using the algorithm CBAS otherwise known as Conditioning by Adaptive Sampling Args: config: dict a dictionary of hyper parameters such as the learning rate """ logger = Logger(config['logging_dir']) task = StaticGraphTask(config['task'], **config['task_kwargs']) if task.is_discrete: task.map_to_integers() if config['normalize_ys']: task.map_normalize_y() if config['normalize_xs']: task.map_normalize_x() x = task.x y = task.y # create the training task and logger train_data, val_data = build_pipeline( x=x, y=y, w=np.ones_like(y), val_size=config['val_size'], batch_size=config['ensemble_batch_size'], bootstraps=config['bootstraps']) # make several keras neural networks with two hidden layers forward_models = [ForwardModel( task, embedding_size=config['embedding_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) for b in range(config['bootstraps'])] # create a trainer for a forward model with a conservative objective ensemble = Ensemble( forward_models, forward_model_optim=tf.keras.optimizers.Adam, forward_model_lr=config['ensemble_lr']) # train the model for an additional number of epochs ensemble.launch(train_data, val_data, logger, config['ensemble_epochs']) # determine which arcitecture for the decoder to use decoder = DiscreteDecoder \ if task.is_discrete else ContinuousDecoder # build the encoder and decoder distribution and the p model p_encoder = Encoder(task, config['latent_size'], embedding_size=config['embedding_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) p_decoder = decoder(task, config['latent_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) p_vae = WeightedVAE(p_encoder, p_decoder, vae_optim=tf.keras.optimizers.Adam, vae_lr=config['vae_lr'], vae_beta=config['vae_beta']) # build a weighted data set train_data, val_data = build_pipeline( x=x, y=y, w=np.ones_like(task.y), batch_size=config['vae_batch_size'], val_size=config['val_size']) # train the initial vae fit to the original data distribution p_vae.launch(train_data, val_data, logger, config['offline_epochs']) # build the encoder and decoder distribution and the p model q_encoder = Encoder(task, config['latent_size'], embedding_size=config['embedding_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) q_decoder = decoder(task, config['latent_size'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], initial_max_std=config['initial_max_std'], initial_min_std=config['initial_min_std']) q_vae = WeightedVAE(q_encoder, q_decoder, vae_optim=tf.keras.optimizers.Adam, vae_lr=config['vae_lr'], vae_beta=config['vae_beta']) # create the cbas importance weight generator cbas = CBAS(ensemble, p_vae, q_vae, latent_size=config['latent_size']) # train and validate the q_vae using online samples q_encoder.set_weights(p_encoder.get_weights()) q_decoder.set_weights(p_decoder.get_weights()) for i in range(config['iterations']): # generate an importance weighted dataset x_t, y_t, w = cbas.generate_data( config['online_batches'], config['vae_batch_size'], config['percentile']) # build a weighted data set train_data, val_data = build_pipeline( x=x_t.numpy(), y=y_t.numpy(), w=w.numpy(), batch_size=config['vae_batch_size'], val_size=config['val_size']) # train a vae fit using weighted maximum likelihood start_epoch = config['online_epochs'] * i + \ config['offline_epochs'] q_vae.launch(train_data, val_data, logger, config['online_epochs'], start_epoch=start_epoch) # autofocus the forward model using importance weights v = cbas.autofocus_weights( x, batch_size=config['ensemble_batch_size']) train_data, val_data = build_pipeline( x=x, y=y, w=v.numpy(), bootstraps=config['bootstraps'], batch_size=config['ensemble_batch_size'], val_size=config['val_size']) # train a vae fit using weighted maximum likelihood start_epoch = config['autofocus_epochs'] * i + \ config['ensemble_epochs'] ensemble.launch(train_data, val_data, logger, config['autofocus_epochs'], start_epoch=start_epoch) # sample designs from the prior z = tf.random.normal([config['solver_samples'], config['latent_size']]) q_dx = q_decoder.get_distribution(z, training=False) x_t = q_dx.sample() np.save(os.path.join(config["logging_dir"], f"solution.npy"), x_t.numpy()) score = task.predict(x_t) if task.is_normalized_y: score = task.denormalize_y(score) logger.record("score", score, config['iterations'], percentile=True)