def get_player(current_time, version, file_model, solver_version=None, sequence=1): """ Load the models of a specific player """ path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ '..', 'saved_models', str(current_time)) try: mod = os.listdir(path) models = list(filter(lambda model: (model.split('-')[0] == str(version) \ and file_model in model), mod)) models.sort() if len(models) == 0: return False, version except FileNotFoundError: return False, version if file_model == "vae": model = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE) elif file_model == "lstm": model = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\ NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE) elif file_model == "controller": model = Controller(PARAMS_CONTROLLER, ACTION_SPACE).to(DEVICE) checkpoint = load_torch_models(path, model, models[0]) if file_model == "controller": file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \ '..', 'saved_models', current_time, "{}-solver.pkl".format(solver_version)) solver = pickle.load(open(file_path, 'rb')) return checkpoint, model, solver return model, checkpoint
def init_models(current_time, load_vae=False, load_lstm=False, load_controller=True, sequence=SEQUENCE): vae = lstm = best_controller = solver = None if load_vae: vae, checkpoint = load_model(current_time, -1, model="vae") if not vae: vae = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE) if load_lstm: lstm, checkpoint = load_model(current_time, -1, model="lstm", sequence=sequence) if not lstm: lstm = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\ NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE) if load_controller: res = load_model(current_time, -1, model="controller") checkpoint = res[0] if len(res) > 2: best_controller = res[1] solver = res[2] current_ctrl_version = checkpoint['version'] else: best_controller = Controller(LATENT_VEC, PARAMS_FC1, ACTION_SPACE).to(DEVICE) solver = CMAES(PARAMS_FC1 + LATENT_VEC + 512, sigma_init=SIGMA_INIT, popsize=POPULATION) return vae, lstm, best_controller, solver, checkpoint
def build_vaes(n_tasks, na, z_size, seq_len, vrec_lr, kl_tolerance): vaes = [] vcomps = [] for i in range(n_tasks): vae = ConvVAE(name="vae%i" % i, z_size=z_size) vcomp = build_vae("vae%i" % i, vae, na, z_size, seq_len, vrec_lr, kl_tolerance) vaes.append(vae) vcomps.append(vcomp) return vaes, vcomps
def learn(sess, n_tasks, z_size, data_dir, num_steps, max_seq_len, batch_size_per_task=16, rnn_size=256, grad_clip=1.0, v_lr=0.0001, vr_lr=0.0001, min_v_lr=0.00001, v_decay=0.999, kl_tolerance=0.5, lr=0.001, min_lr=0.00001, decay=0.999, view="transposed", model_dir="tf_rnn", layer_norm=False, rnn_mmd=False, no_cor=False, w_mmd=1.0, alpha=1.0, beta=0.1, recurrent_dp=1.0, input_dp=1.0, output_dp=1.0): batch_size = batch_size_per_task * n_tasks wrapper = WrapperFactory.get_wrapper(view) if wrapper is None: raise Exception("Such view is not available") print("Batch size for each taks is", batch_size_per_task) print("The total batch size is", batch_size) check_dir(model_dir) lf = open(model_dir + '/log_%s' % datetime.now().isoformat(), "w") # define env na = make_env(config.env_name).action_space.n input_size = z_size + na output_size = z_size print("the environment", config.env_name, "has %i actions" % na) seq_len = max_seq_len fns = os.listdir(data_dir) fns = [fn for fn in fns if '.npz' in fn] random.shuffle(fns) dm = get_dm(wrapper, seq_len, na, data_dir, fns, not no_cor) tf_vrct_lr = tf.placeholder(tf.float32, shape=[]) # learn from reconstruction. vaes, vcomps = build_vaes(n_tasks, na, z_size, seq_len, tf_vrct_lr, kl_tolerance) vae_losses = [vcomp.loss for vcomp in vcomps] transform_loss = get_transform_loss(vcomps[0], vaes[1], wrapper) old_vae0 = ConvVAE(name="old_vae0", z_size=z_size) old_vcomp0 = build_vae("old_vae0", old_vae0, na, z_size, seq_len, tf_vrct_lr, kl_tolerance) assign_old_eq_new = tf.group([tf.assign(oldv, newv) for (oldv, newv) in zip(old_vcomp0.var_list, vcomps[0].var_list)]) vmmd_losses = get_vmmd_losses(n_tasks, old_vcomp0, vcomps, alpha, beta) vrec_ops = get_vae_rec_ops(n_tasks, vcomps, vmmd_losses, w_mmd) vrec_all_op = tf.group(vrec_ops) # Meta RNN. rnn = VRNN("rnn", max_seq_len, input_size, output_size, batch_size_per_task, rnn_size, layer_norm, recurrent_dp, input_dp, output_dp) global_step = tf.Variable(0, name='global_step', trainable=False) tf_rpred_lr = tf.placeholder(tf.float32, shape=[]) rcomp0 = build_rnn("rnn", rnn, na, z_size, batch_size_per_task, seq_len) print("The basic rnn has been built") rcomps = build_rnns(n_tasks, rnn, vaes, vcomps, kl_tolerance) rnn_losses = [rcomp.loss for rcomp in rcomps] if rnn_mmd: rmmd_losses = get_rmmd_losses(n_tasks, old_vcomp0, vcomps, alpha, beta) for i in range(n_tasks): rnn_losses[i] += 0.1 * rmmd_losses[i] ptransform_loss = get_predicted_transform_loss(vcomps[0], rcomps[0], vaes[1], wrapper, batch_size_per_task, seq_len) print("RNN has been connected to each VAE") rnn_total_loss = tf.reduce_mean(rnn_losses) rpred_opt = tf.train.AdamOptimizer(tf_rpred_lr, name="rpred_opt") gvs = rpred_opt.compute_gradients(rnn_total_loss, rcomp0.var_list) clip_gvs = [(tf.clip_by_value(grad, -grad_clip, grad_clip), var) for grad, var in gvs if grad is not None] rpred_op = rpred_opt.apply_gradients(clip_gvs, global_step=global_step, name='rpred_op') # VAE in prediction phase vpred_ops, tf_vpred_lrs = get_vae_pred_ops(n_tasks, vcomps, rnn_losses) vpred_all_op = tf.group(vpred_ops) rpred_lr = lr vrct_lr = v_lr vpred_lr = vr_lr sess.run(tf.global_variables_initializer()) for i in range(num_steps): step = sess.run(global_step) rpred_lr = (rpred_lr - min_lr) * decay + min_lr vrct_lr = (vrct_lr - min_v_lr) * v_decay + min_v_lr vpred_lr = (vpred_lr - min_v_lr) * v_decay + min_v_lr ratio = 1.0 data_buffer = [] for it in range(config.psteps_per_it): raw_obs_list, raw_a_list = dm.random_batch(batch_size_per_task) data_buffer.append((raw_obs_list, raw_a_list)) feed = {tf_rpred_lr: rpred_lr, tf_vrct_lr: vrct_lr, tf_vpred_lrs[0]: vpred_lr, tf_vpred_lrs[1]: vpred_lr * ratio} feed[old_vcomp0.x] = raw_obs_list[0] for j in range(n_tasks): vcomp = vcomps[j] feed[vcomp.x] = raw_obs_list[j] feed[vcomp.a] = raw_a_list[j][:, :-1, :] (rnn_cost, rnn_cost2, vae_cost, vae_cost2, transform_cost, ptransform_cost, _, _) = sess.run( [rnn_losses[0], rnn_losses[1], vae_losses[0], vae_losses[1], transform_loss, ptransform_loss, rpred_op, vpred_all_op], feed) ratio = rnn_cost2 / rnn_cost if i % config.log_interval == 0: output_log = get_output_log(step, rpred_lr, [vae_cost], [rnn_cost], [transform_cost], [ptransform_cost]) lf.write(output_log) data_order = np.arange(len(data_buffer)) nd = len(data_order) np.random.shuffle(data_order) for it in range(config.rsteps_per_it): if (it + 1) % nd == 0: np.random.shuffle(data_order) rid = data_order[it % nd] raw_obs_list, raw_a_list = data_buffer[rid] # raw_obs_list, raw_a_list = dm.random_batch(batch_size_per_task) feed = {tf_rpred_lr: rpred_lr, tf_vrct_lr: vrct_lr} feed[old_vcomp0.x] = raw_obs_list[0] for j in range(n_tasks): vcomp = vcomps[j] feed[vcomp.x] = raw_obs_list[j] feed[vcomp.a] = raw_a_list[j][:, :-1, :] (rnn_cost, rnn_cost2, vae_cost, vae_cost2, transform_cost, ptransform_cost, _) = sess.run([ rnn_losses[0], rnn_losses[1], vae_losses[0], vae_losses[1], transform_loss, ptransform_loss, vrec_all_op], feed) if i % config.log_interval == 0: output_log = get_output_log(step, rpred_lr, [vae_cost], [rnn_cost], [transform_cost], [ptransform_cost]) lf.write(output_log) lf.flush() if (i + 1) % config.target_update_interval == 0: sess.run(assign_old_eq_new) if i % config.model_save_interval == 0: tmp_dir = model_dir + '/it_%i' % i check_dir(tmp_dir) saveToFlat(rcomp0.var_list, tmp_dir + '/rnn.p') for j in range(n_tasks): vcomp = vcomps[j] saveToFlat(vcomp.var_list, tmp_dir + '/vae%i.p' % j) saveToFlat(rcomp0.var_list, model_dir + '/final_rnn.p') for i in range(n_tasks): vcomp = vcomps[i] saveToFlat(vcomp.var_list, model_dir + '/final_vae%i.p' % i)
obs_norm = np.asarray(obs_norm, dtype=np.float32) obs_mean = np.expand_dims(np.mean(obs_norm, axis=0), axis=0) obs_std = np.mean(np.std(obs_norm, axis=0)) + 1e-8 del obs_norm else: obs_mean = np.zeros([1, 64, 64, 3], dtype=np.float32) obs_std = 255.0 ppo = PPO(env.action_space[0], hyperparams.EPSILON, hyperparams.ENTROPY_REG, hyperparams.VALUE_COEFFICIENT, hyperparams.INITIAL_LAYER, hyperparams.LEARNING_RATE, hyperparams.MAX_GRAD_NORM, hyperparams.RECURRENT, hyperparams.RECURRENT_SIZE) if hyperparams.INTRINSIC: vae = ConvVAE(hyperparams.VAE_Z_SIZE, hyperparams.VAE_LEARNING_RATE, hyperparams.VAE_KL_TOLERANCE, 64, hyperparams.OBS_STD) prediction = NextStatePrediction( hyperparams.PREDICTION_LEARNING_RATE, hyperparams.PREDICTION_SIZE, n_steps, n_envs, hyperparams.USE_RNN) vae_states = ReplayMemory(max_size=hyperparams.MEMORY_LENGTH) # prediction.set_hidden_states(n_envs) current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = 'custom/INTRINSIC/' + hyperparams.ENVIRONMENT_NAME + '_' + current_time + '/train' train_summary_writer = tf.summary.create_file_writer(train_log_dir) train_summary_writer.set_as_default()
def train_vae(current_time): """ Train a VAE to create a latent representation of the Sonic levels by trying to encode and decode each frame. """ dataset = VAEDataset() last_id = 0 lr = LR version = 1 total_ite = 1 client = MongoClient() db = client.retro_contest collection = db[current_time] fs = gridfs.GridFS(db) ## Load or create models vae, checkpoint = load_model(current_time, -1, model="vae") if not vae: vae = ConvVAE((WIDTH, HEIGHT, 3), LATENT_VEC).to(DEVICE) optimizer = create_optimizer(vae, lr) state = create_state(version, lr, total_ite, optimizer) save_checkpoint(vae, "vae", state, current_time) else: optimizer = create_optimizer(vae, lr, param=checkpoint['optimizer']) total_ite = checkpoint['total_ite'] + 1 lr = checkpoint['lr'] version = checkpoint['version'] last_id = 0 ## Fill the dataset (or wait for the database to be filled) while len(dataset) < SIZE: last_id = fetch_new_run(collection, fs, dataset, last_id, loaded_version=current_time) time.sleep(5) dataloader = DataLoader(dataset, batch_size=BATCH_SIZE_VAE, shuffle=True) while True: batch_loss = [] running_loss = [] for batch_idx, frames in enumerate(dataloader): frames = torch.tensor(frames, dtype=torch.float, device=DEVICE) / 255 frames = frames.view(-1, 3, WIDTH, HEIGHT) ## Save the models if total_ite % SAVE_TICK == 0: version += 1 state = create_state(version, lr, total_ite, optimizer) save_checkpoint(vae, "vae", state, current_time) if total_ite % SAVE_PIC_TICK == 0: traverse_latent_space(vae, frames[0], frames[-1], total_ite) create_img_recons(vae, frames[0:40], total_ite) loss = train_epoch(vae, optimizer, frames) running_loss.append(loss) ## Print running loss if total_ite % LOSS_TICK == 0: print("[TRAIN] current iteration: %d, averaged loss: %.3f"\ % (total_ite, loss)) batch_loss.append(np.mean(running_loss)) running_loss = [] ## Fetch new games if total_ite % REFRESH_TICK == 0: new_last_id = fetch_new_run(collection, fs, dataset, last_id) if new_last_id == last_id: last_id = 0 else: last_id = new_last_id total_ite += 1 if len(batch_loss) > 0: print("[TRAIN] Average backward pass loss : %.3f, current lr: %f" % (np.mean(batch_loss), lr))
def build_ae_for_size(ae_class, sizes, filters, latent_size, lamb=None, bn=False, info=False): l = math.floor(math.log(min(sizes[1:]), 2)) l = min(l, 3) size_preserving_l = len(filters) - l kernels = [3] * size_preserving_l + [4] * l strides = [1] * size_preserving_l + [2] * l paddings = [1] * size_preserving_l + [1] * l if ae_class == 'vae': ae = ConvVAE(sizes, (filters, list(reversed(filters[:-1]))), (kernels, kernels), strides=(strides, strides), paddings=(paddings, paddings), latent_size=latent_size, lamb=lamb, bn=bn).cuda() elif ae_class == 'ae': assert lamb is None ae = ConvAE(sizes, (filters, list(reversed(filters[:-1]))), (kernels, kernels), strides=(strides, strides), paddings=(paddings, paddings), latent_size=latent_size, bn=bn).cuda() elif ae_class == 'wae': ae = ConvWAE(sizes, (filters, list(reversed(filters[:-1]))), (kernels, kernels), strides=(strides, strides), paddings=(paddings, paddings), latent_size=latent_size, bn=bn, lamb=lamb, z_var=2.0).cuda() elif ae_class == 'waegan': encoder = ConvWAEGAN_Encoder(sizes, (filters, list(reversed(filters[:-1]))), (kernels, kernels), strides=(strides, strides), paddings=(paddings, paddings), latent_size=latent_size, lamb=lamb, bn=bn).cuda() decoder = ConvWAEGAN_Decoder(sizes, (filters, list(reversed(filters[:-1]))), (kernels, kernels), strides=(strides, strides), paddings=(paddings, paddings), latent_size=latent_size, lamb=lamb, bn=bn).cuda() discriminator = ConvWAEGAN_Discriminator(latent_size=latent_size, lamb=lamb).cuda() ae = (encoder, decoder, discriminator) else: raise ValueError('wrong ae_class parameter value') if info: print(f'------------------------------') print(f'Built {ae_class} with parameters:') print(f'input/output size: {tuple(sizes)}') print(f'layers: {len(filters)} - {filters}') if ae_class == 'waegan': print(f'pre-latent: {ae[0].pre_latent_size}') else: print(f'pre-latent: {ae.pre_latent_size}') print(f'latent size: {latent_size}') return ae