Example #1
0
def get_player(current_time,
               version,
               file_model,
               solver_version=None,
               sequence=1):
    """ Load the models of a specific player """

    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \
                            '..', 'saved_models', str(current_time))
    try:
        mod = os.listdir(path)
        models = list(filter(lambda model: (model.split('-')[0] == str(version) \
                        and file_model in model), mod))
        models.sort()
        if len(models) == 0:
            return False, version
    except FileNotFoundError:
        return False, version

    if file_model == "vae":
        model = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE)
    elif file_model == "lstm":
        model = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\
                     NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE)
    elif file_model == "controller":
        model = Controller(PARAMS_CONTROLLER, ACTION_SPACE).to(DEVICE)

    checkpoint = load_torch_models(path, model, models[0])
    if file_model == "controller":
        file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), \
                    '..', 'saved_models', current_time, "{}-solver.pkl".format(solver_version))
        solver = pickle.load(open(file_path, 'rb'))
        return checkpoint, model, solver
    return model, checkpoint
Example #2
0
def init_models(current_time, load_vae=False, load_lstm=False, load_controller=True, sequence=SEQUENCE):

    vae = lstm = best_controller = solver = None
    if load_vae:
        vae, checkpoint = load_model(current_time, -1, model="vae")
        if not vae:
            vae = ConvVAE((HEIGHT, WIDTH, 3), LATENT_VEC).to(DEVICE)
    
    if load_lstm:
        lstm, checkpoint = load_model(current_time, -1, model="lstm", sequence=sequence)
        if not lstm:
            lstm = LSTM(sequence, HIDDEN_UNITS, LATENT_VEC,\
                        NUM_LAYERS, GAUSSIANS, HIDDEN_DIM).to(DEVICE)

    if load_controller:    
        res = load_model(current_time, -1, model="controller")
        checkpoint = res[0]
        if len(res) > 2:
            best_controller = res[1]
            solver = res[2]
            current_ctrl_version = checkpoint['version']
        else:
            best_controller = Controller(LATENT_VEC, PARAMS_FC1, ACTION_SPACE).to(DEVICE)
            solver = CMAES(PARAMS_FC1 + LATENT_VEC + 512,
                        sigma_init=SIGMA_INIT,
                        popsize=POPULATION)

    return vae, lstm, best_controller, solver, checkpoint
Example #3
0
def build_vaes(n_tasks, na, z_size, seq_len, vrec_lr,
               kl_tolerance):
  vaes = []
  vcomps = []
  for i in range(n_tasks):
    vae = ConvVAE(name="vae%i" % i, z_size=z_size)
    vcomp = build_vae("vae%i" % i, vae, na, z_size, seq_len, vrec_lr,
                      kl_tolerance)
    vaes.append(vae)
    vcomps.append(vcomp)
  return vaes, vcomps
Example #4
0
def learn(sess, n_tasks, z_size, data_dir, num_steps, max_seq_len,
          batch_size_per_task=16, rnn_size=256,
          grad_clip=1.0, v_lr=0.0001, vr_lr=0.0001,
          min_v_lr=0.00001, v_decay=0.999, kl_tolerance=0.5,
          lr=0.001, min_lr=0.00001, decay=0.999,
          view="transposed",
          model_dir="tf_rnn", layer_norm=False,
          rnn_mmd=False, no_cor=False,
          w_mmd=1.0,
          alpha=1.0, beta=0.1,
          recurrent_dp=1.0,
          input_dp=1.0,
          output_dp=1.0):
  batch_size = batch_size_per_task * n_tasks

  wrapper = WrapperFactory.get_wrapper(view)
  if wrapper is None:
    raise Exception("Such view is not available")

  print("Batch size for each taks is", batch_size_per_task)
  print("The total batch size is", batch_size)

  check_dir(model_dir)
  lf = open(model_dir + '/log_%s' % datetime.now().isoformat(), "w")
  # define env
  na = make_env(config.env_name).action_space.n
  input_size = z_size + na
  output_size = z_size
  print("the environment", config.env_name, "has %i actions" % na)

  seq_len = max_seq_len

  fns = os.listdir(data_dir)
  fns = [fn for fn in fns if '.npz' in fn]
  random.shuffle(fns)
  dm = get_dm(wrapper, seq_len, na, data_dir, fns, not no_cor)
  tf_vrct_lr = tf.placeholder(tf.float32,
                              shape=[])  # learn from reconstruction.
  vaes, vcomps = build_vaes(n_tasks, na, z_size, seq_len, tf_vrct_lr,
                            kl_tolerance)
  vae_losses = [vcomp.loss for vcomp in vcomps]
  transform_loss = get_transform_loss(vcomps[0], vaes[1], wrapper)

  old_vae0 = ConvVAE(name="old_vae0", z_size=z_size)
  old_vcomp0 = build_vae("old_vae0", old_vae0, na, z_size, seq_len,
                         tf_vrct_lr, kl_tolerance)
  assign_old_eq_new = tf.group([tf.assign(oldv, newv)
                                for (oldv, newv) in
                                zip(old_vcomp0.var_list, vcomps[0].var_list)])

  vmmd_losses = get_vmmd_losses(n_tasks, old_vcomp0, vcomps, alpha, beta)
  vrec_ops = get_vae_rec_ops(n_tasks, vcomps, vmmd_losses, w_mmd)
  vrec_all_op = tf.group(vrec_ops)

  # Meta RNN.
  rnn = VRNN("rnn", max_seq_len, input_size, output_size, batch_size_per_task,
             rnn_size, layer_norm, recurrent_dp, input_dp, output_dp)

  global_step = tf.Variable(0, name='global_step', trainable=False)
  tf_rpred_lr = tf.placeholder(tf.float32, shape=[])
  rcomp0 = build_rnn("rnn", rnn, na, z_size, batch_size_per_task, seq_len)

  print("The basic rnn has been built")

  rcomps = build_rnns(n_tasks, rnn, vaes, vcomps, kl_tolerance)
  rnn_losses = [rcomp.loss for rcomp in rcomps]

  if rnn_mmd:
    rmmd_losses = get_rmmd_losses(n_tasks, old_vcomp0, vcomps, alpha, beta)
    for i in range(n_tasks):
      rnn_losses[i] += 0.1 * rmmd_losses[i]

  ptransform_loss = get_predicted_transform_loss(vcomps[0], rcomps[0],
                                                 vaes[1],
                                                 wrapper, batch_size_per_task,
                                                 seq_len)
  print("RNN has been connected to each VAE")

  rnn_total_loss = tf.reduce_mean(rnn_losses)
  rpred_opt = tf.train.AdamOptimizer(tf_rpred_lr, name="rpred_opt")
  gvs = rpred_opt.compute_gradients(rnn_total_loss, rcomp0.var_list)
  clip_gvs = [(tf.clip_by_value(grad, -grad_clip, grad_clip), var) for
              grad, var in gvs if grad is not None]
  rpred_op = rpred_opt.apply_gradients(clip_gvs, global_step=global_step,
                                       name='rpred_op')

  # VAE in prediction phase
  vpred_ops, tf_vpred_lrs = get_vae_pred_ops(n_tasks, vcomps, rnn_losses)
  vpred_all_op = tf.group(vpred_ops)

  rpred_lr = lr
  vrct_lr = v_lr
  vpred_lr = vr_lr
  sess.run(tf.global_variables_initializer())

  for i in range(num_steps):

    step = sess.run(global_step)
    rpred_lr = (rpred_lr - min_lr) * decay + min_lr
    vrct_lr = (vrct_lr - min_v_lr) * v_decay + min_v_lr
    vpred_lr = (vpred_lr - min_v_lr) * v_decay + min_v_lr

    ratio = 1.0

    data_buffer = []

    for it in range(config.psteps_per_it):
      raw_obs_list, raw_a_list = dm.random_batch(batch_size_per_task)
      data_buffer.append((raw_obs_list, raw_a_list))

      feed = {tf_rpred_lr: rpred_lr, tf_vrct_lr: vrct_lr,
              tf_vpred_lrs[0]: vpred_lr,
              tf_vpred_lrs[1]: vpred_lr * ratio}
      feed[old_vcomp0.x] = raw_obs_list[0]
      for j in range(n_tasks):
        vcomp = vcomps[j]
        feed[vcomp.x] = raw_obs_list[j]
        feed[vcomp.a] = raw_a_list[j][:, :-1, :]

      (rnn_cost, rnn_cost2, vae_cost, vae_cost2,
       transform_cost, ptransform_cost, _, _) = sess.run(
        [rnn_losses[0], rnn_losses[1],
         vae_losses[0], vae_losses[1],
         transform_loss, ptransform_loss,
         rpred_op, vpred_all_op], feed)
      ratio = rnn_cost2 / rnn_cost

    if i % config.log_interval == 0:
      output_log = get_output_log(step, rpred_lr, [vae_cost], [rnn_cost], [transform_cost], [ptransform_cost])
      lf.write(output_log)

    data_order = np.arange(len(data_buffer))
    nd = len(data_order)
    np.random.shuffle(data_order)

    for it in range(config.rsteps_per_it):
      if (it + 1) % nd == 0:
        np.random.shuffle(data_order)
      rid = data_order[it % nd]

      raw_obs_list, raw_a_list = data_buffer[rid]
      # raw_obs_list, raw_a_list = dm.random_batch(batch_size_per_task)

      feed = {tf_rpred_lr: rpred_lr, tf_vrct_lr: vrct_lr}
      feed[old_vcomp0.x] = raw_obs_list[0]
      for j in range(n_tasks):
        vcomp = vcomps[j]
        feed[vcomp.x] = raw_obs_list[j]
        feed[vcomp.a] = raw_a_list[j][:, :-1, :]

      (rnn_cost, rnn_cost2, vae_cost, vae_cost2, transform_cost,
       ptransform_cost, _) = sess.run([
        rnn_losses[0], rnn_losses[1],
        vae_losses[0], vae_losses[1],
        transform_loss, ptransform_loss,
        vrec_all_op], feed)

    if i % config.log_interval == 0:
      output_log = get_output_log(step, rpred_lr, [vae_cost], [rnn_cost], [transform_cost], [ptransform_cost])
      lf.write(output_log)

    lf.flush()

    if (i + 1) % config.target_update_interval == 0:
      sess.run(assign_old_eq_new)

    if i % config.model_save_interval == 0:
      tmp_dir = model_dir + '/it_%i' % i
      check_dir(tmp_dir)
      saveToFlat(rcomp0.var_list, tmp_dir + '/rnn.p')
      for j in range(n_tasks):
        vcomp = vcomps[j]
        saveToFlat(vcomp.var_list, tmp_dir + '/vae%i.p' % j)

  saveToFlat(rcomp0.var_list, model_dir + '/final_rnn.p')
  for i in range(n_tasks):
    vcomp = vcomps[i]
    saveToFlat(vcomp.var_list, model_dir + '/final_vae%i.p' % i)
Example #5
0
            obs_norm = np.asarray(obs_norm, dtype=np.float32)
            obs_mean = np.expand_dims(np.mean(obs_norm, axis=0), axis=0)
            obs_std = np.mean(np.std(obs_norm, axis=0)) + 1e-8
            del obs_norm
        else:
            obs_mean = np.zeros([1, 64, 64, 3], dtype=np.float32)
            obs_std = 255.0

        ppo = PPO(env.action_space[0], hyperparams.EPSILON,
                  hyperparams.ENTROPY_REG, hyperparams.VALUE_COEFFICIENT,
                  hyperparams.INITIAL_LAYER, hyperparams.LEARNING_RATE,
                  hyperparams.MAX_GRAD_NORM, hyperparams.RECURRENT,
                  hyperparams.RECURRENT_SIZE)
        if hyperparams.INTRINSIC:
            vae = ConvVAE(hyperparams.VAE_Z_SIZE,
                          hyperparams.VAE_LEARNING_RATE,
                          hyperparams.VAE_KL_TOLERANCE, 64,
                          hyperparams.OBS_STD)

            prediction = NextStatePrediction(
                hyperparams.PREDICTION_LEARNING_RATE,
                hyperparams.PREDICTION_SIZE, n_steps, n_envs,
                hyperparams.USE_RNN)

            vae_states = ReplayMemory(max_size=hyperparams.MEMORY_LENGTH)
        # prediction.set_hidden_states(n_envs)

        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        train_log_dir = 'custom/INTRINSIC/' + hyperparams.ENVIRONMENT_NAME + '_' + current_time + '/train'
        train_summary_writer = tf.summary.create_file_writer(train_log_dir)
        train_summary_writer.set_as_default()
def train_vae(current_time):
    """
    Train a VAE to create a latent representation of the Sonic levels by
    trying to encode and decode each frame.
    """

    dataset = VAEDataset()
    last_id = 0
    lr = LR
    version = 1
    total_ite = 1

    client = MongoClient()
    db = client.retro_contest
    collection = db[current_time]
    fs = gridfs.GridFS(db)

    ## Load or create models
    vae, checkpoint = load_model(current_time, -1, model="vae")
    if not vae:
        vae = ConvVAE((WIDTH, HEIGHT, 3), LATENT_VEC).to(DEVICE)
        optimizer = create_optimizer(vae, lr)
        state = create_state(version, lr, total_ite, optimizer)
        save_checkpoint(vae, "vae", state, current_time)
    else:
        optimizer = create_optimizer(vae, lr, param=checkpoint['optimizer'])
        total_ite = checkpoint['total_ite'] + 1
        lr = checkpoint['lr']
        version = checkpoint['version']
        last_id = 0

    ## Fill the dataset (or wait for the database to be filled)
    while len(dataset) < SIZE:
        last_id = fetch_new_run(collection, fs, dataset, last_id, loaded_version=current_time)
        time.sleep(5)
    
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE_VAE, shuffle=True)
    while True:
        batch_loss = []
        running_loss = []

        for batch_idx, frames in enumerate(dataloader):
            frames = torch.tensor(frames, dtype=torch.float, device=DEVICE) / 255
            frames = frames.view(-1, 3, WIDTH, HEIGHT)

            ## Save the models
            if total_ite % SAVE_TICK == 0:
                version += 1
                state = create_state(version, lr, total_ite, optimizer)
                save_checkpoint(vae, "vae", state, current_time)
            
            if total_ite % SAVE_PIC_TICK == 0:
                traverse_latent_space(vae, frames[0], frames[-1], total_ite)
                create_img_recons(vae, frames[0:40], total_ite)

            loss = train_epoch(vae, optimizer, frames)
            running_loss.append(loss)

            ## Print running loss
            if total_ite % LOSS_TICK == 0:
                print("[TRAIN] current iteration: %d, averaged loss: %.3f"\
                        % (total_ite, loss))
                batch_loss.append(np.mean(running_loss))
                running_loss = []
            
            ## Fetch new games
            if total_ite % REFRESH_TICK == 0:
                new_last_id = fetch_new_run(collection, fs, dataset, last_id)
                if new_last_id == last_id:
                    last_id = 0
                else:
                    last_id = new_last_id
            
            total_ite += 1
    
        if len(batch_loss) > 0:
            print("[TRAIN] Average backward pass loss : %.3f, current lr: %f" % (np.mean(batch_loss), lr))
Example #7
0
def build_ae_for_size(ae_class,
                      sizes,
                      filters,
                      latent_size,
                      lamb=None,
                      bn=False,
                      info=False):
    l = math.floor(math.log(min(sizes[1:]), 2))
    l = min(l, 3)
    size_preserving_l = len(filters) - l
    kernels = [3] * size_preserving_l + [4] * l
    strides = [1] * size_preserving_l + [2] * l
    paddings = [1] * size_preserving_l + [1] * l
    if ae_class == 'vae':
        ae = ConvVAE(sizes, (filters, list(reversed(filters[:-1]))),
                     (kernels, kernels),
                     strides=(strides, strides),
                     paddings=(paddings, paddings),
                     latent_size=latent_size,
                     lamb=lamb,
                     bn=bn).cuda()
    elif ae_class == 'ae':
        assert lamb is None
        ae = ConvAE(sizes, (filters, list(reversed(filters[:-1]))),
                    (kernels, kernels),
                    strides=(strides, strides),
                    paddings=(paddings, paddings),
                    latent_size=latent_size,
                    bn=bn).cuda()
    elif ae_class == 'wae':
        ae = ConvWAE(sizes, (filters, list(reversed(filters[:-1]))),
                     (kernels, kernels),
                     strides=(strides, strides),
                     paddings=(paddings, paddings),
                     latent_size=latent_size,
                     bn=bn,
                     lamb=lamb,
                     z_var=2.0).cuda()
    elif ae_class == 'waegan':
        encoder = ConvWAEGAN_Encoder(sizes,
                                     (filters, list(reversed(filters[:-1]))),
                                     (kernels, kernels),
                                     strides=(strides, strides),
                                     paddings=(paddings, paddings),
                                     latent_size=latent_size,
                                     lamb=lamb,
                                     bn=bn).cuda()
        decoder = ConvWAEGAN_Decoder(sizes,
                                     (filters, list(reversed(filters[:-1]))),
                                     (kernels, kernels),
                                     strides=(strides, strides),
                                     paddings=(paddings, paddings),
                                     latent_size=latent_size,
                                     lamb=lamb,
                                     bn=bn).cuda()
        discriminator = ConvWAEGAN_Discriminator(latent_size=latent_size,
                                                 lamb=lamb).cuda()
        ae = (encoder, decoder, discriminator)
    else:
        raise ValueError('wrong ae_class parameter value')
    if info:
        print(f'------------------------------')
        print(f'Built {ae_class} with parameters:')
        print(f'input/output size: {tuple(sizes)}')
        print(f'layers: {len(filters)} - {filters}')
        if ae_class == 'waegan':
            print(f'pre-latent: {ae[0].pre_latent_size}')
        else:
            print(f'pre-latent: {ae.pre_latent_size}')
        print(f'latent size: {latent_size}')
    return ae