Esempio n. 1
0
    def __init__(self, render_mode=False, load_model=True):
        super(DoomTakeCoverWrapper, self).__init__()

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        reset_graph()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=tf.AUTO_REUSE)

        self.rnn = Model(hps_sample, gpu_mode=False)

        if load_model:
            self.vae.load_json(os.path.join(model_path_name, 'vae.json'))
            self.rnn.load_json(os.path.join(model_path_name, 'rnn.json'))

        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=())
        self.outwidth = self.rnn.hps.seq_width
        self.obs_size = self.outwidth + model_rnn_size * model_state_space

        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(SCREEN_Y, SCREEN_X, 3))
        self.actual_observation_space = spaces.Box(low=-50.,
                                                   high=50.,
                                                   shape=(self.obs_size))

        self.zero_state = self.rnn.sess.run(self.rnn.zero_state)

        self._seed()

        self.rnn_state = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()
Esempio n. 2
0
    return img


# Hyperparameters for ConvVAE
z_size = 64
batch_size = 1
learning_rate = 0.0001
kl_tolerance = 0.5

filelist = os.listdir(DATA_DIR)
filelist.sort()
filelist = filelist[0:10000]

dataset, action_dataset = load_raw_data_list(filelist)

reset_graph()

vae = ConvVAE(z_size=z_size,
              batch_size=batch_size,
              learning_rate=learning_rate,
              kl_tolerance=kl_tolerance,
              is_training=False,
              reuse=False,
              gpu_mode=False)

vae.load_json(os.path.join(model_path_name, 'vae.json'))

mu_dataset = []
logvar_dataset = []
for i in range(len(dataset)):
    data = dataset[i]
Esempio n. 3
0
def main(args):
    print("Train RNN begin")
    os.environ["CUDA_VISIBLE_DEVICES"]="0"
    np.set_printoptions(precision=4, edgeitems=6, linewidth=100, suppress=True)

    model_save_path = args.output_file_name
    model_rnn_size = 512
    model_restart_factor = 10.
    Z_VECTOR_SIZE = 64 #KOEChange
    DATA_DIR = "series"

  
    initial_z_save_path = "tf_initial_z"
    if not os.path.exists(initial_z_save_path):
        os.makedirs(initial_z_save_path)
    model_num_mixture = args.num_mixtures
    epochs = args.epochs

    model_save_path += "_" + str(model_num_mixture) + "mixtures"

    if not os.path.exists(model_save_path):
        os.makedirs(model_save_path)

    def default_hps():
      return HyperParams(max_seq_len=100, # KOEChange. Was 500
                         seq_width=Z_VECTOR_SIZE,    # KOEChange. Was 64.
                         rnn_size=model_rnn_size,    # number of rnn cells
                         batch_size=100,   # minibatch sizes
                         grad_clip=1.0,
                         num_mixture=int(model_num_mixture),   # number of mixtures in MDN
                         restart_factor=model_restart_factor, # factor of importance for restart=1 rare case for loss.
                         learning_rate=0.001,
                         decay_rate=0.99999,
                         min_learning_rate=0.00001,
                         use_layer_norm=0, # set this to 1 to get more stable results (less chance of NaN), but slower
                         use_recurrent_dropout=0,
                         recurrent_dropout_prob=0.90,
                         use_input_dropout=0,
                         input_dropout_prob=0.90,
                         use_output_dropout=0,
                         output_dropout_prob=0.90,
                         is_training=1)

    hps_model = default_hps()
    hps_sample = hps_model._replace(batch_size=1, max_seq_len=2, use_recurrent_dropout=0, is_training=0)

    # load preprocessed data
    raw_data = np.load(os.path.join(DATA_DIR, "series.npz"))
    raw_data_mu = raw_data["mu"]
    raw_data_logvar = raw_data["logvar"]
    raw_data_action =  raw_data["action"]

    def load_series_data():
      all_data = []
      for i in range(len(raw_data_mu)):
        action = raw_data_action[i]
        mu = raw_data_mu[i]
        logvar = raw_data_logvar[i]
        all_data.append([mu, logvar, action])
      return all_data

    def get_frame_count(all_data):
      frame_count = []
      for data in all_data:
        frame_count.append(len(data[0]))
      return np.sum(frame_count)

    def create_batches(all_data, batch_size=100, seq_length=100):
      num_frames = get_frame_count(all_data)
      num_batches = int(num_frames/(batch_size*seq_length))
      num_frames_adjusted = num_batches*batch_size*seq_length
      random.shuffle(all_data)
      num_frames = get_frame_count(all_data)
      data_mu = np.zeros((num_frames, N_z), dtype=np.float16)
      data_logvar = np.zeros((num_frames, N_z), dtype=np.float16)
      data_action = np.zeros(num_frames, dtype=np.float16)
      data_restart = np.zeros(num_frames, dtype=np.uint8)
      idx = 0
      for data in all_data:
        mu, logvar, action=data
        N = len(action)
        data_mu[idx:idx+N] = mu.reshape(N, Z_VECTOR_SIZE)
        data_logvar[idx:idx+N] = logvar.reshape(N, Z_VECTOR_SIZE)
        data_action[idx:idx+N] = action.reshape(N)
        data_restart[idx]=1
        idx += N

      data_mu = data_mu[0:num_frames_adjusted]
      data_logvar = data_logvar[0:num_frames_adjusted]
      data_action = data_action[0:num_frames_adjusted]
      data_restart = data_restart[0:num_frames_adjusted]

      data_mu = np.split(data_mu.reshape(batch_size, -1, Z_VECTOR_SIZE), num_batches, 1)
      data_logvar = np.split(data_logvar.reshape(batch_size, -1, Z_VECTOR_SIZE), num_batches, 1)
      data_action = np.split(data_action.reshape(batch_size, -1), num_batches, 1)
      data_restart = np.split(data_restart.reshape(batch_size, -1), num_batches, 1)

      return data_mu, data_logvar, data_action, data_restart

    def get_batch(batch_idx, data_mu, data_logvar, data_action, data_restart):
      batch_mu = data_mu[batch_idx]
      batch_logvar = data_logvar[batch_idx]
      batch_action = data_action[batch_idx]
      batch_restart = data_restart[batch_idx]
      batch_s = batch_logvar.shape
      batch_z = batch_mu + np.exp(batch_logvar/2.0) * np.random.randn(*batch_s)
      return batch_z, batch_action, batch_restart

    # process data
    all_data = load_series_data()

    max_seq_len = hps_model.max_seq_len
    N_z = hps_model.seq_width

    # save 1000 initial mu and logvars:
    initial_mu = []
    initial_logvar = []
    for i in range(1000):
      mu = np.copy(raw_data_mu[i][0, :]*10000).astype(np.int).tolist()
      logvar = np.copy(raw_data_logvar[i][0, :]*10000).astype(np.int).tolist()
      initial_mu.append(mu)
      initial_logvar.append(logvar)
    with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile:
      json.dump([initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': '))

    reset_graph()
    model = Model(hps_model)

    hps = hps_model
    start = time.time()
    print("Starting first epoch of total ", epochs)

    for epoch in range(1, epochs):
      print('preparing data for epoch', epoch)
      data_mu, data_logvar, data_action, data_restart = 0, 0, 0, 0
      data_mu, data_logvar, data_action, data_restart = create_batches(all_data)
      num_batches = len(data_mu)
      print('number of batches', num_batches)
      end = time.time()
      time_taken = end-start
      print('time taken to create batches', time_taken)

      batch_state = model.sess.run(model.initial_state)

      for local_step in range(num_batches):

        batch_z, batch_action, batch_restart = get_batch(local_step, data_mu, data_logvar, data_action, data_restart)
        step = model.sess.run(model.global_step)
        curr_learning_rate = (hps.learning_rate-hps.min_learning_rate) * (hps.decay_rate) ** step + hps.min_learning_rate

        feed = {model.batch_z: batch_z,
                model.batch_action: batch_action,
                model.batch_restart: batch_restart,
                model.initial_state: batch_state,
                model.lr: curr_learning_rate}

        (train_cost, z_cost, r_cost, batch_state, train_step, _) = model.sess.run([model.cost, model.z_cost, model.r_cost, model.final_state, model.global_step, model.train_op], feed)
        if (step%20==0 and step > 0):
          end = time.time()
          time_taken = end-start
          start = time.time()
          output_log = "step: %d, lr: %.6f, cost: %.4f, z_cost: %.4f, r_cost: %.4f, train_time_taken: %.4f" % (step, curr_learning_rate, train_cost, z_cost, r_cost, time_taken)
          print(output_log)

    # save the model (don't bother with tf checkpoints json all the way ...)
    model.save_json(os.path.join(model_save_path, "rnn.json"))