Ejemplo n.º 1
0
    def __init__(self, render_mode=False, load_model=True):
        super(DoomTakeCoverWrapper, self).__init__()

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        reset_graph()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=tf.AUTO_REUSE)

        self.rnn = Model(hps_sample, gpu_mode=False)

        if load_model:
            self.vae.load_json(os.path.join(model_path_name, 'vae.json'))
            self.rnn.load_json(os.path.join(model_path_name, 'rnn.json'))

        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=())
        self.outwidth = self.rnn.hps.seq_width
        self.obs_size = self.outwidth + model_rnn_size * model_state_space

        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(SCREEN_Y, SCREEN_X, 3))
        self.actual_observation_space = spaces.Box(low=-50.,
                                                   high=50.,
                                                   shape=(self.obs_size))

        self.zero_state = self.rnn.sess.run(self.rnn.zero_state)

        self._seed()

        self.rnn_state = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()
    def __init__(self, rnn_load_path, num_mixtures, temperature):
        #RNN parameters - modelled after hps_sample in doomrnn.py
        self.vae = VAE(z_size=LATENT_SPACE_DIMENSIONALITY,
                       batch_size=1,
                       is_training=False,
                       reuse=False,
                       gpu_mode=False)

        self.vae.load_json(os.path.join(VAE_PATH, 'vae.json'))
        hps = default_prediction_hps(num_mixtures)
        self.rnn = RNN(hps, gpu_mode=False)

        self.rnn.load_json(os.path.join(rnn_load_path, 'rnn.json'))
        self.frame_count = 0
        self.temperature = temperature
        self.zero_state = self.rnn.sess.run(self.rnn.zero_state)
        self.outwidth = self.rnn.hps.seq_width
        self.restart = 1
        self.rnn_state = self.zero_state
Ejemplo n.º 3
0
class DoomTakeCoverWrapper(DoomTakeCoverEnv):
    def __init__(self, render_mode=False, load_model=True):
        super(DoomTakeCoverWrapper, self).__init__()

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        reset_graph()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=tf.AUTO_REUSE)

        self.rnn = Model(hps_sample, gpu_mode=False)

        if load_model:
            self.vae.load_json(os.path.join(model_path_name, 'vae.json'))
            self.rnn.load_json(os.path.join(model_path_name, 'rnn.json'))

        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=())
        self.outwidth = self.rnn.hps.seq_width
        self.obs_size = self.outwidth + model_rnn_size * model_state_space

        self.observation_space = Box(low=0,
                                     high=255,
                                     shape=(SCREEN_Y, SCREEN_X, 3))
        self.actual_observation_space = spaces.Box(low=-50.,
                                                   high=50.,
                                                   shape=(self.obs_size))

        self.zero_state = self.rnn.sess.run(self.rnn.zero_state)

        self._seed()

        self.rnn_state = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()

    def _step(self, action):

        # update states of rnn
        self.frame_count += 1

        prev_z = np.zeros((1, 1, self.outwidth))
        prev_z[0][0] = self.z

        prev_action = np.zeros((1, 1))
        prev_action[0] = action

        prev_restart = np.ones((1, 1))
        prev_restart[0] = self.restart

        s_model = self.rnn

        feed = {
            s_model.input_z: prev_z,
            s_model.input_action: prev_action,
            s_model.input_restart: prev_restart,
            s_model.initial_state: self.rnn_state
        }

        self.rnn_state = s_model.sess.run(s_model.final_state, feed)

        # actual action in wrapped env:

        threshold = 0.3333
        full_action = [0] * 43

        if action < -threshold:
            full_action[11] = 1

        if action > threshold:
            full_action[10] = 1

        obs, reward, done, _ = super(DoomTakeCoverWrapper,
                                     self)._step(full_action)
        small_obs = _process_frame(obs)
        self.current_obs = small_obs
        self.z = self._encode(small_obs)

        if done:
            self.restart = 1
        else:
            self.restart = 0

        return self._current_state(), reward, done, {}

    def _encode(self, img):
        simple_obs = np.copy(img).astype(np.float) / 255.0
        simple_obs = simple_obs.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(simple_obs)
        return (mu +
                np.exp(logvar / 2.0) * self.np_random.randn(*logvar.shape))[0]

    def _decode(self, z):
        # decode the latent vector
        img = self.vae.decode(z.reshape(1, 64)) * 255.
        img = np.round(img).astype(np.uint8)
        img = img.reshape(64, 64, 3)
        return img

    def _reset(self):
        obs = super(DoomTakeCoverWrapper, self)._reset()
        small_obs = _process_frame(obs)
        self.current_obs = small_obs
        self.rnn_state = self.zero_state
        self.z = self._encode(small_obs)
        self.restart = 1
        self.frame_count = 0
        return self._current_state()

    def _current_state(self):
        if model_state_space == 2:
            return np.concatenate([
                self.z,
                self.rnn_state.c.flatten(),
                self.rnn_state.h.flatten()
            ],
                                  axis=0)
        return np.concatenate([self.z, self.rnn_state.h.flatten()], axis=0)

    def _seed(self, seed=None):
        if seed:
            tf.set_random_seed(seed)
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None  # If we don't None out this reference pyglet becomes unhappy
            return
        try:
            state = self.game.get_state()
            img = state.image_buffer
            small_img = self.current_obs
            if img is None:
                img = np.zeros(shape=(480, 640, 3), dtype=np.uint8)
            if small_img is None:
                small_img = np.zeros(shape=(SCREEN_Y, SCREEN_X, 3),
                                     dtype=np.uint8)
            small_img = resize(small_img, (img.shape[0], img.shape[0]))
            vae_img = self._decode(self.z)
            vae_img = resize(vae_img, (img.shape[0], img.shape[0]))
            all_img = np.concatenate((img, small_img, vae_img), axis=1)
            img = all_img
            if mode == 'rgb_array':
                return img
            elif mode is 'human':
                from gym.envs.classic_control import rendering
                if self.viewer is None:
                    self.viewer = rendering.SimpleImageViewer()
                self.viewer.imshow(img)
        except doom_py.vizdoom.ViZDoomIsNotRunningException:
            pass  # Doom has been closed
Ejemplo n.º 4
0
def main(args):
    print("Train RNN begin")
    os.environ["CUDA_VISIBLE_DEVICES"]="0"
    np.set_printoptions(precision=4, edgeitems=6, linewidth=100, suppress=True)

    model_save_path = args.output_file_name
    model_rnn_size = 512
    model_restart_factor = 10.
    Z_VECTOR_SIZE = 64 #KOEChange
    DATA_DIR = "series"

  
    initial_z_save_path = "tf_initial_z"
    if not os.path.exists(initial_z_save_path):
        os.makedirs(initial_z_save_path)
    model_num_mixture = args.num_mixtures
    epochs = args.epochs

    model_save_path += "_" + str(model_num_mixture) + "mixtures"

    if not os.path.exists(model_save_path):
        os.makedirs(model_save_path)

    def default_hps():
      return HyperParams(max_seq_len=100, # KOEChange. Was 500
                         seq_width=Z_VECTOR_SIZE,    # KOEChange. Was 64.
                         rnn_size=model_rnn_size,    # number of rnn cells
                         batch_size=100,   # minibatch sizes
                         grad_clip=1.0,
                         num_mixture=int(model_num_mixture),   # number of mixtures in MDN
                         restart_factor=model_restart_factor, # factor of importance for restart=1 rare case for loss.
                         learning_rate=0.001,
                         decay_rate=0.99999,
                         min_learning_rate=0.00001,
                         use_layer_norm=0, # set this to 1 to get more stable results (less chance of NaN), but slower
                         use_recurrent_dropout=0,
                         recurrent_dropout_prob=0.90,
                         use_input_dropout=0,
                         input_dropout_prob=0.90,
                         use_output_dropout=0,
                         output_dropout_prob=0.90,
                         is_training=1)

    hps_model = default_hps()
    hps_sample = hps_model._replace(batch_size=1, max_seq_len=2, use_recurrent_dropout=0, is_training=0)

    # load preprocessed data
    raw_data = np.load(os.path.join(DATA_DIR, "series.npz"))
    raw_data_mu = raw_data["mu"]
    raw_data_logvar = raw_data["logvar"]
    raw_data_action =  raw_data["action"]

    def load_series_data():
      all_data = []
      for i in range(len(raw_data_mu)):
        action = raw_data_action[i]
        mu = raw_data_mu[i]
        logvar = raw_data_logvar[i]
        all_data.append([mu, logvar, action])
      return all_data

    def get_frame_count(all_data):
      frame_count = []
      for data in all_data:
        frame_count.append(len(data[0]))
      return np.sum(frame_count)

    def create_batches(all_data, batch_size=100, seq_length=100):
      num_frames = get_frame_count(all_data)
      num_batches = int(num_frames/(batch_size*seq_length))
      num_frames_adjusted = num_batches*batch_size*seq_length
      random.shuffle(all_data)
      num_frames = get_frame_count(all_data)
      data_mu = np.zeros((num_frames, N_z), dtype=np.float16)
      data_logvar = np.zeros((num_frames, N_z), dtype=np.float16)
      data_action = np.zeros(num_frames, dtype=np.float16)
      data_restart = np.zeros(num_frames, dtype=np.uint8)
      idx = 0
      for data in all_data:
        mu, logvar, action=data
        N = len(action)
        data_mu[idx:idx+N] = mu.reshape(N, Z_VECTOR_SIZE)
        data_logvar[idx:idx+N] = logvar.reshape(N, Z_VECTOR_SIZE)
        data_action[idx:idx+N] = action.reshape(N)
        data_restart[idx]=1
        idx += N

      data_mu = data_mu[0:num_frames_adjusted]
      data_logvar = data_logvar[0:num_frames_adjusted]
      data_action = data_action[0:num_frames_adjusted]
      data_restart = data_restart[0:num_frames_adjusted]

      data_mu = np.split(data_mu.reshape(batch_size, -1, Z_VECTOR_SIZE), num_batches, 1)
      data_logvar = np.split(data_logvar.reshape(batch_size, -1, Z_VECTOR_SIZE), num_batches, 1)
      data_action = np.split(data_action.reshape(batch_size, -1), num_batches, 1)
      data_restart = np.split(data_restart.reshape(batch_size, -1), num_batches, 1)

      return data_mu, data_logvar, data_action, data_restart

    def get_batch(batch_idx, data_mu, data_logvar, data_action, data_restart):
      batch_mu = data_mu[batch_idx]
      batch_logvar = data_logvar[batch_idx]
      batch_action = data_action[batch_idx]
      batch_restart = data_restart[batch_idx]
      batch_s = batch_logvar.shape
      batch_z = batch_mu + np.exp(batch_logvar/2.0) * np.random.randn(*batch_s)
      return batch_z, batch_action, batch_restart

    # process data
    all_data = load_series_data()

    max_seq_len = hps_model.max_seq_len
    N_z = hps_model.seq_width

    # save 1000 initial mu and logvars:
    initial_mu = []
    initial_logvar = []
    for i in range(1000):
      mu = np.copy(raw_data_mu[i][0, :]*10000).astype(np.int).tolist()
      logvar = np.copy(raw_data_logvar[i][0, :]*10000).astype(np.int).tolist()
      initial_mu.append(mu)
      initial_logvar.append(logvar)
    with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile:
      json.dump([initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': '))

    reset_graph()
    model = Model(hps_model)

    hps = hps_model
    start = time.time()
    print("Starting first epoch of total ", epochs)

    for epoch in range(1, epochs):
      print('preparing data for epoch', epoch)
      data_mu, data_logvar, data_action, data_restart = 0, 0, 0, 0
      data_mu, data_logvar, data_action, data_restart = create_batches(all_data)
      num_batches = len(data_mu)
      print('number of batches', num_batches)
      end = time.time()
      time_taken = end-start
      print('time taken to create batches', time_taken)

      batch_state = model.sess.run(model.initial_state)

      for local_step in range(num_batches):

        batch_z, batch_action, batch_restart = get_batch(local_step, data_mu, data_logvar, data_action, data_restart)
        step = model.sess.run(model.global_step)
        curr_learning_rate = (hps.learning_rate-hps.min_learning_rate) * (hps.decay_rate) ** step + hps.min_learning_rate

        feed = {model.batch_z: batch_z,
                model.batch_action: batch_action,
                model.batch_restart: batch_restart,
                model.initial_state: batch_state,
                model.lr: curr_learning_rate}

        (train_cost, z_cost, r_cost, batch_state, train_step, _) = model.sess.run([model.cost, model.z_cost, model.r_cost, model.final_state, model.global_step, model.train_op], feed)
        if (step%20==0 and step > 0):
          end = time.time()
          time_taken = end-start
          start = time.time()
          output_log = "step: %d, lr: %.6f, cost: %.4f, z_cost: %.4f, r_cost: %.4f, train_time_taken: %.4f" % (step, curr_learning_rate, train_cost, z_cost, r_cost, time_taken)
          print(output_log)

    # save the model (don't bother with tf checkpoints json all the way ...)
    model.save_json(os.path.join(model_save_path, "rnn.json"))
Ejemplo n.º 5
0
max_seq_len = hps_model.max_seq_len
N_z = hps_model.seq_width

# save 1000 initial mu and logvars:
initial_mu = []
initial_logvar = []
for i in range(258):
  mu = np.copy(raw_data_mu[i][0, :]*10000).astype(np.int).tolist()
  logvar = np.copy(raw_data_logvar[i][0, :]*10000).astype(np.int).tolist()
  initial_mu.append(mu)
  initial_logvar.append(logvar)
with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile:
  json.dump([initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': '))

reset_graph()
model = Model(hps_model)

hps = hps_model
start = time.time()

for epoch in range(1, 401):
  print('preparing data for epoch', epoch)
  data_mu, data_logvar, data_action, data_restart = 0, 0, 0, 0
  data_mu, data_logvar, data_action, data_restart = create_batches(all_data)
  num_batches = len(data_mu)
  print('number of batches', num_batches)
  end = time.time()
  time_taken = end-start
  print('time taken to create batches', time_taken)

  batch_state = model.sess.run(model.initial_state)
Ejemplo n.º 6
0
initial_logvar = []
for i in range(1000):
    mu = np.copy(raw_data_mu[i][0, :] * 10000).astype(np.int).tolist()
    logvar = np.copy(raw_data_logvar[i][0, :] * 10000).astype(np.int).tolist()
    initial_mu.append(mu)
    initial_logvar.append(logvar)
with open(os.path.join(initial_z_save_path, "{}.json".format(initial_z_name)),
          'wt') as outfile:
    json.dump([initial_mu, initial_logvar],
              outfile,
              sort_keys=True,
              indent=0,
              separators=(',', ': '))

reset_graph()
model = Model(hps_model)

hps = hps_model
start = time.time()

for epoch in range(1, 401):
    print('preparing data for epoch', epoch)
    data_mu, data_logvar, data_action, data_restart = 0, 0, 0, 0
    data_mu, data_logvar, data_action, data_restart = create_batches(all_data)
    num_batches = len(data_mu)
    print('number of batches', num_batches)
    end = time.time()
    time_taken = end - start
    print('time taken to create batches', time_taken)

    batch_state = model.sess.run(model.initial_state)
class RNNAnalyzer:
    def __init__(self, rnn_load_path, num_mixtures, temperature):
        #RNN parameters - modelled after hps_sample in doomrnn.py
        self.vae = VAE(z_size=LATENT_SPACE_DIMENSIONALITY,
                       batch_size=1,
                       is_training=False,
                       reuse=False,
                       gpu_mode=False)

        self.vae.load_json(os.path.join(VAE_PATH, 'vae.json'))
        hps = default_prediction_hps(num_mixtures)
        self.rnn = RNN(hps, gpu_mode=False)

        self.rnn.load_json(os.path.join(rnn_load_path, 'rnn.json'))
        self.frame_count = 0
        self.temperature = temperature
        self.zero_state = self.rnn.sess.run(self.rnn.zero_state)
        self.outwidth = self.rnn.hps.seq_width
        self.restart = 1
        self.rnn_state = self.zero_state

    def _reset(self, initial_z):
        #Resets RNN, with an initial z.
        self.rnn_state = self.zero_state
        self.z = initial_z
        self.restart = 1
        self.frame_count = 0

    def decode_with_vae(self, latent_vector_sequence):
        reconstructions = self.vae.decode(np.array(latent_vector_sequence))
        return reconstructions

    def predict_one_step(self, action, previous_z=[]):
        #Predicts one step ahead from the previous state.
        #If previous z is given, we predict with that as input. Otherwise, we dream from the previous output we generated.
        print("Test")
        self.frame_count += 1
        prev_z = np.zeros((1, 1, self.outwidth))
        if len(previous_z) > 0:
            prev_z[0][0] = previous_z
        else:
            prev_z[0][0] = self.z

        prev_action = np.zeros((1, 1))
        prev_action[0] = action

        prev_restart = np.ones((1, 1))
        prev_restart[0] = self.restart

        s_model = self.rnn

        feed = {
            s_model.input_z: prev_z,
            s_model.input_action: prev_action,
            s_model.input_restart: prev_restart,
            s_model.initial_state: self.rnn_state
        }

        [logmix, mean, logstd, logrestart, next_state] = s_model.sess.run([
            s_model.out_logmix, s_model.out_mean, s_model.out_logstd,
            s_model.out_restart_logits, s_model.final_state
        ], feed)

        OUTWIDTH = self.outwidth
        # adjust temperatures
        logmix2 = np.copy(logmix) / self.temperature
        logmix2 -= logmix2.max()
        logmix2 = np.exp(logmix2)
        logmix2 /= logmix2.sum(axis=1).reshape(OUTWIDTH, 1)

        mixture_idx = np.zeros(OUTWIDTH)
        chosen_mean = np.zeros(OUTWIDTH)
        chosen_logstd = np.zeros(OUTWIDTH)
        for j in range(OUTWIDTH):
            idx = get_pi_idx(np_random.rand(), logmix2[j])
            mixture_idx[j] = idx
            chosen_mean[j] = mean[j][idx]
            chosen_logstd[j] = logstd[j][idx]

        rand_gaussian = np_random.randn(OUTWIDTH) * np.sqrt(self.temperature)
        next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian
        self.restart = 0
        next_restart = 0  #Never telling it that we got a restart.
        #if (logrestart[0] > 0):
        #next_restart = 1

        self.z = next_z
        self.restart = next_restart
        self.rnn_state = next_state

        return next_z, logmix2