Ejemplo n.º 1
0
def sample_vae2(args):
    """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git
    """
    z_size = 32
    batch_size = args.count
    learning_rate = 0.0001
    kl_tolerance = 0.5
    model_path_name = "tf_vae"

    reset_graph()
    vae = ConvVAE(
        z_size=z_size,
        batch_size=batch_size,
        learning_rate=learning_rate,
        kl_tolerance=kl_tolerance,
        is_training=False,
        reuse=False,
        gpu_mode=False)  # use GPU on batchsize of 1000 -> much faster

    vae.load_json(os.path.join(model_path_name, 'vae.json'))

    z = np.random.normal(size=(args.count, z_size))
    samples = vae.decode(z)
    input_dim = samples.shape[1:]

    n = args.count
    plt.figure(figsize=(20, 4))
    plt.title('VAE samples')
    for i in range(n):
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(samples[i].reshape(input_dim[0], input_dim[1],
                                      input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    #plt.savefig( image_path )
    plt.show()
Ejemplo n.º 2
0
class Model:
  ''' simple one layer model for car racing '''
  def __init__(self):
    self.env_name = "carracing"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
    self.vae.load_json('vae/vae.json')
    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
    self.rnn.load_json('rnn/rnn.json')
    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32

    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False

  def make_env(self, seed=-1, render_mode=False):
    self.render_mode = render_mode
    self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)

  def reset(self):
    self.state = rnn_init_state(self.rnn)

  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    mu, logvar = self.vae.encode_mu_logvar(result)
    mu = mu[0]
    logvar = logvar[0]
    s = logvar.shape
    z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
    return z, mu, logvar

  def decode_obs(self, z):
    # decode the latent vector
    img = self.vae.decode(z.reshape(1, self.z_size)) * 255.
    img = np.round(img).astype(np.uint8)
    img = img.reshape(64, 64, 3)
    return img

  def get_action(self, z):
    h = rnn_output(self.state, z, EXP_MODE)

    '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
      action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
    else:
      action = np.tanh(np.dot(h, self.weight) + self.bias)
    
    action[1] = (action[1]+1.0) / 2.0
    action[2] = clip(action[2])

    self.state = rnn_next_state(self.rnn, z, action, self.state)

    return action

  def set_model_params(self, model_params):
    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      params = np.array(model_params)
      cut_off = (self.input_size+1)*self.hidden_size
      params_1 = params[:cut_off]
      params_2 = params[cut_off:]
      self.bias_hidden = params_1[:self.hidden_size]
      self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size)
      self.bias_output = params_2[:3]
      self.weight_output = params_2[3:].reshape(self.hidden_size, 3)
    else:
      self.bias = np.array(model_params[:3])
      self.weight = np.array(model_params[3:]).reshape(self.input_size, 3)

  def load_model(self, filename):
    with open(filename) as f:    
      data = json.load(f)
    print('loading file %s' % (filename))
    self.data = data
    model_params = np.array(data[0]) # assuming other stuff is in data
    self.set_model_params(model_params)

  def get_random_model_params(self, stdev=0.1):
    return np.random.randn(self.param_count)*stdev
Ejemplo n.º 3
0
output_dir = "vae_test_result"

z_size=32

filelist = os.listdir(DATA_DIR)
filelist = [f for f in filelist if '.npz' in f]

obs = np.load(os.path.join(DATA_DIR, random.choice(filelist)))["obs"]
obs = np.expand_dims(obs, axis=-1)
obs = obs.astype(np.float32)/255.0

n = len(obs)

vae = ConvVAE(z_size=z_size,
              batch_size=1,
              is_training=False,
              reuse=False,
              gpu_mode=False)

vae.load_json(os.path.join(model_path_name, 'vae.json'))

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

print(n, "images loaded")
for i in range(n):
    frame = obs[i].reshape(1, 64, 64, 1)
    batch_z = vae.encode(frame)
    reconstruct = vae.decode(batch_z)
    imsave(output_dir+'/%s.png' % pad_num(i), 255.*frame[0].reshape(64, 64))
    imsave(output_dir+'/%s_vae.png' % pad_num(i), 255.*reconstruct[0].reshape(64, 64))
Ejemplo n.º 4
0
def sample_vae2(args):
    """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git
    """
    z_size = 64  # This needs to match the size of the trained vae
    batch_size = args.count
    learning_rate = 0.0001
    kl_tolerance = 0.5
    model_path_name = "tf_vae"

    reset_graph()
    vae = ConvVAE(
        z_size=z_size,
        batch_size=batch_size,
        learning_rate=learning_rate,
        kl_tolerance=kl_tolerance,
        is_training=False,
        reuse=False,
        gpu_mode=False)  # use GPU on batchsize of 1000 -> much faster

    vae.load_json(os.path.join(model_path_name, 'vae.json'))

    z = np.random.normal(size=(args.count, z_size))
    samples = vae.decode(z)
    input_dim = samples.shape[1:]

    gen = DriveDataGenerator(args.dirs,
                             image_size=(64, 64),
                             batch_size=args.count,
                             shuffle=True,
                             max_load=10000,
                             images_only=True)
    orig = gen[0].astype(np.float) / 255.0
    #mu, logvar = vae.encode_mu_logvar(orig)
    #recon = vae.decode( mu )
    recon = vae.decode(vae.encode(orig))

    n = args.count
    plt.figure(figsize=(20, 6), tight_layout=False)
    plt.title('VAE samples')
    for i in range(n):
        ax = plt.subplot(3, n, i + 1)
        plt.imshow(samples[i].reshape(input_dim[0], input_dim[1],
                                      input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Random")

    for i in range(n):
        ax = plt.subplot(3, n, n + i + 1)
        plt.imshow(orig[i].reshape(input_dim[0], input_dim[1], input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Real")

        ax = plt.subplot(3, n, (2 * n) + i + 1)
        plt.imshow(recon[i].reshape(input_dim[0], input_dim[1], input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Reconstructed")

    plt.savefig("samples_vae.png")
    plt.show()
Ejemplo n.º 5
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, arglist):
        self.env_name = arglist.game
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json(arglist.vae_file)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json(arglist.rnn_file)
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 2)
            self.bias_output = np.random.randn(2)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 2 + 2)
        else:
            self.weight = np.random.randn(self.input_size, 2)
            self.bias = np.random.randn(2)
            self.param_count = (self.input_size) * 2 + 2

        self.render_mode = False

    def make_env(self, seed=-1, render_mode=False):
        self.render_mode = render_mode
        self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def decode_obs(self, z):
        # decode the latent vector
        img = self.vae.decode(z.reshape(1, self.z_size)) * 255.
        img = np.round(img).astype(np.uint8)
        img = img.reshape(64, 64, 3)
        return img

    def get_action(self, z, arglist):
        h = rnn_output(self.state, z, EXP_MODE)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            action = np.tanh(np.dot(h, self.weight) + self.bias)

        if arglist.competitive:
            obs, rewards, done, win = self.env.step([action[0], 'script'])
        else:
            obs, rewards, done, win = self.env.step(action)

        extra_reward = 0.0  # penalize for turning too frequently
        if arglist.competitive:
            if arglist.train_mode and penalize_turning:
                extra_reward -= np.abs(action[0]) / 10.0
                rewards[0] += extra_reward
            reward = rewards[0]
        else:
            if arglist.train_mode and penalize_turning:
                reward = np.sum(rewards)
                extra_reward -= np.abs(action[0]) / 10.0
                reward += extra_reward

        # recording_reward.append(reward)
        # total_reward += reward

        self.state = rnn_next_state(self.rnn, z, action, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:2]
            self.weight_output = params_2[2:].reshape(self.hidden_size, 2)
        else:
            self.bias = np.array(model_params[:2])
            self.weight = np.array(model_params[2:]).reshape(
                self.input_size, 2)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        return np.random.randn(self.param_count) * stdev
Ejemplo n.º 6
0
class VAERacingStack(CarRacing):
    def __init__(self, full_episode=False, discrete_mode=False):
        super(VAERacingStack, self).__init__()
        self._internal_counter = 0
        self.z_size = games['vae_racing_stack'].input_size
        self.vae = ConvVAE(batch_size=1,
                           z_size=self.z_size,
                           num_channel=FRAME_STACK,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json('vae/vae_stack_' + str(FRAME_STACK) + '.json')
        self.full_episode = full_episode
        high = np.array([np.inf] * self.z_size)
        self.observation_space = Box(-high, high)
        self.cumulative_frames = None
        self._has_rendered = False
        self.discrete_mode = discrete_mode

    def _get_image(self, z, cumulative_frames):
        large_img = np.zeros((64 * 2, 64 * FRAME_STACK))

        # decode the latent vector
        if z is not None:
            img = self.vae.decode(z.reshape(1, self.z_size)) * 255.0
            img = np.round(img).astype(np.uint8)
            img = img.reshape(64, 64, FRAME_STACK)
            for i in range(FRAME_STACK):
                large_img[64:, i * 64:(i + 1) * 64] = img[:, :, i]

        if len(cumulative_frames) == FRAME_STACK:
            for i in range(FRAME_STACK):
                large_img[:64, i * 64:(i + 1) * 64] = cumulative_frames[i]

        large_img = large_img.astype(np.uint8)

        return large_img

    def _reset(self):
        self._internal_counter = 0
        self.cumulative_frames = None
        self._has_rendered = False
        return super(VAERacingStack, self)._reset()

    def _render(self, mode='human', close=False):
        if mode == 'human' or mode == 'rgb_array':
            self._has_rendered = True
        return super(VAERacingStack, self)._render(mode=mode, close=close)

    def _step(self, action):

        if not self._has_rendered:
            self._render("rgb_array")
            self._has_rendered = False

        if action is not None:
            if not self.discrete_mode:
                action[0] = _clip(action[0], lo=-1.0, hi=+1.0)
                action[1] = _clip(action[1], lo=-1.0, hi=+1.0)
                action[1] = (action[1] + 1.0) / 2.0
                action[2] = _clip(action[2])
            else:
                '''
        in discrete setting:
        if action[0] is the highest, then agent does nothing
        if action[1] is the highest, then agent hits the pedal
        if -action[1] is the highest, then agent hits the brakes
        if action[2] is the highest, then agent turns left
        if action[3] is the highest, then agent turns right
        '''
                logits = [
                    _clip((action[0] + 1.0), hi=+2.0),
                    _clip(action[1]),
                    _clip(-action[1]),
                    _clip(action[2]),
                    _clip(-action[2])
                ]
                probs = softmax(logits)
                #chosen_action = np.argmax(logits)
                chosen_action = sample(probs)

                a = np.array([0.0, 0.0, 0.0])

                if chosen_action == 1: a[1] = +1.0  # up
                if chosen_action == 2:
                    a[2] = +0.8  # down: 0.8 as recommended by the environment's built-in demo
                if chosen_action == 3: a[0] = -1.0  # left
                if chosen_action == 4: a[0] = +1.0  # right

                action = a
                #print("chosen_action", chosen_action, action)

        obs, reward, done, _ = super(VAERacingStack, self)._step(action)

        if self.cumulative_frames is not None:
            self.cumulative_frames.pop(0)
            self.cumulative_frames.append(_process_frame_green(obs))
        else:
            self.cumulative_frames = [_process_frame_green(obs)] * FRAME_STACK

        self.z = z = _compress_frames(self.cumulative_frames, self.vae)

        if self.full_episode:
            return z, reward, False, {}

        self._internal_counter += 1
        if self._internal_counter > TIME_LIMIT:
            done = True

        #img = self._get_image(self.z, self.cumulative_frames)
        #imageio.imwrite('dump/'+('%0*d' % (4, self._internal_counter))+'.png', img)

        return z, reward, done, {}
Ejemplo n.º 7
0
for i in range(steps):

  ob = obs[i:i+1] # (1, 64, 64, 1)
  action = oh_actions[i:i+1] # (1, n)

  z = vae.encode(ob) # (1, 32) VAE done!
  rnn_z = np.expand_dims(z, axis=0) # (1, 1, 32)
  action = np.expand_dims(action, axis=0) # (1, 1, n)


  input_x = np.concatenate([rnn_z, action], axis=2) # (1, 1, 32+n)
  feed = {rnn.input_x: input_x, rnn.initial_state: state} # predict the next state and next z.

  if pz is not None: # decode from the z
    frame = vae.decode(pz[None])
    frame2 = vae.decode(z)
    #neglogp = neg_likelihood(logmix, mean, logstd, z.reshape(32,1))
    #imsave(output_dir + '/%s_origin_%.2f.png' % (pad_num(i), np.exp(-neglogp)), 255.*ob.reshape(64, 64))
    #imsave(output_dir + '/%s_reconstruct.png' % pad_num(i), 255. * frame[0].reshape(64, 64))
    img = concat_img(255.*ob, 255*frame2, 255.*frame)
    imsave(output_dir + '/%s.png' % pad_num(i), img)

  (logmix, mean, logstd, state) = rnn.sess.run([rnn.out_logmix, rnn.out_mean,
                                                rnn.out_logstd, rnn.final_state], feed)



  # Sample the next frame's state.
  pz = sample_z(logmix, mean, logstd, OUTWIDTH, T)