예제 #1
0
    def __init__(self):
        self.env_name = "carracing"
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json('vae/vae.json')
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json('rnn/rnn.json')
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False
예제 #2
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self):
        self.env_name = "carracing"
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json('vae/vae.json')
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json('rnn/rnn.json')
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False

    def make_env(self, seed=-1, render_mode=False):
        self.render_mode = render_mode
        self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def decode_obs(self, z):
        # decode the latent vector
        img = self.vae.decode(z.reshape(1, self.z_size)) * 255.
        img = np.round(img).astype(np.uint8)
        img = img.reshape(64, 64, 3)
        return img

    def get_action(self, z):
        h = rnn_output(self.state, z, EXP_MODE)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            action = np.tanh(np.dot(h, self.weight) + self.bias)

        action[1] = (action[1] + 1.0) / 2.0
        action[2] = clip(action[2])

        self.state = rnn_next_state(self.rnn, z, action, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:3]
            self.weight_output = params_2[3:].reshape(self.hidden_size, 3)
        else:
            self.bias = np.array(model_params[:3])
            self.weight = np.array(model_params[3:]).reshape(
                self.input_size, 3)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        return np.random.randn(self.param_count) * stdev
N_data = len(data_mu)  # should be 10k
batch_size = hps_model.batch_size

# save 1000 initial mu and logvars:
initial_mu = np.copy(data_mu[:1000, 0, :] * 10000).astype(np.int).tolist()
initial_logvar = np.copy(data_logvar[:1000, 0, :] * 10000).astype(
    np.int).tolist()
with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile:
    json.dump([initial_mu, initial_logvar],
              outfile,
              sort_keys=True,
              indent=0,
              separators=(',', ': '))

reset_graph()
rnn = MDNRNN(hps_model)

# train loop:
hps = hps_model
start = time.time()
for local_step in range(hps.num_steps):

    step = rnn.sess.run(rnn.global_step)
    curr_learning_rate = (hps.learning_rate - hps.min_learning_rate) * (
        hps.decay_rate)**step + hps.min_learning_rate

    raw_z, raw_a = random_batch()
    inputs = np.concatenate((raw_z[:, :-1, :], raw_a[:, :-1, :]), axis=2)
    outputs = raw_z[:, 1:, :]  # teacher forcing (shift by one predictions)

    feed = {