def __init__(self, load_model=True): self.env_name = './VisualPushBlock_withBlock_z_info.x86_64' #'./VisualPushBlock.x86_64' self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = z_size if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer ###CHANGE is made here self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE) self.bias_output = np.random.randn(ACTION_SIZE) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * ACTION_SIZE + ACTION_SIZE) else: self.weight = np.random.randn(self.input_size, ACTION_SIZE) self.bias = np.random.randn(ACTION_SIZE) self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE self.render_mode = False
def __init__(self, args, render_mode=False, load_model=True): self.render_mode = render_mode model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name) with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'), 'r') as f: [initial_mu, initial_logvar] = json.load(f) self.initial_mu_logvar = np.array( [list(elem) for elem in zip(initial_mu, initial_logvar)]) self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.action_space = Box(low=-1.0, high=1.0, shape=()) obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.observation_space = Box(low=-50., high=50., shape=(obs_size, )) self.rnn_states = None self.o = None self.seed() self.reset()
def __init__(self, arglist): self.env_name = arglist.game self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json(arglist.vae_file) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json(arglist.rnn_file) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 2) self.bias_output = np.random.randn(2) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 2 + 2) else: self.weight = np.random.randn(self.input_size, 2) self.bias = np.random.randn(2) self.param_count = (self.input_size) * 2 + 2 self.render_mode = False
def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load( 'results/{}/tf_vae'.format(args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load( 'results/{}/tf_rnn'.format(args.env_name)).variables ]) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size + args.rnn_size * args.state_space))
def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32 + 256))
def __init__(self, load_model=True): # For Mac # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous" # For linux self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False
def __init__(self): self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False
def __init__(self, arglist, action_space, scope, load_model=True): self.action_space = action_space self.arglist = arglist self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample = hps_model._replace( batch_size=1, input_seq_width=32 + arglist.action_space + (arglist.agent_num - 1) * arglist.action_space * arglist.timestep, max_seq_len=1, use_recurrent_dropout=0, is_training=0) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json(arglist.vae_model_dir) self.rnn.load_json(arglist.rnn_model_dir) self.state = rnn_init_state(self.rnn) self.rnn_mode = True if arglist.inference: self.input_size = rnn_output_size( EXP_MODE) + (arglist.agent_num - 1) * arglist.action_space else: self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 # action trajectories recording self.act_traj = [ collections.deque(np.zeros( (arglist.timestep, arglist.action_space)), maxlen=arglist.timestep) ] * (arglist.agent_num - 1) self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep, arglist.action_space, arglist.action_space, "oppo_model_{}".format(scope)) self.inference = arglist.inference if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.action_space) self.bias_output = np.random.randn(self.action_space) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * self.action_space + self.action_space) else: self.weight = np.random.randn(self.input_size, self.action_space) self.bias = np.random.randn(self.action_space) self.param_count = ( self.input_size) * self.action_space + self.action_space
class ModelMCTS(Model): def __init__(self, load_model=True): self.env_name = "carracing" self.env = make_env(self.env_name, seed=SEED, render_mode=render_mode, full_episode=False) self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('../vae/vae.json') self.rnn.load_json('../rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 3 + 3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size) * 3 + 3 self.render_mode = False self.mct = None def get_action(self, z): a = random_linear_sample(-1, 1) b = random_linear_sample(0, 1) c = random_linear_sample(0, 1) actions = dp(a, b, c) action, self.mct = mcts.mcts(z, self.env, actions, old_tree=self.mct, tree_depth=6, simulate_depth=200) self.state = rnn_next_state(self.rnn, z, action, self.state) return action
def train_rnn(args, train_dataset, validation_dataset): model_save_path = get_path(args, "tf_rnn", create=True) rnn = MDNRNN(args=args) rnn.compile(optimizer=rnn.optimizer, loss=rnn.loss_fn, metrics=rnn.get_metrics()) print("Start training") current_time = datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_dir = model_save_path / "tensorboard" / current_time rnn.fit(train_dataset, validation_data=validation_dataset, steps_per_epoch=args.rnn_epoch_steps, epochs=args.rnn_num_steps // args.rnn_epoch_steps, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=20, histogram_freq=1, profile_batch=0), tf.keras.callbacks.ModelCheckpoint(str(model_save_path / "ckpt-e{epoch:03d}"), verbose=1), ]) rnn.save(str(model_save_path)) print(f"Model saved to {model_save_path}")
def __init__(self, load_model=True, full_episode=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.vae = CVAE(batch_size=1) self.rnn = MDNRNN(hps_sample) if load_model: self.vae.load_json('tf_vae/vae.json') self.rnn.load_json('tf_rnn/rnn.json') self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format(args.exp_name, args.env_name)).variables]) self.rnn.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format(args.exp_name, args.env_name)).variables]) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size+args.rnn_size*args.state_space)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) z = self.vae.encode(result)[0] return z def reset(self): self.rnn_states = rnn_init_state(self.rnn) if self.with_obs: [z_state, obs] = super(CarRacingMDNRNN, self).reset() # calls step self.N_tiles = len(self.track) return [z_state, obs] else: z_state = super(CarRacingMDNRNN, self).reset() # calls step self.N_tiles = len(self.track) return z_state def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z = tf.squeeze(self.encode_obs(obs)) h = tf.squeeze(self.rnn_states[0]) c = tf.squeeze(self.rnn_states[1]) if self.rnn.args.state_space == 2: z_state = tf.concat([z, c, h], axis=-1) else: z_state = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) if self.with_obs: return [z_state, obs], reward, done, {} else: return z_state, reward, done, {} def close(self): super(CarRacingMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect()
def __init__(self, args, render_mode=False, load_model=True, with_obs=False): super(DoomTakeCoverMDNRNN, self).__init__() self.with_obs = with_obs self.no_render = True if render_mode: self.no_render = False self.current_obs = None self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) self.action_space = Box(low=-1.0, high=1.0, shape=()) self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space self.observation_space = Box(low=0, high=255, shape=(64, 64, 3)) self.actual_observation_space = Box(low=-50., high=50., shape=(self.obs_size)) self._seed() self.rnn_states = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset()
def __init__(self, model_name='', load_model=True, load_full_model=False, full_model_path=''): self.model_name = model_name self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_full_model: self.vae.load_json(os.path.join(full_model_path, 'vae.json')) self.rnn.load_json(os.path.join(full_model_path, 'rnn.json')) elif load_model: self.vae.load_json( os.path.join(vae_path, self.model_name + '_vae.json')) self.rnn.load_json( os.path.join(rnn_path, self.model_name + '_rnn.json')) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 3 + 3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size) * 3 + 3 self.render_mode = False
def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False): self.name = type + str(time.time()) random.seed(30) self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True) self.image_dimension = [64,64] self.history_pick = history_pick self.state_space_size = history_pick * np.prod(self.image_dimension) self.action_space_size = 5 self.state_shape = [None, self.history_pick] + list(self.image_dimension) self.history = [] self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]} self.seed = seed self.detect_edges = detect_edges self.detect_grass = detect_grass self.flip = flip self.flip_episode = False self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json')
def __init__(self, sess=None, summary_writer=tf.summary.FileWriter("logs/"), rl_training=False, reuse=False, cluster=None, index=0, device='/gpu:0', ppo_load_path=None, ppo_save_path=None, load_worldmodel=True, ntype='worldmodel'): self.policy_model_path_load = ppo_load_path + ntype self.policy_model_path_save = ppo_save_path + ntype self.rl_training = rl_training self.use_norm = True self.reuse = reuse self.sess = sess self.cluster = cluster self.index = index self.device = device self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_worldmodel: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.input_size = rnn_output_size(EXP_MODE) self._create_graph() self.rl_saver = tf.train.Saver() self.summary_writer = summary_writer
class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, load_model=True, full_episode=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.vae = CVAE(batch_size=1) self.rnn = MDNRNN(hps_sample) if load_model: self.vae.load_json('tf_vae/vae.json') self.rnn.load_json('tf_rnn/rnn.json') self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) return z, mu, logvar def reset(self): self.rnn_states = rnn_init_state(self.rnn) z_h = super(CarRacingWrapper, self).reset() # calls step return z_h def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z, _, _ = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) return z_h, reward, done, {}
def __init__(self, load_model=True): self.env_name = "Pong" self._make_env() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions) self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("not ported for atari") self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.num_actions) self.bias_output = np.random.randn(self.num_actions) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( (self.hidden_size + 1) * self.num_actions) else: # TODO: Not known until env.action_space is queried... self.weight = np.random.randn(self.input_size, self.num_actions) self.bias = np.random.randn(self.num_actions) self.param_count = (self.input_size + 1) * self.num_actions self.render_mode = False
def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False): self.env_name = env_name self.make_env() self.z_size = 32 self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na) self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.init_controller() self.render_mode = False
class Model: ''' simple one layer model for car racing ''' def __init__(self): self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) return z, mu, logvar def decode_obs(self, z): # decode the latent vector img = self.vae.decode(z.reshape(1, self.z_size)) * 255. img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, 3) return img def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) action[1] = (action[1]+1.0) / 2.0 action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size+1)*self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size) self.bias_output = params_2[:3] self.weight_output = params_2[3:].reshape(self.hidden_size, 3) else: self.bias = np.array(model_params[:3]) self.weight = np.array(model_params[3:]).reshape(self.input_size, 3) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): return np.random.randn(self.param_count)*stdev
N_data = len(data_mu) # should be 10k batch_size = hps_model.batch_size # save 1000 initial mu and logvars: initial_mu = np.copy(data_mu[:1000, 0, :] * 10000).astype(np.int).tolist() initial_logvar = np.copy(data_logvar[:1000, 0, :] * 10000).astype( np.int).tolist() with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile: json.dump([initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': ')) reset_graph() rnn = MDNRNN(hps_model) # train loop: hps = hps_model start = time.time() for local_step in range(hps.num_steps): step = rnn.sess.run(rnn.global_step) curr_learning_rate = (hps.learning_rate - hps.min_learning_rate) * ( hps.decay_rate)**step + hps.min_learning_rate raw_z, raw_a = random_batch() inputs = np.concatenate((raw_z[:, :-1, :], raw_a[:, :-1, :]), axis=2) outputs = raw_z[:, 1:, :] # teacher forcing (shift by one predictions) feed = {
from baselines.ddpg.models import Actor, Critic from baselines.ddpg.memory import Memory from baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise from baselines.common import set_global_seeds import baselines.common.tf_util as U from baselines import logger import numpy as np try: from mpi4py import MPI except ImportError: MPI = None vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) vae.load_json('vae/vae.json') rnn.load_json('rnn/rnn.json') def learn(network, env, seed=None, total_timesteps=None, nb_epochs=None, # with default settings, perform 1M steps total nb_epoch_cycles=20, nb_rollout_steps=100, reward_scale=1.0, render=False, render_eval=False, noise_type='adaptive-param_0.2', normalize_returns=False,
class Model: ''' simple one layer model for translating game state to actions''' def __init__(self, load_model=True): self.env_name = "Pong" self._make_env() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions) self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("not ported for atari") self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.num_actions) self.bias_output = np.random.randn(self.num_actions) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( (self.hidden_size + 1) * self.num_actions) else: # TODO: Not known until env.action_space is queried... self.weight = np.random.randn(self.input_size, self.num_actions) self.bias = np.random.randn(self.num_actions) self.param_count = (self.input_size + 1) * self.num_actions self.render_mode = False def _make_env(self): self.render_mode = render_mode self.env = make_env(self.env_name) self.num_actions = self.env.action_space.n def make_env(self): pass #TODO (Chazzz): eventually remove def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 1) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) # print(len(h), " h:", h) #TODO: 256+32 (the 32 comes first) # So we could have 288*2*18 params, or 288*2*environment.action_space.n (6 for Pong) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("Not ported to atari") # h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) # action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: # could probabilistically sample from softmax, but greedy action = np.argmax(np.matmul(h, self.weight) + self.bias) # action[1] = (action[1]+1.0) / 2.0 # action[2] = clip(action[2]) # print("Action:", action) action_one_hot = np.zeros(self.num_actions) action_one_hot[action] = 1 # print("Action hot:", action_one_hot) self.state = rnn_next_state(self.rnn, z, action_one_hot, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:self.num_actions] self.weight_output = params_2[self.num_actions:].reshape( self.hidden_size, self.num_actions) else: self.bias = np.array(model_params[:self.num_actions]) self.weight = np.array(model_params[self.num_actions:]).reshape( self.input_size, self.num_actions) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class DreamDoomTakeCoverMDNRNN: def __init__(self, args, render_mode=False, load_model=True): self.render_mode = render_mode model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name) with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'), 'r') as f: [initial_mu, initial_logvar] = json.load(f) self.initial_mu_logvar = np.array( [list(elem) for elem in zip(initial_mu, initial_logvar)]) self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.action_space = Box(low=-1.0, high=1.0, shape=()) obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space # future versions of OpenAI gym needs a dtype=np.float32 in the next line: self.observation_space = Box(low=-50., high=50., shape=(obs_size, )) self.rnn_states = None self.o = None self._training = True self.seed() self.reset() def _sample_init_z(self): idx = self.np_random.randint(low=0, high=self.initial_mu_logvar.shape[0]) init_mu, init_logvar = self.initial_mu_logvar[idx] init_mu = init_mu / 10000.0 init_logvar = init_logvar / 10000.0 init_z = init_mu + np.exp( init_logvar / 2.0) * self.np_random.randn(*init_logvar.shape) return init_z def reset(self): self.rnn_states = rnn_init_state(self.rnn) z = np.expand_dims(self._sample_init_z(), axis=0) self.o = z z_ch = tf.concat([z, self.rnn_states[1], self.rnn_states[0]], axis=-1) return tf.squeeze(z_ch) def seed(self, seed=None): if seed: tf.random.set_seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): rnn_states_p1, z_tp1, r_tp1, d_tp1 = rnn_sim(self.rnn, self.o, self.rnn_states, action, training=self._training) self.rnn_states = rnn_states_p1 self.o = z_tp1 z_ch = tf.squeeze( tf.concat([z_tp1, self.rnn_states[1], self.rnn_states[0]], axis=-1)) return z_ch.numpy(), tf.squeeze(r_tp1), d_tp1.numpy(), {} def close(self): tf.keras.backend.clear_session() gc.collect() def render(self, mode): pass
class Model: ''' simple one layer model for car racing ''' def __init__(self, arglist): self.env_name = arglist.game self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json(arglist.vae_file) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json(arglist.rnn_file) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 2) self.bias_output = np.random.randn(2) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 2 + 2) else: self.weight = np.random.randn(self.input_size, 2) self.bias = np.random.randn(2) self.param_count = (self.input_size) * 2 + 2 self.render_mode = False def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def decode_obs(self, z): # decode the latent vector img = self.vae.decode(z.reshape(1, self.z_size)) * 255. img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, 3) return img def get_action(self, z, arglist): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) if arglist.competitive: obs, rewards, done, win = self.env.step([action[0], 'script']) else: obs, rewards, done, win = self.env.step(action) extra_reward = 0.0 # penalize for turning too frequently if arglist.competitive: if arglist.train_mode and penalize_turning: extra_reward -= np.abs(action[0]) / 10.0 rewards[0] += extra_reward reward = rewards[0] else: if arglist.train_mode and penalize_turning: reward = np.sum(rewards) extra_reward -= np.abs(action[0]) / 10.0 reward += extra_reward # recording_reward.append(reward) # total_reward += reward self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:2] self.weight_output = params_2[2:].reshape(self.hidden_size, 2) else: self.bias = np.array(model_params[:2]) self.weight = np.array(model_params[2:]).reshape( self.input_size, 2) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): return np.random.randn(self.param_count) * stdev
class DoomTakeCoverMDNRNN(DoomTakeCoverEnv): def __init__(self, args, render_mode=False, load_model=True, with_obs=False): super(DoomTakeCoverMDNRNN, self).__init__() self.with_obs = with_obs self.no_render = True if render_mode: self.no_render = False self.current_obs = None self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) self.action_space = Box(low=-1.0, high=1.0, shape=()) self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space self.observation_space = Box(low=0, high=255, shape=(64, 64, 3)) self.actual_observation_space = Box(low=-50., high=50., shape=(self.obs_size)) self._seed() self.rnn_states = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset() def close(self): super(DoomTakeCoverMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect() def _step(self, action): # update states of rnn self.frame_count += 1 self.rnn_states = rnn_next_state(self.rnn, self.z, action, self.rnn_states) # actual action in wrapped env: threshold = 0.3333 full_action = [0] * 43 if action < -threshold: full_action[11] = 1 if action > threshold: full_action[10] = 1 obs, reward, done, _ = super(DoomTakeCoverMDNRNN, self)._step(full_action) small_obs = self._process_frame(obs) self.current_obs = small_obs self.z = self._encode(small_obs) if done: self.restart = 1 else: self.restart = 0 if self.with_obs: return [self._current_state(), self.current_obs], reward, done, {} else: return self._current_state(), reward, done, {} def _encode(self, img): simple_obs = np.copy(img).astype(np.float) / 255.0 simple_obs = simple_obs.reshape(1, 64, 64, 3) z = self.vae.encode(simple_obs)[0] return z def _reset(self): obs = super(DoomTakeCoverMDNRNN, self)._reset() small_obs = self._process_frame(obs) self.current_obs = small_obs self.rnn_states = rnn_init_state(self.rnn) self.z = self._encode(small_obs) self.restart = 1 self.frame_count = 0 if self.with_obs: return [self._current_state(), self.current_obs] else: return self._current_state() def _process_frame(self, frame): obs = frame[0:400, :, :] obs = Image.fromarray(obs, mode='RGB').resize((64, 64)) obs = np.array(obs) return obs def _current_state(self): if self.rnn.args.state_space == 2: return np.concatenate([ self.z, tf.keras.backend.flatten(self.rnn_states[1]), tf.keras.backend.flatten(self.rnn_states[0]) ], axis=0) # cell then hidden fro some reason return np.concatenate( [self.z, tf.keras.backend.flatten(self.rnn_states[0])], axis=0) # only the hidden state def _seed(self, seed=None): if seed: tf.random.set_seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed]
class Model: def __init__(self, arglist,action_space, scope, load_model=True): self.action_space = action_space self.arglist = arglist # self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample = hps_model._replace(batch_size=1, input_seq_width = arglist.obs_size+ arglist.action_space + (arglist.agent_num-1) * arglist.action_space * arglist.timestep, max_seq_len=1, use_recurrent_dropout=0, is_training=0, obs_size = arglist.obs_size ) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: # self.vae.load_json(arglist.vae_model_dir) self.rnn.load_json(arglist.rnn_model_dir) self.state = self.rnn.rnn_init_state() self.rnn_mode = True print(arglist.inference) if arglist.inference == True: self.input_size = self.rnn.rnn_output_size(arglist.exp_mode) +(arglist.agent_num-1) * arglist.action_space elif arglist.inference == False: self.input_size = self.rnn.rnn_output_size(arglist.exp_mode) + (arglist.timestep) *( arglist.agent_num-1) * arglist.action_space # self.z_size = 32 # action trajectories recording self.act_traj = [collections.deque(np.zeros((arglist.timestep, arglist.action_space)), maxlen = arglist.timestep)] *(arglist.agent_num -1) # self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep, arglist.action_space,arglist.action_space, "oppo_model_{}".format(scope) ) self.inference = arglist.inference if arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.action_space) self.bias_output = np.random.randn(self.action_space) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*self.action_space+self.action_space) else: self.weight = np.random.randn(self.input_size, self.action_space) self.bias = np.random.randn(self.action_space) self.param_count = (self.input_size)*self.action_space+self.action_space def reset(self): self.state = self.rnn.rnn_init_state() # self.oppo_state = lstm_init_state(self.oppo_model) # def encode_obs(self, obs): # # convert raw obs to z, mu, logvar # result = np.copy(obs).astype(np.float)/255.0 # result = result.reshape(1, 64, 64, 3) # mu, logvar = self.vae.encode_mu_logvar(result) # mu = mu[0] # logvar = logvar[0] # s = logvar.shape # z = mu + np.exp(logvar/2.0) * np.random.randn(*s) # return z, mu, logvar def get_action(self, obs, act_traj): h = self.rnn.rnn_output(self.state, obs, act_traj, self.arglist.exp_mode) if self.arglist.inference: oppo_intents = [] for i in range(self.arglist.agent_num - 1): act_traj = self.act_traj[i] # intent = self.oppo_model .get_inference(act_traj) intent = [0,0] oppo_intents.append(intent) oppo_intents = np.reshape(oppo_intents, ((self.arglist.agent_num-1 )* self.arglist.action_space)) #Oppo intent shape (batch_size, agent_num, action_space) # reshape oppo_intent agent_num * batch_size * action_space controller_input = np.concatenate((h, oppo_intents)) else: controller_input = h if self.arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer x = np.tanh(np.dot(controller_input, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(x, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(controller_input, self.weight) + self.bias) for i in range(self.action_space): action[i] = clip(action[i]) self.state = self.rnn.rnn_next_state(obs, action, self.act_traj, self.state) # self.oppo_state = oppo_next_state(self.oppo_model, action, self.act_traj, self.oppo_state) # epsilon exploration if np.random.uniform(0,1) < 0.2: action = [np.random.uniform(-3,3)] * len(action) return action def set_model_params(self, model_params): if self.arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size+1)*self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size) self.bias_output = params_2[:self.action_space] self.weight_output = params_2[self.action_space:].reshape(self.hidden_size, self.action_space) else: self.bias = np.array(model_params[:self.action_space]) self.weight = np.array(model_params[self.action_space:]).reshape(self.input_size, self.action_space) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) # self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy(self.param_count)*stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) # vae_params = self.vae.get_random_model_params(stdev=stdev) # self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True): self.env_name = './VisualPushBlock_withBlock_z_info.x86_64' #'./VisualPushBlock.x86_64' self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = z_size if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer ###CHANGE is made here self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE) self.bias_output = np.random.randn(ACTION_SIZE) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * ACTION_SIZE + ACTION_SIZE) else: self.weight = np.random.randn(self.input_size, ACTION_SIZE) self.bias = np.random.randn(ACTION_SIZE) self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE self.render_mode = False def make_env(self, seed=-1, render_mode=False, full_episode=False, worker_id=0): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode, worker_id=worker_id) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar #result = np.copy(obs).astype(np.float)/255.0 result = np.copy(obs).astype(np.float) result = result.reshape(1, IMAGE_W, IMAGE_H, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) #print('h', h.shape, h) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: '''print(h.shape) print(self.weight.shape) print(self.bias.shape)''' action = np.tanh(np.dot(h, self.weight) + self.bias) '''for i in range(ACTION_SIZE): action[i] = (action[i]+1.0) / 2.0 #all actions value are in range 0 to 1''' #action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) #update weights of MDN-RNN return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:ACTION_SIZE] self.weight_output = params_2[ACTION_SIZE:].reshape( self.hidden_size, ACTION_SIZE) else: self.bias = np.array(model_params[:ACTION_SIZE]) self.weight = np.array(model_params[ACTION_SIZE:]).reshape( self.input_size, ACTION_SIZE) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False): self.env_name = env_name self.make_env() self.z_size = 32 self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na) self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.init_controller() self.render_mode = False # INIT The Controller After the enviroment Creation. def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) self.na = self.env.action_space.n # discrete by default. def init_controller(self): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn( self.hidden_size, self.na) # pong. Modify later. self.bias_output = np.random.randn(self.na) self.param_count = (self.input_size + 1) * self.hidden_size + ( self.hidden_size + 1) * self.na else: self.weight = np.random.randn(self.input_size, self.na) self.bias = np.random.randn(self.na) self.param_count = (self.input_size + 1) * self.na def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 1) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z, epsilon=0.0): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if np.random.rand() < epsilon: action = np.random.randint(0, self.na) else: if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.maximum( np.dot(h, self.weight_hidden) + self.bias_hidden, 0) action = np.argmax( np.dot(h, self.weight_output) + self.bias_output) else: action = np.argmax(np.dot(h, self.weight) + self.bias) oh_action = np.zeros(self.na) oh_action[action] = 1 # action[1] = (action[1]+1.0) / 2.0 # action[2] = clip(action[2]) # TODO check about this fucntion self.state = rnn_next_state(self.rnn, z, oh_action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:self.na] self.weight_output = params_2[self.na:].reshape( self.hidden_size, self.na) else: self.bias = np.array(model_params[:self.na]) self.weight = np.array(model_params[self.na:]).reshape( self.input_size, self.na) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class CarRacing: # Parameters # - type: Name of environment. Default is classic Car Racing game, but can be changed to introduce perturbations in environment # - history_pick: Size of history # - seed: List of seeds to sample from during training. Default is none (random games) def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False): self.name = type + str(time.time()) random.seed(30) self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True) self.image_dimension = [64,64] self.history_pick = history_pick self.state_space_size = history_pick * np.prod(self.image_dimension) self.action_space_size = 5 self.state_shape = [None, self.history_pick] + list(self.image_dimension) self.history = [] self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]} self.seed = seed self.detect_edges = detect_edges self.detect_grass = detect_grass self.flip = flip self.flip_episode = False self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') # returns a random action def sample_action_space(self): return np.random.randint(self.action_space_size) def map_action(self, action): if self.flip_episode and action <= 1: action = 1 - action return self.action_dict[action] # resets the environment and returns the initial state def reset(self, test=False): self.state_rnn = rnn_init_state(self.rnn) if self.seed: self.env.seed(random.choice(self.seed)) self.flip_episode = random.random() > 0.5 and not test and self.flip state, self.state_rnn = self.encode_obs(self.env.reset(), self.state_rnn, np.array([0.5, 0.2, 0.8])) return state, 1 # take action def step(self, action, test=False): action = self.map_action(action) total_reward = 0 n = 1 if test else random.choice([2, 3, 4]) for i in range(n): next_state, reward, done, info = self.env.step(action) next_state, self. state_rnn = self.encode_obs(next_state, self.state_rnn, action) total_reward += reward info = {'true_done': done} if done: break return next_state, total_reward, done, info, 1 def render(self): self.env.render() # process state and return the current history def process(self, state): self.add_history(state) in_grass = utils.in_grass(state) if len(self.history) < self.history_pick: zeros = np.zeros(self.image_dimension) result = np.tile(zeros, ((self.history_pick - len(self.history)), 1, 1)) result = np.concatenate((result, np.array(self.history))) else: result = np.array(self.history) return result, in_grass def add_history(self, state): if len(self.history) >= self.history_pick: self.history.pop(0) #temp = utils.process_image(state, detect_edges=self.detect_edges, flip=self.flip_episode) self.history.append(state) def __str__(self): return self.name + '\nseed: {0}\nactions: {1}'.format(self.seed, self.action_dict) def encode_obs(self, obs, prev_state, action): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) h = rnn_output(prev_state, z, 4) next_state = rnn_next_state(self.rnn, z, np.array(action), prev_state) return np.concatenate([h, z]), next_state