class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True): self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 3 + 3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size) * 3 + 3 self.render_mode = False def make_env(self, seed=-1, render_mode=False, full_episode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar print(obs.shape) result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) action[1] = (action[1] + 1.0) / 2.0 action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:3] self.weight_output = params_2[3:].reshape(self.hidden_size, 3) else: self.bias = np.array(model_params[:3]) self.weight = np.array(model_params[3:]).reshape( self.input_size, 3) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for translating game state to actions''' def __init__(self, load_model=True): self.env_name = "Pong" self._make_env() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions) self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("not ported for atari") self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.num_actions) self.bias_output = np.random.randn(self.num_actions) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( (self.hidden_size + 1) * self.num_actions) else: # TODO: Not known until env.action_space is queried... self.weight = np.random.randn(self.input_size, self.num_actions) self.bias = np.random.randn(self.num_actions) self.param_count = (self.input_size + 1) * self.num_actions self.render_mode = False def _make_env(self): self.render_mode = render_mode self.env = make_env(self.env_name) self.num_actions = self.env.action_space.n def make_env(self): pass #TODO (Chazzz): eventually remove def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 1) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) # print(len(h), " h:", h) #TODO: 256+32 (the 32 comes first) # So we could have 288*2*18 params, or 288*2*environment.action_space.n (6 for Pong) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("Not ported to atari") # h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) # action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: # could probabilistically sample from softmax, but greedy action = np.argmax(np.matmul(h, self.weight) + self.bias) # action[1] = (action[1]+1.0) / 2.0 # action[2] = clip(action[2]) # print("Action:", action) action_one_hot = np.zeros(self.num_actions) action_one_hot[action] = 1 # print("Action hot:", action_one_hot) self.state = rnn_next_state(self.rnn, z, action_one_hot, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:self.num_actions] self.weight_output = params_2[self.num_actions:].reshape( self.hidden_size, self.num_actions) else: self.bias = np.array(model_params[:self.num_actions]) self.weight = np.array(model_params[self.num_actions:]).reshape( self.input_size, self.num_actions) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False): self.env_name = env_name self.make_env() self.z_size = 32 self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na) self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.init_controller() self.render_mode = False # INIT The Controller After the enviroment Creation. def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) self.na = self.env.action_space.n # discrete by default. def init_controller(self): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn( self.hidden_size, self.na) # pong. Modify later. self.bias_output = np.random.randn(self.na) self.param_count = (self.input_size + 1) * self.hidden_size + ( self.hidden_size + 1) * self.na else: self.weight = np.random.randn(self.input_size, self.na) self.bias = np.random.randn(self.na) self.param_count = (self.input_size + 1) * self.na def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 1) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z, epsilon=0.0): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if np.random.rand() < epsilon: action = np.random.randint(0, self.na) else: if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.maximum( np.dot(h, self.weight_hidden) + self.bias_hidden, 0) action = np.argmax( np.dot(h, self.weight_output) + self.bias_output) else: action = np.argmax(np.dot(h, self.weight) + self.bias) oh_action = np.zeros(self.na) oh_action[action] = 1 # action[1] = (action[1]+1.0) / 2.0 # action[2] = clip(action[2]) # TODO check about this fucntion self.state = rnn_next_state(self.rnn, z, oh_action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:self.na] self.weight_output = params_2[self.na:].reshape( self.hidden_size, self.na) else: self.bias = np.array(model_params[:self.na]) self.weight = np.array(model_params[self.na:]).reshape( self.input_size, self.na) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True): self.env_name = './VisualPushBlock_withBlock_z_info.x86_64' #'./VisualPushBlock.x86_64' self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = z_size if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer ###CHANGE is made here self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE) self.bias_output = np.random.randn(ACTION_SIZE) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * ACTION_SIZE + ACTION_SIZE) else: self.weight = np.random.randn(self.input_size, ACTION_SIZE) self.bias = np.random.randn(ACTION_SIZE) self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE self.render_mode = False def make_env(self, seed=-1, render_mode=False, full_episode=False, worker_id=0): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode, worker_id=worker_id) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar #result = np.copy(obs).astype(np.float)/255.0 result = np.copy(obs).astype(np.float) result = result.reshape(1, IMAGE_W, IMAGE_H, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) #print('h', h.shape, h) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: '''print(h.shape) print(self.weight.shape) print(self.bias.shape)''' action = np.tanh(np.dot(h, self.weight) + self.bias) '''for i in range(ACTION_SIZE): action[i] = (action[i]+1.0) / 2.0 #all actions value are in range 0 to 1''' #action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) #update weights of MDN-RNN return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:ACTION_SIZE] self.weight_output = params_2[ACTION_SIZE:].reshape( self.hidden_size, ACTION_SIZE) else: self.bias = np.array(model_params[:ACTION_SIZE]) self.weight = np.array(model_params[ACTION_SIZE:]).reshape( self.input_size, ACTION_SIZE) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: def __init__(self, arglist,action_space, scope, load_model=True): self.action_space = action_space self.arglist = arglist # self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample = hps_model._replace(batch_size=1, input_seq_width = arglist.obs_size+ arglist.action_space + (arglist.agent_num-1) * arglist.action_space * arglist.timestep, max_seq_len=1, use_recurrent_dropout=0, is_training=0, obs_size = arglist.obs_size ) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: # self.vae.load_json(arglist.vae_model_dir) self.rnn.load_json(arglist.rnn_model_dir) self.state = self.rnn.rnn_init_state() self.rnn_mode = True print(arglist.inference) if arglist.inference == True: self.input_size = self.rnn.rnn_output_size(arglist.exp_mode) +(arglist.agent_num-1) * arglist.action_space elif arglist.inference == False: self.input_size = self.rnn.rnn_output_size(arglist.exp_mode) + (arglist.timestep) *( arglist.agent_num-1) * arglist.action_space # self.z_size = 32 # action trajectories recording self.act_traj = [collections.deque(np.zeros((arglist.timestep, arglist.action_space)), maxlen = arglist.timestep)] *(arglist.agent_num -1) # self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep, arglist.action_space,arglist.action_space, "oppo_model_{}".format(scope) ) self.inference = arglist.inference if arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.action_space) self.bias_output = np.random.randn(self.action_space) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*self.action_space+self.action_space) else: self.weight = np.random.randn(self.input_size, self.action_space) self.bias = np.random.randn(self.action_space) self.param_count = (self.input_size)*self.action_space+self.action_space def reset(self): self.state = self.rnn.rnn_init_state() # self.oppo_state = lstm_init_state(self.oppo_model) # def encode_obs(self, obs): # # convert raw obs to z, mu, logvar # result = np.copy(obs).astype(np.float)/255.0 # result = result.reshape(1, 64, 64, 3) # mu, logvar = self.vae.encode_mu_logvar(result) # mu = mu[0] # logvar = logvar[0] # s = logvar.shape # z = mu + np.exp(logvar/2.0) * np.random.randn(*s) # return z, mu, logvar def get_action(self, obs, act_traj): h = self.rnn.rnn_output(self.state, obs, act_traj, self.arglist.exp_mode) if self.arglist.inference: oppo_intents = [] for i in range(self.arglist.agent_num - 1): act_traj = self.act_traj[i] # intent = self.oppo_model .get_inference(act_traj) intent = [0,0] oppo_intents.append(intent) oppo_intents = np.reshape(oppo_intents, ((self.arglist.agent_num-1 )* self.arglist.action_space)) #Oppo intent shape (batch_size, agent_num, action_space) # reshape oppo_intent agent_num * batch_size * action_space controller_input = np.concatenate((h, oppo_intents)) else: controller_input = h if self.arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer x = np.tanh(np.dot(controller_input, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(x, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(controller_input, self.weight) + self.bias) for i in range(self.action_space): action[i] = clip(action[i]) self.state = self.rnn.rnn_next_state(obs, action, self.act_traj, self.state) # self.oppo_state = oppo_next_state(self.oppo_model, action, self.act_traj, self.oppo_state) # epsilon exploration if np.random.uniform(0,1) < 0.2: action = [np.random.uniform(-3,3)] * len(action) return action def set_model_params(self, model_params): if self.arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size+1)*self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size) self.bias_output = params_2[:self.action_space] self.weight_output = params_2[self.action_space:].reshape(self.hidden_size, self.action_space) else: self.bias = np.array(model_params[:self.action_space]) self.weight = np.array(model_params[self.action_space:]).reshape(self.input_size, self.action_space) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) # self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy(self.param_count)*stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) # vae_params = self.vae.get_random_model_params(stdev=stdev) # self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)