def __init__(self, hparams): self.obs_shape = hparams['obs_shape'] self.n_actions = hparams['n_actions'] self.use_gae = hparams['use_gae'] self.gamma = hparams['gamma'] self.tau = hparams['tau'] self.num_steps = hparams['num_steps'] self.num_processes = hparams['num_processes'] self.value_loss_coef = hparams['value_loss_coef'] self.entropy_coef = hparams['entropy_coef'] self.cuda = hparams['cuda'] self.opt = hparams['opt'] self.grad_clip = hparams['grad_clip'] self.actor_critic = CNNPolicy(self.obs_shape[0], self.n_actions) #.cuda() # Storing rollouts self.rollouts = RolloutStorage(self.num_steps, self.num_processes, self.obs_shape, self.n_actions) # if self.cuda: self.actor_critic.cuda() self.rollouts.cuda() self.optimizer = optim.Adam(params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps']) self.hparams = hparams
def __init__(self, hparams): self.use_gae = hparams['use_gae'] self.gamma = hparams['gamma'] self.tau = hparams['tau'] self.obs_shape = hparams['obs_shape'] self.num_steps = hparams['num_steps'] self.num_processes = hparams['num_processes'] self.value_loss_coef = hparams['value_loss_coef'] self.entropy_coef = hparams['entropy_coef'] self.cuda = hparams['cuda'] self.opt = hparams['opt'] self.grad_clip = hparams['grad_clip'] self.next_state_pred_ = hparams['next_state_pred_'] # Policy and Value network if 'traj_action_mask' in hparams and hparams['traj_action_mask']: self.actor_critic = CNNPolicy_trajectory_action_mask( self.obs_shape[0], hparams['action_space']) else: self.actor_critic = CNNPolicy(self.obs_shape[0], hparams['action_space']) # Storing rollouts self.rollouts = RolloutStorage(self.num_steps, self.num_processes, self.obs_shape, hparams['action_space']) if self.cuda: self.actor_critic.cuda() self.rollouts.cuda() #Optimizer if self.opt == 'rms': self.optimizer = optim.RMSprop( params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps'], alpha=hparams['alpha']) elif self.opt == 'adam': self.optimizer = optim.Adam(params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps']) elif self.opt == 'sgd': self.optimizer = optim.SGD(params=self.actor_critic.parameters(), lr=hparams['lr'], momentum=hparams['mom']) else: print('no opt specified') self.action_shape = 1 if hparams['gif_'] or hparams['ls_'] or hparams['vae_'] or hparams[ 'grad_var_']: self.rollouts_list = RolloutStorage_list() self.hparams = hparams
def __init__(self, envs, hparams): self.use_gae = hparams['use_gae'] self.gamma = hparams['gamma'] self.tau = hparams['tau'] self.obs_shape = hparams['obs_shape'] self.num_steps = hparams['num_steps'] self.num_processes = hparams['num_processes'] self.value_loss_coef = hparams['value_loss_coef'] self.entropy_coef = hparams['entropy_coef'] self.cuda = hparams['cuda'] self.opt = hparams['opt'] self.grad_clip = hparams['grad_clip'] # Policy and Value network # if hparams['dropout'] == True: # print ('CNNPolicy_dropout2') # self.actor_critic = CNNPolicy_dropout2(self.obs_shape[0], envs.action_space) # elif len(envs.observation_space.shape) == 3: # print ('CNNPolicy2') # self.actor_critic = CNNPolicy2(self.obs_shape[0], envs.action_space) # else: # self.actor_critic = MLPPolicy(self.obs_shape[0], envs.action_space) if 'traj_action_mask' in hparams and hparams['traj_action_mask']: self.actor_critic = CNNPolicy_trajectory_action_mask( self.obs_shape[0], envs.action_space) else: self.actor_critic = CNNPolicy(self.obs_shape[0], envs.action_space) # Storing rollouts self.rollouts = RolloutStorage(self.num_steps, self.num_processes, self.obs_shape, envs.action_space) if self.cuda: self.actor_critic.cuda() self.rollouts.cuda() #Optimizer if self.opt == 'rms': self.optimizer = optim.RMSprop( params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps'], alpha=hparams['alpha']) elif self.opt == 'adam': self.optimizer = optim.Adam(params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps']) elif self.opt == 'sgd': self.optimizer = optim.SGD(params=self.actor_critic.parameters(), lr=hparams['lr'], momentum=hparams['mom']) else: print('no opt specified') # if envs.action_space.__class__.__name__ == "Discrete": # action_shape = 1 # else: # action_shape = envs.action_space.shape[0] # self.action_shape = action_shape self.action_shape = 1 # if __: # self.deterministic_action = 0 # else: # self.deterministic_action = 0 if hparams['gif_'] or hparams['ls_']: self.rollouts_list = RolloutStorage_list() self.hparams = hparams
state_dataset.append(dataset[i][t][1]) # /255.) print(len(state_dataset)) print('\nInit Policies') # agent = a2c(model_dict) # param_file = home+'/Documents/tmp/breakout_2frames/BreakoutNoFrameskip-v4/A2C/seed0/model_params/model_params9999360.pt' # load_policy = 1 policies = [] policies_dir = home + '/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/' for f in os.listdir(policies_dir): print(f) policy = CNNPolicy(2, 4) #.cuda() param_file = home + '/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/' + f + '/model_params3/model_params9999360.pt' param_dict = torch.load(param_file) policy.load_state_dict(param_dict) # policy = torch.load(param_file).cuda() print('loaded params', param_file) policy.cuda() policies.append(policy) #just one for now break # if load_policy: # param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'
print(len(state_dataset)) # fdsfds print('\nInit Policies') # agent = a2c(model_dict) # param_file = home+'/Documents/tmp/breakout_2frames/BreakoutNoFrameskip-v4/A2C/seed0/model_params/model_params9999360.pt' # load_policy = 1 policies = [] # policies_dir = home+'/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/' policies_dir = home + '/Documents/tmp/RoadRunner/RoadRunnerNoFrameskip-v4/A2C/' for f in os.listdir(policies_dir): print(f) # policy = CNNPolicy(2, 4) #.cuda() policy = CNNPolicy(2, 18) #.cuda() #num-frames, nyum-actions # param_file = home+'/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/'+f+'/model_params3/model_params9999360.pt' param_file = home + '/Documents/tmp/RoadRunner/RoadRunnerNoFrameskip-v4/A2C/' + f + '/model_params3/model_params9999360.pt' param_dict = torch.load(param_file) policy.load_state_dict(param_dict) # policy = torch.load(param_file).cuda() print('loaded params', param_file) policy.cuda() policies.append(policy) #just one for now break
#load experiemetn dict print ('load experiment dict') dict_location = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_dict.json' with open(dict_location, 'r') as outfile: exp_dict = json.load(outfile) #Init policy , not agent print ('init policy') policy = CNNPolicy(2*3, 18) #frames*channels, action size #load params # param_file = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_params3/model_params2000000.pt' param_file = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt' param_dict = torch.load(param_file) policy.load_state_dict(param_dict) # policy = torch.load(param_file).cuda() print ('loaded params', param_file) policy.cuda()
def __init__(self, hparams): self.obs_shape = hparams['obs_shape'] self.n_actions = hparams['n_actions'] self.actor_critic = CNNPolicy(self.obs_shape[0], self.n_actions).cuda()
# dataset: trajectories: timesteps: (action,state) state: [2,84,84] print(len(dataset)) print(len(dataset[ii][0])) # single timepoint print(dataset[ii][0][0].shape) #action [1] a_t+1 print(dataset[ii][0][1].shape) #state [2,84,84] s_t state_dataset = [] for i in range(len(dataset)): for t in range(len(dataset[i])): state_dataset.append(dataset[i][t][1]) # /255.) print(len(state_dataset)) print('Init Expert Policy') expert_policy = CNNPolicy(2, 4) #.cuda() # agent = a2c(model_dict) # param_file = home+'/Documents/tmp/breakout_2frames/BreakoutNoFrameskip-v4/A2C/seed0/model_params/model_params9999360.pt' load_policy = 1 if load_policy: # param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt' param_file = home + '/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params9999360.pt' param_dict = torch.load(param_file) # print (param_dict.keys()) # for key in param_dict.keys(): # print (param_dict[key].size()) # print (policy.state_dict().keys()) # for key in policy.state_dict().keys():
# param_file = home+'/Documents/tmp/RoadRunner/RoadRunnerNoFrameskip-v4/A2C/'+f+'/model_params3/model_params9999360.pt' # param_dict = torch.load(param_file) # policy.load_state_dict(param_dict) # # policy = torch.load(param_file).cuda() # print ('loaded params', param_file) # policy.cuda() # policies.append(policy) # #just one for now # break # policy = policies[0] policy = CNNPolicy(2 * 3, 18) #.cuda() #num-frames* channels, num-actions # param_file = home+'/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/'+f+'/model_params3/model_params9999360.pt' # param_file = home+'/Documents/tmp/RoadRunner/RoadRunnerNoFrameskip-v4/A2C/'+f+'/model_params3/model_params9999360.pt' param_file = home + '/Documents/tmp/' + exp_name + '/' + env_name + 'NoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt' param_dict = torch.load(param_file) policy.load_state_dict(param_dict) # policy = torch.load(param_file).cuda() print('loaded params', param_file) policy.cuda() class MASK_PREDICTOR(nn.Module): def __init__(self): super(MASK_PREDICTOR, self).__init__()