def __init__(self, hparams): self.use_gae = hparams['use_gae'] self.gamma = hparams['gamma'] self.tau = hparams['tau'] self.obs_shape = hparams['obs_shape'] self.num_steps = hparams['num_steps'] self.num_processes = hparams['num_processes'] self.value_loss_coef = hparams['value_loss_coef'] self.entropy_coef = hparams['entropy_coef'] self.cuda = hparams['cuda'] self.opt = hparams['opt'] self.grad_clip = hparams['grad_clip'] # self.next_state_pred_ = hparams['next_state_pred_'] # # Policy and Value network # if 'traj_action_mask' in hparams and hparams['traj_action_mask']: # self.actor_critic = CNNPolicy_trajectory_action_mask(self.obs_shape[0], hparams['action_space']) # else: self.actor_critic = CNNPolicy(self.obs_shape[0], hparams['action_space'], hparams['n_contexts']) # Storing rollouts self.rollouts = RolloutStorage(self.num_steps, self.num_processes, self.obs_shape, hparams['action_space']) if self.cuda: self.actor_critic.cuda() self.rollouts.cuda() #Optimizer if self.opt == 'rms': self.optimizer = optim.RMSprop( params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps'], alpha=hparams['alpha']) elif self.opt == 'adam': self.optimizer = optim.Adam(params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps']) elif self.opt == 'sgd': self.optimizer = optim.SGD(params=self.actor_critic.parameters(), lr=hparams['lr'], momentum=hparams['mom']) else: print('no opt specified') self.action_shape = 1 if hparams['gif_'] or hparams['ls_'] or hparams['vae_'] or hparams[ 'grad_var_']: self.rollouts_list = RolloutStorage_list() self.hparams = hparams
def __init__(self, envs, hparams): self.use_gae = hparams['use_gae'] self.gamma = hparams['gamma'] self.tau = hparams['tau'] self.obs_shape = hparams['obs_shape'] self.num_steps = hparams['num_steps'] self.num_processes = hparams['num_processes'] self.value_loss_coef = hparams['value_loss_coef'] self.entropy_coef = hparams['entropy_coef'] self.cuda = hparams['cuda'] self.opt = hparams['opt'] self.grad_clip = hparams['grad_clip'] if hparams['dropout'] == True: print ('CNNPolicy_dropout2') actor_critic = CNNPolicy_dropout2(self.obs_shape[0], envs.action_space) # actor_critic = CNNPolicy_dropout(self.obs_shape[0], envs.action_space) elif len(envs.observation_space.shape) == 3: print ('CNNPolicy2') actor_critic = CNNPolicy2(self.obs_shape[0], envs.action_space) # actor_critic = CNNPolicy(self.obs_shape[0], envs.action_space) else: actor_critic = MLPPolicy(self.obs_shape[0], envs.action_space) if envs.action_space.__class__.__name__ == "Discrete": action_shape = 1 else: action_shape = envs.action_space.shape[0] self.action_shape = action_shape rollouts = RolloutStorage(self.num_steps, self.num_processes, self.obs_shape, envs.action_space) #it has a self.state that is [steps, processes, obs] #steps is used to compute expected reward if self.cuda: actor_critic.cuda() rollouts.cuda() if self.opt == 'rms': self.optimizer = optim.RMSprop(params=actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps'], alpha=hparams['alpha']) elif self.opt == 'adam': self.optimizer = optim.Adam(params=actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps']) elif self.opt == 'sgd': self.optimizer = optim.SGD(params=actor_critic.parameters(), lr=hparams['lr'], momentum=hparams['mom']) else: print ('no opt specified') self.actor_critic = actor_critic self.rollouts = rollouts self.rollouts_list = RolloutStorage_list()
def __init__(self, envs, hparams): self.use_gae = hparams['use_gae'] self.gamma = hparams['gamma'] self.tau = hparams['tau'] self.obs_shape = hparams['obs_shape'] self.num_steps = hparams['num_steps'] self.num_processes = hparams['num_processes'] self.value_loss_coef = hparams['value_loss_coef'] self.entropy_coef = hparams['entropy_coef'] self.cuda = hparams['cuda'] self.opt = hparams['opt'] self.grad_clip = hparams['grad_clip'] # Policy and Value network # if hparams['dropout'] == True: # print ('CNNPolicy_dropout2') # self.actor_critic = CNNPolicy_dropout2(self.obs_shape[0], envs.action_space) # elif len(envs.observation_space.shape) == 3: # print ('CNNPolicy2') # self.actor_critic = CNNPolicy2(self.obs_shape[0], envs.action_space) # else: # self.actor_critic = MLPPolicy(self.obs_shape[0], envs.action_space) if 'traj_action_mask' in hparams and hparams['traj_action_mask']: self.actor_critic = CNNPolicy_trajectory_action_mask( self.obs_shape[0], envs.action_space) else: self.actor_critic = CNNPolicy(self.obs_shape[0], envs.action_space) # Storing rollouts self.rollouts = RolloutStorage(self.num_steps, self.num_processes, self.obs_shape, envs.action_space) if self.cuda: self.actor_critic.cuda() self.rollouts.cuda() #Optimizer if self.opt == 'rms': self.optimizer = optim.RMSprop( params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps'], alpha=hparams['alpha']) elif self.opt == 'adam': self.optimizer = optim.Adam(params=self.actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps']) elif self.opt == 'sgd': self.optimizer = optim.SGD(params=self.actor_critic.parameters(), lr=hparams['lr'], momentum=hparams['mom']) else: print('no opt specified') # if envs.action_space.__class__.__name__ == "Discrete": # action_shape = 1 # else: # action_shape = envs.action_space.shape[0] # self.action_shape = action_shape self.action_shape = 1 # if __: # self.deterministic_action = 0 # else: # self.deterministic_action = 0 if hparams['gif_'] or hparams['ls_']: self.rollouts_list = RolloutStorage_list() self.hparams = hparams