Exemple #1
0
    def __init__(self, hparams):

        self.use_gae = hparams['use_gae']
        self.gamma = hparams['gamma']
        self.tau = hparams['tau']

        self.obs_shape = hparams['obs_shape']
        self.num_steps = hparams['num_steps']
        self.num_processes = hparams['num_processes']
        self.value_loss_coef = hparams['value_loss_coef']
        self.entropy_coef = hparams['entropy_coef']
        self.cuda = hparams['cuda']
        self.opt = hparams['opt']
        self.grad_clip = hparams['grad_clip']

        # self.next_state_pred_ = hparams['next_state_pred_']

        # # Policy and Value network
        # if 'traj_action_mask' in hparams and hparams['traj_action_mask']:
        #     self.actor_critic = CNNPolicy_trajectory_action_mask(self.obs_shape[0], hparams['action_space'])
        # else:

        self.actor_critic = CNNPolicy(self.obs_shape[0],
                                      hparams['action_space'],
                                      hparams['n_contexts'])

        # Storing rollouts
        self.rollouts = RolloutStorage(self.num_steps, self.num_processes,
                                       self.obs_shape, hparams['action_space'])

        if self.cuda:
            self.actor_critic.cuda()
            self.rollouts.cuda()

        #Optimizer
        if self.opt == 'rms':
            self.optimizer = optim.RMSprop(
                params=self.actor_critic.parameters(),
                lr=hparams['lr'],
                eps=hparams['eps'],
                alpha=hparams['alpha'])
        elif self.opt == 'adam':
            self.optimizer = optim.Adam(params=self.actor_critic.parameters(),
                                        lr=hparams['lr'],
                                        eps=hparams['eps'])
        elif self.opt == 'sgd':
            self.optimizer = optim.SGD(params=self.actor_critic.parameters(),
                                       lr=hparams['lr'],
                                       momentum=hparams['mom'])
        else:
            print('no opt specified')

        self.action_shape = 1

        if hparams['gif_'] or hparams['ls_'] or hparams['vae_'] or hparams[
                'grad_var_']:
            self.rollouts_list = RolloutStorage_list()

        self.hparams = hparams
    def __init__(self, envs, hparams):

        self.use_gae = hparams['use_gae']
        self.gamma = hparams['gamma']
        self.tau = hparams['tau']

        self.obs_shape = hparams['obs_shape']
        self.num_steps = hparams['num_steps']
        self.num_processes = hparams['num_processes']
        self.value_loss_coef = hparams['value_loss_coef']
        self.entropy_coef = hparams['entropy_coef']
        self.cuda = hparams['cuda']
        self.opt = hparams['opt']
        self.grad_clip = hparams['grad_clip']



        if hparams['dropout'] == True:
            print ('CNNPolicy_dropout2')
            actor_critic = CNNPolicy_dropout2(self.obs_shape[0], envs.action_space)
            # actor_critic = CNNPolicy_dropout(self.obs_shape[0], envs.action_space)
        elif len(envs.observation_space.shape) == 3:
            print ('CNNPolicy2')
            actor_critic = CNNPolicy2(self.obs_shape[0], envs.action_space)
            # actor_critic = CNNPolicy(self.obs_shape[0], envs.action_space)
        else:
            actor_critic = MLPPolicy(self.obs_shape[0], envs.action_space)

        if envs.action_space.__class__.__name__ == "Discrete":
            action_shape = 1
        else:
            action_shape = envs.action_space.shape[0]
        self.action_shape = action_shape

        rollouts = RolloutStorage(self.num_steps, self.num_processes, self.obs_shape, envs.action_space)
        #it has a self.state that is [steps, processes, obs]
        #steps is used to compute expected reward

        if self.cuda:
            actor_critic.cuda()
            rollouts.cuda()

        if self.opt == 'rms':
            self.optimizer = optim.RMSprop(params=actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps'], alpha=hparams['alpha'])
        elif self.opt == 'adam':
            self.optimizer = optim.Adam(params=actor_critic.parameters(), lr=hparams['lr'], eps=hparams['eps'])
        elif self.opt == 'sgd':
            self.optimizer = optim.SGD(params=actor_critic.parameters(), lr=hparams['lr'], momentum=hparams['mom'])
        else:
            print ('no opt specified')

        self.actor_critic = actor_critic
        self.rollouts = rollouts


        self.rollouts_list = RolloutStorage_list()
Exemple #3
0
    def __init__(self, envs, hparams):

        self.use_gae = hparams['use_gae']
        self.gamma = hparams['gamma']
        self.tau = hparams['tau']

        self.obs_shape = hparams['obs_shape']
        self.num_steps = hparams['num_steps']
        self.num_processes = hparams['num_processes']
        self.value_loss_coef = hparams['value_loss_coef']
        self.entropy_coef = hparams['entropy_coef']
        self.cuda = hparams['cuda']
        self.opt = hparams['opt']
        self.grad_clip = hparams['grad_clip']

        # Policy and Value network

        # if hparams['dropout'] == True:
        #     print ('CNNPolicy_dropout2')
        #     self.actor_critic = CNNPolicy_dropout2(self.obs_shape[0], envs.action_space)
        # elif len(envs.observation_space.shape) == 3:
        #     print ('CNNPolicy2')
        #     self.actor_critic = CNNPolicy2(self.obs_shape[0], envs.action_space)
        # else:
        #     self.actor_critic = MLPPolicy(self.obs_shape[0], envs.action_space)

        if 'traj_action_mask' in hparams and hparams['traj_action_mask']:
            self.actor_critic = CNNPolicy_trajectory_action_mask(
                self.obs_shape[0], envs.action_space)
        else:
            self.actor_critic = CNNPolicy(self.obs_shape[0], envs.action_space)

        # Storing rollouts
        self.rollouts = RolloutStorage(self.num_steps, self.num_processes,
                                       self.obs_shape, envs.action_space)

        if self.cuda:
            self.actor_critic.cuda()
            self.rollouts.cuda()

        #Optimizer
        if self.opt == 'rms':
            self.optimizer = optim.RMSprop(
                params=self.actor_critic.parameters(),
                lr=hparams['lr'],
                eps=hparams['eps'],
                alpha=hparams['alpha'])
        elif self.opt == 'adam':
            self.optimizer = optim.Adam(params=self.actor_critic.parameters(),
                                        lr=hparams['lr'],
                                        eps=hparams['eps'])
        elif self.opt == 'sgd':
            self.optimizer = optim.SGD(params=self.actor_critic.parameters(),
                                       lr=hparams['lr'],
                                       momentum=hparams['mom'])
        else:
            print('no opt specified')

        # if envs.action_space.__class__.__name__ == "Discrete":
        #     action_shape = 1
        # else:
        #     action_shape = envs.action_space.shape[0]
        # self.action_shape = action_shape
        self.action_shape = 1
        # if __:
        #     self.deterministic_action = 0
        # else:
        #     self.deterministic_action = 0
        if hparams['gif_'] or hparams['ls_']:
            self.rollouts_list = RolloutStorage_list()

        self.hparams = hparams