Esempio n. 1
0
    def __init__(self, base_name, observation_space, action_space, config):
        common.a2c_common.ContinuousA2CBase.__init__(self, base_name,
                                                     observation_space,
                                                     action_space, config)
        obs_shape = algos_torch.torch_ext.shape_whc_to_cwh(self.state_shape)
        config = {
            'actions_num': self.actions_num,
            'input_shape': obs_shape,
            'games_num': 1,
            'batch_num': 1,
        }
        self.model = self.network.build(config)
        self.model.cuda()
        self.last_lr = float(self.last_lr)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    float(self.last_lr))
        #self.optimizer = algos_torch.torch_ext.RangerQH(self.model.parameters(), float(self.last_lr))

        if self.normalize_input:
            self.running_mean_std = RunningMeanStd(obs_shape).cuda()
        if self.has_curiosity:
            self.rnd_curiosity = rnd_curiosity.RNDCurisityTrain(
                algos_torch.torch_ext.shape_whc_to_cwh(self.state_shape),
                self.curiosity_config['network'], self.curiosity_config,
                self.writer, lambda obs: self._preproc_obs(obs))
Esempio n. 2
0
    def __init__(self, config):
        BasePlayer.__init__(self, config)
        self.network = config['network']
        self.actions_num = self.action_space.shape[0]
        self.actions_low = self.action_space.low
        self.actions_high = self.action_space.high
        self.mask = [False]

        observation_shape = algos_torch.torch_ext.shape_whc_to_cwh(
            self.state_shape)

        self.normalize_input = self.config['normalize_input']
        print(self.state_shape)
        config = {
            'actions_num': self.actions_num,
            'input_shape':
            algos_torch.torch_ext.shape_whc_to_cwh(self.state_shape),
            'games_num': 1,
            'batch_num': 1,
        }
        self.model = self.network.build(config)
        self.model.cuda()
        self.model.eval()

        if self.normalize_input:
            self.running_mean_std = RunningMeanStd(observation_shape).cuda()
            self.running_mean_std.eval()
Esempio n. 3
0
 def __init__(self, state_shape, model, config, writter, _preproc_obs):
     nn.Module.__init__(self)
     rnd_config = {
         'input_shape': state_shape,
     }
     self.model = RNDCuriosityNetwork(model.build('rnd',
                                                  **rnd_config)).cuda()
     self.config = config
     self.lr = config['lr']
     self.writter = writter
     self.optimizer = torch.optim.Adam(self.model.parameters(),
                                       float(self.lr))
     self._preproc_obs = _preproc_obs
     self.output_normalization = RunningMeanStd((1, ),
                                                norm_only=True).cuda()
     self.frame = 0
     self.exp_percent = config.get('exp_percent', 1.0)
Esempio n. 4
0
 def __init__(self, base_name, observation_space, action_space, config):
     common.a2c_common.DiscreteA2CBase.__init__(self, base_name,
                                                observation_space,
                                                action_space, config)
     config = {
         'actions_num': self.actions_num,
         'input_shape':
         algos_torch.torch_ext.shape_whc_to_cwh(self.state_shape),
         'games_num': 1,
         'batch_num': 1,
     }
     self.model = self.network.build(config)
     self.model.cuda()
     self.last_lr = float(self.last_lr)
     self.optimizer = optim.Adam(self.model.parameters(),
                                 float(self.last_lr))
     #self.optimizer = algos_torch.torch_ext.RangerQH(self.model.parameters(), float(self.last_lr))
     if self.normalize_input:
         self.running_mean_std = RunningMeanStd(
             observation_space.shape).cuda()