コード例 #1
0
def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample):
    # model_path = env.external_brain_names[0]
    trainer_config = TrainerSettings()
    trainer_config.network_settings.memory = (NetworkSettings.MemorySettings()
                                              if use_rnn else None)
    policy = TorchPolicy(0, mock_behavior_specs, trainer_config, tanhresample,
                         tanhresample)
    bc_module = BCModule(
        policy,
        settings=bc_settings,
        policy_learning_rate=trainer_config.hyperparameters.learning_rate,
        default_batch_size=trainer_config.hyperparameters.batch_size,
        default_num_epoch=3,
    )
    return bc_module
コード例 #2
0
ファイル: torch_optimizer.py プロジェクト: tyohanan/ml-agents
 def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
     super().__init__()
     self.policy = policy
     self.trainer_settings = trainer_settings
     self.update_dict: Dict[str, torch.Tensor] = {}
     self.value_heads: Dict[str, torch.Tensor] = {}
     self.memory_in: torch.Tensor = None
     self.memory_out: torch.Tensor = None
     self.m_size: int = 0
     self.global_step = torch.tensor(0)
     self.bc_module: Optional[BCModule] = None
     self.create_reward_signals(trainer_settings.reward_signals)
     if trainer_settings.behavioral_cloning is not None:
         self.bc_module = BCModule(
             self.policy,
             trainer_settings.behavioral_cloning,
             policy_learning_rate=trainer_settings.hyperparameters.learning_rate,
             default_batch_size=trainer_settings.hyperparameters.batch_size,
             default_num_epoch=3,
         )