def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample): # model_path = env.external_brain_names[0] trainer_config = TrainerSettings() trainer_config.network_settings.memory = (NetworkSettings.MemorySettings() if use_rnn else None) policy = TorchPolicy(0, mock_behavior_specs, trainer_config, tanhresample, tanhresample) bc_module = BCModule( policy, settings=bc_settings, policy_learning_rate=trainer_config.hyperparameters.learning_rate, default_batch_size=trainer_config.hyperparameters.batch_size, default_num_epoch=3, ) return bc_module
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings): super().__init__() self.policy = policy self.trainer_settings = trainer_settings self.update_dict: Dict[str, torch.Tensor] = {} self.value_heads: Dict[str, torch.Tensor] = {} self.memory_in: torch.Tensor = None self.memory_out: torch.Tensor = None self.m_size: int = 0 self.global_step = torch.tensor(0) self.bc_module: Optional[BCModule] = None self.create_reward_signals(trainer_settings.reward_signals) if trainer_settings.behavioral_cloning is not None: self.bc_module = BCModule( self.policy, trainer_settings.behavioral_cloning, policy_learning_rate=trainer_settings.hyperparameters.learning_rate, default_batch_size=trainer_settings.hyperparameters.batch_size, default_num_epoch=3, )