コード例 #1
0
    def __init__(
        self,
        seed: int,
        behavior_spec: BehaviorSpec,
        trainer_settings: TrainerSettings,
        model_path: str,
        load: bool = False,
    ):
        """
        Initialized the policy.
        :param seed: Random seed to use for TensorFlow.
        :param brain: The corresponding Brain for this policy.
        :param trainer_settings: The trainer parameters.
        :param model_path: Where to load/save the model.
        :param load: If True, load model from model_path. Otherwise, create new model.
        """

        self.m_size = 0
        self.trainer_settings = trainer_settings
        self.network_settings: NetworkSettings = trainer_settings.network_settings
        # for ghost trainer save/load snapshots
        self.assign_phs: List[tf.Tensor] = []
        self.assign_ops: List[tf.Operation] = []

        self.inference_dict: Dict[str, tf.Tensor] = {}
        self.update_dict: Dict[str, tf.Tensor] = {}
        self.sequence_length = 1
        self.seed = seed
        self.behavior_spec = behavior_spec

        self.act_size = (list(behavior_spec.discrete_action_branches)
                         if behavior_spec.is_action_discrete() else
                         [behavior_spec.action_size])
        self.vec_obs_size = sum(shape[0]
                                for shape in behavior_spec.observation_shapes
                                if len(shape) == 1)
        self.vis_obs_size = sum(1 for shape in behavior_spec.observation_shapes
                                if len(shape) == 3)

        self.use_recurrent = self.network_settings.memory is not None
        self.memory_dict: Dict[str, np.ndarray] = {}
        self.num_branches = self.behavior_spec.action_size
        self.previous_action_dict: Dict[str, np.array] = {}
        self.normalize = self.network_settings.normalize
        self.use_continuous_act = behavior_spec.is_action_continuous()
        self.model_path = model_path
        self.initialize_path = self.trainer_settings.init_path
        self.keep_checkpoints = self.trainer_settings.keep_checkpoints
        self.graph = tf.Graph()
        self.sess = tf.Session(config=tf_utils.generate_session_config(),
                               graph=self.graph)
        self.saver: Optional[tf.Operation] = None
        self.seed = seed
        if self.network_settings.memory is not None:
            self.m_size = self.network_settings.memory.memory_size
            self.sequence_length = self.network_settings.memory.sequence_length
        self._initialize_tensorflow_references()
        self.load = load
コード例 #2
0
    def __init__(
        self,
        seed: int,
        behavior_spec: BehaviorSpec,
        trainer_settings: TrainerSettings,
        model_path: str,
        load: bool = False,
        tanh_squash: bool = False,
        reparameterize: bool = False,
        condition_sigma_on_obs: bool = True,
    ):
        self.behavior_spec = behavior_spec
        self.trainer_settings = trainer_settings
        self.network_settings: NetworkSettings = trainer_settings.network_settings
        self.seed = seed
        self.act_size = (
            list(behavior_spec.discrete_action_branches)
            if behavior_spec.is_action_discrete()
            else [behavior_spec.action_size]
        )
        self.vec_obs_size = sum(
            shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
        )
        self.vis_obs_size = sum(
            1 for shape in behavior_spec.observation_shapes if len(shape) == 3
        )
        self.model_path = model_path
        self.initialize_path = self.trainer_settings.init_path
        self._keep_checkpoints = self.trainer_settings.keep_checkpoints
        self.use_continuous_act = behavior_spec.is_action_continuous()
        self.num_branches = self.behavior_spec.action_size
        self.previous_action_dict: Dict[str, np.array] = {}
        self.memory_dict: Dict[str, np.ndarray] = {}
        self.normalize = trainer_settings.network_settings.normalize
        self.use_recurrent = self.network_settings.memory is not None
        self.load = load
        self.h_size = self.network_settings.hidden_units
        num_layers = self.network_settings.num_layers
        if num_layers < 1:
            num_layers = 1
        self.num_layers = num_layers

        self.vis_encode_type = self.network_settings.vis_encode_type
        self.tanh_squash = tanh_squash
        self.reparameterize = reparameterize
        self.condition_sigma_on_obs = condition_sigma_on_obs

        self.m_size = 0
        self.sequence_length = 1
        if self.network_settings.memory is not None:
            self.m_size = self.network_settings.memory.memory_size
            self.sequence_length = self.network_settings.memory.sequence_length

        # Non-exposed parameters; these aren't exposed because they don't have a
        # good explanation and usually shouldn't be touched.
        self.log_std_min = -20
        self.log_std_max = 2
コード例 #3
0
ファイル: trainer.py プロジェクト: zt1217396582/ml-agents
 def create_torch_policy(self, parsed_behavior_id: BehaviorIdentifiers,
                         behavior_spec: BehaviorSpec) -> TorchPolicy:
     """
     Creates a policy with a PyTorch backend and PPO hyperparameters
     :param parsed_behavior_id:
     :param behavior_spec: specifications for policy construction
     :return policy
     """
     policy = TorchPolicy(
         self.seed,
         behavior_spec,
         self.trainer_settings,
         condition_sigma_on_obs=False,  # Faster training for PPO
         separate_critic=behavior_spec.is_action_continuous(),
     )
     return policy