Beispiel #1
0
    def __init__(
        self,
        seed: int,
        behavior_spec: BehaviorSpec,
        trainer_settings: TrainerSettings,
        model_path: str,
        load: bool = False,
    ):
        """
        Initialized the policy.
        :param seed: Random seed to use for TensorFlow.
        :param brain: The corresponding Brain for this policy.
        :param trainer_settings: The trainer parameters.
        :param model_path: Where to load/save the model.
        :param load: If True, load model from model_path. Otherwise, create new model.
        """

        self.m_size = 0
        self.trainer_settings = trainer_settings
        self.network_settings: NetworkSettings = trainer_settings.network_settings
        # for ghost trainer save/load snapshots
        self.assign_phs: List[tf.Tensor] = []
        self.assign_ops: List[tf.Operation] = []

        self.inference_dict: Dict[str, tf.Tensor] = {}
        self.update_dict: Dict[str, tf.Tensor] = {}
        self.sequence_length = 1
        self.seed = seed
        self.behavior_spec = behavior_spec

        self.act_size = (list(behavior_spec.discrete_action_branches)
                         if behavior_spec.is_action_discrete() else
                         [behavior_spec.action_size])
        self.vec_obs_size = sum(shape[0]
                                for shape in behavior_spec.observation_shapes
                                if len(shape) == 1)
        self.vis_obs_size = sum(1 for shape in behavior_spec.observation_shapes
                                if len(shape) == 3)

        self.use_recurrent = self.network_settings.memory is not None
        self.memory_dict: Dict[str, np.ndarray] = {}
        self.num_branches = self.behavior_spec.action_size
        self.previous_action_dict: Dict[str, np.array] = {}
        self.normalize = self.network_settings.normalize
        self.use_continuous_act = behavior_spec.is_action_continuous()
        self.model_path = model_path
        self.initialize_path = self.trainer_settings.init_path
        self.keep_checkpoints = self.trainer_settings.keep_checkpoints
        self.graph = tf.Graph()
        self.sess = tf.Session(config=tf_utils.generate_session_config(),
                               graph=self.graph)
        self.saver: Optional[tf.Operation] = None
        self.seed = seed
        if self.network_settings.memory is not None:
            self.m_size = self.network_settings.memory.memory_size
            self.sequence_length = self.network_settings.memory.sequence_length
        self._initialize_tensorflow_references()
        self.load = load
Beispiel #2
0
    def __init__(self, seed, brain, trainer_parameters, load=False):
        """
        Initialized the policy.
        :param seed: Random seed to use for TensorFlow.
        :param brain: The corresponding Brain for this policy.
        :param trainer_parameters: The trainer parameters.
        """
        self._version_number_ = 2
        self.m_size = 0

        # for ghost trainer save/load snapshots
        self.assign_phs = []
        self.assign_ops = []

        self.inference_dict = {}
        self.update_dict = {}
        self.sequence_length = 1
        self.seed = seed
        self.brain = brain

        self.act_size = brain.vector_action_space_size
        self.vec_obs_size = brain.vector_observation_space_size
        self.vis_obs_size = brain.number_visual_observations

        self.use_recurrent = trainer_parameters["use_recurrent"]
        self.memory_dict: Dict[str, np.ndarray] = {}
        self.num_branches = len(self.brain.vector_action_space_size)
        self.previous_action_dict: Dict[str, np.array] = {}
        self.normalize = trainer_parameters.get("normalize", False)
        self.use_continuous_act = brain.vector_action_space_type == "continuous"
        if self.use_continuous_act:
            self.num_branches = self.brain.vector_action_space_size[0]
        self.model_path = trainer_parameters["output_path"]
        self.initialize_path = trainer_parameters.get("init_path", None)
        self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
        self.graph = tf.Graph()
        self.sess = tf.Session(
            config=tf_utils.generate_session_config(), graph=self.graph
        )
        self.saver = None
        self.seed = seed
        if self.use_recurrent:
            self.m_size = trainer_parameters["memory_size"]
            self.sequence_length = trainer_parameters["sequence_length"]
            if self.m_size == 0:
                raise UnityPolicyException(
                    "The memory size for brain {0} is 0 even "
                    "though the trainer uses recurrent.".format(brain.brain_name)
                )
            elif self.m_size % 2 != 0:
                raise UnityPolicyException(
                    "The memory size for brain {0} is {1} "
                    "but it must be divisible by 2.".format(
                        brain.brain_name, self.m_size
                    )
                )
        self._initialize_tensorflow_references()
        self.load = load
Beispiel #3
0
    def __init__(
        self,
        seed: int,
        behavior_spec: BehaviorSpec,
        trainer_settings: TrainerSettings,
        tanh_squash: bool = False,
        reparameterize: bool = False,
        condition_sigma_on_obs: bool = True,
        create_tf_graph: bool = True,
    ):
        """
        Initialized the policy.
        :param seed: Random seed to use for TensorFlow.
        :param brain: The corresponding Brain for this policy.
        :param trainer_settings: The trainer parameters.
        """
        super().__init__(
            seed,
            behavior_spec,
            trainer_settings,
            tanh_squash,
            reparameterize,
            condition_sigma_on_obs,
        )
        if (
            self.behavior_spec.action_spec.continuous_size > 0
            and self.behavior_spec.action_spec.discrete_size > 0
        ):
            raise UnityPolicyException(
                "TensorFlow does not support mixed action spaces. Please run with the Torch framework."
            )
        # for ghost trainer save/load snapshots
        self.assign_phs: List[tf.Tensor] = []
        self.assign_ops: List[tf.Operation] = []
        self.update_dict: Dict[str, tf.Tensor] = {}
        self.inference_dict: Dict[str, tf.Tensor] = {}
        self.first_normalization_update: bool = False

        self.graph = tf.Graph()
        self.sess = tf.Session(
            config=tf_utils.generate_session_config(), graph=self.graph
        )
        self._initialize_tensorflow_references()
        self.grads = None
        self.update_batch: Optional[tf.Operation] = None
        self.trainable_variables: List[tf.Variable] = []
        self.rank = get_rank()
        if create_tf_graph:
            self.create_tf_graph()
Beispiel #4
0
    def __init__(
        self,
        seed: int,
        behavior_spec: BehaviorSpec,
        trainer_settings: TrainerSettings,
        model_path: str,
        load: bool = False,
        tanh_squash: bool = False,
        reparameterize: bool = False,
        condition_sigma_on_obs: bool = True,
        create_tf_graph: bool = True,
    ):
        """
        Initialized the policy.
        :param seed: Random seed to use for TensorFlow.
        :param brain: The corresponding Brain for this policy.
        :param trainer_settings: The trainer parameters.
        :param model_path: Where to load/save the model.
        :param load: If True, load model from model_path. Otherwise, create new model.
        """
        super().__init__(
            seed,
            behavior_spec,
            trainer_settings,
            model_path,
            load,
            tanh_squash,
            reparameterize,
            condition_sigma_on_obs,
        )
        # for ghost trainer save/load snapshots
        self.assign_phs: List[tf.Tensor] = []
        self.assign_ops: List[tf.Operation] = []
        self.update_dict: Dict[str, tf.Tensor] = {}
        self.inference_dict: Dict[str, tf.Tensor] = {}
        self.first_normalization_update: bool = False

        self.graph = tf.Graph()
        self.sess = tf.Session(config=tf_utils.generate_session_config(),
                               graph=self.graph)
        self.saver: Optional[tf.Operation] = None
        self._initialize_tensorflow_references()
        self.grads = None
        self.update_batch: Optional[tf.Operation] = None
        self.trainable_variables: List[tf.Variable] = []
        if create_tf_graph:
            self.create_tf_graph()
Beispiel #5
0
 def _dict_to_tensorboard(self, name: str, input_dict: Dict[str,
                                                            Any]) -> str:
     """
     Convert a dict to a Tensorboard-encoded string.
     :param name: The name of the text.
     :param input_dict: A dictionary that will be displayed in a table on Tensorboard.
     """
     try:
         with tf.Session(config=generate_session_config()) as sess:
             s_op = tf.summary.text(
                 name,
                 tf.convert_to_tensor(
                     ([[str(x), str(input_dict[x])] for x in input_dict])),
             )
             s = sess.run(s_op)
             return s
     except Exception:
         logger.warning("Could not write text summary for Tensorboard.")
         return ""
Beispiel #6
0
 def __init__(self, seed, brain, trainer_parameters):
     """
     Initialized the policy.
     :param seed: Random seed to use for TensorFlow.
     :param brain: The corresponding Brain for this policy.
     :param trainer_parameters: The trainer parameters.
     """
     self.m_size = None
     self.model = None
     self.inference_dict = {}
     self.update_dict = {}
     self.sequence_length = 1
     self.seed = seed
     self.brain = brain
     self.use_recurrent = trainer_parameters["use_recurrent"]
     self.memory_dict: Dict[str, np.ndarray] = {}
     self.reward_signals: Dict[str, "RewardSignal"] = {}
     self.num_branches = len(self.brain.vector_action_space_size)
     self.previous_action_dict: Dict[str, np.array] = {}
     self.normalize = trainer_parameters.get("normalize", False)
     self.use_continuous_act = brain.vector_action_space_type == "continuous"
     if self.use_continuous_act:
         self.num_branches = self.brain.vector_action_space_size[0]
     self.model_path = trainer_parameters["model_path"]
     self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
     self.graph = tf.Graph()
     self.sess = tf.Session(config=tf_utils.generate_session_config(),
                            graph=self.graph)
     self.saver = None
     if self.use_recurrent:
         self.m_size = trainer_parameters["memory_size"]
         self.sequence_length = trainer_parameters["sequence_length"]
         if self.m_size == 0:
             raise UnityPolicyException(
                 "The memory size for brain {0} is 0 even "
                 "though the trainer uses recurrent.".format(
                     brain.brain_name))
         elif self.m_size % 4 != 0:
             raise UnityPolicyException(
                 "The memory size for brain {0} is {1} "
                 "but it must be divisible by 4.".format(
                     brain.brain_name, self.m_size))
Beispiel #7
0
 def write_tensorboard_text(self, key: str, input_dict: Dict[str, Any]) -> None:
     """
     Saves text to Tensorboard.
     Note: Only works on tensorflow r1.2 or above.
     :param key: The name of the text.
     :param input_dict: A dictionary that will be displayed in a table on Tensorboard.
     """
     try:
         with tf.Session(config=tf_utils.generate_session_config()) as sess:
             s_op = tf.summary.text(
                 key,
                 tf.convert_to_tensor(
                     ([[str(x), str(input_dict[x])] for x in input_dict])
                 ),
             )
             s = sess.run(s_op)
             self.stats_reporter.write_text(s, self.get_step)
     except Exception:
         LOGGER.info("Could not write text summary for Tensorboard.")
         pass
Beispiel #8
0
    def __init__(
        self,
        seed: int,
        brain: BrainParameters,
        trainer_settings: TrainerSettings,
        model_path: str,
        load: bool = False,
    ):
        """
        Initialized the policy.
        :param seed: Random seed to use for TensorFlow.
        :param brain: The corresponding Brain for this policy.
        :param trainer_settings: The trainer parameters.
        :param model_path: Where to load/save the model.
        :param load: If True, load model from model_path. Otherwise, create new model.
        """

        self.m_size = 0
        self.trainer_settings = trainer_settings
        self.network_settings: NetworkSettings = trainer_settings.network_settings
        # for ghost trainer save/load snapshots
        self.assign_phs: List[tf.Tensor] = []
        self.assign_ops: List[tf.Operation] = []

        self.inference_dict: Dict[str, tf.Tensor] = {}
        self.update_dict: Dict[str, tf.Tensor] = {}
        self.sequence_length = 1
        self.seed = seed
        self.brain = brain

        self.act_size = brain.vector_action_space_size
        self.vec_obs_size = brain.vector_observation_space_size
        self.vis_obs_size = brain.number_visual_observations

        self.use_recurrent = self.network_settings.memory is not None
        self.memory_dict: Dict[str, np.ndarray] = {}
        self.num_branches = len(self.brain.vector_action_space_size)
        self.previous_action_dict: Dict[str, np.array] = {}
        self.normalize = self.network_settings.normalize
        self.use_continuous_act = brain.vector_action_space_type == "continuous"
        if self.use_continuous_act:
            self.num_branches = self.brain.vector_action_space_size[0]
        self.model_path = model_path
        self.initialize_path = self.trainer_settings.init_path
        self.keep_checkpoints = self.trainer_settings.keep_checkpoints
        self.graph = tf.Graph()
        self.sess = tf.Session(config=tf_utils.generate_session_config(),
                               graph=self.graph)
        self.saver: Optional[tf.Operation] = None
        self.seed = seed
        if self.network_settings.memory is not None:
            self.m_size = self.network_settings.memory.memory_size
            self.sequence_length = self.network_settings.memory.sequence_length
            if self.m_size == 0:
                raise UnityPolicyException(
                    "The memory size for brain {0} is 0 even "
                    "though the trainer uses recurrent.".format(
                        brain.brain_name))
            elif self.m_size % 2 != 0:
                raise UnityPolicyException(
                    "The memory size for brain {0} is {1} "
                    "but it must be divisible by 2.".format(
                        brain.brain_name, self.m_size))
        self._initialize_tensorflow_references()
        self.load = load