def __init__(
        self,
        trainer_factory: TrainerFactory,
        output_path: str,
        run_id: str,
        param_manager: EnvironmentParameterManager,
        train: bool,
        training_seed: int,
    ):
        """
        :param output_path: Path to save the model.
        :param summaries_dir: Folder to save training summaries.
        :param run_id: The sub-directory name for model and summary statistics
        :param param_manager: EnvironmentParameterManager object which stores information about all
        environment parameters.
        :param train: Whether to train model, or only run inference.
        :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
        :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging.
        """
        self.trainers: Dict[str, Trainer] = {}
        self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set)
        self.trainer_factory = trainer_factory
        self.output_path = output_path
        self.logger = get_logger(__name__)
        self.run_id = run_id
        self.train_model = train
        self.param_manager = param_manager
        self.ghost_controller = self.trainer_factory.ghost_controller
        self.registered_behavior_ids: Set[str] = set()

        self.trainer_threads: List[threading.Thread] = []
        self.kill_trainers = False
        np.random.seed(training_seed)
        tf.set_random_seed(training_seed)
    def __init__(self, trainer_factory: TrainerFactory, output_path: str,
                 run_id: str, meta_curriculum: Optional[MetaCurriculum],
                 train: bool, training_seed: int,
                 sampler_manager: SamplerManager,
                 resampling_interval: Optional[int]):
        """
        :param output_path: Path to save the model.
        :param summaries_dir: Folder to save training summaries.
        :param run_id: The sub-directory name for model and summary statistics
        :param meta_curriculum: MetaCurriculum object which stores information about all curricula.
        :param train: Whether to train model, or only run inference.
        :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
        :param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters.
        :param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled.
        :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging.
        """
        self.trainers: Dict[str, Trainer] = {}
        self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set)
        self.trainer_factory = trainer_factory
        self.output_path = output_path
        self.logger = get_logger(__name__)
        self.run_id = run_id
        self.train_model = train
        self.meta_curriculum = meta_curriculum
        self.sampler_manager = sampler_manager
        self.resampling_interval = resampling_interval
        self.ghost_controller = self.trainer_factory.ghost_controller

        self.trainer_threads: List[threading.Thread] = []
        self.kill_trainers = False
        np.random.seed(training_seed)
        tf.set_random_seed(training_seed)
Beispiel #3
0
 def __init__(
     self, m_size, normalize, use_recurrent, brain, seed, stream_names=None
 ):
     tf.set_random_seed(seed)
     self.brain = brain
     self.vector_in = None
     self.global_step, self.increment_step, self.steps_to_increment = (
         self.create_global_steps()
     )
     self.visual_in = []
     self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name="batch_size")
     self.sequence_length = tf.placeholder(
         shape=None, dtype=tf.int32, name="sequence_length"
     )
     self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name="masks")
     self.mask = tf.cast(self.mask_input, tf.int32)
     self.stream_names = stream_names or []
     self.use_recurrent = use_recurrent
     if self.use_recurrent:
         self.m_size = m_size
     else:
         self.m_size = 0
     self.normalize = normalize
     self.act_size = brain.vector_action_space_size
     self.vec_obs_size = brain.vector_observation_space_size
     self.vis_obs_size = brain.number_visual_observations
     tf.Variable(
         int(brain.vector_action_space_type == "continuous"),
         name="is_continuous_control",
         trainable=False,
         dtype=tf.int32,
     )
     tf.Variable(
         self._version_number_,
         name="version_number",
         trainable=False,
         dtype=tf.int32,
     )
     tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
     if brain.vector_action_space_type == "continuous":
         tf.Variable(
             self.act_size[0],
             name="action_output_shape",
             trainable=False,
             dtype=tf.int32,
         )
     else:
         tf.Variable(
             sum(self.act_size),
             name="action_output_shape",
             trainable=False,
             dtype=tf.int32,
         )
     self.value_heads: Dict[str, tf.Tensor] = {}
     self.normalization_steps: Optional[tf.Variable] = None
     self.running_mean: Optional[tf.Variable] = None
     self.running_variance: Optional[tf.Variable] = None
     self.update_normalization: Optional[tf.Operation] = None
     self.value: Optional[tf.Tensor] = None
Beispiel #4
0
    def create_tf_graph(self) -> None:
        """
        Builds the tensorflow graph needed for this policy.
        """
        with self.graph.as_default():
            tf.set_random_seed(self.seed)
            _vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            if len(_vars) > 0:
                # We assume the first thing created in the graph is the Policy. If
                # already populated, don't create more tensors.
                return

            self.create_input_placeholders()
            encoded = self._create_encoder(
                self.visual_in,
                self.processed_vector_in,
                self.h_size,
                self.num_layers,
                self.vis_encode_type,
            )
            if self.use_continuous_act:
                self._create_cc_actor(
                    encoded,
                    self.tanh_squash,
                    self.reparameterize,
                    self.condition_sigma_on_obs,
                )
            else:
                self._create_dc_actor(encoded)
            self.trainable_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy"
            )
            self.trainable_variables += tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope="lstm"
            )  # LSTMs need to be root scope for Barracuda export

        self.inference_dict = {
            "action": self.output,
            "log_probs": self.all_log_probs,
            "entropy": self.entropy,
        }
        if self.use_continuous_act:
            self.inference_dict["pre_action"] = self.output_pre
        if self.use_recurrent:
            self.inference_dict["memory_out"] = self.memory_out

        # We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
        # it will re-load the full graph
        self.initialize()
        # Create assignment ops for Ghost Trainer
        self.init_load_weights()
 def __init__(
     self,
     trainer_factory: TrainerFactory,
     model_path: str,
     summaries_dir: str,
     run_id: str,
     save_freq: int,
     meta_curriculum: Optional[MetaCurriculum],
     train: bool,
     training_seed: int,
     sampler_manager: SamplerManager,
     resampling_interval: Optional[int],
     ghost_swap: int,
 ):
     """
     :param model_path: Path to save the model.
     :param summaries_dir: Folder to save training summaries.
     :param run_id: The sub-directory name for model and summary statistics
     :param save_freq: Frequency at which to save model
     :param meta_curriculum: MetaCurriculum object which stores information about all curricula.
     :param train: Whether to train model, or only run inference.
     :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
     :param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters.
     :param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled.
     """
     self.trainers: Dict[str, Trainer] = {}
     self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set)
     self.trainer_factory = trainer_factory
     self.model_path = model_path
     self.summaries_dir = summaries_dir
     self.logger = logging.getLogger("mlagents.trainers")
     self.run_id = run_id
     self.save_freq = save_freq
     self.train_model = train
     self.meta_curriculum = meta_curriculum
     self.sampler_manager = sampler_manager
     self.resampling_interval = resampling_interval
     self.ghost_swap = ghost_swap
     self.ghost_index: int = 0
     self.ghost_names: List[str] = []
     np.random.seed(training_seed)
     tf.set_random_seed(training_seed)