def __init__( self, trainer_factory: TrainerFactory, output_path: str, run_id: str, param_manager: EnvironmentParameterManager, train: bool, training_seed: int, ): """ :param output_path: Path to save the model. :param summaries_dir: Folder to save training summaries. :param run_id: The sub-directory name for model and summary statistics :param param_manager: EnvironmentParameterManager object which stores information about all environment parameters. :param train: Whether to train model, or only run inference. :param training_seed: Seed to use for Numpy and Tensorflow random number generation. :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging. """ self.trainers: Dict[str, Trainer] = {} self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set) self.trainer_factory = trainer_factory self.output_path = output_path self.logger = get_logger(__name__) self.run_id = run_id self.train_model = train self.param_manager = param_manager self.ghost_controller = self.trainer_factory.ghost_controller self.registered_behavior_ids: Set[str] = set() self.trainer_threads: List[threading.Thread] = [] self.kill_trainers = False np.random.seed(training_seed) tf.set_random_seed(training_seed)
def __init__(self, trainer_factory: TrainerFactory, output_path: str, run_id: str, meta_curriculum: Optional[MetaCurriculum], train: bool, training_seed: int, sampler_manager: SamplerManager, resampling_interval: Optional[int]): """ :param output_path: Path to save the model. :param summaries_dir: Folder to save training summaries. :param run_id: The sub-directory name for model and summary statistics :param meta_curriculum: MetaCurriculum object which stores information about all curricula. :param train: Whether to train model, or only run inference. :param training_seed: Seed to use for Numpy and Tensorflow random number generation. :param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters. :param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled. :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging. """ self.trainers: Dict[str, Trainer] = {} self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set) self.trainer_factory = trainer_factory self.output_path = output_path self.logger = get_logger(__name__) self.run_id = run_id self.train_model = train self.meta_curriculum = meta_curriculum self.sampler_manager = sampler_manager self.resampling_interval = resampling_interval self.ghost_controller = self.trainer_factory.ghost_controller self.trainer_threads: List[threading.Thread] = [] self.kill_trainers = False np.random.seed(training_seed) tf.set_random_seed(training_seed)
def __init__( self, m_size, normalize, use_recurrent, brain, seed, stream_names=None ): tf.set_random_seed(seed) self.brain = brain self.vector_in = None self.global_step, self.increment_step, self.steps_to_increment = ( self.create_global_steps() ) self.visual_in = [] self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name="batch_size") self.sequence_length = tf.placeholder( shape=None, dtype=tf.int32, name="sequence_length" ) self.mask_input = tf.placeholder(shape=[None], dtype=tf.float32, name="masks") self.mask = tf.cast(self.mask_input, tf.int32) self.stream_names = stream_names or [] self.use_recurrent = use_recurrent if self.use_recurrent: self.m_size = m_size else: self.m_size = 0 self.normalize = normalize self.act_size = brain.vector_action_space_size self.vec_obs_size = brain.vector_observation_space_size self.vis_obs_size = brain.number_visual_observations tf.Variable( int(brain.vector_action_space_type == "continuous"), name="is_continuous_control", trainable=False, dtype=tf.int32, ) tf.Variable( self._version_number_, name="version_number", trainable=False, dtype=tf.int32, ) tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32) if brain.vector_action_space_type == "continuous": tf.Variable( self.act_size[0], name="action_output_shape", trainable=False, dtype=tf.int32, ) else: tf.Variable( sum(self.act_size), name="action_output_shape", trainable=False, dtype=tf.int32, ) self.value_heads: Dict[str, tf.Tensor] = {} self.normalization_steps: Optional[tf.Variable] = None self.running_mean: Optional[tf.Variable] = None self.running_variance: Optional[tf.Variable] = None self.update_normalization: Optional[tf.Operation] = None self.value: Optional[tf.Tensor] = None
def create_tf_graph(self) -> None: """ Builds the tensorflow graph needed for this policy. """ with self.graph.as_default(): tf.set_random_seed(self.seed) _vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if len(_vars) > 0: # We assume the first thing created in the graph is the Policy. If # already populated, don't create more tensors. return self.create_input_placeholders() encoded = self._create_encoder( self.visual_in, self.processed_vector_in, self.h_size, self.num_layers, self.vis_encode_type, ) if self.use_continuous_act: self._create_cc_actor( encoded, self.tanh_squash, self.reparameterize, self.condition_sigma_on_obs, ) else: self._create_dc_actor(encoded) self.trainable_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy" ) self.trainable_variables += tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="lstm" ) # LSTMs need to be root scope for Barracuda export self.inference_dict = { "action": self.output, "log_probs": self.all_log_probs, "entropy": self.entropy, } if self.use_continuous_act: self.inference_dict["pre_action"] = self.output_pre if self.use_recurrent: self.inference_dict["memory_out"] = self.memory_out # We do an initialize to make the Policy usable out of the box. If an optimizer is needed, # it will re-load the full graph self.initialize() # Create assignment ops for Ghost Trainer self.init_load_weights()
def __init__( self, trainer_factory: TrainerFactory, model_path: str, summaries_dir: str, run_id: str, save_freq: int, meta_curriculum: Optional[MetaCurriculum], train: bool, training_seed: int, sampler_manager: SamplerManager, resampling_interval: Optional[int], ghost_swap: int, ): """ :param model_path: Path to save the model. :param summaries_dir: Folder to save training summaries. :param run_id: The sub-directory name for model and summary statistics :param save_freq: Frequency at which to save model :param meta_curriculum: MetaCurriculum object which stores information about all curricula. :param train: Whether to train model, or only run inference. :param training_seed: Seed to use for Numpy and Tensorflow random number generation. :param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters. :param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled. """ self.trainers: Dict[str, Trainer] = {} self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set) self.trainer_factory = trainer_factory self.model_path = model_path self.summaries_dir = summaries_dir self.logger = logging.getLogger("mlagents.trainers") self.run_id = run_id self.save_freq = save_freq self.train_model = train self.meta_curriculum = meta_curriculum self.sampler_manager = sampler_manager self.resampling_interval = resampling_interval self.ghost_swap = ghost_swap self.ghost_index: int = 0 self.ghost_names: List[str] = [] np.random.seed(training_seed) tf.set_random_seed(training_seed)