def get_preprocessor_for_space(observation_space, options=None): """Returns a suitable preprocessor for the given observation space. Args: observation_space (Space): The input observation space. options (dict): Options to pass to the preprocessor. Returns: preprocessor (Preprocessor): Preprocessor for the observations. """ options = options or MODEL_DEFAULTS for k in options.keys(): if k not in MODEL_DEFAULTS: raise Exception("Unknown config key `{}`, all keys: {}".format( k, list(MODEL_DEFAULTS))) if options.get("custom_preprocessor"): preprocessor = options["custom_preprocessor"] logger.info("Using custom preprocessor {}".format(preprocessor)) prep = _global_registry.get(RLLIB_PREPROCESSOR, preprocessor)( observation_space, options) else: cls = get_preprocessor(observation_space) prep = cls(observation_space, options) logger.debug("Created preprocessor {}: {} -> {}".format( prep, observation_space, prep.shape)) return prep
def _setup(self, config): env = self._env_id if env: config["env"] = env if _global_registry.contains(ENV_CREATOR, env): self.env_creator = _global_registry.get(ENV_CREATOR, env) else: import gym # soft dependency self.env_creator = lambda env_config: gym.make(env) else: self.env_creator = lambda env_config: None # Merge the supplied config with the class default merged_config = copy.deepcopy(self._default_config) merged_config = deep_update(merged_config, config, self._allow_unknown_configs, self._allow_unknown_subkeys) self.raw_user_config = config self.config = merged_config Agent._validate_config(self.config) if self.config.get("log_level"): logging.getLogger("ray.rllib").setLevel(self.config["log_level"]) # TODO(ekl) setting the graph is unnecessary for PyTorch agents with tf.Graph().as_default(): self._init()
def get_torch_model(obs_space, num_outputs, options=None, default_model_cls=None): """Returns a custom model for PyTorch algorithms. Args: obs_space (Space): The input observation space. num_outputs (int): The size of the output vector of the model. options (dict): Optional args to pass to the model constructor. default_model_cls (cls): Optional class to use if no custom model. Returns: model (models.Model): Neural network model. """ from ray.rllib.models.pytorch.fcnet import (FullyConnectedNetwork as PyTorchFCNet) from ray.rllib.models.pytorch.visionnet import (VisionNetwork as PyTorchVisionNet) options = options or MODEL_DEFAULTS if options.get("custom_model"): model = options["custom_model"] logger.debug("Using custom torch model {}".format(model)) return _global_registry.get(RLLIB_MODEL, model)(obs_space, num_outputs, options) if options.get("use_lstm"): raise NotImplementedError( "LSTM auto-wrapping not implemented for torch") if default_model_cls: return default_model_cls(obs_space, num_outputs, options) if isinstance(obs_space, gym.spaces.Discrete): obs_rank = 1 else: obs_rank = len(obs_space.shape) if obs_rank > 1: return PyTorchVisionNet(obs_space, num_outputs, options) return PyTorchFCNet(obs_space, num_outputs, options)
def _get_model(input_dict, obs_space, action_space, num_outputs, options, state_in, seq_lens): if options.get("custom_model"): model = options["custom_model"] logger.debug("Using custom model {}".format(model)) return _global_registry.get(RLLIB_MODEL, model)( input_dict, obs_space, action_space, num_outputs, options, state_in=state_in, seq_lens=seq_lens) obs_rank = len(input_dict["obs"].shape) - 1 if obs_rank > 1: return VisionNetwork(input_dict, obs_space, action_space, num_outputs, options) return FullyConnectedNetwork(input_dict, obs_space, action_space, num_outputs, options)
def get_model_v2(obs_space: gym.Space, action_space: gym.Space, num_outputs: int, model_config: ModelConfigDict, framework: str = "tf", name: str = "default_model", model_interface: type = None, default_model: type = None, **model_kwargs) -> ModelV2: """Returns a suitable model compatible with given spaces and output. Args: obs_space (Space): Observation space of the target gym env. This may have an `original_space` attribute that specifies how to unflatten the tensor into a ragged tensor. action_space (Space): Action space of the target gym env. num_outputs (int): The size of the output vector of the model. model_config (ModelConfigDict): The "model" sub-config dict within the Trainer's config dict. framework (str): One of "tf2", "tf", "tfe", "torch", or "jax". name (str): Name (scope) for the model. model_interface (cls): Interface required for the model default_model (cls): Override the default class for the model. This only has an effect when not using a custom model model_kwargs (dict): args to pass to the ModelV2 constructor Returns: model (ModelV2): Model to use for the policy. """ # Validate the given config dict. ModelCatalog._validate_config(config=model_config, action_space=action_space, framework=framework) if model_config.get("custom_model"): # Allow model kwargs to be overridden / augmented by # custom_model_config. customized_model_kwargs = dict( model_kwargs, **model_config.get("custom_model_config", {})) if isinstance(model_config["custom_model"], type): model_cls = model_config["custom_model"] else: model_cls = _global_registry.get(RLLIB_MODEL, model_config["custom_model"]) # Only allow ModelV2 or native keras Models. if not issubclass(model_cls, ModelV2): if framework not in [ "tf", "tf2", "tfe" ] or not issubclass(model_cls, tf.keras.Model): raise ValueError( "`model_cls` must be a ModelV2 sub-class, but is" " {}!".format(model_cls)) logger.info("Wrapping {} as {}".format(model_cls, model_interface)) model_cls = ModelCatalog._wrap_if_needed(model_cls, model_interface) if framework in ["tf2", "tf", "tfe"]: # Try wrapping custom model with LSTM/attention, if required. if model_config.get("use_lstm") or model_config.get( "use_attention"): from ray.rllib.models.tf.attention_net import ( AttentionWrapper, Keras_AttentionWrapper, ) from ray.rllib.models.tf.recurrent_net import ( LSTMWrapper, Keras_LSTMWrapper, ) wrapped_cls = model_cls # Wrapped (custom) model is itself a keras Model -> # wrap with keras LSTM/GTrXL (attention) wrappers. if issubclass(wrapped_cls, tf.keras.Model): model_cls = (Keras_LSTMWrapper if model_config.get("use_lstm") else Keras_AttentionWrapper) model_config["wrapped_cls"] = wrapped_cls # Wrapped (custom) model is ModelV2 -> # wrap with ModelV2 LSTM/GTrXL (attention) wrappers. else: forward = wrapped_cls.forward model_cls = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper if model_config.get("use_lstm") else AttentionWrapper, ) model_cls._wrapped_forward = forward # Obsolete: Track and warn if vars were created but not # registered. Only still do this, if users do register their # variables. If not (which they shouldn't), don't check here. created = set() def track_var_creation(next_creator, **kw): v = next_creator(**kw) created.add(v) return v with tf.variable_creator_scope(track_var_creation): if issubclass(model_cls, tf.keras.Model): instance = model_cls( input_space=obs_space, action_space=action_space, num_outputs=num_outputs, name=name, **customized_model_kwargs, ) else: # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls( obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs, ) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: instance = model_cls( obs_space, action_space, num_outputs, model_config, name, **model_kwargs, ) logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") # Other error -> re-raise. else: raise e # User still registered TFModelV2's variables: Check, whether # ok. registered = [] if not isinstance(instance, tf.keras.Model): registered = set(instance.var_list) if len(registered) > 0: not_registered = set() for var in created: if var not in registered: not_registered.add(var) if not_registered: raise ValueError( "It looks like you are still using " "`{}.register_variables()` to register your " "model's weights. This is no longer required, but " "if you are still calling this method at least " "once, you must make sure to register all created " "variables properly. The missing variables are {}," " and you only registered {}. " "Did you forget to call `register_variables()` on " "some of the variables in question?".format( instance, not_registered, registered)) elif framework == "torch": # Try wrapping custom model with LSTM/attention, if required. if model_config.get("use_lstm") or model_config.get( "use_attention"): from ray.rllib.models.torch.attention_net import AttentionWrapper from ray.rllib.models.torch.recurrent_net import LSTMWrapper wrapped_cls = model_cls forward = wrapped_cls.forward model_cls = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper if model_config.get("use_lstm") else AttentionWrapper, ) model_cls._wrapped_forward = forward # PyTorch automatically tracks nn.Modules inside the parent # nn.Module's constructor. # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls( obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs, ) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: instance = model_cls( obs_space, action_space, num_outputs, model_config, name, **model_kwargs, ) logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") # Other error -> re-raise. else: raise e else: raise NotImplementedError( "`framework` must be 'tf2|tf|tfe|torch', but is " "{}!".format(framework)) return instance # Find a default TFModelV2 and wrap with model_interface. if framework in ["tf", "tfe", "tf2"]: v2_class = None # Try to get a default v2 model. if not model_config.get("custom_model"): v2_class = default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if not v2_class: raise ValueError("ModelV2 class could not be determined!") if model_config.get("use_lstm") or model_config.get( "use_attention"): from ray.rllib.models.tf.attention_net import ( AttentionWrapper, Keras_AttentionWrapper, ) from ray.rllib.models.tf.recurrent_net import ( LSTMWrapper, Keras_LSTMWrapper, ) wrapped_cls = v2_class if model_config.get("use_lstm"): if issubclass(wrapped_cls, tf.keras.Model): v2_class = Keras_LSTMWrapper model_config["wrapped_cls"] = wrapped_cls else: v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper) v2_class._wrapped_forward = wrapped_cls.forward else: if issubclass(wrapped_cls, tf.keras.Model): v2_class = Keras_AttentionWrapper model_config["wrapped_cls"] = wrapped_cls else: v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, AttentionWrapper) v2_class._wrapped_forward = wrapped_cls.forward # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) if issubclass(wrapper, tf.keras.Model): model = wrapper( input_space=obs_space, action_space=action_space, num_outputs=num_outputs, name=name, **dict(model_kwargs, **model_config), ) return model return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) # Find a default TorchModelV2 and wrap with model_interface. elif framework == "torch": # Try to get a default v2 model. if not model_config.get("custom_model"): v2_class = default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if not v2_class: raise ValueError("ModelV2 class could not be determined!") if model_config.get("use_lstm") or model_config.get( "use_attention"): from ray.rllib.models.torch.attention_net import AttentionWrapper from ray.rllib.models.torch.recurrent_net import LSTMWrapper wrapped_cls = v2_class forward = wrapped_cls.forward if model_config.get("use_lstm"): v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper) else: v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, AttentionWrapper) v2_class._wrapped_forward = forward # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) # Find a default JAXModelV2 and wrap with model_interface. elif framework == "jax": v2_class = default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) else: raise NotImplementedError( "`framework` must be 'tf2|tf|tfe|torch', but is " "{}!".format(framework))
def get_model_v2(obs_space: gym.Space, action_space: gym.Space, num_outputs: int, model_config: ModelConfigDict, framework: str = "tf", name: str = "default_model", model_interface: type = None, default_model: type = None, **model_kwargs) -> ModelV2: """Returns a suitable model compatible with given spaces and output. Args: obs_space (Space): Observation space of the target gym env. This may have an `original_space` attribute that specifies how to unflatten the tensor into a ragged tensor. action_space (Space): Action space of the target gym env. num_outputs (int): The size of the output vector of the model. framework (str): One of "tf", "tfe", or "torch". name (str): Name (scope) for the model. model_interface (cls): Interface required for the model default_model (cls): Override the default class for the model. This only has an effect when not using a custom model model_kwargs (dict): args to pass to the ModelV2 constructor Returns: model (ModelV2): Model to use for the policy. """ if model_config.get("custom_model"): # Allow model kwargs to be overriden / augmented by # custom_model_config. customized_model_kwargs = dict( model_kwargs, **model_config.get("custom_model_config", {})) if isinstance(model_config["custom_model"], type): model_cls = model_config["custom_model"] else: model_cls = _global_registry.get(RLLIB_MODEL, model_config["custom_model"]) if not issubclass(model_cls, ModelV2): raise ValueError( "`model_cls` must be a ModelV2 sub-class, but is" " {}!".format(model_cls)) logger.info("Wrapping {} as {}".format(model_cls, model_interface)) model_cls = ModelCatalog._wrap_if_needed(model_cls, model_interface) if framework in ["tf2", "tf", "tfe"]: # Track and warn if vars were created but not registered. created = set() def track_var_creation(next_creator, **kw): v = next_creator(**kw) created.add(v) return v with tf.variable_creator_scope(track_var_creation): # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") # Other error -> re-raise. else: raise e registered = set(instance.variables()) not_registered = set() for var in created: if var not in registered: not_registered.add(var) if not_registered: raise ValueError( "It looks like variables {} were created as part " "of {} but does not appear in model.variables() " "({}). Did you forget to call " "model.register_variables() on the variables in " "question?".format(not_registered, instance, registered)) else: # PyTorch automatically tracks nn.Modules inside the parent # nn.Module's constructor. # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") # Other error -> re-raise. else: raise e return instance if framework in ["tf", "tfe", "tf2"]: v2_class = None # Try to get a default v2 model. if not model_config.get("custom_model"): v2_class = default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if not v2_class: raise ValueError("ModelV2 class could not be determined!") if model_config.get("use_lstm"): wrapped_cls = v2_class forward = wrapped_cls.forward v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper) v2_class._wrapped_forward = forward # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) elif framework == "torch": v2_class = \ default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if model_config.get("use_lstm"): from ray.rllib.models.torch.recurrent_net import LSTMWrapper \ as TorchLSTMWrapper wrapped_cls = v2_class forward = wrapped_cls.forward v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, TorchLSTMWrapper) v2_class._wrapped_forward = forward # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) else: raise NotImplementedError( "`framework` must be 'tf2|tf|tfe|torch', but is " "{}!".format(framework))
def __init__(self, observation_space, action_space, config): # config = dict(ray.rllib.agents.ppo.ppo.DEFAULT_CONFIG, **config) self.sess = tf.get_default_session() self.action_space = action_space self.config = config self.kl_coeff_val = self.config["kl_coeff"] self.kl_target = self.config["kl_target"] self.inner_lr = self.config["inner_lr"] self.outer_lr = self.config["outer_lr"] self.mode = self.config["mode"] assert self.mode in ["local", "remote"] assert self.kl_coeff_val == 0.0 dist_cls, logit_dim = ModelCatalog.get_action_dist( action_space, self.config["model"]) with tf.name_scope("inputs"): obs_ph = tf.placeholder(tf.float32, shape=(None, ) + observation_space.shape, name="obs") adv_ph = tf.placeholder(tf.float32, shape=(None, ), name="advantages") act_ph = ModelCatalog.get_action_placeholder(action_space) logits_ph = tf.placeholder(tf.float32, shape=(None, logit_dim), name="logits") vf_preds_ph = tf.placeholder(tf.float32, shape=(None, ), name="vf_preds") value_targets_ph = tf.placeholder(tf.float32, shape=(None, ), name="value_targets") prev_actions_ph = ModelCatalog.get_action_placeholder(action_space) prev_rewards_ph = tf.placeholder(tf.float32, shape=(None, ), name="prev_rewards") existing_state_in = None existing_seq_lens = None self.observations = obs_ph self.a3c_loss_in = [("obs", obs_ph), ("advantages", adv_ph), ("actions", act_ph), ("value_targets", value_targets_ph), ("vf_preds", vf_preds_ph), ("prev_actions", prev_actions_ph), ("prev_rewards", prev_rewards_ph)] # self.a3c_loss_in = [ # ("obs", obs_ph), # ("advantages", adv_ph), # ("actions", act_ph), # ("prev_actions", prev_actions_ph), # ("prev_rewards", prev_rewards_ph)] self.ppo_loss_in = list(self.a3c_loss_in) \ + [("logits", logits_ph)] assert self.config["model"]["custom_model"] logger.info( f'Using custom model {self.config["model"]["custom_model"]}') model_cls = _global_registry.get(RLLIB_MODEL, self.config["model"]["custom_model"]) new_variables, grad_placeholders, custom_variables, dummy_variables = \ model_cls.prepare(observation_space, (logit_dim // 2 if self.config["model"]["free_log_std"] else logit_dim), self.config["model"], func=lambda x, y: x - self.inner_lr * y) self._new_variables = new_variables self._grad_placeholders = grad_placeholders self._custom_variables = custom_variables self._dummy_variables = dummy_variables self._inner_variables = nest.flatten(custom_variables) # for Meta-SGD, `custom_variables` and `adaptive learning rates` self._outer_variables = nest.flatten(custom_variables) self._variables = {var.op.name: var for var in self._outer_variables} self._grad_phs_loss_inputs = [] for i in range(len(grad_placeholders)): for key, ph in grad_placeholders[i].items(): self._grad_phs_loss_inputs.append( (custom_variables[i][key].op.name, ph)) self._grad_phs_loss_input_dict = dict(self._grad_phs_loss_inputs) self.model = model_cls( { "obs": obs_ph, "prev_actions": prev_actions_ph, "prev_rewards": prev_actions_ph }, observation_space, logit_dim, self.config["model"], state_in=existing_state_in, seq_lens=existing_seq_lens, custom_params=new_variables) self.logits = self.model.outputs with tf.name_scope("sampler"): curr_action_dist = dist_cls(self.logits) self.sampler = curr_action_dist.sample() assert self.config["use_gae"] and self.config["vf_share_layers"] self.value_function = self.model.value_function() if self.model.state_in: raise NotImplementedError else: mask = None with tf.name_scope("a3c_loss"): self.a3c_loss_obj = A3CLoss( action_dist=curr_action_dist, actions=act_ph, advantages=adv_ph, value_targets=value_targets_ph, vf_preds=vf_preds_ph, value_function=self.value_function, vf_loss_coeff=self.config["vf_loss_coeff"], entropy_coeff=self.config["entropy_coeff"], vf_clip_param=self.config["vf_clip_param"]) # self.a3c_loss_obj = PGLoss( # curr_action_dist, act_ph, adv_ph) with tf.name_scope( "ppo_loss" ): # write own PPO loss, boolean_mask -> dynamic_partition self.ppo_loss_obj = PPOLoss( action_dist=curr_action_dist, action_space=action_space, logits=logits_ph, actions=act_ph, advantages=adv_ph, value_targets=value_targets_ph, vf_preds=vf_preds_ph, value_function=self.value_function, valid_mask=mask, kl_coeff=self.kl_coeff_val, clip_param=self.config["clip_param"], vf_clip_param=self.config["vf_clip_param"], vf_loss_coeff=self.config["vf_loss_coeff"], entropy_coeff=self.config["entropy_coeff"], use_gae=self.config["use_gae"]) BaseMAMLPolicyGraph.__init__( self, observation_space, action_space, self.sess, obs_input=obs_ph, action_sampler=self.sampler, inner_loss=self.a3c_loss_obj.loss, inner_loss_inputs=self.a3c_loss_in, outer_loss=self.ppo_loss_obj.loss, outer_loss_inputs=self.ppo_loss_in, state_inputs=self.model.state_in, state_outputs=self.model.state_out, prev_action_input=prev_actions_ph, prev_reward_input=prev_rewards_ph, seq_lens=self.model.seq_lens, max_seq_len=self.config["model"]["max_seq_len"]) self.a3c_stats_fetches = { "total_loss": self.a3c_loss_obj.loss, "policy_loss": self.a3c_loss_obj.mean_policy_loss, "vf_loss": self.a3c_loss_obj.mean_vf_loss, "entropy": self.a3c_loss_obj.mean_entropy } self.ppo_stats_fetches = { "total_loss": self.ppo_loss_obj.loss, "policy_Loss": self.ppo_loss_obj.mean_policy_loss, "vf_loss": self.ppo_loss_obj.mean_vf_loss, "entropy": self.ppo_loss_obj.mean_entropy, "kl": self.ppo_loss_obj.mean_kl } self.sess.run(tf.global_variables_initializer())
def get_action_dist(action_space: gym.Space, config: ModelConfigDict, dist_type: Optional[Union[ str, Type[ActionDistribution]]] = None, framework: str = "tf", **kwargs) -> (type, int): """Returns a distribution class and size for the given action space. Args: action_space (Space): Action space of the target gym env. config (Optional[dict]): Optional model config. dist_type (Optional[Union[str, Type[ActionDistribution]]]): Identifier of the action distribution (str) interpreted as a hint or the actual ActionDistribution class to use. framework (str): One of "tf2", "tf", "tfe", "torch", or "jax". kwargs (dict): Optional kwargs to pass on to the Distribution's constructor. Returns: Tuple: - dist_class (ActionDistribution): Python class of the distribution. - dist_dim (int): The size of the input vector to the distribution. """ dist_cls = None config = config or MODEL_DEFAULTS # Custom distribution given. if config.get("custom_action_dist"): custom_action_config = config.copy() action_dist_name = custom_action_config.pop("custom_action_dist") logger.debug( "Using custom action distribution {}".format(action_dist_name)) dist_cls = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name) return ModelCatalog._get_multi_action_distribution( dist_cls, action_space, custom_action_config, framework) # Dist_type is given directly as a class. elif (type(dist_type) is type and issubclass(dist_type, ActionDistribution) and dist_type not in (MultiActionDistribution, TorchMultiActionDistribution)): dist_cls = dist_type # Box space -> DiagGaussian OR Deterministic. elif isinstance(action_space, Box): if action_space.dtype.name.startswith("int"): low_ = np.min(action_space.low) high_ = np.max(action_space.high) dist_cls = (TorchMultiCategorical if framework == "torch" else MultiCategorical) num_cats = int(np.product(action_space.shape)) return ( partial( dist_cls, input_lens=[high_ - low_ + 1 for _ in range(num_cats)], action_space=action_space, ), num_cats * (high_ - low_ + 1), ) else: if len(action_space.shape) > 1: raise UnsupportedSpaceException( "Action space has multiple dimensions " "{}. ".format(action_space.shape) + "Consider reshaping this into a single dimension, " "using a custom action distribution, " "using a Tuple action space, or the multi-agent API.") # TODO(sven): Check for bounds and return SquashedNormal, etc.. if dist_type is None: return ( partial( TorchDiagGaussian if framework == "torch" else DiagGaussian, action_space=action_space, ), DiagGaussian.required_model_output_shape( action_space, config), ) elif dist_type == "deterministic": dist_cls = (TorchDeterministic if framework == "torch" else Deterministic) # Discrete Space -> Categorical. elif isinstance(action_space, Discrete): dist_cls = (TorchCategorical if framework == "torch" else JAXCategorical if framework == "jax" else Categorical) # Tuple/Dict Spaces -> MultiAction. elif (dist_type in ( MultiActionDistribution, TorchMultiActionDistribution, ) or isinstance(action_space, (Tuple, Dict))): return ModelCatalog._get_multi_action_distribution( (MultiActionDistribution if framework == "tf" else TorchMultiActionDistribution), action_space, config, framework, ) # Simplex -> Dirichlet. elif isinstance(action_space, Simplex): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "Simplex action spaces not supported for torch.") dist_cls = Dirichlet # MultiDiscrete -> MultiCategorical. elif isinstance(action_space, MultiDiscrete): dist_cls = (TorchMultiCategorical if framework == "torch" else MultiCategorical) return partial(dist_cls, input_lens=action_space.nvec), int( sum(action_space.nvec)) # Unknown type -> Error. else: raise NotImplementedError("Unsupported args: {} {}".format( action_space, dist_type)) return dist_cls, dist_cls.required_model_output_shape( action_space, config)
def get_action_dist(action_space, config, dist_type=None, framework="tf", **kwargs): """Returns a distribution class and size for the given action space. Args: action_space (Space): Action space of the target gym env. config (Optional[dict]): Optional model config. dist_type (Optional[str]): Identifier of the action distribution interpreted as a hint. framework (str): One of "tf", "tfe", or "torch". kwargs (dict): Optional kwargs to pass on to the Distribution's constructor. Returns: Tuple: - dist_class (ActionDistribution): Python class of the distribution. - dist_dim (int): The size of the input vector to the distribution. """ dist = None config = config or MODEL_DEFAULTS # Custom distribution given. if config.get("custom_action_dist"): action_dist_name = config["custom_action_dist"] logger.debug( "Using custom action distribution {}".format(action_dist_name)) dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name) # Dist_type is given directly as a class. elif type(dist_type) is type and \ issubclass(dist_type, ActionDistribution) and \ dist_type not in ( MultiActionDistribution, TorchMultiActionDistribution): dist = dist_type # Box space -> DiagGaussian OR Deterministic. elif isinstance(action_space, gym.spaces.Box): if len(action_space.shape) > 1: raise UnsupportedSpaceException( "Action space has multiple dimensions " "{}. ".format(action_space.shape) + "Consider reshaping this into a single dimension, " "using a custom action distribution, " "using a Tuple action space, or the multi-agent API.") # TODO(sven): Check for bounds and return SquashedNormal, etc.. if dist_type is None: dist = TorchDiagGaussian if framework == "torch" \ else DiagGaussian elif dist_type == "deterministic": dist = TorchDeterministic if framework == "torch" \ else Deterministic # Discrete Space -> Categorical. elif isinstance(action_space, gym.spaces.Discrete): dist = TorchCategorical if framework == "torch" else Categorical # Tuple/Dict Spaces -> MultiAction. elif dist_type in (MultiActionDistribution, TorchMultiActionDistribution) or \ isinstance(action_space, (gym.spaces.Tuple, gym.spaces.Dict)): flat_action_space = flatten_space(action_space) child_dists_and_in_lens = tree.map_structure( lambda s: ModelCatalog.get_action_dist( s, config, framework=framework), flat_action_space) child_dists = [e[0] for e in child_dists_and_in_lens] input_lens = [int(e[1]) for e in child_dists_and_in_lens] return partial((TorchMultiActionDistribution if framework == "torch" else MultiActionDistribution), action_space=action_space, child_distributions=child_dists, input_lens=input_lens), int(sum(input_lens)) # Simplex -> Dirichlet. elif isinstance(action_space, Simplex): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "Simplex action spaces not supported for torch.") dist = Dirichlet # MultiDiscrete -> MultiCategorical. elif isinstance(action_space, gym.spaces.MultiDiscrete): dist = TorchMultiCategorical if framework == "torch" else \ MultiCategorical return partial(dist, input_lens=action_space.nvec), \ int(sum(action_space.nvec)) # Unknown type -> Error. else: raise NotImplementedError("Unsupported args: {} {}".format( action_space, dist_type)) return dist, dist.required_model_output_shape(action_space, config)
def get_model_v2(obs_space, action_space, num_outputs, model_config, framework="tf", name="default_model", model_interface=None, default_model=None, **model_kwargs): """Returns a suitable model compatible with given spaces and output. Args: obs_space (Space): Observation space of the target gym env. This may have an `original_space` attribute that specifies how to unflatten the tensor into a ragged tensor. action_space (Space): Action space of the target gym env. num_outputs (int): The size of the output vector of the model. framework (str): One of "tf", "tfe", or "torch". name (str): Name (scope) for the model. model_interface (cls): Interface required for the model default_model (cls): Override the default class for the model. This only has an effect when not using a custom model model_kwargs (dict): args to pass to the ModelV2 constructor Returns: model (ModelV2): Model to use for the policy. """ if model_config.get("custom_model"): if "custom_options" in model_config and \ model_config["custom_options"] != DEPRECATED_VALUE: deprecation_warning("model.custom_options", "model.custom_model_config", error=False) model_config["custom_model_config"] = \ model_config.pop("custom_options") if isinstance(model_config["custom_model"], type): model_cls = model_config["custom_model"] else: model_cls = _global_registry.get(RLLIB_MODEL, model_config["custom_model"]) # TODO(sven): Hard-deprecate Model(V1). if issubclass(model_cls, ModelV2): logger.info("Wrapping {} as {}".format(model_cls, model_interface)) model_cls = ModelCatalog._wrap_if_needed( model_cls, model_interface) if framework in ["tf", "tfe"]: # Track and warn if vars were created but not registered. created = set() def track_var_creation(next_creator, **kw): v = next_creator(**kw) created.add(v) return v with tf.variable_creator_scope(track_var_creation): # Try calling with kwargs first (custom ModelV2 should # accept these as kwargs, not get them from # config["custom_model_config"] anymore). try: instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) except TypeError as e: # Keyword error: Try old way w/o kwargs. if "__init__() got an unexpected " in e.args[0]: logger.warning( "Custom ModelV2 should accept all custom " "options as **kwargs, instead of expecting" " them in config['custom_model_config']!") instance = model_cls(obs_space, action_space, num_outputs, model_config, name) # Other error -> re-raise. else: raise e registered = set(instance.variables()) not_registered = set() for var in created: if var not in registered: not_registered.add(var) if not_registered: raise ValueError( "It looks like variables {} were created as part " "of {} but does not appear in model.variables() " "({}). Did you forget to call " "model.register_variables() on the variables in " "question?".format(not_registered, instance, registered)) else: # PyTorch automatically tracks nn.Modules inside the parent # nn.Module's constructor. # TODO(sven): Do this for TF as well. instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) return instance # TODO(sven): Hard-deprecate Model(V1). This check will be # superflous then. elif tf.executing_eagerly(): raise ValueError( "Eager execution requires a TFModelV2 model to be " "used, however you specified a custom model {}".format( model_cls)) if framework in ["tf", "tfe"]: v2_class = None # Try to get a default v2 model. if not model_config.get("custom_model"): v2_class = default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if model_config.get("use_lstm"): wrapped_cls = v2_class forward = wrapped_cls.forward v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, LSTMWrapper) v2_class._wrapped_forward = forward # fallback to a default v1 model if v2_class is None: if tf.executing_eagerly(): raise ValueError( "Eager execution requires a TFModelV2 model to be " "used, however there is no default V2 model for this " "observation space: {}, use_lstm={}".format( obs_space, model_config.get("use_lstm"))) v2_class = make_v1_wrapper(ModelCatalog.get_model) # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) elif framework == "torch": v2_class = \ default_model or ModelCatalog._get_v2_model_class( obs_space, model_config, framework=framework) if model_config.get("use_lstm"): from ray.rllib.models.torch.recurrent_net import LSTMWrapper \ as TorchLSTMWrapper wrapped_cls = v2_class forward = wrapped_cls.forward v2_class = ModelCatalog._wrap_if_needed( wrapped_cls, TorchLSTMWrapper) v2_class._wrapped_forward = forward # Wrap in the requested interface. wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) else: raise NotImplementedError( "`framework` must be 'tf|tfe|torch', but is " "{}!".format(framework))
def get_model_v2(obs_space, action_space, num_outputs, model_config, framework, name="default_model", model_interface=None, default_model=None, **model_kwargs): """Returns a suitable model compatible with given spaces and output. Args: obs_space (Space): Observation space of the target gym env. This may have an `original_space` attribute that specifies how to unflatten the tensor into a ragged tensor. action_space (Space): Action space of the target gym env. num_outputs (int): The size of the output vector of the model. framework (str): Either "tf" or "torch". name (str): Name (scope) for the model. model_interface (cls): Interface required for the model default_model (cls): Override the default class for the model. This only has an effect when not using a custom model model_kwargs (dict): args to pass to the ModelV2 constructor Returns: model (ModelV2): Model to use for the policy. """ if model_config.get("custom_model"): model_cls = _global_registry.get(RLLIB_MODEL, model_config["custom_model"]) if issubclass(model_cls, ModelV2): if model_interface and not issubclass(model_cls, model_interface): raise ValueError("The given model must subclass", model_interface) if framework == "tf": created = set() # Track and warn if vars were created but not registered def track_var_creation(next_creator, **kw): v = next_creator(**kw) created.add(v) return v with tf.variable_creator_scope(track_var_creation): instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) registered = set(instance.variables()) not_registered = set() for var in created: if var not in registered: not_registered.add(var) if not_registered: raise ValueError( "It looks like variables {} were created as part " "of {} but does not appear in model.variables() " "({}). Did you forget to call " "model.register_variables() on the variables in " "question?".format(not_registered, instance, registered)) else: # no variable tracking instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) return instance elif tf.executing_eagerly(): raise ValueError( "Eager execution requires a TFModelV2 model to be " "used, however you specified a custom model {}".format( model_cls)) if framework == "tf": v2_class = None # try to get a default v2 model if not model_config.get("custom_model"): v2_class = default_model or ModelCatalog._get_v2_model( obs_space, model_config) # fallback to a default v1 model if v2_class is None: if tf.executing_eagerly(): raise ValueError( "Eager execution requires a TFModelV2 model to be " "used, however there is no default V2 model for this " "observation space: {}, use_lstm={}".format( obs_space, model_config.get("use_lstm"))) v2_class = make_v1_wrapper(ModelCatalog.get_model) # wrap in the requested interface wrapper = ModelCatalog._wrap_if_needed(v2_class, model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) elif framework == "torch": if default_model: return default_model(obs_space, action_space, num_outputs, model_config, name) return ModelCatalog._get_default_torch_model_v2( obs_space, action_space, num_outputs, model_config, name) else: raise NotImplementedError( "Framework must be 'tf' or 'torch': {}".format(framework))
def _setup(self, config): env = self._env_id if env: config["env"] = env if _global_registry.contains(ENV_CREATOR, env): self.env_creator = _global_registry.get(ENV_CREATOR, env) else: import gym # soft dependency self.env_creator = lambda env_config: gym.make(env) else: self.env_creator = lambda env_config: None # Merge the supplied config with the class default, but store the # user-provided one. self.raw_user_config = config self.config = Trainer.merge_trainer_configs(self._default_config, config) # Check and resolve DL framework settings. if "use_pytorch" in self.config and \ self.config["use_pytorch"] != DEPRECATED_VALUE: deprecation_warning("use_pytorch", "framework=torch", error=False) if self.config["use_pytorch"]: self.config["framework"] = "torch" self.config.pop("use_pytorch") if "eager" in self.config and self.config["eager"] != DEPRECATED_VALUE: deprecation_warning("eager", "framework=tfe", error=False) if self.config["eager"]: self.config["framework"] = "tfe" self.config.pop("eager") # Check all dependencies and resolve "auto" framework. self.config["framework"] = check_framework(self.config["framework"]) # Notify about eager/tracing support. if tf and self.config["framework"] == "tfe": if not tf.executing_eagerly(): tf.enable_eager_execution() logger.info("Executing eagerly, with eager_tracing={}".format( self.config["eager_tracing"])) if tf and not tf.executing_eagerly() and \ self.config["framework"] != "torch": logger.info("Tip: set framework=tfe or the --eager flag to enable " "TensorFlow eager execution") if self.config["normalize_actions"]: inner = self.env_creator def normalize(env): import gym # soft dependency if not isinstance(env, gym.Env): raise ValueError( "Cannot apply NormalizeActionActionWrapper to env of " "type {}, which does not subclass gym.Env.", type(env)) return NormalizeActionWrapper(env) self.env_creator = lambda env_config: normalize(inner(env_config)) Trainer._validate_config(self.config) if not callable(self.config["callbacks"]): raise ValueError( "`callbacks` must be a callable method that " "returns a subclass of DefaultCallbacks, got {}".format( self.config["callbacks"])) self.callbacks = self.config["callbacks"]() log_level = self.config.get("log_level") if log_level in ["WARN", "ERROR"]: logger.info("Current log_level is {}. For more information, " "set 'log_level': 'INFO' / 'DEBUG' or use the -v and " "-vv flags.".format(log_level)) if self.config.get("log_level"): logging.getLogger("ray.rllib").setLevel(self.config["log_level"]) def get_scope(): if tf and not tf.executing_eagerly(): return tf.Graph().as_default() else: return open(os.devnull) # fake a no-op scope with get_scope(): self._init(self.config, self.env_creator) # Evaluation setup. if self.config.get("evaluation_interval"): # Update env_config with evaluation settings: extra_config = copy.deepcopy(self.config["evaluation_config"]) # Assert that user has not unset "in_evaluation". assert "in_evaluation" not in extra_config or \ extra_config["in_evaluation"] is True extra_config.update({ "batch_mode": "complete_episodes", "rollout_fragment_length": 1, "in_evaluation": True, }) logger.debug( "using evaluation_config: {}".format(extra_config)) self.evaluation_workers = self._make_workers( self.env_creator, self._policy, merge_dicts(self.config, extra_config), num_workers=self.config["evaluation_num_workers"]) self.evaluation_metrics = {}
def get_action_dist(action_space, config, dist_type=None, torch=None, framework="tf"): """Returns a distribution class and size for the given action space. Args: action_space (Space): Action space of the target gym env. config (dict): Optional model config. dist_type (Optional[str]): Identifier of the action distribution. torch (bool): Obsoleted: Whether to return PyTorch Model and distribution (use framework="torch" instead). framework (str): One of "tf" or "torch". Returns: dist_class (ActionDistribution): Python class of the distribution. dist_dim (int): The size of the input vector to the distribution. """ # Obsoleted parameter `torch`: if torch is not None: deprecation_warning("`torch` parameter", "`framework`='tf|torch'") framework = "torch" if torch else "tf" dist = None config = config or MODEL_DEFAULTS if config.get("custom_action_dist"): action_dist_name = config["custom_action_dist"] logger.debug( "Using custom action distribution {}".format(action_dist_name)) dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name) elif isinstance(action_space, gym.spaces.Box): if len(action_space.shape) > 1: raise UnsupportedSpaceException( "Action space has multiple dimensions " "{}. ".format(action_space.shape) + "Consider reshaping this into a single dimension, " "using a custom action distribution, " "using a Tuple action space, or the multi-agent API.") if dist_type is None: dist = DiagGaussian if framework == "tf" else TorchDiagGaussian elif dist_type == "deterministic": dist = Deterministic elif isinstance(action_space, gym.spaces.Discrete): dist = Categorical if framework == "tf" else TorchCategorical elif isinstance(action_space, gym.spaces.Tuple): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "Tuple action spaces not supported for Pytorch.") child_dist = [] input_lens = [] for action in action_space.spaces: dist, action_size = ModelCatalog.get_action_dist( action, config) child_dist.append(dist) input_lens.append(action_size) return partial(MultiActionDistribution, child_distributions=child_dist, action_space=action_space, input_lens=input_lens), sum(input_lens) elif isinstance(action_space, Simplex): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "Simplex action spaces not supported for torch.") dist = Dirichlet elif isinstance(action_space, gym.spaces.MultiDiscrete): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "MultiDiscrete action spaces not supported for Pytorch.") return partial(MultiCategorical, input_lens=action_space.nvec), \ int(sum(action_space.nvec)) elif isinstance(action_space, gym.spaces.Dict): # TODO(sven): implement raise NotImplementedError( "Dict action spaces are not supported, consider using " "gym.spaces.Tuple instead") else: raise NotImplementedError("Unsupported args: {} {}".format( action_space, dist_type)) return dist, dist.required_model_output_shape(action_space, config)
def get_action_dist(action_space, config, dist_type=None, torch=False): """Returns action distribution class and size for the given action space. Args: action_space (Space): Action space of the target gym env. config (dict): Optional model config. dist_type (str): Optional identifier of the action distribution. torch (bool): Optional whether to return PyTorch distribution. Returns: dist_class (ActionDistribution): Python class of the distribution. dist_dim (int): The size of the input vector to the distribution. """ config = config or MODEL_DEFAULTS if config.get("custom_action_dist"): action_dist_name = config["custom_action_dist"] logger.debug( "Using custom action distribution {}".format(action_dist_name)) dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name) elif isinstance(action_space, gym.spaces.Box): if len(action_space.shape) > 1: raise UnsupportedSpaceException( "Action space has multiple dimensions " "{}. ".format(action_space.shape) + "Consider reshaping this into a single dimension, " "using a custom action distribution, " "using a Tuple action space, or the multi-agent API.") if dist_type is None: dist = TorchDiagGaussian if torch else DiagGaussian elif dist_type == "deterministic": dist = Deterministic elif isinstance(action_space, gym.spaces.Discrete): dist = TorchCategorical if torch else Categorical elif isinstance(action_space, gym.spaces.Tuple): if torch: raise NotImplementedError("Tuple action spaces not supported " "for Pytorch.") child_dist = [] input_lens = [] for action in action_space.spaces: dist, action_size = ModelCatalog.get_action_dist( action, config) child_dist.append(dist) input_lens.append(action_size) return partial(MultiActionDistribution, child_distributions=child_dist, action_space=action_space, input_lens=input_lens), sum(input_lens) elif isinstance(action_space, Simplex): if torch: raise NotImplementedError("Simplex action spaces not " "supported for Pytorch.") dist = Dirichlet elif isinstance(action_space, gym.spaces.MultiDiscrete): if torch: raise NotImplementedError("MultiDiscrete action spaces not " "supported for Pytorch.") return partial(MultiCategorical, input_lens=action_space.nvec), \ int(sum(action_space.nvec)) return dist, dist.required_model_output_shape(action_space, config) raise NotImplementedError("Unsupported args: {} {}".format( action_space, dist_type))
def get_action_dist(action_space, config, dist_type=None, framework="tf", **kwargs): """Returns a distribution class and size for the given action space. Args: action_space (Space): Action space of the target gym env. config (Optional[dict]): Optional model config. dist_type (Optional[str]): Identifier of the action distribution. framework (str): One of "tf" or "torch". kwargs (dict): Optional kwargs to pass on to the Distribution's constructor. Returns: dist_class (ActionDistribution): Python class of the distribution. dist_dim (int): The size of the input vector to the distribution. """ dist = None config = config or MODEL_DEFAULTS # Custom distribution given. if config.get("custom_action_dist"): action_dist_name = config["custom_action_dist"] logger.debug( "Using custom action distribution {}".format(action_dist_name)) dist = _global_registry.get(RLLIB_ACTION_DIST, action_dist_name) # Dist_type is given directly as a class. elif type(dist_type) is type and \ issubclass(dist_type, ActionDistribution) and \ dist_type is not MultiActionDistribution: dist = dist_type # Box space -> DiagGaussian OR Deterministic. elif isinstance(action_space, gym.spaces.Box): if len(action_space.shape) > 1: raise UnsupportedSpaceException( "Action space has multiple dimensions " "{}. ".format(action_space.shape) + "Consider reshaping this into a single dimension, " "using a custom action distribution, " "using a Tuple action space, or the multi-agent API.") # TODO(sven): Check for bounds and return SquashedNormal, etc.. if dist_type is None: dist = DiagGaussian if framework == "tf" else TorchDiagGaussian elif dist_type == "deterministic": dist = Deterministic # Discrete Space -> Categorical. elif isinstance(action_space, gym.spaces.Discrete): dist = Categorical if framework == "tf" else TorchCategorical # Tuple Space -> MultiAction. elif dist_type is MultiActionDistribution or \ isinstance(action_space, gym.spaces.Tuple): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "Tuple action spaces not supported for Pytorch.") child_dist = [] input_lens = [] for action in action_space.spaces: dist, action_size = ModelCatalog.get_action_dist( action, config) child_dist.append(dist) input_lens.append(action_size) return partial( MultiActionDistribution, action_space=action_space, child_distributions=child_dist, input_lens=input_lens), sum(input_lens) # Simplex -> Dirichlet. elif isinstance(action_space, Simplex): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "Simplex action spaces not supported for torch.") dist = Dirichlet # MultiDiscrete -> MultiCategorical. elif isinstance(action_space, gym.spaces.MultiDiscrete): if framework == "torch": # TODO(sven): implement raise NotImplementedError( "MultiDiscrete action spaces not supported for Pytorch.") return partial(MultiCategorical, input_lens=action_space.nvec), \ int(sum(action_space.nvec)) # Dict -> TODO(sven) elif isinstance(action_space, gym.spaces.Dict): # TODO(sven): implement raise NotImplementedError( "Dict action spaces are not supported, consider using " "gym.spaces.Tuple instead") # Unknown type -> Error. else: raise NotImplementedError("Unsupported args: {} {}".format( action_space, dist_type)) return dist, dist.required_model_output_shape(action_space, config)
def _setup(self, config): env = self._env_id if env: config["env"] = env if _global_registry.contains(ENV_CREATOR, env): self.env_creator = _global_registry.get(ENV_CREATOR, env) else: import gym # soft dependency self.env_creator = lambda env_config: gym.make(env) else: self.env_creator = lambda env_config: None # Merge the supplied config with the class default, but store the # user-provided one. self.raw_user_config = config self.config = Trainer.merge_trainer_configs(self._default_config, config) if self.config["normalize_actions"]: inner = self.env_creator def normalize(env): import gym # soft dependency if not isinstance(env, gym.Env): raise ValueError( "Cannot apply NormalizeActionActionWrapper to env of " "type {}, which does not subclass gym.Env.", type(env)) return NormalizeActionWrapper(env) self.env_creator = lambda env_config: normalize(inner(env_config)) Trainer._validate_config(self.config) log_level = self.config.get("log_level") if log_level in ["WARN", "ERROR"]: logger.info("Current log_level is {}. For more information, " "set 'log_level': 'INFO' / 'DEBUG' or use the -v and " "-vv flags.".format(log_level)) if self.config.get("log_level"): logging.getLogger("ray.rllib").setLevel(self.config["log_level"]) def get_scope(): if tf and not tf.executing_eagerly(): return tf.Graph().as_default() else: return open("/dev/null") # fake a no-op scope with get_scope(): self._init(self.config, self.env_creator) # Evaluation setup. if self.config.get("evaluation_interval"): # Update env_config with evaluation settings: extra_config = copy.deepcopy(self.config["evaluation_config"]) # Assert that user has not unset "in_evaluation". assert "in_evaluation" not in extra_config or \ extra_config["in_evaluation"] is True extra_config.update({ "batch_mode": "complete_episodes", "batch_steps": 1, "in_evaluation": True, }) logger.debug( "using evaluation_config: {}".format(extra_config)) self.evaluation_workers = self._make_workers( self.env_creator, self._policy, merge_dicts(self.config, extra_config), num_workers=self.config["evaluation_num_workers"]) self.evaluation_metrics = {}
def get_env_creator(env_id): """Return the environment creator funtion for the given environment id.""" if not _global_registry.contains(ENV_CREATOR, env_id): raise ValueError(f"Environment id {env_id} not registered in Tune") return _global_registry.get(ENV_CREATOR, env_id)
def get_model_v2(obs_space, action_space, num_outputs, model_config, framework="tf", name=None, model_interface=None, **model_kwargs): """Returns a suitable model compatible with given spaces and output. Args: obs_space (Space): Observation space of the target gym env. This may have an `original_space` attribute that specifies how to unflatten the tensor into a ragged tensor. action_space (Space): Action space of the target gym env. num_outputs (int): The size of the output vector of the model. framework (str): Either "tf" or "torch". name (str): Name (scope) for the model. model_interface (cls): Interface required for the model model_kwargs (dict): args to pass to the ModelV2 constructor Returns: model (ModelV2): Model to use for the policy. """ if model_config.get("custom_model"): model_cls = _global_registry.get(RLLIB_MODEL, model_config["custom_model"]) if issubclass(model_cls, ModelV2): if model_interface and not issubclass(model_cls, model_interface): raise ValueError("The given model must subclass", model_interface) created = set() # Track and warn if variables were created but no registered def track_var_creation(next_creator, **kw): v = next_creator(**kw) created.add(v) return v with tf.variable_creator_scope(track_var_creation): instance = model_cls(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) registered = set(instance.variables()) not_registered = set() for var in created: if var not in registered: not_registered.add(var) if not_registered: raise ValueError( "It looks like variables {} were created as part of " "{} but does not appear in model.variables() ({}). " "Did you forget to call model.register_variables() " "on the variables in question?".format( not_registered, instance, registered)) return instance if framework == "tf": legacy_model_cls = ModelCatalog.get_model wrapper = ModelCatalog._wrap_if_needed( make_v1_wrapper(legacy_model_cls), model_interface) return wrapper(obs_space, action_space, num_outputs, model_config, name, **model_kwargs) raise NotImplementedError("TODO: support {} models".format(framework))
def rollout(agent, env_name, num_steps, num_episodes=0, saver=None, no_render=True, video_dir=None): policy_agent_mapping = default_policy_agent_mapping if saver is None: saver = RolloutSaver() # Normal case: Agent was setup correctly with an evaluation WorkerSet, # which we will now use to rollout. if hasattr(agent, "evaluation_workers") and isinstance( agent.evaluation_workers, WorkerSet): steps = 0 episodes = 0 while keep_going(steps, num_steps, episodes, num_episodes): saver.begin_rollout() eval_result = agent.evaluate()["evaluation"] # Increase timestep and episode counters. eps = agent.config["evaluation_num_episodes"] episodes += eps steps += eps * eval_result["episode_len_mean"] # Print out results and continue. print("Episode #{}: reward: {}".format( episodes, eval_result["episode_reward_mean"])) saver.end_rollout() return # Agent has no evaluation workers, but RolloutWorkers. elif hasattr(agent, "workers") and isinstance(agent.workers, WorkerSet): env = agent.workers.local_worker().env multiagent = isinstance(env, MultiAgentEnv) if agent.workers.local_worker().multiagent: policy_agent_mapping = agent.config["multiagent"][ "policy_mapping_fn"] policy_map = agent.workers.local_worker().policy_map state_init = {p: m.get_initial_state() for p, m in policy_map.items()} use_lstm = {p: len(s) > 0 for p, s in state_init.items()} # Agent has neither evaluation- nor rollout workers. else: from gym import envs if envs.registry.env_specs.get(agent.config["env"]): # if environment is gym environment, load from gym env = gym.make(agent.config["env"]) else: # if environment registered ray environment, load from ray env_creator = _global_registry.get(ENV_CREATOR, agent.config["env"]) env_context = EnvContext(agent.config["env_config"] or {}, worker_index=0) env = env_creator(env_context) multiagent = False try: policy_map = {DEFAULT_POLICY_ID: agent.policy} except AttributeError: raise AttributeError( "Agent ({}) does not have a `policy` property! This is needed " "for performing (trained) agent rollouts.".format(agent)) use_lstm = {DEFAULT_POLICY_ID: False} action_init = { p: flatten_to_single_ndarray(m.action_space.sample()) for p, m in policy_map.items() } # If monitoring has been requested, manually wrap our environment with a # gym monitor, which is set to record every episode. if video_dir: env = gym_wrappers.Monitor(env=env, directory=video_dir, video_callable=lambda _: True, force=True) steps = 0 episodes = 0 while keep_going(steps, num_steps, episodes, num_episodes): mapping_cache = {} # in case policy_agent_mapping is stochastic saver.begin_rollout() obs = env.reset() agent_states = DefaultMapping( lambda agent_id: state_init[mapping_cache[agent_id]]) prev_actions = DefaultMapping( lambda agent_id: action_init[mapping_cache[agent_id]]) prev_rewards = collections.defaultdict(lambda: 0.) done = False reward_total = 0.0 while not done and keep_going(steps, num_steps, episodes, num_episodes): multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs} action_dict = {} for agent_id, a_obs in multi_obs.items(): if a_obs is not None: policy_id = mapping_cache.setdefault( agent_id, policy_agent_mapping(agent_id)) p_use_lstm = use_lstm[policy_id] if p_use_lstm: a_action, p_state, _ = agent.compute_action( a_obs, state=agent_states[agent_id], prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id) agent_states[agent_id] = p_state else: a_action = agent.compute_action( a_obs, prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id) a_action = flatten_to_single_ndarray(a_action) action_dict[agent_id] = a_action prev_actions[agent_id] = a_action action = action_dict action = action if multiagent else action[_DUMMY_AGENT_ID] next_obs, reward, done, info = env.step(action) if multiagent: for agent_id, r in reward.items(): prev_rewards[agent_id] = r else: prev_rewards[_DUMMY_AGENT_ID] = reward if multiagent: done = done["__all__"] reward_total += sum(r for r in reward.values() if r is not None) else: reward_total += reward if not no_render: env.render() saver.append_step(obs, action, next_obs, reward, done, info) steps += 1 obs = next_obs saver.end_rollout() print("Episode #{}: reward: {}".format(episodes, reward_total)) if done: episodes += 1