def _register_point_maze(prefix, cls, **kwargs): control = {"WithCtrl": {}, "NoCtrl": {"ctrl_coef": 0.0}} for k, cfg in control.items(): fn = registry.build_loader_fn_require_space(cls, target=np.array( [0.3, 0.5, 0.0]), **cfg, **kwargs) reward_serialize.reward_registry.register(key=f"{prefix}{k}-v0", value=fn)
def _register_models(format_str, cls, forward=True): """Registers reward models of type cls under key formatted by format_str.""" forwards = {"Forward": {"forward": forward}, "Backward": {"forward": not forward}} control = {"WithCtrl": {}, "NoCtrl": {"ctrl_coef": 0.0}} res = {} for k1, cfg1 in forwards.items(): for k2, cfg2 in control.items(): fn = registry.build_loader_fn_require_space(cls, **cfg1, **cfg2) key = format_str.format(k1 + k2) reward_serialize.reward_registry.register(key=key, value=fn) return res
tf.logging.info(f"Loaded VecNormalize from '{normalize_path}'") except FileNotFoundError: # We did not use VecNormalize during training, skip pass yield policy finally: if model is not None and model.sess is not None: model.sess.close() return f policy_registry.register( "random", value=registry.build_loader_fn_require_space( registry.dummy_context(RandomPolicy), ), ) policy_registry.register( "zero", value=registry.build_loader_fn_require_space( registry.dummy_context(ZeroPolicy), ), ) def _add_stable_baselines_policies(classes): for k, (cls_name, attr) in classes.items(): try: cls = registry.load_attr(cls_name) fn = _load_stable_baselines(cls, attr) policy_registry.register(k, value=fn) except (AttributeError, ImportError):
# We did not use VecNormalize during training, skip pass else: vec_normalize.training = False vec_normalize.set_venv(venv) policy = NormalizePolicy(policy, vec_normalize) logging.info(f"Loaded VecNormalize from '{normalize_path}'") return policy return f policy_registry.register( "random", value=registry.build_loader_fn_require_space(base.RandomPolicy), ) policy_registry.register( "zero", value=registry.build_loader_fn_require_space(base.ZeroPolicy), ) def _add_stable_baselines_policies(classes): for k, (cls_name, attr) in classes.items(): cls = registry.load_attr(cls_name) fn = _load_stable_baselines(cls, attr) policy_registry.register(k, value=fn) STABLE_BASELINES_CLASSES = {
f"Serialized object from '{path}' is not a RewardModel") assert venv.observation_space == model.observation_space assert venv.action_space == model.action_space return model reward_registry.register(key="imitation/RewardNet_unshaped-v0", value=_load_imitation(True)) reward_registry.register(key="imitation/RewardNet_shaped-v0", value=_load_imitation(False)) reward_registry.register(key="evaluating_rewards/RewardModel-v0", value=_load_native) reward_registry.register( key="evaluating_rewards/Zero-v0", value=registry.build_loader_fn_require_space(rewards.ZeroReward), ) def load_reward( reward_type: str, reward_path: str, venv: vec_env.VecEnv, discount: Optional[float] = None, ) -> rewards.RewardModel: """Load serialized reward model. Args: reward_type: A key in `AGENT_LOADERS`, e.g. `ppo2`. reward_path: A path on disk where the policy is stored. venv: An environment that the policy is to be used with.
-1, 1) delta_vel = target_vel - vel delta_vel_norm = np.linalg.norm(delta_vel, ord=np.inf, axis=1).reshape(-1, 1) act = delta_vel / np.maximum(delta_vel_norm, 1e-4) act = act.clip(-1, 1) return act, None, None, None def proba_step(self, obs, state=None, mask=None): raise NotImplementedError() # Register custom policies with imitation policy_serialize.policy_registry.register( key="evaluating_rewards/PointMassHardcoded-v0", value=registry.build_loader_fn_require_space( registry.dummy_context(PointMassPolicy)), ) # Register custom rewards with evaluating_rewards reward_serialize.reward_registry.register( key="evaluating_rewards/PointMassGroundTruth-v0", value=registry.build_loader_fn_require_space(PointMassGroundTruth), ) reward_serialize.reward_registry.register( key="evaluating_rewards/PointMassSparseWithCtrl-v0", value=registry.build_loader_fn_require_space(PointMassSparseReward), ) reward_serialize.reward_registry.register( key="evaluating_rewards/PointMassSparseNoCtrl-v0", value=registry.build_loader_fn_require_space(PointMassSparseReward, ctrl_coef=0.0),
value=fn) _register_models("evaluating_rewards/HalfCheetahGroundTruth{}-v0", HalfCheetahGroundTruthReward) _register_models("evaluating_rewards/HopperGroundTruth{}-v0", HopperGroundTruthReward) _register_models("evaluating_rewards/HopperBackflip{}-v0", HopperBackflipReward, forward=False) _register_point_maze("evaluating_rewards/PointMazeGroundTruth", PointMazeReward) _register_point_maze("evaluating_rewards/PointMazeRepellent", PointMazeSparseBonusReward, sparse_coef=-1.0) _register_point_maze( "evaluating_rewards/PointMazeBetterGoal", PointMazeSparseBonusReward, # Locate target on the left behind the wall, so the agent (in the Left version of environment) # has to pass the wall and go past the goal state to hit the sparse target. This is unlikely for # random exploration (hard to get past wall) or expert (will not go past goal). sparse_target=np.array([0.1, 0.5, 0.0]), sparse_coef=2.0, ) reward_serialize.reward_registry.register( key="evaluating_rewards/PointMazeWrongTargetWithCtrl-v0", value=registry.build_loader_fn_require_space(PointMazeReward, target=np.array( [0.1, 0.1, 0.0])), )