Beispiel #1
0
            except FileNotFoundError:
                # We did not use VecNormalize during training, skip
                pass

            yield policy
        finally:
            if model is not None and model.sess is not None:
                model.sess.close()

    return f


policy_registry.register(
    "random",
    value=registry.build_loader_fn_require_space(
        registry.dummy_context(RandomPolicy), ),
)
policy_registry.register(
    "zero",
    value=registry.build_loader_fn_require_space(
        registry.dummy_context(ZeroPolicy), ),
)


def _add_stable_baselines_policies(classes):
    for k, (cls_name, attr) in classes.items():
        try:
            cls = registry.load_attr(cls_name)
            fn = _load_stable_baselines(cls, attr)
            policy_registry.register(k, value=fn)
        except (AttributeError, ImportError):
Beispiel #2
0
                                    value=_load_reward_net_as_fn(k, False))


_add_reward_net_as_fn_loaders(REWARD_NETS)


def load_zero(path: str, venv: VecEnv) -> RewardFn:
    def f(old_obs: np.ndarray, act: np.ndarray, new_obs: np.ndarray,
          steps: np.ndarray) -> np.ndarray:
        return np.zeros(old_obs.shape[0])

    return f


reward_fn_registry.register(key='zero',
                            value=registry.dummy_context(load_zero))


@util.docstring_parameter(reward_types=", ".join(reward_fn_registry.keys()))
def load_reward(reward_type: str, reward_path: str,
                venv: VecEnv) -> ContextManager[RewardFn]:
    """Load serialized policy.

  Args:
    reward_type: A key in `reward_registry`, e.g. `RewardNet`. Valid types
        include {reward_types}.
    reward_path: A path specifying the reward.
    venv: An environment that the policy is to be used with.
  """
    reward_loader = reward_fn_registry.get(reward_type)
    return reward_loader(reward_path, venv)
        delta_vel = target_vel - vel
        delta_vel_norm = np.linalg.norm(delta_vel, ord=np.inf,
                                        axis=1).reshape(-1, 1)
        act = delta_vel / np.maximum(delta_vel_norm, 1e-4)
        act = act.clip(-1, 1)
        return act, None, None, None

    def proba_step(self, obs, state=None, mask=None):
        raise NotImplementedError()


# Register custom policies with imitation
policy_serialize.policy_registry.register(
    key="evaluating_rewards/PointMassHardcoded-v0",
    value=registry.build_loader_fn_require_space(
        registry.dummy_context(PointMassPolicy)),
)

# Register custom rewards with evaluating_rewards
reward_serialize.reward_registry.register(
    key="evaluating_rewards/PointMassGroundTruth-v0",
    value=registry.build_loader_fn_require_space(PointMassGroundTruth),
)
reward_serialize.reward_registry.register(
    key="evaluating_rewards/PointMassSparseWithCtrl-v0",
    value=registry.build_loader_fn_require_space(PointMassSparseReward),
)
reward_serialize.reward_registry.register(
    key="evaluating_rewards/PointMassSparseNoCtrl-v0",
    value=registry.build_loader_fn_require_space(PointMassSparseReward,
                                                 ctrl_coef=0.0),
Beispiel #4
0
    del path, venv

    def f(obs: np.ndarray, act: np.ndarray, next_obs: np.ndarray,
          dones: np.ndarray) -> np.ndarray:
        del act, next_obs, dones  # Unused.
        return np.zeros(obs.shape[0])

    return f


reward_registry.register(key="DiscrimNet", value=_load_discrim_net)
reward_registry.register(key="RewardNet_shaped",
                         value=_load_reward_net_as_fn(shaped=True))
reward_registry.register(key="RewardNet_unshaped",
                         value=_load_reward_net_as_fn(shaped=False))
reward_registry.register(key="zero", value=registry.dummy_context(load_zero))


@util.docstring_parameter(reward_types=", ".join(reward_registry.keys()))
def load_reward(reward_type: str, reward_path: str,
                venv: VecEnv) -> ContextManager[common.RewardFn]:
    """Load serialized policy.

    Args:
      reward_type: A key in `reward_registry`. Valid types
          include {reward_types}.
      reward_path: A path specifying the reward.
      venv: An environment that the policy is to be used with.
    """
    reward_loader = reward_registry.get(reward_type)
    return reward_loader(reward_path, venv)