Esempio n. 1
0
def _register_point_maze(prefix, cls, **kwargs):
    control = {"WithCtrl": {}, "NoCtrl": {"ctrl_coef": 0.0}}
    for k, cfg in control.items():
        fn = registry.build_loader_fn_require_space(cls,
                                                    target=np.array(
                                                        [0.3, 0.5, 0.0]),
                                                    **cfg,
                                                    **kwargs)
        reward_serialize.reward_registry.register(key=f"{prefix}{k}-v0",
                                                  value=fn)
Esempio n. 2
0
def _register_models(format_str, cls, forward=True):
    """Registers reward models of type cls under key formatted by format_str."""
    forwards = {"Forward": {"forward": forward}, "Backward": {"forward": not forward}}
    control = {"WithCtrl": {}, "NoCtrl": {"ctrl_coef": 0.0}}

    res = {}
    for k1, cfg1 in forwards.items():
        for k2, cfg2 in control.items():
            fn = registry.build_loader_fn_require_space(cls, **cfg1, **cfg2)
            key = format_str.format(k1 + k2)
            reward_serialize.reward_registry.register(key=key, value=fn)
    return res
Esempio n. 3
0
                tf.logging.info(f"Loaded VecNormalize from '{normalize_path}'")
            except FileNotFoundError:
                # We did not use VecNormalize during training, skip
                pass

            yield policy
        finally:
            if model is not None and model.sess is not None:
                model.sess.close()

    return f


policy_registry.register(
    "random",
    value=registry.build_loader_fn_require_space(
        registry.dummy_context(RandomPolicy), ),
)
policy_registry.register(
    "zero",
    value=registry.build_loader_fn_require_space(
        registry.dummy_context(ZeroPolicy), ),
)


def _add_stable_baselines_policies(classes):
    for k, (cls_name, attr) in classes.items():
        try:
            cls = registry.load_attr(cls_name)
            fn = _load_stable_baselines(cls, attr)
            policy_registry.register(k, value=fn)
        except (AttributeError, ImportError):
Esempio n. 4
0
            # We did not use VecNormalize during training, skip
            pass
        else:
            vec_normalize.training = False
            vec_normalize.set_venv(venv)
            policy = NormalizePolicy(policy, vec_normalize)
            logging.info(f"Loaded VecNormalize from '{normalize_path}'")

        return policy

    return f


policy_registry.register(
    "random",
    value=registry.build_loader_fn_require_space(base.RandomPolicy),
)
policy_registry.register(
    "zero",
    value=registry.build_loader_fn_require_space(base.ZeroPolicy),
)


def _add_stable_baselines_policies(classes):
    for k, (cls_name, attr) in classes.items():
        cls = registry.load_attr(cls_name)
        fn = _load_stable_baselines(cls, attr)
        policy_registry.register(k, value=fn)


STABLE_BASELINES_CLASSES = {
Esempio n. 5
0
                f"Serialized object from '{path}' is not a RewardModel")
        assert venv.observation_space == model.observation_space
        assert venv.action_space == model.action_space

    return model


reward_registry.register(key="imitation/RewardNet_unshaped-v0",
                         value=_load_imitation(True))
reward_registry.register(key="imitation/RewardNet_shaped-v0",
                         value=_load_imitation(False))
reward_registry.register(key="evaluating_rewards/RewardModel-v0",
                         value=_load_native)
reward_registry.register(
    key="evaluating_rewards/Zero-v0",
    value=registry.build_loader_fn_require_space(rewards.ZeroReward),
)


def load_reward(
    reward_type: str,
    reward_path: str,
    venv: vec_env.VecEnv,
    discount: Optional[float] = None,
) -> rewards.RewardModel:
    """Load serialized reward model.

    Args:
        reward_type: A key in `AGENT_LOADERS`, e.g. `ppo2`.
        reward_path: A path on disk where the policy is stored.
        venv: An environment that the policy is to be used with.
            -1, 1)
        delta_vel = target_vel - vel
        delta_vel_norm = np.linalg.norm(delta_vel, ord=np.inf,
                                        axis=1).reshape(-1, 1)
        act = delta_vel / np.maximum(delta_vel_norm, 1e-4)
        act = act.clip(-1, 1)
        return act, None, None, None

    def proba_step(self, obs, state=None, mask=None):
        raise NotImplementedError()


# Register custom policies with imitation
policy_serialize.policy_registry.register(
    key="evaluating_rewards/PointMassHardcoded-v0",
    value=registry.build_loader_fn_require_space(
        registry.dummy_context(PointMassPolicy)),
)

# Register custom rewards with evaluating_rewards
reward_serialize.reward_registry.register(
    key="evaluating_rewards/PointMassGroundTruth-v0",
    value=registry.build_loader_fn_require_space(PointMassGroundTruth),
)
reward_serialize.reward_registry.register(
    key="evaluating_rewards/PointMassSparseWithCtrl-v0",
    value=registry.build_loader_fn_require_space(PointMassSparseReward),
)
reward_serialize.reward_registry.register(
    key="evaluating_rewards/PointMassSparseNoCtrl-v0",
    value=registry.build_loader_fn_require_space(PointMassSparseReward,
                                                 ctrl_coef=0.0),
Esempio n. 7
0
                                                  value=fn)


_register_models("evaluating_rewards/HalfCheetahGroundTruth{}-v0",
                 HalfCheetahGroundTruthReward)
_register_models("evaluating_rewards/HopperGroundTruth{}-v0",
                 HopperGroundTruthReward)
_register_models("evaluating_rewards/HopperBackflip{}-v0",
                 HopperBackflipReward,
                 forward=False)
_register_point_maze("evaluating_rewards/PointMazeGroundTruth",
                     PointMazeReward)
_register_point_maze("evaluating_rewards/PointMazeRepellent",
                     PointMazeSparseBonusReward,
                     sparse_coef=-1.0)
_register_point_maze(
    "evaluating_rewards/PointMazeBetterGoal",
    PointMazeSparseBonusReward,
    # Locate target on the left behind the wall, so the agent (in the Left version of environment)
    # has to pass the wall and go past the goal state to hit the sparse target. This is unlikely for
    # random exploration (hard to get past wall) or expert (will not go past goal).
    sparse_target=np.array([0.1, 0.5, 0.0]),
    sparse_coef=2.0,
)
reward_serialize.reward_registry.register(
    key="evaluating_rewards/PointMazeWrongTargetWithCtrl-v0",
    value=registry.build_loader_fn_require_space(PointMazeReward,
                                                 target=np.array(
                                                     [0.1, 0.1, 0.0])),
)