Esempio n. 1
0
def _setup_airl_undiscounted_shaped_reward_net(venv):
    potential_in_size = preprocessing.get_flattened_obs_dim(
        venv.observation_space)
    potential_net = networks.build_mlp(
        in_size=potential_in_size,
        hid_sizes=(32, 32),
        squeeze_output=True,
    )
    reward_net = reward_nets.BasicShapedRewardNet(
        venv.observation_space,
        venv.action_space,
        discount_factor=1.0,
        use_next_state=True,
        use_done=True,
        potential_net=potential_net,
    )
    return discrim_nets.DiscrimNetAIRL(reward_net)
Esempio n. 2
0
    def __init__(
        self,
        venv: vec_env.VecEnv,
        expert_data: Union[Iterable[Mapping], types.Transitions],
        expert_batch_size: int,
        gen_algo: on_policy_algorithm.OnPolicyAlgorithm,
        *,
        # FIXME(sam): pass in reward net directly, not via _cls and _kwargs
        reward_net_cls: Type[reward_nets.RewardNet] = reward_nets.BasicShapedRewardNet,
        reward_net_kwargs: Optional[Mapping] = None,
        discrim_kwargs: Optional[Mapping] = None,
        **kwargs,
    ):
        """Adversarial Inverse Reinforcement Learning.

        Most parameters are described in and passed to `AdversarialTrainer.__init__`.
        Additional parameters that `AIRL` adds on top of its superclass initializer are
        as follows:

        Args:
            reward_net_cls: Reward network constructor. The reward network is part of
                the AIRL discriminator.
            reward_net_kwargs: Optional keyword arguments to use while constructing
                the reward network.
            discrim_kwargs: Optional keyword arguments to use while constructing the
                DiscrimNetAIRL.
        """
        # TODO(shwang): Maybe offer str=>RewardNet conversion like
        #  stable_baselines3 does with policy classes.
        reward_net_kwargs = reward_net_kwargs or {}
        reward_network = reward_net_cls(
            action_space=venv.action_space,
            observation_space=venv.observation_space,
            # pytype is afraid that we'll directly call RewardNet() which is an abstract
            # class, hence the disable.
            **reward_net_kwargs,  # pytype: disable=not-instantiable
        )

        discrim_kwargs = discrim_kwargs or {}
        discrim = discrim_nets.DiscrimNetAIRL(reward_network, **discrim_kwargs)
        super().__init__(
            venv, gen_algo, discrim, expert_data, expert_batch_size, **kwargs
        )
Esempio n. 3
0
def _setup_airl_basic_custom_net(venv):
    base_reward_net = reward_nets.BasicRewardMLP(
        observation_space=venv.observation_space,
        action_space=venv.action_space,
        use_state=True,
        use_action=True,
        use_next_state=False,
        use_done=False,
        hid_sizes=(32, 32),
    )
    reward_net = reward_nets.BasicRewardNet(
        observation_space=venv.observation_space,
        action_space=venv.action_space,
        use_state=True,
        use_action=True,
        use_next_state=False,
        use_done=False,
        base_reward_net=base_reward_net,
    )
    return discrim_nets.DiscrimNetAIRL(reward_net)
Esempio n. 4
0
def _setup_airl_basic(venv):
    reward_net = reward_nets.BasicRewardNet(venv.observation_space,
                                            venv.action_space)
    return discrim_nets.DiscrimNetAIRL(reward_net)