Beispiel #1
0
def test_tabular_function():
    def reward_function_factory(env):
        params = np.zeros(64)
        params[-1] = 1.
        return TabularRewardFunction(env, params)

    env = make_wrapped_env('FrozenLake8x8-v0',
                           reward_function_factory=reward_function_factory,
                           with_model_wrapper=True)
    agent = ValueIteration(env)
    agent.train(1)
    trajs = collect_trajs(env, agent, 10)
    for traj in trajs:
        for i in range(len(traj['rewards'])):
            assert np.isclose(traj['rewards'][i], traj['true_rewards'][i])
Beispiel #2
0
    def evaluate(self, estim_rewards, ord=2):

        # value function for actual policy

        expert_agent = ValueIteration(env=self.env, gamma=self.gamma)
        expert_agent.train(time_limit=50)
        value_actual = expert_agent.V

        # value function for learned policy

        learned_agent = ValueIteration(env=self.env, gamma=self.gamma)
        learned_agent.train(time_limit=50, reward_function=estim_rewards)
        value_learned = learned_agent.V

        # Inverse Learning Error

        ile = np.linalg.norm((value_actual - value_learned), ord=ord)

        return ile
Beispiel #3
0
def rl_alg_factory(env):
    return ValueIteration(env, {'gamma': 0.9})
def rl_alg_factory(env):
    return ValueIteration(env)
from irl_benchmark.irl.reward.reward_wrapper import RewardWrapper
from irl_benchmark.rl.algorithms.value_iteration import ValueIteration
from irl_benchmark.utils.utils import get_transition_matrix

store_to = 'data/frozen/expert/'
no_episodes = 1000
max_steps_per_episode = 1000

env = gym.make('FrozenLake8x8-v0')
env = FrozenLakeFeatureWrapper(env)
initial_reward_function_estimate = FeatureBasedRewardFunction(
    env=env, parameters=np.zeros(64))
env = RewardWrapper(env=env, reward_function=initial_reward_function_estimate)

# Generate expert trajectories.
expert_agent = ValueIteration(env)
print('Training expert agent...')
expert_agent.train(30)
expert_trajs = collect_trajs(env, expert_agent, no_episodes,
                             max_steps_per_episode, store_to)

feat_map = np.eye(64)

transition_dynamics = get_transition_matrix(env)


def rl_alg_factory(env):
    return ValueIteration(env)


meirl = MaxEnt(env,
Beispiel #6
0
def rl_alg_factory(env, lp=False):
    '''Return an RL algorithm that will collect expert trajectories.'''
    if lp:
        return ValueIteration(env, error=1e-5)
    else:
        return TabularQ(env)