def test_tab_featb_functions(): env = feature_make('FrozenLake8x8-v0') params = np.zeros(64) params[-1] = 1. rf = FeatureBasedRewardFunction(env, params) domain = rf.domain() rf2 = TabularRewardFunction(env, params) rf_true = make_true_reward('FrozenLake8x8-v0') rew1 = rf.reward(domain) rew2 = rf2.reward(domain) rew_true = rf_true.reward(domain) assert np.all(rew_true == rew1) assert np.all(rew1 == rew2) assert rew_true.shape == rew1.shape assert rew1.shape == rew2.shape
def reward_function_factory(env): params = np.zeros(64) params[-1] = 1. return TabularRewardFunction(env, params)
def reward_function_factory(env): return TabularRewardFunction(env, true_rews[:-1])
def frozen_lake_8_8(env): parameters = np.zeros(64) parameters[-1] = 1.0 print("Making", env, parameters) return TabularRewardFunction(env, parameters)
def frozen_lake(env): parameters = np.zeros(16) parameters[-1] = 1.0 return TabularRewardFunction(env, parameters)
def rew_fun_factory(env): return TabularRewardFunction(env, 'random')
def test_random_tabular_function(): env = make_env('FrozenLake-v0') rf = TabularRewardFunction(env, 'random')