Esempio n. 1
0
def reward_tabular_normalized(policy, task, tol=1e-4):
    '''
    compute the expected reward / reward by value iteration
    averaged over states.
    '''
    gtV = compute_tabular_value(task, tol) # ground truth values by value iteration.
    V = reward_tabular(policy, task, tol)
    return V / gtV
Esempio n. 2
0
def expected_reward_tabular_normalized(policy, task, tol=1e-4):
    '''
    compute the expected reward / reward by value iteration
    averaged over states.
    '''
    gtV = compute_tabular_value(task, tol) # ground truth values by value iteration.
    V = reward_tabular(policy, task, tol)
    rewards = [V[state] / gtV[state] for state in task.get_valid_states()]
    return np.mean(rewards)
Esempio n. 3
0
def reward_tabular_normalized_fix_start(policy, task, tol=1e-4):
    '''
    compute the expected reward / reward by value iteration
    averaged over states.
    '''
    states = [task.start_state]
    gtV = compute_tabular_value(task, tol) # ground truth values by value iteration.
    V = reward_tabular(policy, task, tol)
    rewards = {state: V[state] / gtV[state] for state in task.get_valid_states()}
    return np.mean([rewards[state] for state in states])