def solve_forest_example(forest_states_size=50, r1=50, r2=25, fire_prob=0.1, num_simulations=50, discount=0.9):
    P, R = forest(S=forest_states_size, r1=r1, r2=r2, p=fire_prob)
    vi = solve_mdp.test_algorithm(ValueIteration, P, R, discount=discount, num_sim=num_simulations)
    pi = solve_mdp.test_algorithm(PolicyIteration, P, R, discount=discount, num_sim=num_simulations)
    df = pd.concat([vi, pi])
    return df
Example #2
0
def solve_ctr_mdp(transitions, rewards, num_simulations=1000, discount=0.99):
    P, R = transitions, rewards
    vi = solve_mdp.test_algorithm(ValueIteration, P, R, discount=discount, num_sim=num_simulations)
    pi = solve_mdp.test_algorithm(PolicyIteration, P, R, discount=discount, num_sim=num_simulations)
    df = pd.concat([vi, pi])
    return df