Ejemplo n.º 1
0
def test_qlearning_discounted_reward(discount_factor_range=(0.1, 0.3, 0.5, 0.9, 0.99), num_sim=50):
    dfs = []
    for factor in discount_factor_range:
        series = []
        for n in range(10000, 10000 + num_sim):
            P, R = forest(S=50, p=0.0, r1=50, r2=25)
            mdp = solve_mdp.solve_mdp_by_qlearning(P, R, discount=factor, max_iter=n)
            series.append(mdp)
        df = pd.concat(series, axis=1).T
        dfs.append(df)
    return pd.concat(dfs)
Ejemplo n.º 2
0
def test_qlearning_algorithm(
    forest_states_size=50, fire_prob=0.01, r1=50, r2=25, discount=0.9, num_sim_range=(10000, 10050), verbose=False
):
    P, R = forest(S=forest_states_size, r1=r1, r2=r2, p=fire_prob)
    min_value, max_value = num_sim_range
    series = []
    for n in range(min_value, max_value):
        s = solve_mdp.solve_mdp_by_qlearning(P, R, discount=discount, max_iter=n, verbose=verbose)
        series.append(s)
    df = pd.concat(series, axis=1)
    return df.T
Ejemplo n.º 3
0
def test_qlearning_deterministic(fireprob_range=(0.0, 0.1, 0.2, 0.5, 1.0), num_sim=50):
    dfs = []
    for factor in fireprob_range:
        series = []
        for n in range(10000, 10000 + num_sim):
            P, R = forest(S=50, p=factor, r1=50, r2=25)
            vi = solve_mdp.solve_mdp_by_qlearning(P, R, max_iter=n)
            vi = vi.append(pd.Series(factor, index=["fire_probability"]))
            series.append(vi)
        df = pd.concat(series, axis=1).T
        dfs.append(df)
    return pd.concat(dfs)
Ejemplo n.º 4
0
def test_qlearning_algorithm(transitions, rewards,
                             discount=0.9,
                             num_sim_range=(10000, 10050),
                             verbose=False):
    P, R = transitions, rewards
    min_value, max_value = num_sim_range
    series = []
    for n in range(min_value, max_value):
        s = solve_mdp.solve_mdp_by_qlearning(P, R, discount=discount, max_iter=n, verbose=verbose)
        series.append(s)
    df = pd.concat(series, axis=1)
    return df.T