def test_2():
    env = cleaning_env()
    env.init_stochatic_model(get_transition_model)
    env.print_env()

    gamma = 0.8
    agent = mdp_agent(gamma)

    epsilon = 0.1
    policy = value_iteration(env, agent, epsilon)
    print_policy(env, policy)
def test_1():
    env = cleaning_env()
    env.init_stochatic_model(get_transition_model)
    env.print_env()
    env.print_transition_model()

    gamma = 0.8
    agent = mdp_agent(gamma)

    policy = policy_iteration(env, agent)
    print_policy(env, policy)
def test_1():
    env = cleaning_env()
    env.init_stochatic_model(get_transition_model)
    env.print_env()
    env.print_transition_model()