def test_2(): env = cleaning_env() env.init_stochatic_model(get_transition_model) env.print_env() gamma = 0.8 agent = mdp_agent(gamma) epsilon = 0.1 policy = value_iteration(env, agent, epsilon) print_policy(env, policy)
def test_1(): env = cleaning_env() env.init_stochatic_model(get_transition_model) env.print_env() env.print_transition_model() gamma = 0.8 agent = mdp_agent(gamma) policy = policy_iteration(env, agent) print_policy(env, policy)
def test_1(): env = cleaning_env() env.init_stochatic_model(get_transition_model) env.print_env() env.print_transition_model()