Ejemplo n.º 1
0
def execute_policy_iteration_test(test, output='console'):
    """
    Description
    -----------
    Function used to run the policy_iteration for
    the given test.

    Parameters
    ----------
    test: Test() \\
        -- A Test() instance with the information
        needed to run the policy_iteration.

    output: str \\
        -- A string that tells the function where
        its output is expected.

    Returns
    -------
    PolicyIteration() \\
        -- If no recognizable outputs is informed,
        returns the instance of the policy_iteration used.
    """

    policy_iteration = PolicyIteration(test)

    policy_iteration.run()

    if output in ['console', 'file']:
        output_processing(output, test, policy_iteration, 'PolicyIteration')

    return policy_iteration
Ejemplo n.º 2
0
    for i in range(size):
        for j in range(size):
            cell_type = cell_matrix[j][i]

            if cell_type == CellType.WHOOPING:
                is_terminal = True
                reward = -10
            elif cell_type == CellType.KFC:
                is_terminal = True
                reward = 10
            else:
                is_terminal = False
                reward = -1
            cell = CellState((i, j), reward, cell_type, is_terminal)
            env.place_cell(i, j, cell)
            states.append(cell)
    return env, states


env, states = create_game_env()
agent = Agent("policy_eval", (0, 0))

policy_iter_algo = PolicyIteration(states)
policy = policy_iter_algo.run()

game = Game(Config, Controller, env, agent, policy)
# initiate env
game.draw_env()
pygame.display.update()
game.start()
Ejemplo n.º 3
0
# Sanity check
transitions = grid_mdp.T(grid_mdp.initial_state, action=1)
print(transitions)

# Run value iteration
# vi = ValueIteration(grid_mdp)
# print('Running value iteration')
# vi.run()
# vi.plot_learning_curve()
#
# print('\nFinal V table:')
# grid_mdp.print_values(vi.V)
#
# # Print the optimal policy
# policy = vi.get_best_policy()
# print('\nBest policy:')
# grid_mdp.print_policy(policy)

# Run policy iteration
pi = PolicyIteration(grid_mdp)
print('Running policy iteration')
pi.run()

# Print the optimal policy
print('\nBest policy:')
grid_mdp.print_policy(pi.policy)

# Save policy to file
with open('results/policy.h5', 'wb') as file:
    pickle.dump(pi.policy, file)