def execute_policy_iteration_test(test, output='console'): """ Description ----------- Function used to run the policy_iteration for the given test. Parameters ---------- test: Test() \\ -- A Test() instance with the information needed to run the policy_iteration. output: str \\ -- A string that tells the function where its output is expected. Returns ------- PolicyIteration() \\ -- If no recognizable outputs is informed, returns the instance of the policy_iteration used. """ policy_iteration = PolicyIteration(test) policy_iteration.run() if output in ['console', 'file']: output_processing(output, test, policy_iteration, 'PolicyIteration') return policy_iteration
for i in range(size): for j in range(size): cell_type = cell_matrix[j][i] if cell_type == CellType.WHOOPING: is_terminal = True reward = -10 elif cell_type == CellType.KFC: is_terminal = True reward = 10 else: is_terminal = False reward = -1 cell = CellState((i, j), reward, cell_type, is_terminal) env.place_cell(i, j, cell) states.append(cell) return env, states env, states = create_game_env() agent = Agent("policy_eval", (0, 0)) policy_iter_algo = PolicyIteration(states) policy = policy_iter_algo.run() game = Game(Config, Controller, env, agent, policy) # initiate env game.draw_env() pygame.display.update() game.start()
# Sanity check transitions = grid_mdp.T(grid_mdp.initial_state, action=1) print(transitions) # Run value iteration # vi = ValueIteration(grid_mdp) # print('Running value iteration') # vi.run() # vi.plot_learning_curve() # # print('\nFinal V table:') # grid_mdp.print_values(vi.V) # # # Print the optimal policy # policy = vi.get_best_policy() # print('\nBest policy:') # grid_mdp.print_policy(policy) # Run policy iteration pi = PolicyIteration(grid_mdp) print('Running policy iteration') pi.run() # Print the optimal policy print('\nBest policy:') grid_mdp.print_policy(pi.policy) # Save policy to file with open('results/policy.h5', 'wb') as file: pickle.dump(pi.policy, file)