# -*- coding: utf-8 -*- """ Created on Thu Feb 21 10:24:13 2019 @author: shaunmathew """ from helper import create_argparser from env import GridWorld import random import math from copy import copy import time #Creating command line parser parser = create_argparser() args = parser.parse_args() grid_world = GridWorld(args.p1, args.p2, args.r_up, args.r_down, args.r_left, args.r_right, grid_world_size=4, starting_state=8) ''' Policy Evaluation '''
def main(arguments): parser = create_argparser({ "alpha": { "default": 0.1 }, "--use_ep_func": { "dest": "use_ep_func", "action": "store_true", "default": True } }) args = parser.parse_args(arguments) grid_world = GridWorld(default_grid, args.p1, args.p2) default_args = {"epsilon": 0.1, "discount_factor": 0.9} for arg in default_args: if arg not in args: setattr(args, arg, default_args[arg]) run_dict = {} num_episodes = args.num_episodes globals()['args'] = args num_runs = 3 if args.AVERAGE_RUNS else 1 for i in range(num_runs): start_time = time.time() q_s_a, q_s_a2 = initialize(grid_world) if not args.use_ep_func: _, _, ep_length_log, time_log, avg_ep_length_log, avg_time_log = double_q( grid_world, q_s_a, q_s_a2, args.epsilon, num_episodes=num_episodes) else: _, _, ep_length_log, time_log, avg_ep_length_log, avg_time_log = double_q( grid_world, q_s_a, q_s_a2, epsilon_func, num_episodes=num_episodes) total_time = time.time() - start_time run_dict[i] = { "Episode Length": ep_length_log, "Time Per Episode": time_log, "Total Time": total_time, "Average Time Log": avg_time_log, "Average Ep Length": avg_ep_length_log } print("\nTook {}s to finish {} episodes".format( total_time, num_episodes)) average_ep_lengths = np.average(np.array( [run_dict[key]["Episode Length"] for key in run_dict]), axis=0) average_ep_time = np.average(np.array( [run_dict[key]["Time Per Episode"] for key in run_dict]), axis=0) average_time = np.average(np.array( [run_dict[key]["Total Time"] for key in run_dict]), axis=0) average_avg_time_log = np.average(np.array( [run_dict[key]["Average Time Log"] for key in run_dict]), axis=0) average_avg_ep_length = np.average(np.array( [run_dict[key]["Average Ep Length"] for key in run_dict]), axis=0) output_deterministic_policy(q_s_a, q_s_a2, grid_world) return average_ep_lengths, average_ep_time, average_time, average_avg_time_log, average_avg_ep_length
def main(arguments): parser = create_argparser() args = parser.parse_args(arguments) grid_world = GridWorld(default_grid, args.p1, args.p2) default_args = {"epsilon": 0.1, "discount_factor": 0.9} #For nice syntax for arg in default_args: if arg not in args: setattr(args, arg, default_args[arg]) num_episodes = args.num_episodes run_dict = {} #injecting into global scope globals()['args'] = args num_runs = 3 if args.AVERAGE_RUNS else 1 for i in range(num_runs): start_time = time.time() pi, q_s_a, returns = initialize(grid_world) _, _, ep_length_log, time_log, avg_ep_length_log, avg_time_log = gpi( grid_world, pi, q_s_a, returns, num_episodes=num_episodes) total_time = time.time() - start_time run_dict[i] = { "Episode Length": ep_length_log, "Time Per Episode": time_log, "Total Time": total_time, "Average Time Log": avg_time_log, "Average Ep Length": avg_ep_length_log } print("\nTook {}s to finish {} episodes".format( total_time, num_episodes)) average_ep_lengths = np.average(np.array( [run_dict[key]["Episode Length"] for key in run_dict]), axis=0) average_ep_time = np.average(np.array( [run_dict[key]["Time Per Episode"] for key in run_dict]), axis=0) average_time = np.average(np.array( [run_dict[key]["Total Time"] for key in run_dict]), axis=0) average_avg_time_log = np.average(np.array( [run_dict[key]["Average Time Log"] for key in run_dict]), axis=0) average_avg_ep_length = np.average(np.array( [run_dict[key]["Average Ep Length"] for key in run_dict]), axis=0) res = [ average_ep_lengths, average_ep_time, average_time, average_avg_time_log, average_avg_ep_length ] graph_names = [ "Episode Length", "Time Per Episode", "Total Time in Seconds", "Time Per Episode (Moving Average 10 ep)", "Episode Length (Moving Average 10 ep)" ] y_axis_names = [ "Episode Length in Steps", "Time Per Episode in Seconds", "Total Time in Seconds", "Time Per Episode in Seconds", "Episode Length in Steps" ] #outputting policy output_deterministic_policy(pi, grid_world) for i in [0, 1]: t = np.linspace(1, num_episodes, num=num_episodes)[0::10] plt.plot(t, res[i][0::10], label="mc") plt.title(graph_names[i]) plt.xlabel("Episode Number") plt.ylabel(y_axis_names[i]) plt.legend() plt.savefig(graph_names[i] + "_mc" + ".jpg") plt.close() for i in [-2, -1]: t = np.linspace(1, num_episodes, num=num_episodes / 10) plt.plot(t, res[i], label="mc") plt.title(graph_names[i]) plt.xlabel("Episode Number") plt.ylabel(y_axis_names[i]) plt.legend() plt.savefig(graph_names[i] + "_mc" + ".jpg") plt.close() return average_ep_lengths, average_ep_time, average_time, average_avg_time_log, average_avg_ep_length