예제 #1
0
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 21 10:24:13 2019

@author: shaunmathew
"""

from helper import create_argparser
from env import GridWorld
import random
import math
from copy import copy
import time

#Creating command line parser
parser = create_argparser()
args = parser.parse_args()

grid_world = GridWorld(args.p1,
                       args.p2,
                       args.r_up,
                       args.r_down,
                       args.r_left,
                       args.r_right,
                       grid_world_size=4,
                       starting_state=8)
'''
Policy Evaluation
'''

예제 #2
0
파일: double_q.py 프로젝트: shaun-mathew/RL
def main(arguments):
    parser = create_argparser({
        "alpha": {
            "default": 0.1
        },
        "--use_ep_func": {
            "dest": "use_ep_func",
            "action": "store_true",
            "default": True
        }
    })
    args = parser.parse_args(arguments)

    grid_world = GridWorld(default_grid, args.p1, args.p2)

    default_args = {"epsilon": 0.1, "discount_factor": 0.9}

    for arg in default_args:
        if arg not in args:
            setattr(args, arg, default_args[arg])

    run_dict = {}

    num_episodes = args.num_episodes

    globals()['args'] = args

    num_runs = 3 if args.AVERAGE_RUNS else 1

    for i in range(num_runs):
        start_time = time.time()
        q_s_a, q_s_a2 = initialize(grid_world)
        if not args.use_ep_func:
            _, _, ep_length_log, time_log, avg_ep_length_log, avg_time_log = double_q(
                grid_world,
                q_s_a,
                q_s_a2,
                args.epsilon,
                num_episodes=num_episodes)
        else:
            _, _, ep_length_log, time_log, avg_ep_length_log, avg_time_log = double_q(
                grid_world,
                q_s_a,
                q_s_a2,
                epsilon_func,
                num_episodes=num_episodes)

        total_time = time.time() - start_time

        run_dict[i] = {
            "Episode Length": ep_length_log,
            "Time Per Episode": time_log,
            "Total Time": total_time,
            "Average Time Log": avg_time_log,
            "Average Ep Length": avg_ep_length_log
        }

        print("\nTook {}s to finish {} episodes".format(
            total_time, num_episodes))

    average_ep_lengths = np.average(np.array(
        [run_dict[key]["Episode Length"] for key in run_dict]),
                                    axis=0)
    average_ep_time = np.average(np.array(
        [run_dict[key]["Time Per Episode"] for key in run_dict]),
                                 axis=0)
    average_time = np.average(np.array(
        [run_dict[key]["Total Time"] for key in run_dict]),
                              axis=0)
    average_avg_time_log = np.average(np.array(
        [run_dict[key]["Average Time Log"] for key in run_dict]),
                                      axis=0)
    average_avg_ep_length = np.average(np.array(
        [run_dict[key]["Average Ep Length"] for key in run_dict]),
                                       axis=0)

    output_deterministic_policy(q_s_a, q_s_a2, grid_world)

    return average_ep_lengths, average_ep_time, average_time, average_avg_time_log, average_avg_ep_length
예제 #3
0
def main(arguments):
    parser = create_argparser()
    args = parser.parse_args(arguments)

    grid_world = GridWorld(default_grid, args.p1, args.p2)

    default_args = {"epsilon": 0.1, "discount_factor": 0.9}

    #For nice syntax
    for arg in default_args:
        if arg not in args:
            setattr(args, arg, default_args[arg])

    num_episodes = args.num_episodes

    run_dict = {}

    #injecting into global scope
    globals()['args'] = args

    num_runs = 3 if args.AVERAGE_RUNS else 1

    for i in range(num_runs):
        start_time = time.time()
        pi, q_s_a, returns = initialize(grid_world)
        _, _, ep_length_log, time_log, avg_ep_length_log, avg_time_log = gpi(
            grid_world, pi, q_s_a, returns, num_episodes=num_episodes)
        total_time = time.time() - start_time

        run_dict[i] = {
            "Episode Length": ep_length_log,
            "Time Per Episode": time_log,
            "Total Time": total_time,
            "Average Time Log": avg_time_log,
            "Average Ep Length": avg_ep_length_log
        }

        print("\nTook {}s to finish {} episodes".format(
            total_time, num_episodes))

    average_ep_lengths = np.average(np.array(
        [run_dict[key]["Episode Length"] for key in run_dict]),
                                    axis=0)
    average_ep_time = np.average(np.array(
        [run_dict[key]["Time Per Episode"] for key in run_dict]),
                                 axis=0)
    average_time = np.average(np.array(
        [run_dict[key]["Total Time"] for key in run_dict]),
                              axis=0)
    average_avg_time_log = np.average(np.array(
        [run_dict[key]["Average Time Log"] for key in run_dict]),
                                      axis=0)
    average_avg_ep_length = np.average(np.array(
        [run_dict[key]["Average Ep Length"] for key in run_dict]),
                                       axis=0)

    res = [
        average_ep_lengths, average_ep_time, average_time,
        average_avg_time_log, average_avg_ep_length
    ]

    graph_names = [
        "Episode Length", "Time Per Episode", "Total Time in Seconds",
        "Time Per Episode (Moving Average 10 ep)",
        "Episode Length (Moving Average 10 ep)"
    ]
    y_axis_names = [
        "Episode Length in Steps", "Time Per Episode in Seconds",
        "Total Time in Seconds", "Time Per Episode in Seconds",
        "Episode Length in Steps"
    ]

    #outputting policy
    output_deterministic_policy(pi, grid_world)

    for i in [0, 1]:
        t = np.linspace(1, num_episodes, num=num_episodes)[0::10]
        plt.plot(t, res[i][0::10], label="mc")
        plt.title(graph_names[i])
        plt.xlabel("Episode Number")
        plt.ylabel(y_axis_names[i])

        plt.legend()
        plt.savefig(graph_names[i] + "_mc" + ".jpg")
        plt.close()

    for i in [-2, -1]:
        t = np.linspace(1, num_episodes, num=num_episodes / 10)
        plt.plot(t, res[i], label="mc")
        plt.title(graph_names[i])
        plt.xlabel("Episode Number")
        plt.ylabel(y_axis_names[i])

        plt.legend()
        plt.savefig(graph_names[i] + "_mc" + ".jpg")
        plt.close()

    return average_ep_lengths, average_ep_time, average_time, average_avg_time_log, average_avg_ep_length