Esempio n. 1
0
File: main.py Progetto: zazyzaya/RL
def part2b():
    Agent = HW2Agent(env.build_map(), env.STATEMAP, 0.5, 0.95, 0.01)
    Agent.value_iteration()
    
    print("Agent B:")
    print("\tNum evaluations:  " + str(Agent.num_evals))
    print(Agent.values)
    
    x,y = Agent.generate_best_path()
    plt.imshow(env.build_map())
    plt.scatter(y,x)
    plt.show()

    plt.imshow(Agent.values)
    plt.show()
Esempio n. 2
0
File: main.py Progetto: zazyzaya/RL
def part2c():
    Agent = HW2Agent(env.build_map(), env.STATEMAP, 0.02, 0.55, 0.01)
    Agent.value_iteration()
    
    print("Agent C:")
    print("\tNum evaluations:  " + str(Agent.num_evals))
    print(Agent.values)
    
    x,y = Agent.generate_best_path(stochastic=True, cutoff=1000)
    plt.imshow(env.build_map())
    plt.scatter(y,x)
    plt.show()

    plt.imshow(Agent.values)
    plt.show()
Esempio n. 3
0
File: main.py Progetto: zazyzaya/RL
def part1a():
    Agent = HW2Agent(env.build_map(), env.STATEMAP, 0.02, 0.95, 0.01)
    Agent.policy_iteration()
    
    print("Agent A:")
    print("\tNum improvements: " + str(Agent.num_improvements))
    print("\tNum evaluations:  " + str(Agent.num_evals))
    print(Agent.values)
    
    x,y = Agent.generate_best_path()
    plt.imshow(env.build_map())
    plt.scatter(y,x)
    plt.show()

    plt.imshow(Agent.values)
    plt.show()
Esempio n. 4
0
File: main.py Progetto: zazyzaya/RL
import matplotlib.pyplot as plt

from math import inf
from agent_class import HW3Agent
from build_environment import build_map, STATEMAP

MAP = build_map()

def test(episode_len, num_episodes, epsilon, default_r=-1):
    Agent = HW3Agent(MAP, STATEMAP, epsilon, default_r=default_r)
    Agent.on_policy_mc(episode_len, num_episodes)
    
    y,x = Agent.generate_best_path(cutoff=1000, stochastic=True)
    pathlen = len(x) if len(x) < 1000 else 'INF'

    pathlen = "%d episodes with %d steps: %s" % (num_episodes, episode_len, str(pathlen))
    print(pathlen)
    plt.imshow(MAP)
    plt.scatter(x,y)
    
    title = "%d episodes of length %d" % (num_episodes, episode_len)
    plt.suptitle(title, fontsize=15)
    plt.title(pathlen)
    #plt.show()


'''
for episodes in range(1,5):
    for elen in range(1,5):
        test(10 ** elen, 10 ** episodes, 0.02, default_r=-inf)
'''