Ejemplo n.º 1
0
import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":
    max_steps = 8000
    num_runs = 1

    # Create and pass agent and environment objects to RLGlue
    environment = WindygridEnvironment()
    agent = SarsaAgent()
    rlglue = RLGlue(environment, agent)
    del agent, environment  # don't use these anymore
    for run in range(num_runs):
        episode=[]
        time_step=[]
        rlglue.rl_init()
        while True:
            rlglue.rl_episode()
            time_step.append(rlglue.num_steps())
            episode.append(rlglue.num_episodes())
            if rlglue.num_steps() > 8000:
                break

    plt.plot(time_step,episode,label="8 actions")
    plt.xticks([0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000])
    plt.xlabel('Time steps')
    plt.ylabel('Episode', rotation=90)
    plt.legend(loc=2)
    plt.show()
        # save average value function numpy object, to be used by plotting script
Ejemplo n.º 2
0
from rl_glue import RLGlue
from windy_env import WindyEnvironment
from sarsa_agent import SarsaAgent
import numpy as np
import matplotlib.pyplot as plt

max_steps = 8000
steps = 0
episodes = 0

ep_list = []
step_list = []

environment = WindyEnvironment()
agent = SarsaAgent()
rl = RLGlue(environment, agent)
rl.rl_init()
while steps < max_steps:
    rl.rl_episode(max_steps)
    steps = rl.num_steps()
    episodes = rl.num_episodes()
    # print(steps, episodes)

    ep_list.append(episodes)
    step_list.append(steps)

plt.xlabel('Time steps')
plt.ylabel('Episodes')
plt.plot(step_list, ep_list)
plt.show()
Ejemplo n.º 3
0
from rl_glue import RLGlue
from windy_env import WindyEnvironment
from n_step_sarsa_agent import SarsaAgent
import numpy as np
import time
import matplotlib.pyplot as plt

if __name__ == "__main__":
    start_time = time.time()
    max_steps = 8000

    # Create and pass agent and environment objects to RLGlue
    environment = WindyEnvironment()
    agent = SarsaAgent()
    rlglue = RLGlue(environment, agent)
    del agent, environment  # don't use these anymore
    rlglue.rl_init()
    L1 = []
    L2 = []
    n = rlglue.rl_agent_message('n')
    a = rlglue.rl_agent_message('a')
    while rlglue.num_steps() < max_steps:
        L1.append(rlglue.num_steps())
        rlglue.rl_episode(10000)
        episodes = rlglue.num_episodes()
        L2.append(episodes)
    plt.title(str(n) + '-step sarsa with ' + str(a) + " actions")
    plt.plot(L1, L2)
    plt.show()
Ejemplo n.º 4
0
    rlglue = RLGlue(environment, agent)
    rlglue.rl_agent_message('n = 0')
    del agent, environment  # don't use these anymore
    steps1 = {}
    L1 = []
    L2 = [0]*num_episodes
    for episode in range(num_episodes):
      steps1[episode]=[]
      L1.append(episode+1)
    for run in range(num_runs):
      np.random.seed(run)
      rlglue.rl_init()
      step = 0
      for episode in range(num_episodes):
        rlglue.rl_episode(max_steps)
        new_step = rlglue.num_steps()
        steps1[episode].append(new_step-step)
        step = new_step
    for episode in range(num_episodes):
      L2[episode] = sum(steps1[episode])/num_runs
    plt.plot(L1[1:],L2[1:],label = '0 planning steps')

    environment =GridEnvironment()
    agent = DynaqAgent()
    rlglue = RLGlue(environment, agent)
    rlglue.rl_agent_message('n = 5')
    del agent, environment  # don't use these anymore
    steps1 = {}
    L1 = []
    L2 = [0]*num_episodes
    for episode in range(num_episodes):
Ejemplo n.º 5
0
from sarsa_agent import SarsaAgent
import numpy as np

if __name__ == "__main__":
    max_total_step = 8000
    total_num_episode = [0]
    current_step = [0]

    # Create and pass agent and environment objects to RLGlue
    environment = WindEnvironment()
    agent = SarsaAgent()
    rlglue = RLGlue(environment, agent)
    del agent, environment  # don't use these anymore

    # set seed for reproducibility
    np.random.seed(1)

    # initialize RL-Glue
    rlglue.rl_init()

    # loop for the experiment step less that 8000
    while (rlglue.num_steps() < max_total_step):
        rlglue.rl_episode(max_total_step)
        total_num_episode.append(rlglue.num_episodes())
        current_step.append(rlglue.num_steps())
    print(total_num_episode)
    np.savez('windy.npz', timeSteps=current_step, Episodes=total_num_episode)
    # to load:
    # data = np.load('windy.npz')
    # print(data["timeSteps"]) to get the array