예제 #1
0
import numpy as np
import matplotlib

matplotlib.use('TkAgg')

from lib.envs.bandit import BanditEnv
from lib.simulation import Experiment
from shared.policy import UCB

evaluation_seed = 1239
num_actions = 10
trials = 100
distribution = "normal"

env = BanditEnv(num_actions, distribution, evaluation_seed)
agent = UCB(num_actions)
experiment = Experiment(env, agent)
experiment.run_bandit(trials)
예제 #2
0
        "maze": SimpleRoomsEnv(),
        "grid": SimpleRoomsEnv()
    }
    return switcher.get(argument)


env = get_env(env_string)

if agent_string.startswith('q'):
    print("Running Q Learning on {} environment for {} epochs".format(
        env_string, num_iter))
    agent = QLearningAgent(range(env.action_space.n),
                           epsilon=epsilon,
                           alpha=alpha,
                           decay_every=decay)
    experiment = Experiment(env, agent)
    experiment.run_qlearning(num_iter, interactive)
    #print("Running Q Learning")
elif agent_string.startswith('s'):
    print("Running SARSA on {} environment for {} epochs".format(
        env_string, num_iter))
    agent = SarsaAgent(range(env.action_space.n),
                       epsilon=epsilon,
                       alpha=alpha,
                       decay_every=decay)
    experiment = Experiment(env, agent)
    experiment.run_sarsa(num_iter, interactive)
    #print("Running SARSA")

else:
    print("Invalid Agent argument")
예제 #3
0
                    max_value_indices.append(idx)

            return np.random.choice(max_value_indices)

    def learn(self, state1, action1, reward, state2, stop):
        """
          SARSA Update
          Q(s,a) <- Q(s,a) + alpha * (reward + gamma * Q(s',a') - Q(s,a))
          or
          Q(s,a) <- Q(s,a) + alpha * (td_target - Q(s,a))
          or
          Q(s,a) <- Q(s,a) + alpha * td_delta
        """

        self._Q_table[state1][action1] = self._Q_table[state1][action1] + \
            self._alpha * (
                reward + self._gamma * max(self._Q_table[state2]) - \
                self._Q_table[state1][action1]
            )


interactive = True

env = WindyGridworldEnv()
agent = QLearningAgent(
    range(env.action_space.n),
    env.S,
)
experiment = Experiment(env, agent)
experiment.run_sarsa(10, interactive)
예제 #4
0
import numpy as np
import matplotlib

matplotlib.use('TkAgg')

from lib.envs.simple_rooms import SimpleRoomsEnv
from lib.simulation import Experiment
from shared.agent import RandomAgent


interactive = True
max_number_of_episodes = 5
env = SimpleRoomsEnv()
agent = RandomAgent(range(env.action_space.n))
experiment = Experiment(env, agent)
experiment.run_agent(max_number_of_episodes, interactive)
        ## TODO 3
        ## Implement the q-learning update here
        """
        Q-learning Update:
        Q(s,a) <- Q(s,a) + alpha * (reward + gamma * max(Q(s') - Q(s,a))
        or
        Q(s,a) <- Q(s,a) + alpha * (td_target - Q(s,a))
        or
        Q(s,a) <- Q(s,a) + alpha * td_delta
        """


interactive = True
env = SimpleRoomsEnv()
agent = QLearningAgent(range(env.action_space.n))
experiment = Experiment(env, agent)
experiment.run_qlearning(10, interactive)

interactive = False
env = SimpleRoomsEnv()
agent = QLearningAgent(range(env.action_space.n))
experiment = Experiment(env, agent)
experiment.run_qlearning(50, interactive)

interactive = True
env = CliffWalkingEnv()
agent = QLearningAgent(range(env.action_space.n))
experiment = Experiment(env, agent)
experiment.run_qlearning(10, interactive)

interactive = False