Ejemplo n.º 1
0
  def test_whenRunSolveMazeIsCalledNoTrialsTimes(self):
    noTrials = 2
    noIterations = 2

    performance = {iteration: PerformanceRecord([0]) for iteration in range(noIterations)}
    mockedAgent = Agent(None, None, None)
    mockedAgent.solveMaze = MagicMock(return_value=performance)
    mockedAgentFactory = AgentFactory(None,None,None)
    mockedAgentFactory.createAgent = MagicMock(return_value=mockedAgent)
    experiment = Experiment(mockedAgentFactory)
    
    experiment.run(1, noTrials)

    mockedAgent.solveMaze.assert_called(noTrials)
Ejemplo n.º 2
0
 def test_machine_run_single_test(self):
     machine = Testing_Policy.Machine('Test', 100, 0, 0, 0, 1)
     testtube = Testing_Policy.Testtube()
     agent = Agent.Agent(state="Infected", info_dict={'Agent Index': 0})
     testtube.register_agent(agent)
     machine.register_testtube(testtube)
     machine.run_single_test(testtube, infected_states=["Infected"])
     self.assertEqual(testtube.testtube_result, "Positive")
Ejemplo n.º 3
0
    def test_deep_copy_agents(self):

        agent_list = [
            Agent.Agent(state="Infected", info_dict={'Agent Index': 0})
        ]
        agent_copy = copy.deepcopy(agent_list)

        self.assertIsNot(agent_list, agent_copy)
        agent_copy[0].state = "Recovered"

        self.assertNotEqual(agent_list[0].state, "Recovered")
Ejemplo n.º 4
0
    def __init__(self, agents_name, items, name="", initialize_agents=True):
        assert (len(items) % len(agents_name) == 0
                ), "Number of items must be a multiple of the number of agents"

        if not name:
            self.name = "Agents_" + str(len(agents_name)) + "_Items_" + str(
                len(items))
        self.agents = dict()
        self.items = items
        self.borda_properties = dict()

        for borda_property in self.borda_properties:
            self.borda_properties[borda_property] = None

        if initialize_agents:
            for name in agents_name:
                self.agents[name] = Agent(name, self)
        else:
            for name in agents_name:
                self.agents[name] = None
Ejemplo n.º 5
0
from src import Agent
from stable_baselines import PPO2
from stable_baselines import DQN

if __name__ == "__main__":
    agent = Agent(model=PPO2)
    # If one wishes to train the agent
    # agent.train(tensorboard_log='./trial_3/')
    # If one wishes to simply run the agent
    agent.evaluate(tensorboard_log='./evaluation/', model_path='/checkpoint_2')
Ejemplo n.º 6
0
from src import Agent

if __name__ == "__main__":
    agent = Agent()
    agent.train()
Ejemplo n.º 7
0
from src import Agent
import numpy as np
import gym
from utils import plotLearning
import tensorflow as tf
import gym.wrappers

if __name__ == '__main__':
    tf.compat.v1.disable_eager_execution()
    env = gym.make('LunarLander-v2')
    lr = 0.001
    n_games = 500
    agent = Agent(gamma=0.99, epsilon=1.0, lr=lr,
                  input_dims=env.observation_space.shape,
                  n_actions=env.action_space.n, mem_size=1000000, batch_size=64,
                  epsilon_end=0.01)
    scores = []
    eps_history = []

    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.store_transition(observation, action, reward, observation_, done)
            observation = observation_
            agent.learn()
        eps_history.append(agent.epsilon)
Ejemplo n.º 8
0
def monte_carlo_control(episodes):
    EPISODES = episodes

    deck = Deck.Deck()

    GAMES = []

    MC = RL.Monte_Carlo_Control()
    policy = RL.Epsilon_Greedy()

    for k in range(EPISODES):
        player = Agent.Player(deck)
        dealer = Agent.Dealer(deck)

        game = State.Game()

        player.draw_black()
        dealer.draw_black()

        state = State.State(player, dealer)
        game.add_state(state)
        # state.print_state()

        # Player 's turn
        while policy.action(state, k + 1, MC) == Agent.HIT:
            player.draw()
            game.add_action(Agent.HIT)
            if player.score > 21 or player.score < 1:  # Lose
                game.add_reward(-1)
                game.terminated = True
                MC.update(game)
                break
            game.add_reward(0)
            state = state.next_state(player, dealer)
            game.add_state(state)

        if not game.terminated:
            game.add_action(Agent.STICK)

        # Dealer 's turn only if game is not over
        if not game.terminated:
            while dealer.score < 17:
                dealer.draw()
                if dealer.score > 21 or dealer.score < 1:  # Win
                    game.add_reward(1)
                    game.terminated = True
                    MC.update(game)
                    break

        # Compare the scores if not terminated
        if not game.terminated:
            if player.score > dealer.score:
                # print("Player scores more Win !")
                game.add_reward(1)
            elif player.score < dealer.score:
                # print("Dealer scores more Lose !")
                game.add_reward(-1)
            else:
                # print("Draw !")
                game.add_reward(0)

            game.terminated = True
            MC.update(game)

        GAMES.append(game)

        # player.print_holds()
        # print()
        # dealer.print_holds()

        # print("This game is over !")

    print("Training Over !")

    return MC.Q
Ejemplo n.º 9
0
def sarsa_lambda_control(episodes, lamb):
    EPISODES = episodes

    deck = Deck.Deck()

    sarsa = RL.Sarsa_Lambda(lamb, 1)
    policy = RL.Epsilon_Greedy()

    for k in range(EPISODES):
        terminated = False

        player = Agent.Player(deck)
        dealer = Agent.Dealer(deck)

        sarsa.init_eligibility_trace()

        player.draw_black()
        dealer.draw_black()

        state = State.State(player, dealer)
        # state.print_state()

        action = policy.action(state, k + 1, sarsa)

        while action == Agent.HIT:
            player.draw()
            sarsa.add_trace_N(state, Agent.HIT)
            if player.score > 21 or player.score < 1:  # Lose
                reward = -1
                delta = reward + 0 - sarsa.Q_S_A(state, Agent.HIT)
                sarsa.update(delta)
                terminated = True
                # print("Player BUST Lose !")
                break
            state_next = state.next_state(player, dealer)
            action_next = policy.action(state_next, k + 1, sarsa)
            delta = 0 + sarsa.gamma * sarsa.Q_S_A(
                state_next, action_next) - sarsa.Q_S_A(state, Agent.HIT)
            sarsa.update(delta)
            state = state_next
            action = action_next

        if not terminated:
            sarsa.add_trace_N(state, Agent.STICK)
            while dealer.score < 17:
                dealer.draw()
                if dealer.score > 21 or dealer.score < 1:  # Win
                    reward = 1
                    delta = reward + 0 - sarsa.Q_S_A(state, Agent.STICK)
                    sarsa.update(delta)
                    terminated = True
                    # print("Dealer BUST Win !")
                    break

            if not terminated:
                if player.score > dealer.score:
                    # print("Player scores more Win !")
                    reward = 1
                    delta = reward + 0 - sarsa.Q_S_A(state, Agent.STICK)
                    sarsa.update(delta)
                    terminated = True
                elif player.score < dealer.score:
                    # print("Dealer scores more Lose !")
                    reward = -1
                    delta = reward + 0 - sarsa.Q_S_A(state, Agent.STICK)
                    sarsa.update(delta)
                    terminated = True
                else:
                    # print("Draw !")
                    reward = 0
                    delta = reward + 0 - sarsa.Q_S_A(state, Agent.STICK)
                    sarsa.update(delta)
                    terminated = True
                    # player.print_holds()
                    # print()
                    # dealer.print_holds()

                    # print("This game is over !")

    print("Training Over !")
    return sarsa.Q
Ejemplo n.º 10
0
    def run(self):
        """
        This method runs the algorithm.

        Returns: Population of the agent and sets of statistics of the optimization and test period.
        """

        # Create the pool with the number of agents given
        pool = [
            Agent.Agent(i, self.pmax, self.s, self.max_t)
            for i in range(0, self.n)
        ]

        # Initialize the dictionary to store the mean and std per optimization cycle.
        statistics_optimization_per_cycle = {}
        last_price = self.prices[
            self.cut_idx]  # Last price of the optimization cycle.

        wealth_end_cycle = [
        ]  # Auxilary array to compute the max and min statistics.

        print('Running optimization period...')
        # Optimization period
        for j in range(0, self.ncycles):  # First loop over number of cycles

            print("Cycle " + str(j))

            if j > 0:  # Reset the agents and the strategies at the end of each opt cycle.
                for a in pool:
                    a.reset()
                    a.reset_strategies()

            for i in range(self.init_idx, self.cut_idx +
                           1):  # Second loop over the time steps

                for agent in pool:  # Third loop over the agents
                    agent.evaluate_strategies(self.prices, self.mt, self.dt,
                                              self.yt, i)

                # If it is the end of a DGA-period, evolve the strategies and reset them.
                if (i - self.init_idx) != 0 and (i -
                                                 self.init_idx) % self.ga == 0:
                    for agent in pool:
                        agent.evolve_strategies(self.gm, self.gp, pool,
                                                self.prices[i])
                    for agent in pool:
                        agent.make_evolution()

            # Append the statistics.
            wealth_end_cycle = [a.get_wealth(last_price) for a in pool]
            cycle_stats = (round(np.mean(wealth_end_cycle),
                                 2), round(np.std(wealth_end_cycle), 2))
            statistics_optimization_per_cycle[j] = cycle_stats

        max_opt = np.max(wealth_end_cycle)
        min_opt = np.min(wealth_end_cycle)

        # Reset the agents' wealth and strategies before starting the test period.
        for agent in pool:
            agent.reset()
            #agent.reset_strategies()

        avg_wealth_t_test = []
        avg_position_t_test = []

        print('Running test period...')
        # Test period
        for i in range(self.cut_idx, self.final_idx + 1):

            wealths = []
            positions = []

            for agent in pool:  # Third loop over the agents

                agent.evaluate_strategies(self.prices, self.mt, self.dt,
                                          self.yt, i)
                wealths.append(agent.get_wealth(self.prices[i]))
                positions.append(agent.get_position())

            if (i - self.init_idx) != 0 and (i - self.init_idx) % self.ga == 0:

                for a in pool:
                    a.evolve_strategies(self.gm, self.gp, pool, self.prices[i])

                for a in pool:
                    a.make_evolution()

            avg_wealth_t_test.append(round(np.mean(wealths), 2))
            avg_position_t_test.append(round(np.mean(positions), 2))

        return pool, wealth_end_cycle, statistics_optimization_per_cycle, avg_wealth_t_test, avg_position_t_test, max_opt, min_opt
Ejemplo n.º 11
0
from src import Agent
import gym
import numpy as np





if __name__ == '__main__':
    env = gym.make('Pendulum-v0')
    agent = Agent(alpha=0.0001, beta=0.001, input_dims=[3], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=1)


    score_history = []
    np.random.seed(0)
    for i in range(1000):
        obs = env.reset()
        done= False
        score = 0
        while not done:
            act = agent.choose_action(obs)
            new_state, reward, done, info =env.step(act)
            agent.Remember(obs, act, reward, new_state, int(done))
            agent.learn()
            score += reward
            obs = new_state

        score_history.append(score)
        print('episode', i, 'score %.2f' % score,
              '100 game average %.2f' % np.mean(score_history[-100:]))