def test_whenRunSolveMazeIsCalledNoTrialsTimes(self): noTrials = 2 noIterations = 2 performance = {iteration: PerformanceRecord([0]) for iteration in range(noIterations)} mockedAgent = Agent(None, None, None) mockedAgent.solveMaze = MagicMock(return_value=performance) mockedAgentFactory = AgentFactory(None,None,None) mockedAgentFactory.createAgent = MagicMock(return_value=mockedAgent) experiment = Experiment(mockedAgentFactory) experiment.run(1, noTrials) mockedAgent.solveMaze.assert_called(noTrials)
def test_machine_run_single_test(self): machine = Testing_Policy.Machine('Test', 100, 0, 0, 0, 1) testtube = Testing_Policy.Testtube() agent = Agent.Agent(state="Infected", info_dict={'Agent Index': 0}) testtube.register_agent(agent) machine.register_testtube(testtube) machine.run_single_test(testtube, infected_states=["Infected"]) self.assertEqual(testtube.testtube_result, "Positive")
def test_deep_copy_agents(self): agent_list = [ Agent.Agent(state="Infected", info_dict={'Agent Index': 0}) ] agent_copy = copy.deepcopy(agent_list) self.assertIsNot(agent_list, agent_copy) agent_copy[0].state = "Recovered" self.assertNotEqual(agent_list[0].state, "Recovered")
def __init__(self, agents_name, items, name="", initialize_agents=True): assert (len(items) % len(agents_name) == 0 ), "Number of items must be a multiple of the number of agents" if not name: self.name = "Agents_" + str(len(agents_name)) + "_Items_" + str( len(items)) self.agents = dict() self.items = items self.borda_properties = dict() for borda_property in self.borda_properties: self.borda_properties[borda_property] = None if initialize_agents: for name in agents_name: self.agents[name] = Agent(name, self) else: for name in agents_name: self.agents[name] = None
from src import Agent from stable_baselines import PPO2 from stable_baselines import DQN if __name__ == "__main__": agent = Agent(model=PPO2) # If one wishes to train the agent # agent.train(tensorboard_log='./trial_3/') # If one wishes to simply run the agent agent.evaluate(tensorboard_log='./evaluation/', model_path='/checkpoint_2')
from src import Agent if __name__ == "__main__": agent = Agent() agent.train()
from src import Agent import numpy as np import gym from utils import plotLearning import tensorflow as tf import gym.wrappers if __name__ == '__main__': tf.compat.v1.disable_eager_execution() env = gym.make('LunarLander-v2') lr = 0.001 n_games = 500 agent = Agent(gamma=0.99, epsilon=1.0, lr=lr, input_dims=env.observation_space.shape, n_actions=env.action_space.n, mem_size=1000000, batch_size=64, epsilon_end=0.01) scores = [] eps_history = [] for i in range(n_games): done = False score = 0 observation = env.reset() while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, observation_, done) observation = observation_ agent.learn() eps_history.append(agent.epsilon)
def monte_carlo_control(episodes): EPISODES = episodes deck = Deck.Deck() GAMES = [] MC = RL.Monte_Carlo_Control() policy = RL.Epsilon_Greedy() for k in range(EPISODES): player = Agent.Player(deck) dealer = Agent.Dealer(deck) game = State.Game() player.draw_black() dealer.draw_black() state = State.State(player, dealer) game.add_state(state) # state.print_state() # Player 's turn while policy.action(state, k + 1, MC) == Agent.HIT: player.draw() game.add_action(Agent.HIT) if player.score > 21 or player.score < 1: # Lose game.add_reward(-1) game.terminated = True MC.update(game) break game.add_reward(0) state = state.next_state(player, dealer) game.add_state(state) if not game.terminated: game.add_action(Agent.STICK) # Dealer 's turn only if game is not over if not game.terminated: while dealer.score < 17: dealer.draw() if dealer.score > 21 or dealer.score < 1: # Win game.add_reward(1) game.terminated = True MC.update(game) break # Compare the scores if not terminated if not game.terminated: if player.score > dealer.score: # print("Player scores more Win !") game.add_reward(1) elif player.score < dealer.score: # print("Dealer scores more Lose !") game.add_reward(-1) else: # print("Draw !") game.add_reward(0) game.terminated = True MC.update(game) GAMES.append(game) # player.print_holds() # print() # dealer.print_holds() # print("This game is over !") print("Training Over !") return MC.Q
def sarsa_lambda_control(episodes, lamb): EPISODES = episodes deck = Deck.Deck() sarsa = RL.Sarsa_Lambda(lamb, 1) policy = RL.Epsilon_Greedy() for k in range(EPISODES): terminated = False player = Agent.Player(deck) dealer = Agent.Dealer(deck) sarsa.init_eligibility_trace() player.draw_black() dealer.draw_black() state = State.State(player, dealer) # state.print_state() action = policy.action(state, k + 1, sarsa) while action == Agent.HIT: player.draw() sarsa.add_trace_N(state, Agent.HIT) if player.score > 21 or player.score < 1: # Lose reward = -1 delta = reward + 0 - sarsa.Q_S_A(state, Agent.HIT) sarsa.update(delta) terminated = True # print("Player BUST Lose !") break state_next = state.next_state(player, dealer) action_next = policy.action(state_next, k + 1, sarsa) delta = 0 + sarsa.gamma * sarsa.Q_S_A( state_next, action_next) - sarsa.Q_S_A(state, Agent.HIT) sarsa.update(delta) state = state_next action = action_next if not terminated: sarsa.add_trace_N(state, Agent.STICK) while dealer.score < 17: dealer.draw() if dealer.score > 21 or dealer.score < 1: # Win reward = 1 delta = reward + 0 - sarsa.Q_S_A(state, Agent.STICK) sarsa.update(delta) terminated = True # print("Dealer BUST Win !") break if not terminated: if player.score > dealer.score: # print("Player scores more Win !") reward = 1 delta = reward + 0 - sarsa.Q_S_A(state, Agent.STICK) sarsa.update(delta) terminated = True elif player.score < dealer.score: # print("Dealer scores more Lose !") reward = -1 delta = reward + 0 - sarsa.Q_S_A(state, Agent.STICK) sarsa.update(delta) terminated = True else: # print("Draw !") reward = 0 delta = reward + 0 - sarsa.Q_S_A(state, Agent.STICK) sarsa.update(delta) terminated = True # player.print_holds() # print() # dealer.print_holds() # print("This game is over !") print("Training Over !") return sarsa.Q
def run(self): """ This method runs the algorithm. Returns: Population of the agent and sets of statistics of the optimization and test period. """ # Create the pool with the number of agents given pool = [ Agent.Agent(i, self.pmax, self.s, self.max_t) for i in range(0, self.n) ] # Initialize the dictionary to store the mean and std per optimization cycle. statistics_optimization_per_cycle = {} last_price = self.prices[ self.cut_idx] # Last price of the optimization cycle. wealth_end_cycle = [ ] # Auxilary array to compute the max and min statistics. print('Running optimization period...') # Optimization period for j in range(0, self.ncycles): # First loop over number of cycles print("Cycle " + str(j)) if j > 0: # Reset the agents and the strategies at the end of each opt cycle. for a in pool: a.reset() a.reset_strategies() for i in range(self.init_idx, self.cut_idx + 1): # Second loop over the time steps for agent in pool: # Third loop over the agents agent.evaluate_strategies(self.prices, self.mt, self.dt, self.yt, i) # If it is the end of a DGA-period, evolve the strategies and reset them. if (i - self.init_idx) != 0 and (i - self.init_idx) % self.ga == 0: for agent in pool: agent.evolve_strategies(self.gm, self.gp, pool, self.prices[i]) for agent in pool: agent.make_evolution() # Append the statistics. wealth_end_cycle = [a.get_wealth(last_price) for a in pool] cycle_stats = (round(np.mean(wealth_end_cycle), 2), round(np.std(wealth_end_cycle), 2)) statistics_optimization_per_cycle[j] = cycle_stats max_opt = np.max(wealth_end_cycle) min_opt = np.min(wealth_end_cycle) # Reset the agents' wealth and strategies before starting the test period. for agent in pool: agent.reset() #agent.reset_strategies() avg_wealth_t_test = [] avg_position_t_test = [] print('Running test period...') # Test period for i in range(self.cut_idx, self.final_idx + 1): wealths = [] positions = [] for agent in pool: # Third loop over the agents agent.evaluate_strategies(self.prices, self.mt, self.dt, self.yt, i) wealths.append(agent.get_wealth(self.prices[i])) positions.append(agent.get_position()) if (i - self.init_idx) != 0 and (i - self.init_idx) % self.ga == 0: for a in pool: a.evolve_strategies(self.gm, self.gp, pool, self.prices[i]) for a in pool: a.make_evolution() avg_wealth_t_test.append(round(np.mean(wealths), 2)) avg_position_t_test.append(round(np.mean(positions), 2)) return pool, wealth_end_cycle, statistics_optimization_per_cycle, avg_wealth_t_test, avg_position_t_test, max_opt, min_opt
from src import Agent import gym import numpy as np if __name__ == '__main__': env = gym.make('Pendulum-v0') agent = Agent(alpha=0.0001, beta=0.001, input_dims=[3], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=1) score_history = [] np.random.seed(0) for i in range(1000): obs = env.reset() done= False score = 0 while not done: act = agent.choose_action(obs) new_state, reward, done, info =env.step(act) agent.Remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state score_history.append(score) print('episode', i, 'score %.2f' % score, '100 game average %.2f' % np.mean(score_history[-100:]))