def run(self): log_dir = "./logs/bastille/MOISMCTSWithRandomRollouts_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(MOISMCTSWithRandomRolloutsAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent())), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/Random_Vs_RandomRollout_100/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), RandomRolloutAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/TabularQLearning_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent()), 8, 12), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/TabularQLearning_MOISMCTSWithValueNetwork/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), MOISMCTSWithValueNetworkAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(1000000))
def main(): # Set log level. logging.basicConfig(level=logging.INFO) # Set a random seed for consistency in agent AND environment. if config.RANDOM_SEED is not None: np.random.seed(config.RANDOM_SEED) # Make environment. env = EnvCatcher(grid_size=config.GRID_SIZE, env_type='episodic', verbose=False, random_seed=config.RANDOM_SEED) # Make agent. agent = RandomAgent(actions=list(range(env.action_space))) # Run the RL Loop. episode_returns = rl.run_loop(agent=agent, env=env, max_num_steps=config.MAX_NUM_STEPS, report_every_n=config.REPORT_EVERY_N) # Save the data. date_string = time.strftime("%Y%m%d-%H%M%S") filename = ('random_grid_{}_nep_{}_'.format( config.GRID_SIZE, len(episode_returns)) + date_string + '.csv') rl.save_episode_returns(filename=filename, episode_returns=episode_returns)
def run_training_batch(n_games): random.seed(time.localtime()) agent = Agent() agent_types = [agent, CustomAgent(), RandomAgent()] # agent_weights = [1.0/3.0, 1.0/3.0, 1] # ^ Weights needed for even distribution of learning agent and random agent # agent_weights = [1.0/3.0, 1, 1] # ^ Weights needed for even distribution of learning agent and custom agent agent_weights = [1.0 / 9.0, 4.0 / 9.0, 1] start = time.perf_counter() for i in range(n_games): try: list_of_agents = [] list_of_agents.append(agent) for i in range(3): rolled_weight = random.random() for index in range(len(agent_types)): if rolled_weight <= agent_weights[index]: list_of_agents.append(agent_types[index]) break random.shuffle(list_of_agents) active_game = Game(i, list_of_agents) active_game.play_game() except Exception as err: print(f"Error encountered... {err}") end = time.perf_counter() print(f"Ran {n_games} in {(end-start)} seconds.") return True
def run(self): log_dir = "./logs/bastille/Random_Vs_ReinforceClassic/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), ReinforceClassicAgent(8, 12), checkpoint=100, log_dir=log_dir).run(1000000))
def test_on_round_finish(self): initial_player_points = [] for player in self.temp_players: player.set_controlling_agent(RandomAgent()) self.temp_deck.deal_cards_to(player) initial_player_points.append(player.get_round_points()) self.test_round.begin_play() self.assertEqual(len(self.temp_deck.get_card_list()), 0)
def run(self): log_dir = "./logs/bastille/Random_Vs_DoubleQLearning/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), DoubleQLearningAgent(), checkpoint=100, log_dir=log_dir).run(1000000))
def test_full_replay_cycle(): from environment.battlesnake_environment import BattlesnakeEnvironment from agents.RandomAgent import RandomAgent from environment.Battlesnake.importer.Importer import Importer from environment.Battlesnake.renderer.game_renderer import GameRenderer agents = [RandomAgent(), RandomAgent()] env = BattlesnakeEnvironment( width=15, height=15, agents=agents, act_timeout=0.1, export_games=True ) env.reset() while not env.game.is_game_over(): env.step() assert os.path.exists(env.exporter.outpath) assert os.path.getsize(env.exporter.outpath) > 0 game, turns, move_list = Importer.load_replay_file(env.exporter.outpath) width, height = move_list[0].width, move_list[0].width num_snakes = len(move_list[0].snakes) renderer = GameRenderer(width, height, num_snakes) assert len(move_list) >= 1 assert len(move_list) == turns assert game for move in move_list: renderer.display(move) os.unlink(env.exporter.outpath) # TODO: Could be moved special place, so that always at the end import shutil shutil.rmtree("replays", ignore_errors=True) shutil.rmtree("replay_test", ignore_errors=True)
def run(self): log_dir = "./logs/bastille/Random_Vs_PPOWithMultipleTrajectoriesMultiOutputs" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( RandomAgent(), PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12), checkpoint=100, log_dir=log_dir).run(1000000))
def setUp(self): # Set a random seed for consistency in agent AND environment. if RANDOM_SEED is not None: np.random.seed(RANDOM_SEED) # Make environment. self.env = EnvCatcher(grid_size=GRID_SIZE, env_type='episodic', verbose=False, random_seed=RANDOM_SEED) # Make agent. self.agent = RandomAgent(actions=list(range(self.env.action_space)))
def sample_env(): from agents.RandomAgent import RandomAgent from agents.SimpleAgent_solution import SimpleAgent from environment.battlesnake_environment import BattlesnakeEnvironment agents = [RandomAgent(), SimpleAgent()] env = BattlesnakeEnvironment( width=15, height=15, agents=agents, act_timeout=0.2 ) env.reset() return env
def test_file_out_behavior(self): for player in self.temp_players: player.set_controlling_agent(RandomAgent()) self.temp_game.play_game() with open(self.test_round.get_file_out_name(), 'rb') as input: file_data = pickle.load(input) for i in range(len(file_data)): self.assertTrue(self.test_round._get_file_out_data()[i]["trick_history"].all() == file_data[i]["trick_history"].all()) self.assertGreater(self.test_round._get_file_out_data()[i]["trick_history"].sum(), 0) self.assertTrue(self.test_round._get_file_out_data()[i]["trick_point_history"].all() == file_data[i]["trick_point_history"].all()) self.assertGreaterEqual(self.test_round._get_file_out_data()[i]["trick_point_history"].sum(), 0) self.assertTrue(self.test_round._get_file_out_data()[i]["player_partners"].all() == file_data[i]["player_partners"].all()) self.assertGreater(self.test_round._get_file_out_data()[i]["player_partners"].sum(), 0) self.assertTrue(self.test_round._get_file_out_data()[i]["call_matrix"].all() == file_data[i]["call_matrix"].all()) self.assertGreater(self.test_round._get_file_out_data()[i]["call_matrix"].sum(), 0) self.assertTrue(self.test_round._get_file_out_data()[i]["player_cards_in_hand_history"].all() == file_data[i]["player_cards_in_hand_history"].all()) self.assertGreater(self.test_round._get_file_out_data()[i]["player_cards_in_hand_history"].sum(), 0) self.assertTrue(self.test_round._get_file_out_data()[i]["player_point_history"].all() == file_data[i]["player_point_history"].all()) self.assertGreater(self.test_round._get_file_out_data()[i]["player_point_history"].sum(), 0) self.assertTrue(self.test_round._get_file_out_data()[i]["player_partner_prediction_history"].all() == file_data[i]["player_partner_prediction_history"].all()) for e in range(len(file_data[i]["player_score_history"])): self.assertTrue(self.test_round._get_file_out_data()[i]["player_score_history"][e] == file_data[i]["player_score_history"][e])
def main(): """Running parameters""" atari_game: str = "Qbert-v4" # atari_game: str = "Pong-v4" number_of_episodes: int = 20000 # Number of episodes to run episode_number: int = 1 env = gym.make(atari_game) "Create agents" random_agent: RandomAgent = RandomAgent(env) q_table_agent: QTableAgent = QTableAgent(env) while episode_number <= number_of_episodes: random_agent.play_an_episode() q_table_agent.play_an_episode() print() if episode_number % 100 == 0: q_table_agent.compare_performance_to(random_agent, 100) print() episode_number += 1 print(q_table_agent)
def run(self): if self.opponent == "RandomAgent": log_dir1 = self.log_dir_root + "DoubleQLearningAgent_VS_RandomAgent_" + self.time print(log_dir1) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), RandomAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir1).run(100000000)) elif self.opponent == "TabularQLearningAgent": log_dir2 = self.log_dir_root + "DoubleQLearningAgent_VS_TabularQLearningAgent_" + self.time print(log_dir2) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), TabularQLearningAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir2).run(100000000)) elif self.opponent == "DeepQLearningAgent": log_dir3 = self.log_dir_root + "DoubleQLearningAgent_VS_DeepQLearningAgent_" + self.time print(log_dir3) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), DeepQLearningAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir3).run(100000000)) elif self.opponent == "ReinforceClassicAgent": log_dir4 = self.log_dir_root + "DoubleQLearningAgent_VS_ReinforceClassicAgent_" + self.time print(log_dir4) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), ReinforceClassicAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir4).run(100000000)) elif self.opponent == "ReinforceClassicWithMultipleTrajectoriesAgent": log_dir5 = self.log_dir_root + "DoubleQLearningAgent_VS_ReinforceClassicWithMultipleTrajectoriesAgent_" + self.time print(log_dir5) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), ReinforceClassicWithMultipleTrajectoriesAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir5).run(100000000)) elif self.opponent == "PPOWithMultipleTrajectoriesMultiOutputsAgent": log_dir6 = self.log_dir_root + "DoubleQLearningAgent_VS_PPOWithMultipleTrajectoriesMultiOutputsAgent_" + self.time print(log_dir6) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), PPOWithMultipleTrajectoriesMultiOutputsAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir6).run(100000000)) elif self.opponent == "MOISMCTSWithRandomRolloutsAgent": log_dir7 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithRandomRolloutsAgent_" + self.time print(log_dir7) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), MOISMCTSWithRandomRolloutsAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir7).run(1000000000)) elif self.opponent == "MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent": log_dir8 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent_" + self.time print(log_dir8) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(100, SafeTicTacToeRunner( RandomAgent(), RandomAgent()),9,9), log_and_reset_score_history_threshold=10000, log_dir=log_dir8).run(1000000000)) elif self.opponent == "MOISMCTSWithValueNetworkAgent": log_dir9 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithValueNetworkAgent_" + self.time print(log_dir9) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), MOISMCTSWithValueNetworkAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir9).run(1000000000)) elif self.opponent == "DoubleQLearningAgent": log_dir10 = self.log_dir_root + "DoubleQLearningAgent_VS_DoubleQLearningAgent_" + self.time print(log_dir10) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), DoubleQLearningAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir9).run(1000000000)) elif self.opponent == "RandomRolloutAgent": nb_rollouts = 3 log_dir11 = self.log_dir_root + "RandomAgent_VS_RandomRolloutAgent(" + str(nb_rollouts) + ")_" + self.time print(log_dir11) print(TensorboardTicTacToeRunner(RandomAgent(), RandomRolloutAgent(nb_rollouts, SafeTicTacToeRunner( RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir11).run(1000000000)) else: print("Unknown opponent")
def run(agent_id, is_training, load_dir): env = gym.make('LunarLanderContinuous-v2') time_format = strftime("%Y%m%d%H%M%S", gmtime()) outdir = './results/' + time_format + '-' + agent_id + '/' if not os.path.exists(outdir + 'video/'): os.makedirs(outdir + 'video/') #Enable below comment to record video #env = wrappers.Monitor(env, directory=outdir + 'video/', force=True) env.seed(123) np.random.seed(123) #set up logger logging.basicConfig(filename=outdir + 'application.log', level=logging.INFO) logger = logging.getLogger('gym') agent = None if (agent_id == 'RandomAgent'): agent = RandomAgent(env) elif (agent_id == 'DDPG'): agent = DDPG(env) agent.build_models() elif (agent_id == 'RBFAgent'): agent = RBFAgent(env) else: logger.error("Invalid Agent chosen!") return if load_dir is not None: logger.info("Load model at " + load_dir) #agent.load_weight(load_dir + '/') agent.load_model(load_dir + '/') # Initialize variables episode = np.int16(1) step = np.int16(0) max_steps = 1000000 max_episodes = 1100 + episode if not is_training: logger.info("it is now testing") K.set_learning_phase(0) max_episodes = 5 + episode else: K.set_learning_phase(1) file_output = outdir + 'reward.csv' labels = ['episode', 'reward'] with open(file_output, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(labels) observation = None episode_reward = None try: while step < max_steps and episode < max_episodes: if observation is None: # start of a new episode logger.info("Start episode " + str(episode)) episode_reward = np.float32(0) # Obtain the initial observation by resetting the environment. agent.reset_episode() observation = deepcopy(env.reset()) action = agent.act(observation) reward = np.float32(0) done = False next_state, r, done, info = env.step(action) next_state = deepcopy(next_state) reward = r if is_training: agent.step(observation, action, reward, next_state, done) episode_reward += reward step += 1 observation = next_state if episode % 20 == 0: logger.info( "State-Action: {}, {}, {}, {}, reward: {}".format( observation[0], observation[1], action[0], action[1], reward)) if done: # Act on the final state # Step on final state but without adding to memory as next state is the reset state if is_training: action = agent.act(next_state) agent.step_without_memory() to_write = [episode] + [episode_reward] writer.writerow(to_write) if episode % 20 == 0: csvfile.flush() if episode % 20 == 0: agent.save_weight(outdir) episode += 1 observation = None episode_reward = None except KeyboardInterrupt: csvfile.flush() agent.save_weight(outdir) # Close the env env.close()
def test_play_with_learning_agent(self): self.temp_player_list[0].set_controlling_agent(Agent()) for index in range(1, 4): self.temp_player_list[index].set_controlling_agent(RandomAgent()) self.temp_game.play_game() self.assertGreaterEqual(max(self.temp_game.get_score_list()), 42)
def test_play_game(self): for player in self.temp_player_list: player.set_controlling_agent(RandomAgent()) self.temp_game.play_game() self.assertGreaterEqual(max(self.temp_game.get_score_list()), 42)
def test_update_scores(self): for player in self.temp_player_list: player.set_controlling_agent(RandomAgent()) self.temp_game.begin_round() self.assertNotEqual(sum(self.temp_game.get_score_list()), 0)
# MOISMCTSWithRandomRolloutsAgent(100, # SafeTicTacToeRunner(RandomAgent(), RandomAgent())), # log_and_reset_score_history_threshold=10000, # log_dir=log_dir).run(1000000000)) # # log_dir = "./logs/Random_vs_all/RandomAgent_VS_MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent_" + str(time()) # print(log_dir) # print(TensorboardTicTacToeRunner(RandomAgent(), # MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(100, SafeTicTacToeRunner( # RandomAgent(), RandomAgent()),9,9), # log_and_reset_score_history_threshold=10000, # log_dir=log_dir).run(1000000000)) # log_dir = "./logs/Random_vs_all/RandomAgent_VS_MOISMCTSWithValueNetworkAgent_" + str(time()) print(log_dir) print(TensorboardTicTacToeRunner(RandomAgent(), MOISMCTSWithValueNetworkAgent(100,"1", SafeTicTacToeRunner(RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir).run(1000000000)) # AGENTS EXAMPLES : # CommandLineAgent() # RandomAgent() # RandomRolloutAgent(3, SafeTicTacToeRunner(RandomAgent(), RandomAgent())) # TabularQLearningAgent() # DeepQLearningAgent(9,9) # ReinforceClassicAgent(9,9) # ReinforceClassicWithMultipleTrajectoriesAgent(9,9) # PPOWithMultipleTrajectoriesMultiOutputsAgent(9,9) # MOISMCTSWithRandomRolloutsAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent()))
scores = self.generate_midgame_env(num_agent_init_card)["scores"] print scores cnt_win[np.argmin(scores)] += 1 print cnt_win def test_66(self, num_round=100): cnt_win = [ 0, ] * len(self.agent_list) for i in range(num_round): scores = [ 0, ] * len(self.agent_list) while max(scores) < 66: score = self.generate_midgame_env( num_agent_init_card)["scores"] scores = [ scores[j] + score[j] for j in range(len(self.agent_list)) ] cnt_win[np.argmin(scores)] += 1 print "Round %d :" % i, scores, cnt_win print cnt_win if __name__ == "__main__": gameEmulator = GameEmulator() #自己的Agent(调用execute_action的那个)编号默认为0,应当被第一个添加进去 gameEmulator.add_agent(KeyboardAgent()) gameEmulator.add_agent(RandomAgent()) gameEmulator.emulate(1)
if self.print_and_reset_score_history_threshold is not None and \ round_id % self.print_and_reset_score_history_threshold == 0: print(score_history / self.print_and_reset_score_history_threshold) if self.prev_history is not None and \ score_history[0] == self.prev_history[0] and \ score_history[1] == self.prev_history[1] and \ score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = score_history self.stuck_on_same_score = 0 if (self. replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self. replace_player1_with_commandline_after_similar_results ): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 return tuple(score_history) if __name__ == "__main__": print("MOISMCTSWithRandomRolloutsAgent VS RandomAgent") print( BasicTicTacToeRunner( MOISMCTSWithRandomRolloutsAgent( 100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), RandomAgent(), print_and_reset_score_history_threshold=1000).run(1000))
def main(ppo=False, dqn=False, ddqn=False, pg=False, random=False): """ main Parameters ---------- ppo : BOOLEAN, optional SET TO TRUE TO RUN PPO AGENT. The default is False. dqn : BOOLEAN, optional SET TO TRUE TO RUN DQN AGENT. The default is False. ddqn : BOOLEAN, optional SET TO TRUE TO RUN DDQN AGENT. The default is False. pg : BOOLEAN, optional SET TO TRUE TO RUN PG AGENT. The default is False. random : BOOLEAN, optional SET TO TRUE TO RUN RANDOM AGENT. The default is False. Returns ------- None. """ # Running DQN Agent if dqn: env = gym.make('PongDeterministic-v4') action_size = env.action_space.n agent = DqnAgent(action_size) nn = AtariModel() nn.build_atari_model(action_size) model = nn.model memory = RingBuffer(dqn_settings.MEMORY_SIZE) for i in range(dqn_settings.NUMBER_EPISODES): frame = env.reset() frame = agent.image_preprocessing(frame) state = (frame, frame, frame, frame) env.render() finished = False summed_reward = 0 while not finished: action = agent.choose_action(state, model) next_frame, reward, finished, _ = env.step(action) next_frame = agent.image_preprocessing(next_frame) reward = agent.transform_reward(reward) next_state = (next_frame, state[0], state[1], state[2]) summed_reward += reward memory.append((state, action, next_state, reward, finished)) state = next_state env.render() if (i > dqn_settings.ITERATIONS_BEFORE_FIT): minibatch = memory.sample_random_batch( dqn_settings.BATCH_SIZE) agent.fit_batch(model, minibatch, action_size) if (agent.epsilon > dqn_settings.FINAL_EPSILON): agent.epsilon = agent.epsilon * dqn_settings.GAMMA print("Iteration:", i, " Reward:", summed_reward, "Epsilon:", agent.epsilon) f = open("rewards.txt", "a") f.write(str(summed_reward) + "\n") f.close() if (i % 100 == 0): model.save('models/saved_model') # Running PPO Agent elif ppo: policy = PpoAgent() run_ppo(policy) # Running DDQN Agent elif ddqn: ddqn_agent = DdqnAgent() ddqn_agent.run_ddqn() # Running PG Agent elif pg: pg_agent = PgAgent() pg_agent.run_pg() # Running Random Agent elif random: random_agent = RandomAgent() random_agent.run_random() # If no agent is selected a message will print asking to select one else: print( 'No agent selected to run! Please select an agent: dqn, ppo, ddqn, pg, random' )
from environment.battlesnake_environment import BattlesnakeEnvironment from agents.RandomAgent import RandomAgent from agents.HumanPlayer import HumanPlayer import pygame nb_random_player = 2 num_fruits = 1 agents = [HumanPlayer()] for i in range(nb_random_player): agents.append(RandomAgent()) env = BattlesnakeEnvironment( width=15, height=15, num_fruits=num_fruits, agents=agents, ) env.reset() env.render() while True: env.handle_input() env.step() env.render() pygame.time.wait(250)
self.prev_history = score_history self.stuck_on_same_score = 0 if (self.replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 score_history = np.array((0, 0, 0)) self.execution_time = np.array((0.0, 0.0)) return tuple(score_history) if __name__ == "__main__": number = [1000, 10000, 100000, 1000000] versus_name = ['RandomAgent', 'Tabular', 'DQN', 'DDQN', 'Reinforce', 'Reinforce A2C Style', 'PPO', 'MCTS'] versus_agent = [RandomAgent(), TabularQLearningAgent(), DeepQLearningAgent(9, 9), DoubleDeepQLearningAgent(9, 9), ReinforceClassicAgent(9, 9), ReinforceClassicWithMultipleTrajectoriesAgent(9, 9), PPOWithMultipleTrajectoriesMultiOutputsAgent(9, 9), MOISMCTSWithValueNetworkAgent(9, 9, 2)] versus = [versus_name, versus_agent] for num in number: for i in range(len(versus_name)): with open("D:/DEEP_LEARNING/Reinforcement/TabularVS" + str(versus[0][i]) +"_NB_"+ str(num) + ".csv", 'w+') as f: #Ici change TabularVS par le nom de l'agent que tu lance contre tout le reste print("New Fight" + str(versus[0][i]) + " " + str(num)) begin = time()
# TabularQLearningAgent(), # log_and_reset_score_history_threshold=1000, # log_dir=log_dir).run(1000000000)) # log_dir = "./logs/TabularQLearning_VS_TabularQLearning_" + str(time()) # print(log_dir) # print(TensorboardPower4Runner(TabularQLearningAgent(), # TabularQLearningAgent(), # log_and_reset_score_history_threshold=1000, # log_dir=log_dir).run(1000000000)) log_dir = "./logs/RandomRollout_VS_TabularQLearning_" + str(time()) print(log_dir) print( TensorboardPower4Runner(RandomRolloutAgent( 3, SafePower4Runner(RandomAgent(), RandomAgent())), TabularQLearningAgent(), log_and_reset_score_history_threshold=1000, log_dir=log_dir).run(1000000000)) # AGENTS EXAMPLES : # CommandLineAgent() # RandomAgent() # RandomRolloutAgent(3, SafeTicTacToeRunner(RandomAgent(), RandomAgent())) # TabularQLearningAgent() # DeepQLearningAgent(9,9) # ReinforceClassicAgent(9,9) # ReinforceClassicWithMultipleTrajectoriesAgent(9,9) # PPOWithMultipleTrajectoriesMultiOutputsAgent(9,9) # MOISMCTSWithRandomRolloutsAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())) # MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent()),9,9)
round_step += 1 self.writer.add_summary( tf.Summary(value=[ tf.Summary.Value( tag="agent1_action_mean_duration", simple_value=self.mean_action_duration_sum[0] / round_step), tf.Summary.Value( tag="agent2_action_mean_duration", simple_value=self.mean_action_duration_sum[1] / round_step), tf.Summary.Value( tag="agent1_accumulated_reward", simple_value=self.mean_accumulated_reward_sum[0]), tf.Summary.Value( tag="agent2_accumulated_reward", simple_value=self.mean_accumulated_reward_sum[1]) ], ), episode_id) episode_id += 1 if __name__ == "__main__": print("Rdm vs Rdm") print( TensorboardInstrumentedWindJammersRunner( TabularQLearningAgent(), RandomAgent(), log_dir_root="./logs/Rdm_Vs_Rdm").run(1000))
def run(): log_dir = "./logs/bastilleMP/ReinforceWithMultipleTraj_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/DeepQLearning_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(DeepQLearningAgent(8, 12), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/MOISMCTSWithRandomRollouts_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(MOISMCTSWithRandomRolloutsAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent())), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/ReinforceClassicWithMultipleTrajectories_Vs_TabularQLearningAgent/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_TabularQLearningAgent/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_RandomRollout_100/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), RandomRolloutAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_DeepQLearning/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), DeepQLearningAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_DoubleQLearning/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), DoubleQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_ReinforceClassic/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), ReinforceClassicAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_ReinforceClassicWithMultipleTrajectories/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( RandomAgent(), ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_PPOWithMultipleTrajectoriesMultiOutputs" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( RandomAgent(), PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithRandomRollouts/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), MOISMCTSWithRandomRolloutsAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( RandomAgent(), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent()), 8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithValueNetwork/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), MOISMCTSWithValueNetworkAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_RandomRollout_100/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), RandomRolloutAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_DeepQLearning/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), DeepQLearningAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_DoubleQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), DoubleQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_ReinforceClassic/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), ReinforceClassicAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_ReinforceClassicWithMultipleTrajectories/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_PPOWithMultipleTrajectoriesMultiOutputs" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithRandomRollouts/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), MOISMCTSWithRandomRolloutsAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent()), 8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithValueNetwork/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), MOISMCTSWithValueNetworkAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000))
input("Which call do you want to make?(enter an index)")) return call_to_play_index def play_card(self, a_player, a_game): valid_play_list = a_player.get_valid_play_list() cards_in_hand_list = [ card.get_card_id() for card in a_player.get_cards_in_hand() ] print(f"Your cards in hand are: {cards_in_hand_list}") playable_cards_list = [ cards_in_hand_list[index] for index in range(len(valid_play_list)) if valid_play_list[index] != 0 ] print(f"Your playable cards are: {playable_cards_list}") current_trick = a_game.get_trick() cards_on_trick = [ str(card) for card in current_trick.get_played_cards_list() ] print(f"The cards on trick are: {cards_on_trick}") card_to_play = -1 while card_to_play not in playable_cards_list: card_to_play = int(input("Which card do you want to play?")) for index in range(len(cards_in_hand_list)): if card_to_play == cards_in_hand_list[index]: return index if __name__ == "__main__": agent_types = [HumanAgent(), Agent(), CustomAgent(), RandomAgent()] active_game = Game(0, agent_types) active_game.play_game()