def run( self, max_rounds: int = -1, initial_game_state: QuartoGameState = QuartoGameState() ) -> 'Tuple[float]': round_id = 0 score_history = np.array((0, 0, 0)) while round_id < max_rounds or round_id == -1: gs = initial_game_state.copy_game_state() terminal = False while not terminal: current_player = gs.get_current_player_id() action_ids = gs.get_available_actions_id_for_player( current_player) print(action_ids) info_state = gs.get_information_state_for_player( current_player) action = self.agents[current_player].act( current_player, info_state, action_ids) (gs, score, terminal) = gs.step(current_player, action) self.agents[current_player].observe( (1 if current_player == 0 else -1) * score, terminal) if terminal: score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0) other_player = (current_player + 1) % 2 self.agents[other_player].observe( (1 if other_player == 0 else -1) * score, terminal) if round_id != -1: round_id += 1 if self.print_and_reset_score_history_threshold is not None and \ round_id % self.print_and_reset_score_history_threshold == 0: print(score_history / self.print_and_reset_score_history_threshold) if self.prev_history is not None and \ score_history[0] == self.prev_history[0] and \ score_history[1] == self.prev_history[1] and \ score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = score_history self.stuck_on_same_score = 0 if (self. replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self. replace_player1_with_commandline_after_similar_results ): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 score_history = np.array((0, 0, 0)) return tuple(score_history)
def run( self, max_rounds: int = -1, initial_game_state: WindJammersGameState = WindJammersGameState() ) -> 'Tuple[float]': round_id = 0 score_history = np.array((0, 0, 0.0)) while round_id < max_rounds or round_id == -1: gs = initial_game_state.copy_game_state() terminal = False while not terminal: sleep(0.016) print(gs) current_player = gs.get_current_player_id() action = 0 if current_player != -1: action_ids = gs.get_available_actions_id_for_player( current_player) info_state = gs.get_information_state_for_player( current_player) action = self.agents[current_player].act( current_player, info_state, action_ids) # WARNING : Two Players Zero Sum Game Hypothesis (gs, score, terminal) = gs.step(current_player, action) self.agents[0].observe(score, terminal) self.agents[1].observe(-score, terminal) if not terminal: score_history += (score if score > 0 else 0.0, -score if score < 0 else 0.0, 0) if round_id != -1: round_id += 1 if self.print_and_reset_score_history_threshold is not None and \ round_id % self.print_and_reset_score_history_threshold == 0: print(score_history / self.print_and_reset_score_history_threshold) if self.prev_history is not None and \ score_history[0] == self.prev_history[0] and \ score_history[1] == self.prev_history[1] and \ score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = score_history self.stuck_on_same_score = 0 if (self. replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self. replace_player1_with_commandline_after_similar_results ): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 score_history = np.array((0, 0, 0.0)) return tuple(score_history)
def run(self, max_rounds: int = -1, initial_game_state: TicTacToeGameState = TicTacToeGameState()) -> 'Tuple[float]': round_id = 0 score_history = np.array((0, 0, 0)) while round_id < max_rounds or round_id == -1: if round_id == 0: gs = initial_game_state.copy_game_state() else: gs.newGameState() terminal = False while not terminal: current_player = gs.get_current_player_id() action_ids = gs.get_available_actions_id_for_player(current_player) info_state = gs.get_information_state_for_player(current_player) action_time = time() action = self.agents[current_player].act(current_player, info_state, action_ids) self.mean_action_duration_sum[current_player] += time() - action_time # WARNING : Two Players Zero Sum Game Hypothesis (gs, score, terminal) = gs.step(current_player, action) self.agents[current_player].observe( (1 if current_player == 0 else -1) * score, terminal) if terminal: score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0) other_player = (current_player + 1) % 2 self.agents[other_player].observe( (1 if other_player == 0 else -1) * score, terminal) if round_id != -1: round_id += 1 if self.log_and_reset_score_history_threshold is not None and \ round_id % self.log_and_reset_score_history_threshold == 0: # print(score_history / self.log_and_reset_score_history_threshold) if self.prev_history is not None and \ score_history[0] == self.prev_history[0] and \ score_history[1] == self.prev_history[1] and \ score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = score_history self.stuck_on_same_score = 0 if (self.replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 self.writerTimeAgent1.add_summary(tf.Summary( value=[tf.Summary.Value(tag="Time", simple_value=self.mean_action_duration_sum[0] / round_id)], ), round_id) self.writerTimeAgent1.flush() self.writerTimeAgent2.add_summary(tf.Summary( value=[tf.Summary.Value(tag="Time", simple_value=self.mean_action_duration_sum[1] / round_id)], ), round_id) self.writerTimeAgent2.flush() self.writerAgent1.add_summary(tf.Summary( value=[tf.Summary.Value(tag="Score", simple_value=score_history[0] / self.log_and_reset_score_history_threshold)], ), round_id) self.writerAgent1.flush() self.writerAgent2.add_summary(tf.Summary( value=[tf.Summary.Value(tag="Score", simple_value=score_history[1] / self.log_and_reset_score_history_threshold)], ), round_id) self.writerAgent2.flush() self.writerDraw.add_summary(tf.Summary( value=[tf.Summary.Value(tag="Score", simple_value=score_history[2] / self.log_and_reset_score_history_threshold)], ), round_id) self.writerDraw.flush() self.mean_action_duration_sum = {0: 0.0, 1: 0.0} self.mean_accumulated_reward_sum = {0: 0.0, 1: 0.0} score_history = np.array((0, 0, 0)) return tuple(score_history)
def run(self, max_rounds: int = -1, initial_game_state: QuartoGameState = QuartoGameState()) -> 'Tuple[float]': round_id = 0 self.round_duration_sum = 0.0 self.mean_action_duration_sum = np.array((0.0, 0.0)) self.score_history = np.array((0, 0, 0)) while round_id < max_rounds or round_id == -1: gs = initial_game_state.copy_game_state() terminal = False round_time = 0.0 round_step = 0 self.action_duration_sum = ([0.0, 0.0]) self.mean_action_duration = np.array((0.0, 0.0)) self.accumulated_reward_sum = ([0.0, 0.0]) while not terminal: # print(gs) round_time = time.time() current_player = gs.get_current_player_id() action = 0 if current_player != -1: action_ids = gs.get_available_actions_id_for_player(current_player) info_state = gs.get_information_state_for_player(current_player) action_time = time.time() action = self.agents[current_player].act(current_player, info_state, action_ids) action_time = time.time() - action_time self.action_duration_sum[current_player] += action_time (gs, score, terminal) = gs.step(current_player, action) self.agents[current_player].observe( (1 if current_player == 0 else -1) * score, terminal) round_step += 1 self.round_duration = time.time() - round_time self.round_duration_sum += self.round_duration self.mean_action_duration = ( self.action_duration_sum[0] / round_step, self.action_duration_sum[1] / round_step) self.mean_action_duration_sum += (self.mean_action_duration[0], self.mean_action_duration[1]) self.score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0) other_player = (current_player + 1) % 2 self.agents[other_player].observe( (1 if other_player == 0 else -1) * score, terminal) self.writer.add_summary(tf.Summary( value=[ tf.Summary.Value(tag="agent1_action_mean_duration", simple_value=self.mean_action_duration[0]), tf.Summary.Value(tag="agent2_action_mean_duration", simple_value=self.mean_action_duration[1]), tf.Summary.Value(tag="round_duration", simple_value=self.round_duration), tf.Summary.Value(tag="agent1_accumulated_reward", simple_value=self.accumulated_reward_sum[0]), tf.Summary.Value(tag="agent2_accumulated_reward", simple_value=self.accumulated_reward_sum[1]) ], ), round_id) if round_id != -1: round_id += 1 if self.print_and_reset_score_history_threshold is not None and \ round_id % self.print_and_reset_score_history_threshold == 0: if self.prev_history is not None and \ self.score_history[0] == self.prev_history[0] and \ self.score_history[1] == self.prev_history[1] and \ self.score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = self.score_history self.stuck_on_same_score = 0 if (self.replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 return tuple(self.score_history), self.round_duration_sum, self.mean_action_duration_sum
if self.print_and_reset_score_history_threshold is not None and \ round_id % self.print_and_reset_score_history_threshold == 0: print(score_history / self.print_and_reset_score_history_threshold) if self.prev_history is not None and \ score_history[0] == self.prev_history[0] and \ score_history[1] == self.prev_history[1] and \ score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = score_history self.stuck_on_same_score = 0 if (self. replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self. replace_player1_with_commandline_after_similar_results ): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 score_history = np.array((0, 0, 0)) return tuple(score_history) if __name__ == "__main__": print("MOISMCTSWithRandomRolloutsAgent VS RandomAgent") print( BasicQuartoRunner( CommandLineAgent(), CommandLineAgent(), print_and_reset_score_history_threshold=1000).run(1000))
def run(self, max_rounds: int = -1, initial_game_state: TicTacToeGameState = TicTacToeGameState()) -> 'Tuple[float]': round_id = 0 score_history = np.array((0, 0, 0)) while round_id < max_rounds or round_id == -1: gs = initial_game_state.copy_game_state() terminal = False tour = 0 execution_time = np.array((0.0, 0.0)) while not terminal: current_player = gs.get_current_player_id() action_ids = gs.get_available_actions_id_for_player(current_player) info_state = gs.get_information_state_for_player(current_player) begin = time() action = self.agents[current_player].act(current_player, info_state, action_ids) end = time() # WARNING : Two Players Zero Sum Game Hypothesis (gs, score, terminal) = gs.step(current_player, action) self.agents[current_player].observe( (1 if current_player == 0 else -1) * score, terminal) execution_time[current_player] += end - begin if terminal: score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0) other_player = (current_player + 1) % 2 self.agents[other_player].observe( (1 if other_player == 0 else -1) * score, terminal) # self.execution_time += execution_time / (tour * 0.5) if round_id != -1: round_id += 1 if self.print_and_reset_score_history_threshold is not None and \ round_id % self.print_and_reset_score_history_threshold == 0: print(score_history / self.print_and_reset_score_history_threshold) if self.file is not None: score_to_print = score_history / self.print_and_reset_score_history_threshold execution_time_to_print = self.execution_time/self.print_and_reset_score_history_threshold self.file.write(str(score_to_print[0]) + ";" + str(execution_time_to_print[0]) + ";" + str(score_to_print[1]) + ";" + str(execution_time_to_print[1]) + ";" + str(score_to_print[2]) + "\n") if self.prev_history is not None and \ score_history[0] == self.prev_history[0] and \ score_history[1] == self.prev_history[1] and \ score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = score_history self.stuck_on_same_score = 0 if (self.replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 score_history = np.array((0, 0, 0)) self.execution_time = np.array((0.0, 0.0)) return tuple(score_history)
def run( self, max_rounds: int = -1, initial_game_state: Power4GameState = Power4GameState() ) -> 'Tuple[float]': round_id = 0 # AJOUT LOG# filename = "../logs/" + type(self.agents[0]).__name__ + "_VS_" + type( self.agents[1]).__name__ + str(time.time()) + ".txt" logs_scores_file = open(filename, "w") logs_scores_file.close() print(type(self.agents[1]).__name__) ##### score_history = np.array((0, 0, 0)) while round_id < max_rounds or round_id == -1: gs = initial_game_state.copy_game_state() terminal = False while not terminal: current_player = gs.get_current_player_id() action_ids = gs.get_available_actions_id_for_player( current_player) info_state = gs.get_information_state_for_player( current_player) #### AJOUT TIMER if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000: timer = time.time() ###### action = self.agents[current_player].act( current_player, info_state, action_ids) ###AJOUT if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000: logs_scores_file = open(filename, "a") logs_scores_file.write("TIMER " + str(round_id) + " PLAYER " + str(current_player) + " == " + str(time.time() - timer) + "\n") logs_scores_file.close() #### # WARNING : Two Players Zero Sum Game Hypothesis (gs, score, terminal) = gs.step(current_player, action) self.agents[current_player].observe( (1 if current_player == 0 else -1) * score, terminal) if terminal: score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0) other_player = (current_player + 1) % 2 self.agents[other_player].observe( (1 if other_player == 0 else -1) * score, terminal) if round_id != -1: round_id += 1 if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000: logs_scores_file = open(filename, "a") logs_scores_file.write( "CHECKPOINT " + str(round_id) + " == " + str(score_history / self.print_and_reset_score_history_threshold) + "\n") logs_scores_file.close() if self.print_and_reset_score_history_threshold is not None and \ round_id % self.print_and_reset_score_history_threshold == 0: print(score_history / self.print_and_reset_score_history_threshold) if self.prev_history is not None and \ score_history[0] == self.prev_history[0] and \ score_history[1] == self.prev_history[1] and \ score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = score_history self.stuck_on_same_score = 0 if (self. replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self. replace_player1_with_commandline_after_similar_results ): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 score_history = np.array((0, 0, 0)) logs_scores_file.close() return tuple(score_history)