def choose_move(self, state: CatanState): self.expectimax_alpha_beta.start_turn_timer() best_move, move, depth = None, None, 1 while not self.expectimax_alpha_beta.ran_out_of_time: best_move = move logger.info('starting depth {}'.format(depth)) move = self.expectimax_alpha_beta.get_best_move(state, max_depth=depth) depth += 2 if best_move is not None: return best_move else: logger.warning('did not finish depth 1, returning a random move') return RandomPlayer.choose_move(self, state)
def main(): pool = multiprocessing.Pool(processes=(multiprocessing.cpu_count())) space = WeightsSpace(pool) previous_result, result = ( ExpectimaxWeightedProbabilitiesPlayer.default_weights, ExpectimaxWeightedProbabilitiesPlayer.default_weights) for _ in range(5): space.iterations_count = 0 result = first_choice_hill_climbing(space, result) if result == previous_result: break previous_result = result space.delta_unit /= 2 dump_weights(result) logger.info('| learned weights: {}'.format(result))
def evaluate_state(self, weights) -> AbstractHillClimbingStateEvaluation: logger.info('| evaluating weights: {}'.format(weights)) args = GameRunTask() args.time_seconds_ = self._time_seconds args.weights_ = weights args.evaluation_ = 0 all_args = [] for i in range(self._games_per_iteration): xx = copy.deepcopy(args) xx.i_ = i all_args.append(xx) results = self._pool.map(WeightsSpace.run_game, all_args) evaluation = sum(results) self.iterations_count += 1 return evaluation
def run_game(args): logger.info('| process {} spawned'.format(args.i_)) seed = None p0 = ExpectimaxWeightedProbabilitiesPlayer(seed, args.time_seconds_, args.weights_) p1 = ExpectimaxBaselinePlayer(seed, args.time_seconds_) state = CatanState([p0, p1], seed) count_moves = 0 while not state.is_final(): state.make_move(state.get_current_player().choose_move(state)) state.make_random_move() count_moves += 1 scores = state.get_scores_by_player() logger.info('| done iteration {}. scores: {}'.format( args.i_, { 'p0 (new weights)': scores[p0], 'p1': scores[p1] })) count_moves_factor = 1 * count_moves p0_factor = 10000 if (scores[p0] >= 10) else 0 p1_factor = scores[p1] * 0.2 res = p0_factor - (p1_factor * count_moves_factor) logger.info('| process {} done. res: {}'.format(args.i_, res)) return res
def run_game(args): logger.info('| process {} spawned'.format(args.i_)) seed = None p0 = Winner(id=0, seed=seed, timeout_seconds=args.time_seconds_, weights=args.weights_) p1 = ExpectimaxWeightedProbabilitiesWithFilterPlayer( id=1, seed=seed, timeout_seconds=args.time_seconds_) p2 = RandomPlayer(id=2, seed=seed) p3 = RandomPlayer(id=3, seed=seed) # p3 = ExpectimaxWeightedProbabilitiesWithFilterPlayer(id=3, seed=seed, timeout_seconds=args.time_seconds_) state = CatanState([p0, p1, p2, p3], seed) count_moves = 0 while not state.is_final(): state.make_move(state.get_current_player().choose_move(state)) state.make_random_move() count_moves += 1 scores = state.get_scores_by_player() logger.info('| done iteration {}. scores: {}'.format( args.i_, { 'p0 (new weights)': scores[p0], 'p1': scores[p1], 'p2': scores[p2], 'p3': scores[p3] })) # TODO: change this block count_moves_factor = 1 * count_moves p0_factor = 10000 if (scores[p0] >= 10) else 0 p_others_factor = (sum(scores.values()) - scores[p0]) * 0.2 res = p0_factor - (p_others_factor * count_moves_factor) logger.info('| process {} done. res: {}'.format(args.i_, res)) return res
def execute_game(plot_map=True): seed = None timeout_seconds = TIME_OUT p0 = Winner(id=0, seed=seed, timeout_seconds=timeout_seconds) p1 = ExpectimaxWeightedProbabilitiesWithFilterPlayer(id=1, seed=seed, timeout_seconds=timeout_seconds) p2 = RandomPlayer(id=2) p3 = RandomPlayer(id=3) players = [p0, p1, p2, p3] state = CatanState(players, seed) turn_count = 0 score_by_player = state.get_scores_by_player_indexed() while not state.is_final(): # noinspection PyProtectedMember logger.info('----------------------p{}\'s turn----------------------'.format(state._current_player_index)) turn_count += 1 robber_placement = state.board.get_robber_land() move = state.get_current_player().choose_move(state) assert not scores_changed(state, score_by_player, state.get_scores_by_player_indexed()) state.make_move(move) state.make_random_move() score_by_player = state.get_scores_by_player_indexed() move_data = {k: v for k, v in move.__dict__.items() if (v and k != 'resources_updates') and not (k == 'robber_placement_land' and v == robber_placement) and not (isinstance(v, dict) and sum(v.values()) == 0)} logger.info('| {}| turn: {:3} | move:{} |'.format(''.join('{} '.format(v) for v in score_by_player), turn_count, move_data)) if plot_map: image_name = 'turn_{}_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player)) state.board.plot_map(image_name, state.current_dice_number) players_scores_by_names = {(k, v.__class__, v.expectimax_alpha_beta.evaluate_heuristic_value.__name__ if ( isinstance(v, ExpectimaxBaselinePlayer)) else None): score_by_player[v.get_id()] for k, v in locals().items() if v in players } names = list(players_scores_by_names.keys()) names.sort() fileLogger.info('\n' + '\n'.join(' {:80} : {} '.format(str(name), players_scores_by_names[name]) for name in names) + '\n turns it took: {}\n'.format(turn_count) + ('-' * 156)) p0_type = type(p0).__name__ p_others_type = type(p1).__name__ global excel_file_name excel_file_name = '{}_vs_{}_timeout_{}_seed_{}.xlsx'.format(p0_type, p_others_type, timeout_seconds, seed, int(time.time())) excel_data_grabber(score_by_player[0], score_by_player[1], score_by_player[2], score_by_player[3], turn_count, p0_type, p_others_type) excel_output = "" for i in range(len(players)): player_output = str(players[i]) + "@" + str(score_by_player[i]) excel_output += player_output + "\n" # fileLogger.info("|\n#" + str(turn_count) + "\n" + excel_output) if score_by_player[0] >= 10: return 1 else: return 0
def is_better(self, first_score: int, second_score: int) -> bool: is_better = first_score > second_score + self._epsilon_is_weighting_better logger.info('| is weight better: {} > {} + {} --> {}'.format( first_score, second_score, self._epsilon_is_weighting_better, is_better)) return is_better
def execute_game(plot_map=True): seed = None timeout_seconds = 5 p0 = MonteCarloWithFilterPlayer(seed, timeout_seconds) p1 = ExpectimaxBaselinePlayer(seed, timeout_seconds) p2 = ExpectimaxBaselinePlayer(seed, timeout_seconds) p3 = ExpectimaxBaselinePlayer(seed, timeout_seconds) players = [p0, p1, p2, p3] state = CatanState(players, seed) turn_count = 0 score_by_player = state.get_scores_by_player() if plot_map: state.board.plot_map('turn_{}_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player.values()))) while not state.is_final(): # noinspection PyProtectedMember logger.info( '----------------------p{}\'s turn----------------------'.format( state._current_player_index)) turn_count += 1 robber_placement = state.board.get_robber_land() move = state.get_current_player().choose_move(state) assert not scores_changed(state, score_by_player, state.get_scores_by_player()) state.make_move(move) state.make_random_move() score_by_player = state.get_scores_by_player() move_data = { k: v for k, v in move.__dict__.items() if (v and k != 'resources_updates') and not (k == 'robber_placement_land' and v == robber_placement) and not (isinstance(v, dict) and sum(v.values()) == 0) } logger.info('| {}| turn: {:3} | move:{} |'.format( ''.join('{} '.format(v) for v in score_by_player.values()), turn_count, move_data)) if plot_map: image_name = 'turn_{}_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player.values())) state.board.plot_map(image_name, state.current_dice_number) players_scores_by_names = { (k, v.__class__, v.expectimax_alpha_beta.evaluate_heuristic_value.__name__ if (isinstance(v, ExpectimaxBaselinePlayer)) else None): score_by_player[v] for k, v in locals().items() if v in players } fileLogger.info('\n' + '\n'.join( ' {:150} : {} '.format(str(name), score) for name, score in players_scores_by_names.items()) + '\n turns it took: {}\n'.format(turn_count) + ('-' * 156)) p0_type = type(p0).__name__ p_others_type = type(p1).__name__ global excel_file_name excel_file_name = '{}_vs_{}_timeout_{}_seed_{}.xlsx'.format( p0_type, p_others_type, timeout_seconds, seed, int(time.time())) excel_data_grabber(score_by_player[p0], score_by_player[p1], score_by_player[p2], score_by_player[p3], turn_count, p0_type, p_others_type)
def execute_game(i, iterations, plot_map=True): seed = None timeout_seconds = 5 p0 = MCTSPlayer(0, iterations=iterations) p1 = RandomPlayer(1) p2 = RandomPlayer(2) p3 = RandomPlayer(3) players = [p0, p1, p2, p3] state = CatanState(players, seed) turn_count = 0 score_by_player = state.get_scores_by_player_indexed() # if plot_map: # state.board.plot_map('turn_{}_scores_{}.png' # .format(turn_count, ''.join('{}_'.format(v) for v in score_by_player.values()))) while not state.is_final(): # noinspection PyProtectedMember logger.info( '----------------------p{}\'s turn----------------------'.format( state._current_player_index)) turn_count += 1 robber_placement = state.board.get_robber_land() move = state.get_current_player().choose_move(state) assert not scores_changed(state, score_by_player, state.get_scores_by_player_indexed()) state.make_move(move) state.make_random_move() score_by_player = state.get_scores_by_player_indexed() move_data = { k: v for k, v in move.__dict__.items() if (v and k != 'resources_updates') and not (k == 'robber_placement_land' and v == robber_placement) and not (isinstance(v, dict) and sum(v.values()) == 0) } logger.info('| {}| turn: {:3} | move:{} |'.format( ''.join('{} '.format(v) for v in score_by_player), turn_count, move_data)) # if plot_map: # image_name = 'turn_{}_scores_{}.png'.format( # turn_count, ''.join('{}_'.format(v) for v in score_by_player)) # state.board.plot_map(image_name, state.current_dice_number) players_scores_by_names = { (k, v.__class__, v.expectimax_alpha_beta.evaluate_heuristic_value.__name__ if (isinstance(v, ExpectimaxBaselinePlayer)) else None): score_by_player[v.get_id()] for k, v in locals().items() if v in players } fileLogger.info('\n' + '\n'.join( ' {:80} : {} '.format(str(name), score) for name, score in players_scores_by_names.items()) + '\n turns it took: {}\n'.format(turn_count) + 'game num: {}, num iterations: {}'.format(i, iterations) + '\n' + ('-' * 156)) p0_type = type(p0).__name__ p_others_type = type(p1).__name__
def execute_game(plot_map=True): seed = None p0 = MonteCarloWithFilterPlayer(seed) p1 = RandomPlayer(seed) players = [p0, p1] state = CatanState(players, seed) turn_count = 0 score_by_player = state.get_scores_by_player() if plot_map: state.board.plot_map('turn_{}_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player.values()))) while not state.is_final(): # noinspection PyProtectedMember logger.info( '----------------------p{}\'s turn----------------------'.format( state._current_player_index)) turn_count += 1 robber_placement = state.board.get_robber_land() move = state.get_current_player().choose_move(state) assert not scores_changed(state, score_by_player, state.get_scores_by_player()) state.make_move(move) state.make_random_move() score_by_player = state.get_scores_by_player() move_data = { k: v for k, v in move.__dict__.items() if (v and k != 'resources_updates') and not (k == 'robber_placement_land' and v == robber_placement) and not (isinstance(v, dict) and sum(v.values()) == 0) } logger.info('| {}| turn: {:3} | move:{} |'.format( ''.join('{} '.format(v) for v in score_by_player.values()), turn_count, move_data)) if plot_map and (turn_count == 4 or turn_count % 50 == 0): image_name = 'turn_4_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player.values())) state.board.plot_map(image_name, state.current_dice_number) if plot_map: state.board.plot_map('turn_{}_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player.values()))) players_scores_by_names = {(k, v.__class__): score_by_player[v] for k, v in locals().items() if v in players} fileLogger.info('\n' + '\n'.join( ' {:150} : {} '.format(str(name), score) for name, score in players_scores_by_names.items()) + '\n turns it took: {}\n'.format(turn_count) + ('-' * 156)) p0_type = type(p0).__name__ p_others_type = type(p1).__name__