def play(): # These four players are playing the game if variables.montecarlo: players = [ AdvancedPlayer(), MonteCarloPlayer(), MinPlayer(), RandomPlayer() ] else: players = [AdvancedPlayer(), MinPlayer(), MinPlayer(), RandomPlayer()] # We are simulating n games accumulating a total score nr_of_matches = variables.nr_of_matches logging.debug('We are playing {} matches in total.'.format(nr_of_matches)) winning_count = [0, 0, 0, 0] for match_nr in range(nr_of_matches): scores = (0, 0, 0, 0) logging.debug("--- MATCH {} ---".format(match_nr)) for game_nr in range(1, 5): logging.debug("--- GAME {} ---".format(game_nr)) game = Game(players, game_nr % 4) scores = tuple(sum(x) for x in zip(scores, game.play())) logging.debug("--- Scores: {} ---".format(scores)) max_score = max(scores) for i in range(4): if scores[i] == max_score: winning_count[i] += 1 logging.debug("--- Winning count: {} ---".format(winning_count))
def process_args(argv: List[str]) -> Tuple[Player, Player]: """Parse command line arguments to determine the players of the game. """ if len(argv) != 3: player_1 = RandomPlayer() player_2 = RandomPlayer() else: str_1 = sys.argv[1] if str_1 == "-d": player_1 = DeepQLearningPlayer() elif str_1 == "-h": player_1 = HumanPlayer() elif str_1 == "-m": player_1 = MinimaxPlayer() else: player_1 = RandomPlayer() str_2 = sys.argv[2] if str_2 == "-d": player_2 = DeepQLearningPlayer() elif str_2 == "-h": player_2 = HumanPlayer() elif str_2 == "-m": player_2 = MinimaxPlayer() else: player_2 = RandomPlayer() return player_1, player_2
def run_game(args): logger.info('| process {} spawned'.format(args.i_)) seed = None p0 = Winner(id=0, seed=seed, timeout_seconds=args.time_seconds_, weights=args.weights_) p1 = ExpectimaxWeightedProbabilitiesWithFilterPlayer( id=1, seed=seed, timeout_seconds=args.time_seconds_) p2 = RandomPlayer(id=2, seed=seed) p3 = RandomPlayer(id=3, seed=seed) # p3 = ExpectimaxWeightedProbabilitiesWithFilterPlayer(id=3, seed=seed, timeout_seconds=args.time_seconds_) state = CatanState([p0, p1, p2, p3], seed) count_moves = 0 while not state.is_final(): state.make_move(state.get_current_player().choose_move(state)) state.make_random_move() count_moves += 1 scores = state.get_scores_by_player() logger.info('| done iteration {}. scores: {}'.format( args.i_, { 'p0 (new weights)': scores[p0], 'p1': scores[p1], 'p2': scores[p2], 'p3': scores[p3] })) # TODO: change this block count_moves_factor = 1 * count_moves p0_factor = 10000 if (scores[p0] >= 10) else 0 p_others_factor = (sum(scores.values()) - scores[p0]) * 0.2 res = p0_factor - (p_others_factor * count_moves_factor) logger.info('| process {} done. res: {}'.format(args.i_, res)) return res
def eval_state(self, game: Game, p1, p2): from players.random_player import RandomPlayer, WEIGHT_MAP26 p1 = RandomPlayer('rr1', w=WEIGHT_MAP26) p2 = RandomPlayer('rr2', w=WEIGHT_MAP26) players = [p1, p2] for o_p, p in zip(game.players, players): p.from_player(o_p) g = copy.copy(game) g.players = players winner = g.run() return 1 if winner is p1 else -1
def choose_move(self, state: CatanState): best_move = self.montecarlo.get_best_move(state) if best_move is not None: return best_move else: logger.warning('returning a random move') return RandomPlayer.choose_move(self, state)
def choose_action(self, b, p_other, actions): results = {repr(a): Result() for a in actions} me, other = RandomPlayerWithFirstAction( 'me', w=WEIGHT_MAP26), RandomPlayer('other', w=WEIGHT_MAP26) new_players = [me, other] if b.players[0] != self: new_players.reverse() for _ in range(100): a = random.choice(actions) me.first_action = a game = copy.copy(b) # copy player state for o_p, p in zip(b.players, new_players): p.from_player(o_p) game.players = new_players game.verbose = False winner = game.run() if winner.name == 'me': results[repr(a)].W += 1 else: results[repr(a)].L += 1 log.info('results= %s', results) best = max(results.items(), key=lambda item: item[1].W / (item[1].W + item[1].L)) for a in actions: if repr(a) == best[0]: return a
def choose_resources_to_drop(self) -> Dict[Resource, int]: if sum(self.resources.values()) < 8: return {} resources_count = sum(self.resources.values()) resources_to_drop_count = ceil(resources_count / 2) if self.can_settle_city() and resources_count >= sum(ResourceAmounts.city.values()) * 2: self.remove_resources_and_piece_for_city() resources_to_drop = copy.deepcopy(self.resources) self.add_resources_and_piece_for_city() elif self.can_settle_settlement() and resources_count >= sum(ResourceAmounts.settlement.values()) * 2: self.remove_resources_and_piece_for_settlement() resources_to_drop = copy.deepcopy(self.resources) self.add_resources_and_piece_for_settlement() elif (self.has_resources_for_development_card() and resources_count >= sum(ResourceAmounts.development_card.values()) * 2): self.remove_resources_for_development_card() resources_to_drop = copy.deepcopy(self.resources) self.add_resources_for_development_card() elif self.can_pave_road() and resources_count >= sum(ResourceAmounts.road.values()) * 2: self.remove_resources_and_piece_for_road() resources_to_drop = copy.deepcopy(self.resources) self.add_resources_and_piece_for_road() else: return RandomPlayer.choose_resources_to_drop(self) resources_to_drop = [resource for resource, count in resources_to_drop.items() for _ in range(count)] return Counter(self._random_choice(resources_to_drop, resources_to_drop_count, replace=False))
def __init__(self, game_state, player_hands, allowed_actions, player=RandomPlayer(), ucb_const=1): self.root = Node(None, None, game_state, player_hands, allowed_actions) self.player = player self.ucb_const = ucb_const
def get_player(self, picker_value): if picker_value == 0: return 'human' if picker_value == 1: return MinimaxPlayer() if picker_value == 2: return LearningPlayer() if picker_value == 3: return RandomPlayer()
def test(): data = get_dataset(RandomPlayer(), 100000, merge=True) print(f"data_length: {len(data)}") # data = pd.DataFrame(X) # data['outcome'] = y analyze( lambda i: tree.DecisionTreeRegressor(criterion="mae", min_samples_leaf=i), data) analyze(lambda i: tree.DecisionTreeRegressor(criterion="mae", max_depth=i), data)
def choose_move(self, state: CatanState): self.expectimax_alpha_beta.start_turn_timer() best_move, move, depth = None, None, 1 while not self.expectimax_alpha_beta.ran_out_of_time: best_move = move logger.info('starting depth {}'.format(depth)) move = self.expectimax_alpha_beta.get_best_move(state, max_depth=depth) depth += 2 if best_move is not None: return best_move else: logger.warning('did not finish depth 1, returning a random move') return RandomPlayer.choose_move(self, state)
def simple_match(): win = {'p1': 0, 'p2': 0} for i in range(1): p1 = RandomPlayer('p1') p2 = MCSimplePlayer('p2', 100) players = [p1, p2] if i % 2: players = players[::-1] g = Game(players, seed=None) winner = g.run() win[winner.name] += 1 print(win) print(win)
def receive_game_start_message(self, game_info): self.my_model = MyModel() nb_player = game_info['player_num'] max_round = game_info['rule']['max_round'] sb_amount = game_info['rule']['small_blind_amount'] ante_amount = game_info['rule']['ante'] self.emulator = Emulator() self.emulator.set_game_rule(nb_player, max_round, sb_amount, ante_amount) #self.set_opponents_model(RandomModel()) self.set_opponents_model(RandomPlayer()) for player_info in game_info['seats']: uuid = player_info['uuid'] player_model = self.my_model if uuid == self.uuid else self.opponents_model self.emulator.register_player(uuid, player_model)
def raw_loop(screen): screen.clear() curses.curs_set(0) init_colors() empty_board(4).refresh() game_end = False env = Ludo(PLAYERS) global agents if agents == None: agents = [IvanPesic(env) for i in range(PLAYERS)] agents[0] = ReinforcePlayer(env, "players\saves\Reinforce30000-1.pth") agents[1] = RLBasicPlayer(env, "players\saves\RLBasic30000-2.pth") agents[3] = HumanPlayer(env) if agents == None: agents = [RandomPlayer() for i in range(PLAYERS)] pstate = env.current_state() while not game_end: if isinstance(agents[env.current_player], HumanPlayer): window = human_board(4, state, env, env.current_player) window.addstr(2 * 4 + 5, 0, 'Igrac ') window.addstr(2 * 4 + 5, 6, str(env.current_player + 1)) window.addstr(2 * 4 + 5, 8, 'je na potezu') window.addstr(2 * 4 + 6, 0, 'Na kocki je bacen broj ') window.addstr(2 * 4 + 6, 23, str(env.roll + 1)) while True: window.refresh() curses.napms(30) c = window.getch() action = agents[env.current_player].play(c) if not action == -1: break window.addstr(2 * 4 + 7, 0, 'Morate odigrati validan potez') else: action = agents[env.current_player].play(pstate, TOKENS) pstate, r, game_end = env.step(action) state = env.current_state_as_tuple() draw_board(4, state, env).refresh() curses.napms(30) curses.curs_set(1) print('Player ', env.winning_player + 1, ' wins')
def raw_loop(screen): env = Ludo(4) screen.clear() curses.curs_set(0) init_colors() empty_board(4).refresh() curses.napms(100) curses.curs_set(1) game_end = False agent1 = RandomPlayer() agent2 = RandomPlayer() state = env.current_state while not game_end: roll = random.randrange(1, 7) if (env.current_player == 0): action = agent1.play(state, TOKENS) if (env.current_player == 1): action = agent2.play(state, TOKENS) state, r, game_end = env.step(roll, action) draw_board(4, state, env).refresh() curses.napms(30) print('Player ', env.winning_player + 1, ' wins')
def main(): print("Cuda available: "+str(torch.cuda.is_available())) #start tensorboard tb = program.TensorBoard() tb.configure(argv=[None, '--logdir', Settings.runs_folder]) tb.launch() # set seed for debugging if Settings.random_seed: torch.manual_seed(Settings.random_seed) #loading initial policy hand_predictor = HandPredictor().to(Settings.device) # take the newest generation available i_episode = max_gen = 0 generations = [int(f[:8]) for f in listdir(Settings.checkpoint_folder) if f.endswith(".pt")] if len(generations) > 0: max_gen = max(generations) hand_predictor.load_state_dict(torch.load(Settings.checkpoint_folder+"/" + str(max_gen).zfill(8) + ".pt")) i_episode = max_gen optimizer = torch.optim.Adam(hand_predictor.parameters(),lr=Settings.lr, betas=Settings.betas, weight_decay=Settings.optimizer_weight_decay) # training loop for _ in range(0, 90000000): Settings.logger.info("playing " +str(Settings.update_games)+ " games") smart_mcts_player = HPPIMCPlayer(30, 120, RandomPlayer(), hand_predictor) # create four players players = [smart_mcts_player, smart_mcts_player, smart_mcts_player, smart_mcts_player] # create a game simulation schafkopf_env = SchafkopfEnv(Settings.random_seed) game_statistics = GameStatistics() memory_states = [] memory_player_hands = [] # play a bunch of games t0 = time.time() for _ in range(Settings.update_games): state, reward, terminal = schafkopf_env.reset() while not terminal: memory_states.append(hand_predictor.preprocess(state)) #TODO: happens twice now and could be optimized memory_player_hands.append(hand_predictor.encode_player_hands(schafkopf_env.player_cards, state["game_state"].current_player)) action, prob = players[state["game_state"].current_player].act(state) state, reward, terminal = schafkopf_env.step(action, prob) if state["game_state"].game_type[1] == 2: schafkopf_env.print_game() print("game "+str(i_episode)) i_episode += 1 game_statistics.update_statistics(state["game_state"], reward) t1 = time.time() #update the policy Settings.logger.info("updating policy") # Create dataset from collected experiences dataset = PredictionDatasetLSTM(memory_states, memory_player_hands) training_generator = data.DataLoader(dataset, collate_fn=dataset.custom_collate,batch_size=Settings.mini_batch_size, shuffle=True) #logging avg_loss = 0 count = 0 hand_predictor.train() for epoch in range(Settings.K_epochs): # epoch mini_batches_in_batch = int(Settings.batch_size / Settings.mini_batch_size) optimizer.zero_grad() for i, (states, hands) in enumerate(training_generator): # mini batch # Transfer to GPU states = [state.to(Settings.device) for state in states] hands = hands.to(Settings.device) pred = hand_predictor(states) #loss = nn.MSELoss()(pred, hands) #TODO: replace by cross entropy loss = nn.BCELoss()(pred, hands) avg_loss += loss.mean().item() count +=1 loss.mean().backward() if (i + 1) % mini_batches_in_batch == 0: optimizer.step() optimizer.zero_grad() t2 = time.time() hand_predictor.eval() # writing game statistics for tensorboard Settings.logger.info("Episode: "+str(i_episode) + " game simulation (s) = "+str(t1-t0) + " update (s) = "+str(t2-t1)) schafkopf_env.print_game() game_statistics.write_and_reset (i_episode) Settings.summary_writer.add_scalar('Loss/MSE_Loss', avg_loss / count, i_episode) # save and evaluate the policy Settings.logger.info("Saving Checkpoint") torch.save(hand_predictor.state_dict(), Settings.checkpoint_folder + "/" + str(i_episode).zfill(8) + ".pt") Settings.logger.info("Evaluation")
else: expected_rewards = self.q_table[new_board_hash] expected = reward + (0.9 * max(expected_rewards.values())) change = 0.3 * (expected - self.q_table[board_hash][move]) self.q_table[board_hash][move] += change def swap_players(p1, p2): return p2, p1 if __name__ == '__main__': qplayer = QPlayer(1, 3) rplayer = RandomPlayer(-1) winning_length = 3 games = 10000 results = {1: 0, 0: 0, -1: 0} import tqdm import pickle # for i in tqdm.tqdm(range(games)): # board = clean_board(3) # winner = 0 # # print(i) # # while True: # move = qplayer.get_move(board) # qplayer.learn_q(board, move)
description="play a game of poker against the computer") parser.add_argument("--name", required=True, help="your name to be displayed throughout the game") parser.add_argument("--num-hands", required=True, help="number of hands you want to play", type=int) args = parser.parse_args() deck = Deck() computer_name = "Maniac" human = HumanPlayer(args.name) computer = RandomPlayer(computer_name) starting_chips = 100 blind = 1 name_width = max(len(args.name), len(computer_name)) rank_width = max(len(str(x)) for x in HandRank) action_width = max(len(str(x)) for x in Action) winner_template = "{{:{}}} wins the pot of: {{}}".format(name_width) show_template = "{{:{}}} shows cards: {{}}, hand: {{:{}}}, {{}}".format( name_width, rank_width) action_template = "Pot: {{:3}}, Player: {{:{}}}, Action: {{:{}}}, Size: {{}}".format( name_width, action_width) human_results = 0
def execute_game(plot_map=True): seed = None p0 = MonteCarloWithFilterPlayer(seed) p1 = RandomPlayer(seed) players = [p0, p1] state = CatanState(players, seed) turn_count = 0 score_by_player = state.get_scores_by_player() if plot_map: state.board.plot_map('turn_{}_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player.values()))) while not state.is_final(): # noinspection PyProtectedMember logger.info( '----------------------p{}\'s turn----------------------'.format( state._current_player_index)) turn_count += 1 robber_placement = state.board.get_robber_land() move = state.get_current_player().choose_move(state) assert not scores_changed(state, score_by_player, state.get_scores_by_player()) state.make_move(move) state.make_random_move() score_by_player = state.get_scores_by_player() move_data = { k: v for k, v in move.__dict__.items() if (v and k != 'resources_updates') and not (k == 'robber_placement_land' and v == robber_placement) and not (isinstance(v, dict) and sum(v.values()) == 0) } logger.info('| {}| turn: {:3} | move:{} |'.format( ''.join('{} '.format(v) for v in score_by_player.values()), turn_count, move_data)) if plot_map and (turn_count == 4 or turn_count % 50 == 0): image_name = 'turn_4_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player.values())) state.board.plot_map(image_name, state.current_dice_number) if plot_map: state.board.plot_map('turn_{}_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player.values()))) players_scores_by_names = {(k, v.__class__): score_by_player[v] for k, v in locals().items() if v in players} fileLogger.info('\n' + '\n'.join( ' {:150} : {} '.format(str(name), score) for name, score in players_scores_by_names.items()) + '\n turns it took: {}\n'.format(turn_count) + ('-' * 156)) p0_type = type(p0).__name__ p_others_type = type(p1).__name__
results = defaultdict(int) for _ in tqdm(range(0, repetitions)): result = play(playerX, playerO) # result_text = 'x' if result == 1 else ('o' if result == -1 else 'draw') results[result] += 1 all = sum(results.values()) x = results[1] o = results[-1] d = results[0] print( f"\n{playerX.__class__.__name__} as X vs. {playerO.__class__.__name__} as O" ) print(f"X won {x} times {(x / all) * 100}%") print(f"O won {o} times {(o / all) * 100}%") print(f"draw {d} times {(d / all) * 100}%") random_player = RandomPlayer() win_selecting_player = WinSelectingPlayer() q_player = QPlayer(win_selecting_player) cart_player = CartPlayer(100000, win_selecting_player) print("TESTING...") test(win_selecting_player, cart_player) test(win_selecting_player, q_player) test(cart_player, random_player) test(random_player, cart_player) test(q_player, RandomPlayer()) test(RandomPlayer(), q_player)
def execute_game(plot_map=True): seed = None timeout_seconds = TIME_OUT p0 = Winner(id=0, seed=seed, timeout_seconds=timeout_seconds) p1 = ExpectimaxWeightedProbabilitiesWithFilterPlayer(id=1, seed=seed, timeout_seconds=timeout_seconds) p2 = RandomPlayer(id=2) p3 = RandomPlayer(id=3) players = [p0, p1, p2, p3] state = CatanState(players, seed) turn_count = 0 score_by_player = state.get_scores_by_player_indexed() while not state.is_final(): # noinspection PyProtectedMember logger.info('----------------------p{}\'s turn----------------------'.format(state._current_player_index)) turn_count += 1 robber_placement = state.board.get_robber_land() move = state.get_current_player().choose_move(state) assert not scores_changed(state, score_by_player, state.get_scores_by_player_indexed()) state.make_move(move) state.make_random_move() score_by_player = state.get_scores_by_player_indexed() move_data = {k: v for k, v in move.__dict__.items() if (v and k != 'resources_updates') and not (k == 'robber_placement_land' and v == robber_placement) and not (isinstance(v, dict) and sum(v.values()) == 0)} logger.info('| {}| turn: {:3} | move:{} |'.format(''.join('{} '.format(v) for v in score_by_player), turn_count, move_data)) if plot_map: image_name = 'turn_{}_scores_{}.png'.format( turn_count, ''.join('{}_'.format(v) for v in score_by_player)) state.board.plot_map(image_name, state.current_dice_number) players_scores_by_names = {(k, v.__class__, v.expectimax_alpha_beta.evaluate_heuristic_value.__name__ if ( isinstance(v, ExpectimaxBaselinePlayer)) else None): score_by_player[v.get_id()] for k, v in locals().items() if v in players } names = list(players_scores_by_names.keys()) names.sort() fileLogger.info('\n' + '\n'.join(' {:80} : {} '.format(str(name), players_scores_by_names[name]) for name in names) + '\n turns it took: {}\n'.format(turn_count) + ('-' * 156)) p0_type = type(p0).__name__ p_others_type = type(p1).__name__ global excel_file_name excel_file_name = '{}_vs_{}_timeout_{}_seed_{}.xlsx'.format(p0_type, p_others_type, timeout_seconds, seed, int(time.time())) excel_data_grabber(score_by_player[0], score_by_player[1], score_by_player[2], score_by_player[3], turn_count, p0_type, p_others_type) excel_output = "" for i in range(len(players)): player_output = str(players[i]) + "@" + str(score_by_player[i]) excel_output += player_output + "\n" # fileLogger.info("|\n#" + str(turn_count) + "\n" + excel_output) if score_by_player[0] >= 10: return 1 else: return 0
from pypokerengine.api.game import setup_config, start_poker from players.fish_player import FishPlayer from players.console_player import ConsolePlayer from players.random_player import RandomPlayer config = setup_config(max_round=100, initial_stack=1000, small_blind_amount=20) config.register_player(name="f1", algorithm=FishPlayer()) config.register_player(name="r1", algorithm=RandomPlayer()) config.register_player(name="c1", algorithm=ConsolePlayer()) game_result = start_poker(config, verbose=1)
def execute_game(i, iterations, plot_map=True): seed = None timeout_seconds = 5 p0 = MCTSPlayer(0, iterations=iterations) p1 = RandomPlayer(1) p2 = RandomPlayer(2) p3 = RandomPlayer(3) players = [p0, p1, p2, p3] state = CatanState(players, seed) turn_count = 0 score_by_player = state.get_scores_by_player_indexed() # if plot_map: # state.board.plot_map('turn_{}_scores_{}.png' # .format(turn_count, ''.join('{}_'.format(v) for v in score_by_player.values()))) while not state.is_final(): # noinspection PyProtectedMember logger.info( '----------------------p{}\'s turn----------------------'.format( state._current_player_index)) turn_count += 1 robber_placement = state.board.get_robber_land() move = state.get_current_player().choose_move(state) assert not scores_changed(state, score_by_player, state.get_scores_by_player_indexed()) state.make_move(move) state.make_random_move() score_by_player = state.get_scores_by_player_indexed() move_data = { k: v for k, v in move.__dict__.items() if (v and k != 'resources_updates') and not (k == 'robber_placement_land' and v == robber_placement) and not (isinstance(v, dict) and sum(v.values()) == 0) } logger.info('| {}| turn: {:3} | move:{} |'.format( ''.join('{} '.format(v) for v in score_by_player), turn_count, move_data)) # if plot_map: # image_name = 'turn_{}_scores_{}.png'.format( # turn_count, ''.join('{}_'.format(v) for v in score_by_player)) # state.board.plot_map(image_name, state.current_dice_number) players_scores_by_names = { (k, v.__class__, v.expectimax_alpha_beta.evaluate_heuristic_value.__name__ if (isinstance(v, ExpectimaxBaselinePlayer)) else None): score_by_player[v.get_id()] for k, v in locals().items() if v in players } fileLogger.info('\n' + '\n'.join( ' {:80} : {} '.format(str(name), score) for name, score in players_scores_by_names.items()) + '\n turns it took: {}\n'.format(turn_count) + 'game num: {}, num iterations: {}'.format(i, iterations) + '\n' + ('-' * 156)) p0_type = type(p0).__name__ p_others_type = type(p1).__name__
from core import Game from players.trivial_player import TrivialPlayer from players.random_player import RandomPlayer players = [TrivialPlayer(), TrivialPlayer(), RandomPlayer()] result = Game(players).play(with_prints=True)
def main(): pimc_player = PIMCPlayer(10, 40, RandomPlayer()) policy = ActorCriticNetworkLSTM().to(Settings.device) policy.load_state_dict(torch.load("../policies/pretrained/lstm-policy.pt")) rl_player = RlPlayer(policy, action_shaping=False, eval=True) hp = HandPredictor().to(Settings.device) hp.load_state_dict(torch.load("../policies/pretrained/hand-predictor.pt")) smart_pimc_player = HPPIMCPlayer(10, 40, RandomPlayer(), HandPredictor().to(Settings.device)) ip = ImmitationPolicy().to(Settings.device) ip.load_state_dict(torch.load("../policies/00010340.pt")) immitation_player = RlPlayer(ip, action_shaping=False, eval=True) participants = [ rl_player, immitation_player, smart_pimc_player, pimc_player, RuleBasedPlayer(), RandomCowardPlayer(), RandomPlayer(), ] number_of_games = 1000 for i in range(len(participants)): for j in range(i + 1, len(participants)): p1 = participants[i] p2 = participants[j] cummulative_reward = [0, 0, 0, 0] for k in range( 2 ): #run the same tournament twice with differen positions of players print(' ') schafkopf_env = SchafkopfEnv(seed=1) if k == 0: players = [p1, p1, p2, p2] else: players = [p2, p2, p1, p1] cummulative_reward.reverse() # tournament loop for game_nr in range(1, number_of_games + 1): state, reward, terminal = schafkopf_env.reset() while not terminal: action, prob = players[ state["game_state"].current_player].act(state) state, reward, terminal = schafkopf_env.step( action, prob) cummulative_reward = [ cummulative_reward[m] + reward[m] for m in range(4) ] if game_nr % 100 == 0: print('.', end='') #schafkopf_env.print_game() print("player " + str(i) + " vs. player " + str(j) + " = " + str((cummulative_reward[2] + cummulative_reward[3]) / (2 * 2 * number_of_games)) + " to " + str((cummulative_reward[0] + cummulative_reward[1]) / (2 * 2 * number_of_games)))
def parse_args(): parser = argparse.ArgumentParser( description="Play different players against each other.") parser.add_argument("--infile", type=str, required=True, help="Input file name.") return parser.parse_args() if __name__ == "__main__": args = parse_args() games = [] with gzip.open(args.infile, 'r') as f: for line in f: games.append(json.loads(line.decode())) player_1 = HeuristicPlayer() player_2 = HeuristicPlayer() player_3 = RandomPlayer() score = {0: 0, 1: 0, 2: 0} for init_game in games: g = Game(init_game, [player_1, player_2, player_3]) winners = g.play() for w in winners: score[w] += 1 print(score)
points += len(diagonal) # 2nd diagonal new_y = (BOARD_SIZE - 1) - y k = new_y - x diagonal = np.diag(np.fliplr(board), k) if np.all(diagonal) and len(diagonal) > 1: points += len(diagonal) return points if __name__ == '__main__': p1 = HumanPlayer('p1') # p2 = HumanPlayer('p2') p2 = RandomPlayer('p2') g = Game(3, p1, p2) g.play() # player1_wins = [] # # print(0, end='') # for i in range(1000): # # print('\r', (i+1)/1000, end='') # player1_wins.append(g.play()) # # time.sleep(0.1) # sys.stdout.write("\r%d%%" % (i / 1000 * 100)) # sys.stdout.flush() # # print() # print(sum(player1_wins) / len(player1_wins) * 100, '%')
boards_batch, moves_batch, rewards_batch = [], [], [] if episode_number % log_every == 0: print("episode: %s win_rate: %s" % (episode_number, _win_rate(log_every, results))) def _win_rate(print_results_every, results): i = sum(results) every___ = (print_results_every * 2.) return 0.5 + i / every___ def normalize_rewards(rewards_batch): normalized_rewards = rewards_batch - np.mean(rewards_batch) rewards_std = np.std(normalized_rewards) if rewards_std != 0: normalized_rewards /= rewards_std else: print("warning: got mini batch std of 0.") return normalized_rewards if __name__ == '__main__': train_policy_gradients(layers=[9, 100, 100, 100, 9], learning_rate=1e-4, batch_size=100, games=100000, log_every=1000, opponent=RandomPlayer(-1), winning_length=3)
"""Connect Games""" from players.random_player import RandomPlayer from connect_games.gomoku import Gomoku from connect_games.tictactoe import TicTacToe from game_manager import GameManager if __name__ == '__main__': play = "TicTacToe" game_dict = {"Gomoku": Gomoku, "TicTacToe": TicTacToe} if play in game_dict: game = game_dict[play] else: print("Game not available!") exit() manager = GameManager(game) manager.run_experiment(2000, RandomPlayer(), RandomPlayer(), render=False)
from players.honest_player_og import HonestPlayer from players.qlearner import RLPlayer from players.random_player import RandomPlayer from players.risky_player import RiskyPlayer from players.call_player import CallPlayer from players.bluff_player import BluffPlayer import pandas as pd # 1. Set game settings on emulator n_players = 4 j=0 emulator = Emulator() #quuid = "uuid-q" p_uuid=["A1","A2","A3","A4"] #qlearner_player = RLPlayer(n_players, quuid) monte_carlos_tries=[[RandomPlayer(p_uuid[0]),RiskyPlayer()],[RiskyPlayer(),CallPlayer()],[RiskyPlayer(),BluffPlayer()]] names=[['Rand','Risk'],['Call','Risk'],['Bluff','Risk']] for tryM in monte_carlos_tries: df = pd.DataFrame(columns = ['uuid', 'stack', 'game']) for i in range(0,1): print("starting game " + str(i) + " from try " + str(j)) df1 = pd.DataFrame(columns = ['uuid', 'stack', 'round']) df2 = pd.DataFrame(columns = ['uuid', 'stack', 'round']) df3 = pd.DataFrame(columns = ['uuid', 'stack', 'round']) df4 = pd.DataFrame(columns = ['uuid', 'stack', 'round']) emulator.register_player(uuid=p_uuid[0], player=tryM[0]) emulator.register_player(uuid=p_uuid[1], player=tryM[1]) emulator.set_game_rule(player_num=2, max_round=1000, small_blind_amount=20, ante_amount=0) # 2. Setup GameState object players_info = { p_uuid[0]: { "name": "player1", "stack": 10000 },