Esempio n. 1
0
def play():
    # These four players are playing the game
    if variables.montecarlo:
        players = [
            AdvancedPlayer(),
            MonteCarloPlayer(),
            MinPlayer(),
            RandomPlayer()
        ]
    else:
        players = [AdvancedPlayer(), MinPlayer(), MinPlayer(), RandomPlayer()]


# We are simulating n games accumulating a total score
    nr_of_matches = variables.nr_of_matches
    logging.debug('We are playing {} matches in total.'.format(nr_of_matches))
    winning_count = [0, 0, 0, 0]
    for match_nr in range(nr_of_matches):
        scores = (0, 0, 0, 0)
        logging.debug("--- MATCH {} ---".format(match_nr))
        for game_nr in range(1, 5):
            logging.debug("--- GAME {} ---".format(game_nr))
            game = Game(players, game_nr % 4)
            scores = tuple(sum(x) for x in zip(scores, game.play()))
        logging.debug("--- Scores: {} ---".format(scores))
        max_score = max(scores)
        for i in range(4):
            if scores[i] == max_score:
                winning_count[i] += 1
    logging.debug("--- Winning count: {} ---".format(winning_count))
Esempio n. 2
0
def process_args(argv: List[str]) -> Tuple[Player, Player]:
    """Parse command line arguments to determine the players of the game.
    """
    if len(argv) != 3:
        player_1 = RandomPlayer()
        player_2 = RandomPlayer()
    else:
        str_1 = sys.argv[1]
        if str_1 == "-d":
            player_1 = DeepQLearningPlayer()
        elif str_1 == "-h":
            player_1 = HumanPlayer()
        elif str_1 == "-m":
            player_1 = MinimaxPlayer()
        else:
            player_1 = RandomPlayer()
        str_2 = sys.argv[2]
        if str_2 == "-d":
            player_2 = DeepQLearningPlayer()
        elif str_2 == "-h":
            player_2 = HumanPlayer()
        elif str_2 == "-m":
            player_2 = MinimaxPlayer()
        else:
            player_2 = RandomPlayer()
    return player_1, player_2
Esempio n. 3
0
    def run_game(args):
        logger.info('| process {} spawned'.format(args.i_))
        seed = None
        p0 = Winner(id=0,
                    seed=seed,
                    timeout_seconds=args.time_seconds_,
                    weights=args.weights_)
        p1 = ExpectimaxWeightedProbabilitiesWithFilterPlayer(
            id=1, seed=seed, timeout_seconds=args.time_seconds_)
        p2 = RandomPlayer(id=2, seed=seed)
        p3 = RandomPlayer(id=3, seed=seed)
        # p3 = ExpectimaxWeightedProbabilitiesWithFilterPlayer(id=3, seed=seed, timeout_seconds=args.time_seconds_)
        state = CatanState([p0, p1, p2, p3], seed)
        count_moves = 0
        while not state.is_final():
            state.make_move(state.get_current_player().choose_move(state))
            state.make_random_move()
            count_moves += 1
        scores = state.get_scores_by_player()
        logger.info('| done iteration {}. scores: {}'.format(
            args.i_, {
                'p0  (new weights)': scores[p0],
                'p1': scores[p1],
                'p2': scores[p2],
                'p3': scores[p3]
            }))

        # TODO: change this block
        count_moves_factor = 1 * count_moves
        p0_factor = 10000 if (scores[p0] >= 10) else 0
        p_others_factor = (sum(scores.values()) - scores[p0]) * 0.2
        res = p0_factor - (p_others_factor * count_moves_factor)

        logger.info('| process {} done. res: {}'.format(args.i_, res))
        return res
Esempio n. 4
0
    def eval_state(self, game: Game, p1, p2):
        from players.random_player import RandomPlayer, WEIGHT_MAP26

        p1 = RandomPlayer('rr1', w=WEIGHT_MAP26)
        p2 = RandomPlayer('rr2', w=WEIGHT_MAP26)
        players = [p1, p2]
        for o_p, p in zip(game.players, players):
            p.from_player(o_p)
        g = copy.copy(game)
        g.players = players
        winner = g.run()
        return 1 if winner is p1 else -1
Esempio n. 5
0
 def choose_move(self, state: CatanState):
     best_move = self.montecarlo.get_best_move(state)
     if best_move is not None:
         return best_move
     else:
         logger.warning('returning a random move')
         return RandomPlayer.choose_move(self, state)
Esempio n. 6
0
    def choose_action(self, b, p_other, actions):
        results = {repr(a): Result() for a in actions}

        me, other = RandomPlayerWithFirstAction(
            'me', w=WEIGHT_MAP26), RandomPlayer('other', w=WEIGHT_MAP26)
        new_players = [me, other]
        if b.players[0] != self:
            new_players.reverse()

        for _ in range(100):
            a = random.choice(actions)
            me.first_action = a
            game = copy.copy(b)

            # copy player state
            for o_p, p in zip(b.players, new_players):
                p.from_player(o_p)
            game.players = new_players
            game.verbose = False
            winner = game.run()
            if winner.name == 'me':
                results[repr(a)].W += 1
            else:
                results[repr(a)].L += 1

        log.info('results= %s', results)
        best = max(results.items(),
                   key=lambda item: item[1].W / (item[1].W + item[1].L))
        for a in actions:
            if repr(a) == best[0]:
                return a
Esempio n. 7
0
    def choose_resources_to_drop(self) -> Dict[Resource, int]:
        if sum(self.resources.values()) < 8:
            return {}
        resources_count = sum(self.resources.values())
        resources_to_drop_count = ceil(resources_count / 2)
        if self.can_settle_city() and resources_count >= sum(ResourceAmounts.city.values()) * 2:
            self.remove_resources_and_piece_for_city()
            resources_to_drop = copy.deepcopy(self.resources)
            self.add_resources_and_piece_for_city()

        elif self.can_settle_settlement() and resources_count >= sum(ResourceAmounts.settlement.values()) * 2:
            self.remove_resources_and_piece_for_settlement()
            resources_to_drop = copy.deepcopy(self.resources)
            self.add_resources_and_piece_for_settlement()

        elif (self.has_resources_for_development_card() and
              resources_count >= sum(ResourceAmounts.development_card.values()) * 2):
            self.remove_resources_for_development_card()
            resources_to_drop = copy.deepcopy(self.resources)
            self.add_resources_for_development_card()

        elif self.can_pave_road() and resources_count >= sum(ResourceAmounts.road.values()) * 2:
            self.remove_resources_and_piece_for_road()
            resources_to_drop = copy.deepcopy(self.resources)
            self.add_resources_and_piece_for_road()

        else:
            return RandomPlayer.choose_resources_to_drop(self)

        resources_to_drop = [resource for resource, count in resources_to_drop.items() for _ in range(count)]
        return Counter(self._random_choice(resources_to_drop, resources_to_drop_count, replace=False))
Esempio n. 8
0
 def __init__(self,
              game_state,
              player_hands,
              allowed_actions,
              player=RandomPlayer(),
              ucb_const=1):
     self.root = Node(None, None, game_state, player_hands, allowed_actions)
     self.player = player
     self.ucb_const = ucb_const
Esempio n. 9
0
 def get_player(self, picker_value):
     if picker_value == 0:
         return 'human'
     if picker_value == 1:
         return MinimaxPlayer()
     if picker_value == 2:
         return LearningPlayer()
     if picker_value == 3:
         return RandomPlayer()
Esempio n. 10
0
def test():
    data = get_dataset(RandomPlayer(), 100000, merge=True)
    print(f"data_length: {len(data)}")
    # data = pd.DataFrame(X)
    # data['outcome'] = y

    analyze(
        lambda i: tree.DecisionTreeRegressor(criterion="mae",
                                             min_samples_leaf=i), data)
    analyze(lambda i: tree.DecisionTreeRegressor(criterion="mae", max_depth=i),
            data)
 def choose_move(self, state: CatanState):
     self.expectimax_alpha_beta.start_turn_timer()
     best_move, move, depth = None, None, 1
     while not self.expectimax_alpha_beta.ran_out_of_time:
         best_move = move
         logger.info('starting depth {}'.format(depth))
         move = self.expectimax_alpha_beta.get_best_move(state,
                                                         max_depth=depth)
         depth += 2
     if best_move is not None:
         return best_move
     else:
         logger.warning('did not finish depth 1, returning a random move')
         return RandomPlayer.choose_move(self, state)
Esempio n. 12
0
def simple_match():

    win = {'p1': 0, 'p2': 0}
    for i in range(1):
        p1 = RandomPlayer('p1')
        p2 = MCSimplePlayer('p2', 100)
        players = [p1, p2]
        if i % 2:
            players = players[::-1]
        g = Game(players, seed=None)
        winner = g.run()
        win[winner.name] += 1
        print(win)
    print(win)
Esempio n. 13
0
    def receive_game_start_message(self, game_info):
        self.my_model = MyModel()
        nb_player = game_info['player_num']
        max_round = game_info['rule']['max_round']
        sb_amount = game_info['rule']['small_blind_amount']
        ante_amount = game_info['rule']['ante']

        self.emulator = Emulator()
        self.emulator.set_game_rule(nb_player, max_round, sb_amount,
                                    ante_amount)
        #self.set_opponents_model(RandomModel())
        self.set_opponents_model(RandomPlayer())
        for player_info in game_info['seats']:
            uuid = player_info['uuid']
            player_model = self.my_model if uuid == self.uuid else self.opponents_model
            self.emulator.register_player(uuid, player_model)
Esempio n. 14
0
def raw_loop(screen):
    screen.clear()
    curses.curs_set(0)
    init_colors()
    empty_board(4).refresh()
    game_end = False
    env = Ludo(PLAYERS)
    global agents
    if agents == None:
        agents = [IvanPesic(env) for i in range(PLAYERS)]
    agents[0] = ReinforcePlayer(env, "players\saves\Reinforce30000-1.pth")
    agents[1] = RLBasicPlayer(env, "players\saves\RLBasic30000-2.pth")
    agents[3] = HumanPlayer(env)
    if agents == None:
        agents = [RandomPlayer() for i in range(PLAYERS)]
    pstate = env.current_state()
    while not game_end:
        if isinstance(agents[env.current_player], HumanPlayer):
            window = human_board(4, state, env, env.current_player)
            window.addstr(2 * 4 + 5, 0, 'Igrac ')
            window.addstr(2 * 4 + 5, 6, str(env.current_player + 1))
            window.addstr(2 * 4 + 5, 8, 'je na potezu')
            window.addstr(2 * 4 + 6, 0, 'Na kocki je bacen broj ')
            window.addstr(2 * 4 + 6, 23, str(env.roll + 1))
            while True:
                window.refresh()
                curses.napms(30)
                c = window.getch()
                action = agents[env.current_player].play(c)
                if not action == -1:
                    break
                window.addstr(2 * 4 + 7, 0, 'Morate odigrati validan potez')
        else:
            action = agents[env.current_player].play(pstate, TOKENS)
        pstate, r, game_end = env.step(action)
        state = env.current_state_as_tuple()
        draw_board(4, state, env).refresh()
        curses.napms(30)
    curses.curs_set(1)
    print('Player ', env.winning_player + 1, ' wins')
Esempio n. 15
0
def raw_loop(screen):
    env = Ludo(4)
    screen.clear()
    curses.curs_set(0)
    init_colors()
    empty_board(4).refresh()
    curses.napms(100)
    curses.curs_set(1)
    game_end = False
    agent1 = RandomPlayer()
    agent2 = RandomPlayer()
    state = env.current_state
    while not game_end:
        roll = random.randrange(1, 7)
        if (env.current_player == 0): action = agent1.play(state, TOKENS)
        if (env.current_player == 1): action = agent2.play(state, TOKENS)
        state, r, game_end = env.step(roll, action)
        draw_board(4, state, env).refresh()
        curses.napms(30)
    print('Player ', env.winning_player + 1, ' wins')
Esempio n. 16
0
def main():

  print("Cuda available: "+str(torch.cuda.is_available()))

  #start tensorboard
  tb = program.TensorBoard()
  tb.configure(argv=[None, '--logdir', Settings.runs_folder])
  tb.launch()

  # set seed for debugging
  if Settings.random_seed:
      torch.manual_seed(Settings.random_seed)

  #loading initial policy
  hand_predictor = HandPredictor().to(Settings.device)
  # take the newest generation available
  i_episode = max_gen = 0
  generations = [int(f[:8]) for f in listdir(Settings.checkpoint_folder) if f.endswith(".pt")]
  if len(generations) > 0:
      max_gen = max(generations)
      hand_predictor.load_state_dict(torch.load(Settings.checkpoint_folder+"/" + str(max_gen).zfill(8) + ".pt"))
      i_episode = max_gen

  optimizer = torch.optim.Adam(hand_predictor.parameters(),lr=Settings.lr, betas=Settings.betas, weight_decay=Settings.optimizer_weight_decay)

  # training loop
  for _ in range(0, 90000000):
    Settings.logger.info("playing " +str(Settings.update_games)+ " games")

    smart_mcts_player = HPPIMCPlayer(30, 120, RandomPlayer(), hand_predictor)
    # create four players
    players = [smart_mcts_player, smart_mcts_player, smart_mcts_player, smart_mcts_player]
    # create a game simulation
    schafkopf_env = SchafkopfEnv(Settings.random_seed)
    game_statistics = GameStatistics()


    memory_states = []
    memory_player_hands = []

    # play a bunch of games
    t0 = time.time()
    for _ in range(Settings.update_games):
      state, reward, terminal = schafkopf_env.reset()

      while not terminal:
        memory_states.append(hand_predictor.preprocess(state)) #TODO: happens twice now and could be optimized
        memory_player_hands.append(hand_predictor.encode_player_hands(schafkopf_env.player_cards, state["game_state"].current_player))

        action, prob = players[state["game_state"].current_player].act(state)
        state, reward, terminal = schafkopf_env.step(action, prob)

        if state["game_state"].game_type[1] == 2:
          schafkopf_env.print_game()

      print("game "+str(i_episode))
      i_episode += 1
      game_statistics.update_statistics(state["game_state"], reward)
    t1 = time.time()

    #update the policy
    Settings.logger.info("updating policy")
    # Create dataset from collected experiences
    dataset = PredictionDatasetLSTM(memory_states, memory_player_hands)
    training_generator = data.DataLoader(dataset, collate_fn=dataset.custom_collate,batch_size=Settings.mini_batch_size, shuffle=True)

    #logging
    avg_loss = 0
    count = 0

    hand_predictor.train()
    for epoch in range(Settings.K_epochs):  # epoch

      mini_batches_in_batch = int(Settings.batch_size / Settings.mini_batch_size)
      optimizer.zero_grad()

      for i, (states, hands) in enumerate(training_generator):  # mini batch
        # Transfer to GPU
        states = [state.to(Settings.device) for state in states]
        hands = hands.to(Settings.device)
        pred = hand_predictor(states)
        #loss = nn.MSELoss()(pred, hands) #TODO: replace by cross entropy
        loss = nn.BCELoss()(pred, hands)

        avg_loss += loss.mean().item()
        count +=1

        loss.mean().backward()

        if (i + 1) % mini_batches_in_batch == 0:
          optimizer.step()
          optimizer.zero_grad()
    t2 = time.time()
    hand_predictor.eval()

    # writing game statistics for tensorboard
    Settings.logger.info("Episode: "+str(i_episode) + " game simulation (s) = "+str(t1-t0) + " update (s) = "+str(t2-t1))
    schafkopf_env.print_game()
    game_statistics.write_and_reset (i_episode)
    Settings.summary_writer.add_scalar('Loss/MSE_Loss', avg_loss / count, i_episode)

    # save and evaluate the policy
    Settings.logger.info("Saving Checkpoint")
    torch.save(hand_predictor.state_dict(), Settings.checkpoint_folder + "/" + str(i_episode).zfill(8) + ".pt")
    Settings.logger.info("Evaluation")
Esempio n. 17
0
        else:
            expected_rewards = self.q_table[new_board_hash]

            expected = reward + (0.9 * max(expected_rewards.values()))

        change = 0.3 * (expected - self.q_table[board_hash][move])
        self.q_table[board_hash][move] += change


def swap_players(p1, p2):
    return p2, p1


if __name__ == '__main__':
    qplayer = QPlayer(1, 3)
    rplayer = RandomPlayer(-1)
    winning_length = 3
    games = 10000

    results = {1: 0, 0: 0, -1: 0}
    import tqdm
    import pickle

    # for i in tqdm.tqdm(range(games)):
    #     board = clean_board(3)
    #     winner = 0
    #     # print(i)
    #
    #     while True:
    #         move = qplayer.get_move(board)
    #         qplayer.learn_q(board, move)
Esempio n. 18
0
    description="play a game of poker against the computer")
parser.add_argument("--name",
                    required=True,
                    help="your name to be displayed throughout the game")
parser.add_argument("--num-hands",
                    required=True,
                    help="number of hands you want to play",
                    type=int)
args = parser.parse_args()

deck = Deck()

computer_name = "Maniac"

human = HumanPlayer(args.name)
computer = RandomPlayer(computer_name)

starting_chips = 100
blind = 1

name_width = max(len(args.name), len(computer_name))
rank_width = max(len(str(x)) for x in HandRank)
action_width = max(len(str(x)) for x in Action)

winner_template = "{{:{}}} wins the pot of: {{}}".format(name_width)
show_template = "{{:{}}} shows cards: {{}}, hand: {{:{}}}, {{}}".format(
    name_width, rank_width)
action_template = "Pot: {{:3}}, Player: {{:{}}}, Action: {{:{}}}, Size: {{}}".format(
    name_width, action_width)

human_results = 0
Esempio n. 19
0
def execute_game(plot_map=True):
    seed = None
    p0 = MonteCarloWithFilterPlayer(seed)
    p1 = RandomPlayer(seed)
    players = [p0, p1]

    state = CatanState(players, seed)

    turn_count = 0
    score_by_player = state.get_scores_by_player()
    if plot_map:
        state.board.plot_map('turn_{}_scores_{}.png'.format(
            turn_count,
            ''.join('{}_'.format(v) for v in score_by_player.values())))

    while not state.is_final():
        # noinspection PyProtectedMember
        logger.info(
            '----------------------p{}\'s turn----------------------'.format(
                state._current_player_index))

        turn_count += 1
        robber_placement = state.board.get_robber_land()

        move = state.get_current_player().choose_move(state)
        assert not scores_changed(state, score_by_player,
                                  state.get_scores_by_player())
        state.make_move(move)
        state.make_random_move()

        score_by_player = state.get_scores_by_player()

        move_data = {
            k: v
            for k, v in move.__dict__.items()
            if (v and k != 'resources_updates')
            and not (k == 'robber_placement_land' and v == robber_placement)
            and not (isinstance(v, dict) and sum(v.values()) == 0)
        }
        logger.info('| {}| turn: {:3} | move:{} |'.format(
            ''.join('{} '.format(v) for v in score_by_player.values()),
            turn_count, move_data))
        if plot_map and (turn_count == 4 or turn_count % 50 == 0):
            image_name = 'turn_4_scores_{}.png'.format(
                turn_count,
                ''.join('{}_'.format(v) for v in score_by_player.values()))
            state.board.plot_map(image_name, state.current_dice_number)

    if plot_map:
        state.board.plot_map('turn_{}_scores_{}.png'.format(
            turn_count,
            ''.join('{}_'.format(v) for v in score_by_player.values())))

    players_scores_by_names = {(k, v.__class__): score_by_player[v]
                               for k, v in locals().items() if v in players}
    fileLogger.info('\n' + '\n'.join(
        ' {:150} : {} '.format(str(name), score)
        for name, score in players_scores_by_names.items()) +
                    '\n turns it took: {}\n'.format(turn_count) + ('-' * 156))

    p0_type = type(p0).__name__
    p_others_type = type(p1).__name__
Esempio n. 20
0
    results = defaultdict(int)

    for _ in tqdm(range(0, repetitions)):
        result = play(playerX, playerO)
        # result_text = 'x' if result == 1 else ('o' if result == -1 else 'draw')
        results[result] += 1

    all = sum(results.values())
    x = results[1]
    o = results[-1]
    d = results[0]
    print(
        f"\n{playerX.__class__.__name__} as X vs. {playerO.__class__.__name__} as O"
    )
    print(f"X won {x} times {(x / all) * 100}%")
    print(f"O won {o} times {(o / all) * 100}%")
    print(f"draw {d} times {(d / all) * 100}%")


random_player = RandomPlayer()
win_selecting_player = WinSelectingPlayer()
q_player = QPlayer(win_selecting_player)
cart_player = CartPlayer(100000, win_selecting_player)
print("TESTING...")
test(win_selecting_player, cart_player)
test(win_selecting_player, q_player)
test(cart_player, random_player)
test(random_player, cart_player)
test(q_player, RandomPlayer())
test(RandomPlayer(), q_player)
Esempio n. 21
0
def execute_game(plot_map=True):
    seed = None
    timeout_seconds = TIME_OUT
    p0 = Winner(id=0, seed=seed, timeout_seconds=timeout_seconds)
    p1 = ExpectimaxWeightedProbabilitiesWithFilterPlayer(id=1, seed=seed, timeout_seconds=timeout_seconds)
    p2 = RandomPlayer(id=2)
    p3 = RandomPlayer(id=3)
    players = [p0, p1, p2, p3]

    state = CatanState(players, seed)

    turn_count = 0
    score_by_player = state.get_scores_by_player_indexed()

    while not state.is_final():
        # noinspection PyProtectedMember
        logger.info('----------------------p{}\'s turn----------------------'.format(state._current_player_index))

        turn_count += 1
        robber_placement = state.board.get_robber_land()

        move = state.get_current_player().choose_move(state)
        assert not scores_changed(state, score_by_player, state.get_scores_by_player_indexed())
        state.make_move(move)
        state.make_random_move()

        score_by_player = state.get_scores_by_player_indexed()

        move_data = {k: v for k, v in move.__dict__.items() if (v and k != 'resources_updates') and not
        (k == 'robber_placement_land' and v == robber_placement) and not
                     (isinstance(v, dict) and sum(v.values()) == 0)}
        logger.info('| {}| turn: {:3} | move:{} |'.format(''.join('{} '.format(v) for v in score_by_player),
                                                          turn_count, move_data))
        if plot_map:
            image_name = 'turn_{}_scores_{}.png'.format(
                turn_count, ''.join('{}_'.format(v) for v in score_by_player))
            state.board.plot_map(image_name, state.current_dice_number)

    players_scores_by_names = {(k, v.__class__, v.expectimax_alpha_beta.evaluate_heuristic_value.__name__ if (
        isinstance(v, ExpectimaxBaselinePlayer)) else None): score_by_player[v.get_id()]
                               for k, v in locals().items() if v in players
                               }
    names = list(players_scores_by_names.keys())
    names.sort()

    fileLogger.info('\n' + '\n'.join(' {:80} : {} '.format(str(name), players_scores_by_names[name])
                                     for name in names) +
                    '\n turns it took: {}\n'.format(turn_count) + ('-' * 156))

    p0_type = type(p0).__name__
    p_others_type = type(p1).__name__
    global excel_file_name
    excel_file_name = '{}_vs_{}_timeout_{}_seed_{}.xlsx'.format(p0_type, p_others_type, timeout_seconds, seed,
                                                                int(time.time()))
    excel_data_grabber(score_by_player[0], score_by_player[1], score_by_player[2], score_by_player[3], turn_count,
                       p0_type, p_others_type)

    excel_output = ""
    for i in range(len(players)):
        player_output = str(players[i]) + "@" + str(score_by_player[i])
        excel_output += player_output + "\n"
    # fileLogger.info("|\n#" + str(turn_count) + "\n" + excel_output)

    if score_by_player[0] >= 10:
        return 1
    else:
        return 0
Esempio n. 22
0
from pypokerengine.api.game import setup_config, start_poker
from players.fish_player import FishPlayer
from players.console_player import ConsolePlayer
from players.random_player import RandomPlayer

config = setup_config(max_round=100, initial_stack=1000, small_blind_amount=20)
config.register_player(name="f1", algorithm=FishPlayer())
config.register_player(name="r1", algorithm=RandomPlayer())
config.register_player(name="c1", algorithm=ConsolePlayer())
game_result = start_poker(config, verbose=1)
def execute_game(i, iterations, plot_map=True):
    seed = None
    timeout_seconds = 5
    p0 = MCTSPlayer(0, iterations=iterations)
    p1 = RandomPlayer(1)
    p2 = RandomPlayer(2)
    p3 = RandomPlayer(3)
    players = [p0, p1, p2, p3]

    state = CatanState(players, seed)

    turn_count = 0
    score_by_player = state.get_scores_by_player_indexed()
    # if plot_map:
    #     state.board.plot_map('turn_{}_scores_{}.png'
    #                          .format(turn_count, ''.join('{}_'.format(v) for v in score_by_player.values())))

    while not state.is_final():
        # noinspection PyProtectedMember
        logger.info(
            '----------------------p{}\'s turn----------------------'.format(
                state._current_player_index))

        turn_count += 1
        robber_placement = state.board.get_robber_land()

        move = state.get_current_player().choose_move(state)
        assert not scores_changed(state, score_by_player,
                                  state.get_scores_by_player_indexed())
        state.make_move(move)
        state.make_random_move()

        score_by_player = state.get_scores_by_player_indexed()

        move_data = {
            k: v
            for k, v in move.__dict__.items()
            if (v and k != 'resources_updates')
            and not (k == 'robber_placement_land' and v == robber_placement)
            and not (isinstance(v, dict) and sum(v.values()) == 0)
        }
        logger.info('| {}| turn: {:3} | move:{} |'.format(
            ''.join('{} '.format(v) for v in score_by_player), turn_count,
            move_data))
        # if plot_map:
        #     image_name = 'turn_{}_scores_{}.png'.format(
        #         turn_count, ''.join('{}_'.format(v) for v in score_by_player))
        #     state.board.plot_map(image_name, state.current_dice_number)

    players_scores_by_names = {
        (k, v.__class__,
         v.expectimax_alpha_beta.evaluate_heuristic_value.__name__ if
         (isinstance(v, ExpectimaxBaselinePlayer)) else None):
        score_by_player[v.get_id()]
        for k, v in locals().items() if v in players
    }
    fileLogger.info('\n' + '\n'.join(
        ' {:80} : {} '.format(str(name), score)
        for name, score in players_scores_by_names.items()) +
                    '\n turns it took: {}\n'.format(turn_count) +
                    'game num: {}, num iterations: {}'.format(i, iterations) +
                    '\n' + ('-' * 156))

    p0_type = type(p0).__name__
    p_others_type = type(p1).__name__
Esempio n. 24
0
from core import Game
from players.trivial_player import TrivialPlayer
from players.random_player import RandomPlayer


players = [TrivialPlayer(), TrivialPlayer(), RandomPlayer()]
result = Game(players).play(with_prints=True)
Esempio n. 25
0
def main():

    pimc_player = PIMCPlayer(10, 40, RandomPlayer())

    policy = ActorCriticNetworkLSTM().to(Settings.device)
    policy.load_state_dict(torch.load("../policies/pretrained/lstm-policy.pt"))
    rl_player = RlPlayer(policy, action_shaping=False, eval=True)

    hp = HandPredictor().to(Settings.device)
    hp.load_state_dict(torch.load("../policies/pretrained/hand-predictor.pt"))
    smart_pimc_player = HPPIMCPlayer(10, 40, RandomPlayer(),
                                     HandPredictor().to(Settings.device))

    ip = ImmitationPolicy().to(Settings.device)
    ip.load_state_dict(torch.load("../policies/00010340.pt"))
    immitation_player = RlPlayer(ip, action_shaping=False, eval=True)

    participants = [
        rl_player,
        immitation_player,
        smart_pimc_player,
        pimc_player,
        RuleBasedPlayer(),
        RandomCowardPlayer(),
        RandomPlayer(),
    ]

    number_of_games = 1000

    for i in range(len(participants)):
        for j in range(i + 1, len(participants)):
            p1 = participants[i]
            p2 = participants[j]

            cummulative_reward = [0, 0, 0, 0]
            for k in range(
                    2
            ):  #run the same tournament twice with differen positions of players
                print(' ')
                schafkopf_env = SchafkopfEnv(seed=1)
                if k == 0:
                    players = [p1, p1, p2, p2]
                else:
                    players = [p2, p2, p1, p1]
                    cummulative_reward.reverse()

                # tournament loop
                for game_nr in range(1, number_of_games + 1):
                    state, reward, terminal = schafkopf_env.reset()
                    while not terminal:
                        action, prob = players[
                            state["game_state"].current_player].act(state)
                        state, reward, terminal = schafkopf_env.step(
                            action, prob)

                    cummulative_reward = [
                        cummulative_reward[m] + reward[m] for m in range(4)
                    ]

                    if game_nr % 100 == 0:
                        print('.', end='')
                    #schafkopf_env.print_game()

            print("player " + str(i) + " vs. player " + str(j) + " = " +
                  str((cummulative_reward[2] + cummulative_reward[3]) /
                      (2 * 2 * number_of_games)) + " to " +
                  str((cummulative_reward[0] + cummulative_reward[1]) /
                      (2 * 2 * number_of_games)))
def parse_args():
    parser = argparse.ArgumentParser(
        description="Play different players against each other.")
    parser.add_argument("--infile",
                        type=str,
                        required=True,
                        help="Input file name.")
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()

    games = []
    with gzip.open(args.infile, 'r') as f:
        for line in f:
            games.append(json.loads(line.decode()))

    player_1 = HeuristicPlayer()
    player_2 = HeuristicPlayer()
    player_3 = RandomPlayer()

    score = {0: 0, 1: 0, 2: 0}
    for init_game in games:
        g = Game(init_game, [player_1, player_2, player_3])
        winners = g.play()
        for w in winners:
            score[w] += 1
        print(score)
        points += len(diagonal)

    # 2nd diagonal
    new_y = (BOARD_SIZE - 1) - y
    k = new_y - x
    diagonal = np.diag(np.fliplr(board), k)
    if np.all(diagonal) and len(diagonal) > 1:
        points += len(diagonal)

    return points


if __name__ == '__main__':
    p1 = HumanPlayer('p1')
    # p2 = HumanPlayer('p2')
    p2 = RandomPlayer('p2')
    g = Game(3, p1, p2)
    g.play()

# player1_wins = []
# # print(0, end='')
# for i in range(1000):
#     # print('\r', (i+1)/1000, end='')
#     player1_wins.append(g.play())
#     # time.sleep(0.1)
#     sys.stdout.write("\r%d%%" % (i / 1000 * 100))
#     sys.stdout.flush()
#
# print()
# print(sum(player1_wins) / len(player1_wins) * 100, '%')
                boards_batch, moves_batch, rewards_batch = [], [], []
            if episode_number % log_every == 0:
                print("episode: %s win_rate: %s" %
                      (episode_number, _win_rate(log_every, results)))


def _win_rate(print_results_every, results):
    i = sum(results)
    every___ = (print_results_every * 2.)
    return 0.5 + i / every___


def normalize_rewards(rewards_batch):
    normalized_rewards = rewards_batch - np.mean(rewards_batch)
    rewards_std = np.std(normalized_rewards)
    if rewards_std != 0:
        normalized_rewards /= rewards_std
    else:
        print("warning: got mini batch std of 0.")
    return normalized_rewards


if __name__ == '__main__':
    train_policy_gradients(layers=[9, 100, 100, 100, 9],
                           learning_rate=1e-4,
                           batch_size=100,
                           games=100000,
                           log_every=1000,
                           opponent=RandomPlayer(-1),
                           winning_length=3)
Esempio n. 29
0
"""Connect Games"""

from players.random_player import RandomPlayer

from connect_games.gomoku import Gomoku
from connect_games.tictactoe import TicTacToe

from game_manager import GameManager

if __name__ == '__main__':
    play = "TicTacToe"

    game_dict = {"Gomoku": Gomoku, "TicTacToe": TicTacToe}
    if play in game_dict:
        game = game_dict[play]
    else:
        print("Game not available!")
        exit()

    manager = GameManager(game)
    manager.run_experiment(2000, RandomPlayer(), RandomPlayer(), render=False)
Esempio n. 30
0
from players.honest_player_og import HonestPlayer
from players.qlearner import RLPlayer
from players.random_player import RandomPlayer
from players.risky_player import RiskyPlayer
from players.call_player import CallPlayer
from players.bluff_player import BluffPlayer
import pandas as pd

# 1. Set game settings on emulator
n_players = 4
j=0
emulator = Emulator()
#quuid = "uuid-q"
p_uuid=["A1","A2","A3","A4"]
#qlearner_player = RLPlayer(n_players, quuid)
monte_carlos_tries=[[RandomPlayer(p_uuid[0]),RiskyPlayer()],[RiskyPlayer(),CallPlayer()],[RiskyPlayer(),BluffPlayer()]]
names=[['Rand','Risk'],['Call','Risk'],['Bluff','Risk']]
for tryM in monte_carlos_tries:
	df = pd.DataFrame(columns = ['uuid', 'stack', 'game'])
	for i in range(0,1):
		print("starting game " + str(i) + " from try " + str(j))
		df1 = pd.DataFrame(columns = ['uuid', 'stack', 'round'])
		df2 = pd.DataFrame(columns = ['uuid', 'stack', 'round'])
		df3 = pd.DataFrame(columns = ['uuid', 'stack', 'round'])
		df4 = pd.DataFrame(columns = ['uuid', 'stack', 'round'])
		emulator.register_player(uuid=p_uuid[0], player=tryM[0])
		emulator.register_player(uuid=p_uuid[1], player=tryM[1])
		emulator.set_game_rule(player_num=2, max_round=1000, small_blind_amount=20, ante_amount=0)
		# 2. Setup GameState object
		players_info = {
			p_uuid[0]: { "name": "player1", "stack": 10000 },