Ejemplo n.º 1
0
    def run_game(self, verbose=True):
        # print('===')
        state = GameState(players=self.num_players,
                          init_game=True,
                          validate=self.validate)
        self.state = state

        state_vectors = []
        # Add the judgement of the first state vector
        ai = self.ais[0]
        if isinstance(ai, H50AI_TDlam):
            state_vector = state.get_state_vector(0).reshape((1, -1))
            current_value = ai.session.run(ai.softmax_output,
                                           {ai.input_state: state_vector})[0]
        else:
            state_vector = np.zeros(5)
            current_value = np.zeros(2)
        # current_grads = ai.session.run(ai.grads, feed_dict={ai.input_state: state_vector})
        # state_vectors.append((state.get_state_vector(0), current_value, state_vector, current_grads))

        game_round = 1
        state = state
        while True:
            if state.current_player_index == 0:
                scores = np.array([player.score for player in state.players])
                if np.any(scores >= self.end_score):  # game has ended
                    max_score = np.max(scores)
                    num_cards = []
                    for i, player in enumerate(state.players):
                        if player.score == max_score:
                            num_cards.append(len(player.cards_played))
                    min_num_cards = np.min(num_cards)

                    winning_players = []
                    for i, player in enumerate(state.players):
                        if player.score == max_score and len(
                                player.cards_played) == min_num_cards:
                            winning_players.append((i, player))

                    assert len(winning_players) >= 1

                    print('## players {} win with {} points after {} rounds'.
                          format([p[0] for p in winning_players], max_score,
                                 game_round))

                    try:
                        state.verify_state()
                    except AssertionError:
                        import traceback
                        traceback.print_exc()
                        import ipdb
                        ipdb.set_trace()

                    if len(winning_players) > 1:
                        print('Draw!')
                        return FinishedGameInfo(None,
                                                None,
                                                state_vectors=state_vectors)

                    winner_index = winning_players[0][0]

                    winner_num_bought = len(
                        state.players[winner_index].cards_played)

                    winner_value = np.zeros(self.num_players)
                    winner_value[winner_index] = 1.
                    for player_index in range(state.num_players):
                        #pass
                        #print(np.roll(winner_value, -1 * player_index))
                        #print(state_vectors[-1].post_move_values[player_index])
                        #print(state_vectors[-1])

                        state_vectors[-1].post_move_values[
                            player_index] = np.roll(winner_value,
                                                    -1 * player_index)

                    # assert ((winner_index + 1) % state.num_players) == state.current_player_index

                    winner_t1 = len([
                        c for c in state.players[winner_index].cards_played
                        if c.tier == 1
                    ])
                    winner_t2 = len([
                        c for c in state.players[winner_index].cards_played
                        if c.tier == 2
                    ])
                    winner_t3 = len([
                        c for c in state.players[winner_index].cards_played
                        if c.tier == 3
                    ])
                    return FinishedGameInfo(game_round,
                                            winner_index,
                                            winner_num_t1_bought=winner_t1,
                                            winner_num_t2_bought=winner_t2,
                                            winner_num_t3_bought=winner_t3,
                                            state_vectors=state_vectors)
                    # return game_round, i, winner_num_bought, state_vectors

            if state.current_player_index == 0:
                game_round += 1
                if verbose:
                    print('Round {}: {}'.format(game_round,
                                                state.get_scores()))
            if game_round > 50:
                print('Stopped after 50 rounds')
                return FinishedGameInfo(None,
                                        None,
                                        state_vectors=state_vectors)
                # return game_round, None, None, state_vectors
            # scores = state.get_scores()
            # if any([score >= self.end_score for score in scores]):
            #     break

            # game end

            for tier in range(1, 4):
                state.generator.shuffle(state.cards_in_deck(tier))

            current_player_index = state.current_player_index

            move, move_info = self.ais[state.current_player_index].make_move(
                state)
            # print(state.current_player_index, move, values[:-1])
            if verbose:
                print('P{}: {}, value {}'.format(state.current_player_index,
                                                 move, values))
            last_player = state.current_player_index
            state.make_move(move)

            # new_state_vector = state.get_state_vector(current_player_index)
            state_vectors.append(move_info)