예제 #1
0
파일: train_pg.py 프로젝트: hcui11/DouDiZhu
def main(agent):
    game = Game()

    while game.get_winner() == -1:
        print(f"PLAYER {game.turn}'s CARDS:")
        print(game.hands[game.turn])

        print("Your opponents hand sizes: ", end="")
        for i in range(3):
            if i != game.turn:
                print(sum(game.hands[i]), end=" ")
        print()

        if game.last_move != None:
            print("The play to beat: ", game.last_move.cards)
        else:
            print("There is no play to beat")

        print("Legal Actions:")
        possible_moves = game.legal_actions()
        for i, action in enumerate(possible_moves[:-1]):
            print(f'{i}: {action}')

        while (True):
            if game.turn == 0:
                player = game.turn
                hands = game.hands[player]
                last_move = game.last_move
                last_deal = [] if last_move is None else last_move.cards
                possible_moves = game.legal_actions()

                agent.current_state(hands, last_deal, possible_moves)
                action = agent.play()
                print("Agent played", action)
                input("Press anything to continue")
                play = Play(action)
                game.move(play)
                break
            else:
                move = input("Please enter your indexed move or enter PASS: "******"PASS" or move == "P":
                    move = -1
                elif move.isnumeric() and int(move) < len(possible_moves):
                    move = int(move)
                else:
                    print('Invalid Move!')
                    continue

                move = possible_moves[move]
                play = Play(move)
                print(f"You played a {play.type}!")
                input("Press anything to continue")
                game.move(play)
                break

        print("\n\n")
    print(f"Player {game.get_winner()} wins!")
예제 #2
0
    def simulate(self):
        current_state = self.state

        while current_state.get_winner() < 0:
            possible_moves = current_state.legal_actions()
            action = possible_moves[np.random.randint(len(possible_moves))]
            state_params = current_state.simulate(Play(action))
            current_state = Game(*state_params)
        winner = current_state.get_winner()
        if self.player == 0:
            return int(winner == 0)
        else:
            return int(winner == 1 or winner == 2)
예제 #3
0
파일: train_pg.py 프로젝트: hcui11/DouDiZhu
def generate_transitions(agent):

    win_reward = 1
    no_reward = 0
    lose_reward = -1

    num_of_games = 1
    for _ in range(num_of_games):
        game = Game()

        game_transitions = []
        while game.get_winner() == -1:
            player = game.turn
            hands = game.hands[player]
            last_move = game.last_move
            last_deal = [] if last_move is None else last_move.cards
            possible_moves = game.legal_actions()
            played_cards = game.played_cards
            # last_move_player = game.last_move_player
            # last_move_player = [int(last_move_player == i) for i in range(3)]
            is_last_deal_landlord = int(game.last_move_player == 0)
            is_landlord = int(game.turn == 0)

            agent.current_state(hands, last_deal, possible_moves, played_cards,
                                is_landlord, is_last_deal_landlord)
            current_state, action, score = agent.deal()

            play = Play(action)
            game.move(play)
            if game.get_winner() == -1:
                gt = GameTransition(current_state, score, no_reward, None)
            else:
                # if landlord wins
                if game.get_winner() == 0:
                    game_transitions[-1].reward = lose_reward
                    game_transitions[-2].reward = lose_reward
                # if farmer 1 wins
                elif game.get_winner() == 1:
                    game_transitions[-1].reward = lose_reward
                    game_transitions[-2].reward = win_reward
                # if farmer 2 wins
                else:
                    game_transitions[-1].reward = win_reward
                    game_transitions[-2].reward = lose_reward
                gt = GameTransition(current_state, score, win_reward, None)
            game_transitions.append(gt)
        # print(f"Player {game.get_winner()} wins!")
    return game_transitions
예제 #4
0
    def expand(self):
        action = self._untried_actions.pop()
        state_params = self.state.simulate(Play(action))
        next_state = Game(*state_params)
        child_node = MonteCarloTreeSearchNode(next_state,
                                              self.player,
                                              parent=self,
                                              parent_action=Play(action))

        self.children.append(child_node)
        return child_node
예제 #5
0
파일: train_pg.py 프로젝트: hcui11/DouDiZhu
def landlordAI_move(landlordAI, game, move):
    try:
        landlordAI = landlordAI.children[move]
        landlordAI.parent = None
    except:
        state = Game(hands=game.hands + 0,
                     last_move=game.last_move,
                     turn=game.turn,
                     passes=game.passes)
        landlordAI = MonteCarloTreeSearchNode(state, 0)
    return landlordAI
예제 #6
0
파일: train_pg.py 프로젝트: hcui11/DouDiZhu
def start_game(players, info=False):
    game = Game()
    while game.get_winner() == -1:
        player = game.turn
        hands = game.hands[player]
        last_move = game.last_move
        last_deal = [] if last_move is None else last_move.cards
        possible_moves = game.legal_actions()
        played_cards = game.played_cards
        is_last_deal_landlord = int(game.last_move == 0)
        is_landlord = int(game.turn == 0)

        players[player].current_state(hands, last_deal, possible_moves,
                                      played_cards, is_landlord,
                                      is_last_deal_landlord)
        action = players[player].play()
        #action = players[player].play(game.legal_actions(), player, game.hands[player], last_deal)
        play = Play(action)
        if info:
            print(f'player {game.turn}:', action)
        game.move(play)
    if info:
        print(f"Player {game.get_winner()} wins!")
    return game.get_winner()
예제 #7
0
파일: train_pg.py 프로젝트: hcui11/DouDiZhu
def vs_mcts(agent, info=False):
    game = Game()
    state1 = Game(hands=game.hands + 0)
    state2 = Game(hands=game.hands + 0)
    mcts_agent1 = MonteCarloTreeSearchNode(state1, 1)
    mcts_agent2 = MonteCarloTreeSearchNode(state2, 2)
    mcts = [mcts_agent1, mcts_agent2]
    if info:
        print('Game Start')
    while game.get_winner() == -1:
        if game.turn == 0:

            player = game.turn
            hands = game.hands[player]
            last_move = game.last_move
            last_deal = [] if last_move is None else last_move.cards
            possible_moves = game.legal_actions()
            played_cards = game.played_cards
            is_last_deal_landlord = int(game.last_move == 0)
            is_landlord = int(game.turn == 0)

            agent.current_state(hands, last_deal, possible_moves, played_cards,
                                is_landlord, is_last_deal_landlord)
            action = agent.play()
            play = Play(action)
            if info:
                print('player 0:', ' '.join([CARD_STR[a] for a in action]))
            game.move(play)

            mcts[0] = landlordAI_move(mcts[0], game, action)
            mcts[1] = landlordAI_move(mcts[1], game, action)
        else:
            mcts_id = game.turn - 1
            mcts_agent = mcts[mcts_id]
            mcts_agent = mcts_agent.best_action()
            mcts_agent.parent = None
            move = mcts_agent.parent_action
            mcts[mcts_id] = mcts_agent
            game.move(move)
            mcts[1 - mcts_id] = landlordAI_move(mcts[1 - mcts_id], game, move)
            #mcts[1] = landlordAI_move(mcts[1], game, move)
            if info:
                print(f'player {mcts_id + 1}:', move)
    if info:
        print(f"Player {game.get_winner()} wins!")
    return game.get_winner()
예제 #8
0
def main():
    game = Game()
    state = Game(hands=game.hands + 0)
    landlordAI = MonteCarloTreeSearchNode(state, 0)
    agent = PGAgent(learning_rate=0.01, device='cpu')
    Naive = NaiveGreedy()
    Random = RandomPlayer()
    Smart = SmartGreedy()

    load_model(agent.model, "PG_param.pth")
    all_players = [
        "MonteCarlo", "PGAgent", "Naive", "Random", "Smart", "Human"
    ]
    #MonteCarlo can only play as landlord
    players = ["Human", "PGAgent", "Smart"]

    while game.get_winner() == -1:

        player = game.turn
        print(f"PLAYER {game.turn}'s CARDS:")
        print(hand_to_string(game.hands[game.turn]))

        print("Your opponents hand sizes: ", end="")
        for i in range(3):
            if i != game.turn:
                print(sum(game.hands[i]), end=" ")
        print()

        if game.last_move != None:
            print("The play to beat: ",
                  indices_to_string(game.last_move.cards))
        else:
            print("There is no play to beat")

        if players[player] == "Human":
            print("Legal Actions:")
            possible_moves = game.legal_actions()
            for i, action in enumerate(possible_moves):
                print(f'{i}: {indices_to_string(action)}')

        while (True):
            if players[player] == "MonteCarlo":
                landlordAI = landlordAI.best_action()
                landlordAI.parent = None
                print(
                    f"MonteCarlo played {indices_to_string(landlordAI.parent_action.cards)}!"
                )
                input("Press anything to continue")
                game.move(landlordAI.parent_action)
                break
            elif players[player] == "PGAgent" or players[
                    player] == "Smart" or players[
                        player] == "Naive" or players[player] == "Random":
                player = game.turn
                hands = game.hands[player]
                last_move = game.last_move
                last_deal = [] if last_move is None else last_move.cards
                possible_moves = game.legal_actions()
                played_cards = game.played_cards
                is_last_deal_landlord = int(game.last_move == 0)
                is_landlord = int(game.turn == 0)
                if players[player] == "PGAgent":
                    agent.current_state(hands, last_deal, possible_moves,
                                        played_cards, is_landlord,
                                        is_last_deal_landlord)
                    action = Play(agent.play())
                    print(f"PGAgent played {indices_to_string(action.cards)}!")
                elif players[player] == "Smart":
                    Smart.current_state(hands, last_deal, possible_moves,
                                        played_cards, is_landlord,
                                        is_last_deal_landlord)
                    action = Play(Smart.play())
                    print(
                        f"Smart Greedy played {indices_to_string(action.cards)}!"
                    )
                elif players[player] == "Naive":
                    Naive.current_state(hands, last_deal, possible_moves,
                                        played_cards, is_landlord,
                                        is_last_deal_landlord)
                    action = Play(Naive.play())
                    print(
                        f"Naive Greedy played {indices_to_string(action.cards)}!"
                    )
                elif players[player] == "Random":
                    Random.current_state(hands, last_deal, possible_moves,
                                         played_cards, is_landlord,
                                         is_last_deal_landlord)
                    action = Play(Random.play())
                    print(f"Random played {indices_to_string(action.cards)}!")

                input("Press anything to continue")
                game.move(action)
                break
            else:
                move = input("Please enter your indexed move or enter PASS: "******"PASS" or move == "P":
                    move = -1
                elif move.isnumeric() and int(move) < len(possible_moves):
                    move = int(move)
                else:
                    print('Invalid Move!')
                    continue

                move = possible_moves[move]
                play = Play(move)
                print(f"You played a {play.type}!")
                input("Press anything to continue")
                game.move(play)
                try:
                    landlordAI = landlordAI.children[move]
                    landlordAI.parent = None
                except:
                    state = Game(hands=game.hands + 0,
                                 last_move=game.last_move,
                                 turn=game.turn,
                                 passes=game.passes)
                    landlordAI = MonteCarloTreeSearchNode(state, 0)
                break
        print("\n\n")
    print(f"Player {game.get_winner()}, {players[game.get_winner()]} wins!")