Ejemplo n.º 1
0
 def _get_best_meld_cluster(self, player_id: int) -> List[List[Card]]:
     game_canvas = self.game_canvas
     hand = game_canvas.getter.get_held_pile_cards(player_id=player_id)
     best_meld_clusters = get_best_meld_clusters(hand=hand)
     best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[
         0]
     return best_meld_cluster
Ejemplo n.º 2
0
def get_payoff_gin_rummy_v1(player: GinRummyPlayer,
                            game: 'GinRummyGame') -> int or float:
    ''' Get the payoff of player:
            a) 1.0 if player gins
            b) 0.2 if player knocks
            c) -deadwood_count / 100 otherwise

    Returns:
        payoff (int or float): payoff for player
    '''
    # payoff is 1.0 if player gins
    # payoff is 0.2 if player knocks
    # payoff is -deadwood_count / 100 if otherwise
    # The goal is to have the agent learn how to knock and gin.
    # The negative payoff when the agent fails to knock or gin should encourage the agent to form melds.
    # The payoff is scaled to lie between -1 and 1.
    going_out_action = game.round.going_out_action
    going_out_player_id = game.round.going_out_player_id
    if going_out_player_id == player.player_id and type(
            going_out_action) is KnockAction:
        payoff = 0.2
    elif going_out_player_id == player.player_id and type(
            going_out_action) is GinAction:
        payoff = 1
    else:
        hand = player.hand
        best_meld_clusters = melding.get_best_meld_clusters(hand=hand)
        best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[
            0]
        deadwood_count = utils.get_deadwood_count(hand, best_meld_cluster)
        payoff = -deadwood_count / 100
    return payoff
Ejemplo n.º 3
0
 def score_player_0(self, action: ScoreNorthPlayerAction):
     # when current_player takes ScoreNorthPlayerAction step, the move is recorded and executed
     # south becomes current player
     if not self.current_player_id == 0:
         raise GinRummyProgramError("current_player_id is {}: should be 0.".format(self.current_player_id))
     current_player = self.get_current_player()
     best_meld_clusters = melding.get_best_meld_clusters(hand=current_player.hand)
     best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[0]
     deadwood_count = utils.get_deadwood_count(hand=current_player.hand, meld_cluster=best_meld_cluster)
     self.move_sheet.append(ScoreNorthMove(player=current_player,
                                           action=action,
                                           best_meld_cluster=best_meld_cluster,
                                           deadwood_count=deadwood_count))
     self.current_player_id = 1
Ejemplo n.º 4
0
 def _get_payoff(self, player: GinRummyPlayer, game) -> float:
     going_out_action = game.round.going_out_action
     going_out_player_id = game.round.going_out_player_id
     if going_out_player_id == player.player_id and type(going_out_action) is KnockAction:
         payoff = self._knock_reward
     elif going_out_player_id == player.player_id and type(going_out_action) is GinAction:
         payoff = self._gin_reward
     else:
         hand = player.hand
         best_meld_clusters = melding.get_best_meld_clusters(hand=hand)
         best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[0]
         deadwood_count = utils.get_deadwood_count(hand, best_meld_cluster)
         payoff = -deadwood_count / 100
     return payoff
Ejemplo n.º 5
0
 def score_player_1(self, action: ScoreSouthPlayerAction):
     assert self.current_player_id == 1
     current_player = self.get_current_player()
     best_meld_clusters = melding.get_best_meld_clusters(
         hand=current_player.hand)
     best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[
         0]
     deadwood_count = utils.get_deadwood_count(
         hand=current_player.hand, meld_cluster=best_meld_cluster)
     self.move_sheet.append(
         ScoreSouthMove(player=current_player,
                        action=action,
                        best_meld_cluster=best_meld_cluster,
                        deadwood_count=deadwood_count))
     self.is_over = True
Ejemplo n.º 6
0
 def score_player_0(self, action: ScoreNorthPlayerAction):
     # when current_player takes ScoreNorthPlayerAction step, the move is recorded and executed
     # south becomes current player
     assert self.current_player_id == 0
     current_player = self.get_current_player()
     best_meld_clusters = melding.get_best_meld_clusters(
         hand=current_player.hand)
     best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[
         0]
     deadwood_count = utils.get_deadwood_count(
         hand=current_player.hand, meld_cluster=best_meld_cluster)
     self.move_sheet.append(
         ScoreNorthMove(player=current_player,
                        action=action,
                        best_meld_cluster=best_meld_cluster,
                        deadwood_count=deadwood_count))
     self.current_player_id = 1
Ejemplo n.º 7
0
    def get_payoffs(self, game: GinRummyGame):
        ''' Get the payoffs of players:
                a) 1 if gin
                b) 0.2 if knock
                c) -deadwood_count / 100 otherwise

        Returns:
            payoffs (list): a list of payoffs for each player
        '''
        payoffs = [0, 0]
        game_round = game.round
        last_action = game.actions[-1]
        assert game_round.is_over
        assert type(last_action) is ScoreSouthPlayerAction
        going_out_action = game_round.going_out_action
        going_out_player_id = game_round.going_out_player_id
        for i in range(2):  # FIXME: 200213 simplified calculation
            player = game.round.players[i]
            hand = player.hand
            if self.get_payoff:
                payoff = self.get_payoff(player, game)
            else:
                best_meld_clusters = melding.get_best_meld_clusters(hand=hand)
                best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[
                    0]
                deadwood_count = utils.get_deadwood_count(
                    hand, best_meld_cluster)
                payoff = -deadwood_count / 100
                if going_out_player_id == player.player_id and type(
                        going_out_action) is KnockAction:
                    payoff = 0.2  # FIXME: 200213 simplified calculation
                elif going_out_player_id == player.player_id and type(
                        going_out_action) is GinAction:
                    payoff = 1  # FIXME: 200213 simplified calculation
                elif type(going_out_action) is DeclareDeadHandAction:
                    pass  # FIXME: 200213 payoffs should be zeros
                else:
                    raise Exception("get_payoffs: ???")
            payoffs[i] = payoff
        return payoffs