def test_is_complete(): board = OFCBoard() board.front = OFCHand(['6s', '6d', '5s']) board.mid = OFCHand(['9d', '9c', '9s', '2d', '3d']) board.back = OFCHand(['Ah', '2h', '3h', '4h']) assert not board.is_complete() board.back = OFCHand(['Ah', '2h', '3h', '4h', '5h']) assert board.is_complete()
def test_ofc_random_policy_can_complete(): agent = OFCRandomAgent() deck = DeckGenerator.new_deck() board = OFCBoard() for i in xrange(13): draw = deck.pop() street_id = agent.place_new_card(draw, board) board.place_card_by_id(draw, street_id) # board.pretty() assert board.is_complete()
class OFCEnv(object): """Handle an OFC game in a manner condusive to PG RL.""" def __init__(self, opponent, encoder_class=None): if encoder_class is not None: self.encoder = encoder_class() self.opponent = opponent self.reset() def reset(self): self.plyr_board = OFCBoard() self.oppo_board = OFCBoard() self.game_over = False self.reward = 0 self.deck = DeckGenerator.new_deck() self.plyr_cards = sorted(self.deck[0:5]) self.oppo_cards = sorted(self.deck[6:11]) self.current_card = self.plyr_cards.pop() self.plyr_goes_first = random.choice([0, 1]) if self.plyr_goes_first == 0: self.execute_opponent_turn() def step(self, action): """Advance the game state by one decision.""" self.plyr_board.place_card_by_id(self.current_card, action) # Only do opponent turn if we have no cards left to lay if len(self.plyr_cards) == 0: self.plyr_cards.append(self.deck.pop()) self.execute_opponent_turn() if len(self.deck) > 35: self.current_card = self.plyr_cards.pop() else: self.current_card = None self.execute_endgame() def observe(self): """Return information about the game state.""" game_state = (self.plyr_board, self.oppo_board, self.current_card, # Current decision card self.plyr_cards, # i.e. remaining starting hand self.game_over, # Whether the game is over self.reward) # Score, or None return game_state def execute_opponent_turn(self): if not self.oppo_board.is_complete(): if len(self.oppo_cards) == 0: self.oppo_cards.append(self.deck.pop()) while len(self.oppo_cards) > 0: oppo_card = self.oppo_cards.pop() free_streets = self.oppo_board.get_free_street_indices() oppo_action = random.choice(free_streets) # For now! # oppo_action = 2 self.oppo_board.place_card_by_id(oppo_card, oppo_action) def execute_endgame(self): self.reward = self.calculate_score() self.game_over = True def calculate_score(self): plyr_royalties = self.plyr_board.get_royalties() oppo_royalties = self.oppo_board.get_royalties() if self.plyr_board.is_foul() and self.oppo_board.is_foul(): score = 0 elif self.plyr_board.is_foul(): score = (-1 * oppo_royalties) - 6 elif self.oppo_board.is_foul(): score = plyr_royalties + 6 else: exch = self.calculate_scoop(self.plyr_board, self.oppo_board) score = exch + plyr_royalties - oppo_royalties return score def calculate_scoop(self, lhs_board, rhs_board): lhs_won = 0 lhs_won += self.calculate_street(lhs_board.front, rhs_board.front) lhs_won += self.calculate_street(lhs_board.mid, rhs_board.mid) lhs_won += self.calculate_street(lhs_board.back, rhs_board.back) if lhs_won in [3, -3]: # Scoop, one way or the other lhs_won = lhs_won * 2 return lhs_won def calculate_street(self, lhs_hand, rhs_hand): lhs_rank = lhs_hand.get_rank() rhs_rank = rhs_hand.get_rank() if lhs_rank < rhs_rank: return 1 if rhs_rank < lhs_rank: return -1 return 0
class OFCEnv(object): """Handle an OFC game in a manner condusive to PG RL.""" def __init__(self, opponent, encoder_class=None): if encoder_class is not None: self.encoder = encoder_class() self.opponent = opponent self.reset() def reset(self): self.plyr_board = OFCBoard() self.oppo_board = OFCBoard() self.game_over = False self.reward = 0 self.deck = DeckGenerator.new_deck() self.plyr_cards = sorted(self.deck[0:5]) self.oppo_cards = sorted(self.deck[6:11]) self.current_card = self.plyr_cards.pop() self.plyr_goes_first = random.choice([0, 1]) if self.plyr_goes_first == 0: self.execute_opponent_turn() def step(self, action): """Advance the game state by one decision.""" self.plyr_board.place_card_by_id(self.current_card, action) # Only do opponent turn if we have no cards left to lay if len(self.plyr_cards) == 0: self.plyr_cards.append(self.deck.pop()) self.execute_opponent_turn() if len(self.deck) > 35: self.current_card = self.plyr_cards.pop() else: self.current_card = None self.execute_endgame() def observe(self): """Return information about the game state.""" game_state = ( self.plyr_board, self.oppo_board, self.current_card, # Current decision card self.plyr_cards, # i.e. remaining starting hand self.game_over, # Whether the game is over self.reward) # Score, or None return game_state def execute_opponent_turn(self): if not self.oppo_board.is_complete(): if len(self.oppo_cards) == 0: self.oppo_cards.append(self.deck.pop()) while len(self.oppo_cards) > 0: oppo_card = self.oppo_cards.pop() free_streets = self.oppo_board.get_free_street_indices() oppo_action = random.choice(free_streets) # For now! # oppo_action = 2 self.oppo_board.place_card_by_id(oppo_card, oppo_action) def execute_endgame(self): self.reward = self.calculate_score() self.game_over = True def calculate_score(self): plyr_royalties = self.plyr_board.get_royalties() oppo_royalties = self.oppo_board.get_royalties() if self.plyr_board.is_foul() and self.oppo_board.is_foul(): score = 0 elif self.plyr_board.is_foul(): score = (-1 * oppo_royalties) - 6 elif self.oppo_board.is_foul(): score = plyr_royalties + 6 else: exch = self.calculate_scoop(self.plyr_board, self.oppo_board) score = exch + plyr_royalties - oppo_royalties return score def calculate_scoop(self, lhs_board, rhs_board): lhs_won = 0 lhs_won += self.calculate_street(lhs_board.front, rhs_board.front) lhs_won += self.calculate_street(lhs_board.mid, rhs_board.mid) lhs_won += self.calculate_street(lhs_board.back, rhs_board.back) if lhs_won in [3, -3]: # Scoop, one way or the other lhs_won = lhs_won * 2 return lhs_won def calculate_street(self, lhs_hand, rhs_hand): lhs_rank = lhs_hand.get_rank() rhs_rank = rhs_hand.get_rank() if lhs_rank < rhs_rank: return 1 if rhs_rank < lhs_rank: return -1 return 0