Esempio n. 1
0
def generate_all_legal_buys(state: State) -> List[ActionBuyCard]:
    """Returns the list of all possible actions of buys in a given state"""
    list_of_actions_buy = []
    all_cards_can_afford = [card for card in state.board.cards_on_board if
                        state.active_players_hand().can_afford_card(card)] + \
                       [reserved_card for reserved_card in state.active_players_hand().cards_reserved if
                        state.active_players_hand().can_afford_card(reserved_card)]

    for card in all_cards_can_afford:
        card_price_after_discount = card.price % state.active_players_hand(
        ).discount()
        for n_gold_gems_to_use in range(state.active_players_hand(
        ).gems_possessed.gems_dict[GemColor.GOLD] + 1):
            # we choose combination of other gems:
            options_of_use_gold_as = combinations_with_replacement(
                card_price_after_discount.non_empty_stacks_except_gold(),
                n_gold_gems_to_use)
            for option_of_use_gold_as in options_of_use_gold_as:
                use_gold_as = tuple_of_gems_to_gems_collection(
                    option_of_use_gold_as)
                #check if the option satysfies conditions:
                condition_1 = use_gold_as <= card_price_after_discount
                if condition_1:
                    list_of_actions_buy.append(
                        ActionBuyCard(card, n_gold_gems_to_use, use_gold_as))

    return list_of_actions_buy
Esempio n. 2
0
    def generate_all_legal_reservations_fast(
            state: State) -> List[ActionReserveCard]:
        list_of_actions_reserve = []
        # first check if active player has not exceeded the limit of reservations
        condition_1 = len(
            state.active_players_hand().cards_reserved) < MAX_RESERVED_CARDS
        if condition_1:
            for card in state.board.cards_on_board:
                condition_2 = state.active_players_hand().gems_possessed.sum(
                ) < MAX_GEMS_ON_HAND
                condition_3 = state.board.gems_on_board.value(
                    GemColor.GOLD) > 0
                if condition_2 and condition_3:
                    # reserve card and take one golden gem for it:
                    list_of_actions_reserve.append(
                        ActionReserveCard(card, True))
                if not condition_3:
                    # the are no golden gems on board, so reserve without taking golden gem:
                    list_of_actions_reserve.append(
                        ActionReserveCard(card, False))
                if condition_3 and not condition_2:
                    # there are golden gems on board, but the player has reached the limit of gems on hand so can take one,
                    # but must return one other:
                    # 1. First case: do not take golden gem:
                    list_of_actions_reserve.append(
                        ActionReserveCard(card, False))
                    # 2. Second case: take golden gem and return one other gem:
                    for gem_color in state.active_players_hand(
                    ).gems_possessed.non_empty_stacks_except_gold():
                        list_of_actions_reserve.append(
                            ActionReserveCard(card, True, gem_color))

        return list_of_actions_reserve
        def draw_board(self, board: Board, x_coord: int, y_coord: int,
                       state: State) -> None:
            """Draws the board, that is: cards that lie on the table, nobles that lie on the table and coins.
            Parameters:
            _ _ _ _ _ _
            board: Board to draw.
            x_coord: Horizontal coordinate (from left top corner).
            y_coord: Vertical coordinate (from left top corner).
            active_players_hand: The hand of the player that is currently active. This argument is optional and is used to
            determine which cards should be given buy or reserve buttons. If the value is None no buttons are drawn."""

            self.board_x_ccord = x_coord
            self.board_y_ccord = y_coord

            self.main_canvas.create_text(x_coord + BOARD_TITLE_POSITION_X,
                                         y_coord + BOARD_TITLE_POSITION_Y,
                                         fill=BOARD_NAME_FONT_COLOR,
                                         text=BOARD_TITLE,
                                         font=BOARD_NAME_FONT)

            # dictionary used to keep track of drawn cards
            cards_already_drawn = {row: set() for row in Row}
            for card in board.cards_on_board:
                position_x = HORIZONTAL_CARD_DISTANCE * len(
                    cards_already_drawn[card.row])
                cards_already_drawn[card.row].add(card)
                self.draw_card(
                    card, x_coord + position_x, y_coord +
                    VERTICAL_CARD_DISTANCE * POSITION_Y_DICT[card.row],
                    state.active_players_hand().can_afford_card(card),
                    state.active_players_hand().can_reserve_card(), state)

            for position_index, noble_card in enumerate(board.nobles_on_board):
                position_x = NOBLES_START_X + x_coord + HORIZONTAL_NOBLE_DISTANCE * position_index
                position_y = NOBLES_START_Y + y_coord
                self.draw_noble(noble_card, position_x, position_y)

            self.draw_gems(board.gems_on_board, x_coord + GEMS_BOARD_X,
                           y_coord + GEMS_BOARD_Y)

            if self.interactive:
                for gem_color in GemColor:
                    gem_entry = Entry(self.main_window)
                    gem_entry.place(x=x_coord +
                                    GEM_ENTRY_SHIFT * gem_color.value +
                                    GEMS_ENTRY_INITIAL_X,
                                    y=y_coord + GEMS_ENTRY_INITIAL_Y,
                                    width=GEM_ENTRY_WIDTH)
                    self.entries[gem_color] = gem_entry
                    self.drawn_buttons.add(gem_entry)
                self.set_entries(GemsCollection())
                trade_button = Button(
                    text=TRADE_BUTTON_TITLE,
                    font=TRADE_BUTTON_FONT,
                    command=lambda: self.set_action(
                        ActionTradeGems(self.read_entries())))
                trade_button.place(x=x_coord + TRADE_BUTTON_X,
                                   y=y_coord + TRADE_BUTTON_Y)
                self.drawn_buttons.add(trade_button)
    def to_state(self, order_deck=True):
        state = State(prepare_state=False)

        state.active_player_id = self.state_as_dict['active_player_id']
        state.list_of_players_hands[state.active_player_id].from_dict(
            self.state_as_dict['active_player_hand'])
        state.list_of_players_hands[(state.active_player_id - 1) % len(
            state.list_of_players_hands)].from_dict(
                self.state_as_dict['other_player_hand'])
        state.board.from_dict(self.state_as_dict)

        # Adding nobles
        for i in self.state_as_dict['active_player_hand'][
                'noble_possessed_ids']:
            state.list_of_players_hands[
                state.active_player_id].nobles_possessed.add(
                    state.board.deck.pop_noble_by_id(i))

        for i in self.state_as_dict['other_player_hand'][
                'noble_possessed_ids']:
            state.list_of_players_hands[(state.active_player_id - 1) % len(
                state.list_of_players_hands)].nobles_possessed.add(
                    state.board.deck.pop_noble_by_id(i))

        for i in self.state_as_dict['board']['nobles_on_board']:
            state.board.nobles_on_board.add(
                state.board.deck.pop_noble_by_id(i))

        # Adding cards
        for i in self.state_as_dict['active_player_hand'][
                'cards_possessed_ids']:
            state.list_of_players_hands[
                state.active_player_id].cards_possessed.add(
                    state.board.deck.pop_card_by_id(i))

        for i in self.state_as_dict['active_player_hand'][
                'cards_reserved_ids']:
            state.list_of_players_hands[
                state.active_player_id].cards_reserved.add(
                    state.board.deck.pop_card_by_id(i))

        for i in self.state_as_dict['other_player_hand'][
                'cards_possessed_ids']:
            state.list_of_players_hands[(state.active_player_id - 1) % len(
                state.list_of_players_hands)].cards_possessed.add(
                    state.board.deck.pop_card_by_id(i))

        for i in self.state_as_dict['other_player_hand']['cards_reserved_ids']:
            state.list_of_players_hands[(state.active_player_id - 1) % len(
                state.list_of_players_hands)].cards_reserved.add(
                    state.board.deck.pop_card_by_id(i))

        if order_deck:
            state.board.deck.order_deck(self.state_as_dict)
        else:
            state.board.deck.shuffle()

        return state
Esempio n. 5
0
    def __init__(self, strategies=None):

        self.current_state_of_the_game = State()
        self.action_space = SplendorActionSpace()
        self.action_space.update(self.current_state_of_the_game)
        self.observation_space = SplendorObservationSpace()
        self.is_done = False
        self.end_episode_mode = 'instant_end'
        self.gui = None
Esempio n. 6
0
 def give_nobles(self, state: State) -> None:
     """Checks if the active player deserves to obtain noble card (or cards)."""
     nobles_to_transfer = set()
     for noble in state.board.nobles_on_board:
         if noble.price <= state.active_players_hand().discount():
             nobles_to_transfer.add(noble)
     for noble in nobles_to_transfer:
             state.active_players_hand().nobles_possessed.add(noble)
             state.board.nobles_on_board.remove(noble)
    def evaluate_state(self, state : State, list_of_actions: List[Action] = None) -> float:
        #check if the state is terminal
        if state.active_players_hand().number_of_my_points() >= POINTS_TO_WIN:
            return  -1
        elif state.other_players_hand().number_of_my_points() >= POINTS_TO_WIN:
            return  1

        else:
            return self.model.get_value(state)
Esempio n. 8
0
def generate_all_legal_trades_classic(state: State) -> List[ActionTradeGems]:
    """Returns the list of all possible actions of trade in a given current_state"""
    list_of_actions_trade = []
    n_non_empty_stacks = len(
        state.board.gems_on_board.non_empty_stacks_except_gold())
    n_gems_to_get_netto = min(
        MAX_GEMS_ON_HAND - state.active_players_hand().gems_possessed.sum(),
        MAX_GEMS_IN_ONE_MOVE, n_non_empty_stacks)

    max_gems_to_take = min(MAX_GEMS_IN_ONE_MOVE, n_non_empty_stacks)

    for n_gems_to_get in range(n_gems_to_get_netto, max_gems_to_take + 1):
        n_gems_to_return = n_gems_to_get - n_gems_to_get_netto

        # choose gems to get:
        options_of_taking = list(
            combinations(
                state.board.gems_on_board.non_empty_stacks_except_gold(),
                n_gems_to_get))
        for option_of_taking in options_of_taking:
            # now we have chosen which gems to take, so we need to decide which to return
            gem_colors_not_taken = {
                gem_color
                for gem_color in GemColor if gem_color not in option_of_taking
            }
            # find gems collection to take:
            gems_to_take = tuple_of_gems_to_gems_collection(option_of_taking)
            # find possible options of returning gems:
            options_of_returning = list(
                combinations_with_replacement(gem_colors_not_taken,
                                              n_gems_to_return))
            for option_of_returning in options_of_returning:
                # now we create gem collection describing transfer and check if it satisfies conditions of legal trade
                gems_to_return = tuple_of_gems_to_gems_collection(
                    option_of_returning)

                gems_collection_to_trade = gems_to_take - gems_to_return
                # check if there is enough gems on the board to take:
                condition_1 = state.board.gems_on_board >= gems_collection_to_trade
                # check if the player has enough gems to return:
                condition_2 = state.active_players_hand(
                ).gems_possessed >= -gems_collection_to_trade
                condition3 = gems_collection_to_trade != GemsCollection()
                if condition_1 and condition_2 and condition3:
                    list_of_actions_trade.append(
                        ActionTradeGems(gems_collection_to_trade))

    return list_of_actions_trade
Esempio n. 9
0
    def choose_action(self, state: State) -> Action:
        actions_by_type = {
            'buy': generate_all_legal_buys(state),
            'trade': generate_all_legal_trades(state),
            'reserve': generate_all_legal_reservations(state)
        }

        list_of_actions = actions_by_type['buy'] + actions_by_type[
            'reserve'] + actions_by_type['trade']
        current_points = state.active_players_hand().number_of_my_points()

        if len(list_of_actions):
            actions = []
            points = []
            for action in list_of_actions:
                ae = action.evaluate(state)
                potential_reward = (np.floor((current_points + ae["card"][2])/POINTS_TO_WIN) * self.weight[0] +\
                                    self.weight[1] * ae["card"][2] + self.weight[2] *ae["nobles"] +\
                                     self.weight[3] * ae["card"][0] + self.weight[4] * sum(ae["gems_flow"]))

                actions.append(action)
                points.append(potential_reward)
            actions = [
                actions[i] for i, point in enumerate(points)
                if point >= sorted(set(points))[-1]
            ]
            return random.choice(actions)

        else:
            return None
Esempio n. 10
0
    def __init__(self):

        #load all cards and nobles
        self.all_cards = load_all_cards()
        self.all_nobles = load_all_nobles()

        self.current_state_of_the_game = State(all_cards=self.all_cards,
                                               all_nobles=self.all_nobles)
        self.action_space = SplendorActionSpace()
        self.update_actions()
        self.observation_space = SplendorObservationSpace(
            all_cards=self.all_cards, all_nobles=self.all_nobles)
        self.is_done = False
        self.first_winner = None
        self.draw = False
        self.end_episode_mode = 'instant_end'
        self.gui = None
    def recreate_state(self):
        """Loads observation and return a current_state that agrees with the observation. Warning: this method is ambiguous,
        that is, many states can have the same observation (they may differ in the order of hidden cards)."""
        state = State(all_cards=StochasticObservation.all_cards,
                      all_nobles=StochasticObservation.all_nobles,
                      prepare_state=False)
        cards_on_board_names = self.observation_dict['cards_on_board_names']
        nobles_on_board_names = self.observation_dict['nobles_on_board_names']
        for card_name in cards_on_board_names:
            card = name_to_card_dict[card_name]
            state.board.cards_on_board.add(card)
            state.board.deck.decks_dict[card.row].remove(card)

        for noble_name in nobles_on_board_names:
            noble = name_to_noble_dict[noble_name]
            state.board.nobles_on_board.add(noble)
            state.board.deck.deck_of_nobles.remove(noble)

        state.board.gems_on_board = self.observation_dict['gems_on_board']

        players_hands = []
        for player_observation in self.observation_dict['players_hands']:
            players_hand = PlayersHand()
            players_hand.gems_possessed = player_observation['gems_possessed']
            for card_name in player_observation['cards_possessed_names']:
                card = name_to_card_dict[card_name]
                players_hand.cards_possessed.add(card)
                state.board.deck.decks_dict[card.row].remove(card)
            for card_name in player_observation['cards_reserved_names']:
                card = name_to_card_dict[card_name]
                players_hand.cards_reserved.add(card)
                state.board.deck.decks_dict[card.row].remove(card)
            for noble_name in player_observation['nobles_possessed_names']:
                noble = name_to_noble_dict[noble_name]
                players_hand.nobles_possessed.add(noble)
                state.board.deck.deck_of_nobles.remove(noble)
            players_hands.append(players_hand)

        state.active_player_id = self.observation_dict['active_player_id']
        state.list_of_players_hands = players_hands
        return state
Esempio n. 12
0
    def execute(self,
                state: State) -> None:

        #First we need to find the price players has to pay for a card after considering his discount
        price_after_discount = self.card.price % state.active_players_hand().discount()
        if self.n_gold_gems_to_use > 0:
            #take golden gems from player:
            state.active_players_hand().gems_possessed.gems_dict[GemColor.GOLD] -= self.n_gold_gems_to_use
            #reduce the price of card:
            price_after_discount -= self.use_gold_as

        state.active_players_hand().cards_possessed.add(self.card)
        if self.card in state.board.cards_on_board:
            state.board.remove_card_from_board_and_refill(self.card)
        if self.card in state.active_players_hand().cards_reserved:
            state.active_players_hand().cards_reserved.remove(self.card)
        state.active_players_hand().gems_possessed = state.active_players_hand().gems_possessed - price_after_discount
        state.board.gems_on_board = state.board.gems_on_board + price_after_discount
        state.board.gems_on_board.gems_dict[GemColor.GOLD] += self.n_gold_gems_to_use
        self.give_nobles(state)
        self.change_active_player(state)
Esempio n. 13
0
    def prepare_to_buy(self, card: Card, state: State):

        price_after_discount = card.price % state.active_players_hand(
        ).discount()
        min_gold = (price_after_discount %
                    state.active_players_hand().gems_possessed).sum()
        min_gold_price = GemsCollection({
            gem_color:
            min(price_after_discount.value(gem_color),
                state.active_players_hand().gems_possessed.value(gem_color))
            for gem_color in GemColor
        })
        min_gold_price.gems_dict[GemColor.GOLD] = min_gold
        self.set_entries(min_gold_price)

        confirm_buy_button = Button(text=CONFIRM_BUY_TITLE,
                                    font=CONFIRM_BUY_FONT,
                                    command=lambda: self.do_buy(card, state))
        confirm_buy_button.place(x=self.board_x_ccord + CONFIRM_BUY_X,
                                 y=self.board_y_ccord + CONFIRM_BUY_Y)
        self.drawn_buttons.add(confirm_buy_button)
Esempio n. 14
0
    def prepare_to_reserve(self, card, state: State):

        basic_gems_transfer = GemsCollection()
        if state.active_players_hand().gems_possessed.sum() < MAX_GEMS_ON_HAND and \
                state.board.gems_on_board.gems_dict[GemColor.GOLD] > 0:
            basic_gems_transfer.gems_dict[GemColor.GOLD] = 1
        self.set_entries(basic_gems_transfer)

        confirm_reserve_button = Button(
            text=CONFIRM_RESERVE_TITLE,
            font=CONFIRM_RESERVE_FONT,
            command=lambda: self.do_reserve(card, state))
        confirm_reserve_button.place(x=self.board_x_ccord + CONFIRM_RESERVE_X,
                                     y=self.board_y_ccord + CONFIRM_RESERVE_Y)
        self.drawn_buttons.add(confirm_reserve_button)
Esempio n. 15
0
 def execute(self,
             state: State) -> None:
     state.board.remove_card_from_board_and_refill(self.card)
     state.active_players_hand().cards_reserved.add(self.card)
     if self.take_golden_gem:
         state.active_players_hand().gems_possessed.gems_dict[GemColor.GOLD] += 1
         state.board.gems_on_board.gems_dict[GemColor.GOLD] -= 1
         if self.return_gem_color is not None:
             state.active_players_hand().gems_possessed[self.return_gem_color] -= 1
             state.board.gems_on_board.gems_dict[self.return_gem_color] += 1
     self.change_active_player(state)
Esempio n. 16
0
    def evaluate(self, state: State) -> None:
        price = self.card.price % state.active_players_hand().discount()
        if self.n_gold_gems_to_use > 0:
            price -= self.use_gold_as

        state.active_players_hand().cards_possessed.add(self.card)
        nobles_to_transfer = 0
        for noble in state.board.nobles_on_board:
            if noble.price <= state.active_players_hand().discount():
                nobles_to_transfer += 1
        state.active_players_hand().cards_possessed.remove(self.card)
        card_properties = self.card.evaluate()

        return {
            "gems_flow":
            price.to_dict_neg(),
            "card": [
                1, card_properties[0],
                card_properties[1] + 3 * nobles_to_transfer
            ],
            "nobles":
            nobles_to_transfer
        }
Esempio n. 17
0
 def state_to_input(self, state: State):
     return self.board_to_input(state.board) + self.players_hand_to_input(state.active_players_hand()) + \
            self.players_hand_to_input(state.previous_players_hand())
 def restore_env(self, numerator):
     self.env.is_done = False
     self.env.current_state_of_the_game = State(
         all_cards=self.env.all_cards, all_nobles=self.env.all_nobles)
     self.env.load_state_from_dict(self.env_dict[numerator])
     self.env.update_actions_light()
Esempio n. 19
0
 def execute(self,
             state: State) -> None:
     state.board.gems_on_board = state.board.gems_on_board - self.gems_from_board_to_player
     state.active_players_hand().gems_possessed = state.active_players_hand().gems_possessed \
                                                  + self.gems_from_board_to_player
     self.change_active_player(state)
Esempio n. 20
0
 def change_active_player(self, state: State) -> None:
     """Changes active player to the next one."""
     state.active_player_id = (state.active_player_id + 1)%len(state.list_of_players_hands)
Esempio n. 21
0
class SplendorEnv(Env):
    """ Description:
        This environment runs the game Splendor."""

    metadata = {'render.modes': ['human']}

    def __init__(self):

        #load all cards and nobles
        self.all_cards = load_all_cards()
        self.all_nobles = load_all_nobles()

        self.current_state_of_the_game = State(all_cards=self.all_cards,
                                               all_nobles=self.all_nobles)
        self.action_space = SplendorActionSpace()
        self.update_actions()
        self.observation_space = SplendorObservationSpace(
            all_cards=self.all_cards, all_nobles=self.all_nobles)
        self.is_done = False
        self.first_winner = None
        self.draw = False
        self.end_episode_mode = 'instant_end'
        self.gui = None

        #Create initial state of the game

    def load_state_from_dict(self, state_as_dict: StateAsDict):
        self.current_state_of_the_game = state_as_dict.to_state()
        self.is_done = False

    def active_player_id(self):
        return self.current_state_of_the_game.active_player_id

    def active_players_hand(self):
        return self.current_state_of_the_game.active_players_hand()

    def state_to_dict(self):
        return self.current_state_of_the_game.to_dict()

    def action_space_to_dict(self):
        return self.action_space.to_dict()

    def step(self,
             mode,
             action: Action,
             return_observation=True,
             ensure_correctness=False):
        """
        Executes action on the environment. Action is performed on the current state of the game.


        The are two modes for is_done: instant_end - the episode ends instantly when any player reaches the number of
        points equal POINTS_TO_WIN and let_all_move - when some player reaches POINTS_TO_WIN we allow all players to move
        (till the end of round) and then we end the episode. Reward: 1 if the action gives POINTS_TO_WIN to the player
        and episode is not yet ended (taking actions when episode ended is considered as loosing), -1 if episode ended,
        0 if episode is not yet ended and the action does not give enough points to the player.

        :param
        action: action to take
        ensure_correctness: True if you want the enivornment check if the action is legal, False if you are sure
        :return: observation, reward, is_done, info
        """
        """Performs one action on the current current_state of the game. """
        info = {}
        if action is not None:
            if ensure_correctness:
                self.update_actions()
                assert self.action_space.contains(
                    action), '{} is not valid action'.format(action)
            action.execute(self.current_state_of_the_game)

        # We find the reward:
        reward = 0

        if action is None:
            info = {'Warning': 'There was no action.'}
            self.is_done = True
            self.first_winner = self.current_state_of_the_game.previous_player_id(
            )
            reward = -1

        #if self.first_winner is not None:
        if self.current_state_of_the_game.previous_player_id(
        ) == self.first_winner:
            reward = 1
        if self.current_state_of_the_game.previous_player_id(
        ) != self.first_winner:
            reward = -1

        if self.first_winner is None:
            if not self.is_done:
                if self.current_state_of_the_game.previous_players_hand(
                ).number_of_my_points() >= POINTS_TO_WIN:
                    reward = 1
                    self.first_winner = self.current_state_of_the_game.previous_player_id(
                    )
                    self.is_done = True

        if return_observation:
            if mode == 'deterministic':
                observation_to_show = DeterministicObservation(
                    self.current_state_of_the_game)
            if mode == 'stochastic':
                observation_to_show = StochasticObservation(
                    self.current_state_of_the_game)
            return observation_to_show, reward, self.is_done, {
                'winner_id': self.first_winner
            }

        if return_observation == False:
            return None, reward, self.is_done, {'winner_id': self.first_winner}

    def is_done_update(self):
        if self.current_state_of_the_game.previous_players_hand(
        ).number_of_my_points() >= POINTS_TO_WIN:
            self.is_done = True

    def update_actions(self):
        self.action_space.update(self.current_state_of_the_game)
        self.vectorize_action_space()

    def update_actions_light(self):
        self.action_space.update(self.current_state_of_the_game)

    def current_action_space(self):
        self.update_actions()
        return self.action_space

    def show_warning(self, action):
        if self.gui is not None:
            self.gui.show_warning(action)

    def show_last_action(self, action):
        if self.gui is not None:
            self.gui.show_last_action(action)

    def load_observation(self, observation: SplendorObservation):
        self.is_done = False
        self.first_winner = None
        self.current_state_of_the_game = observation.recreate_state()

    def set_active_player(self, id: int) -> None:
        self.current_state_of_the_game.active_player_id = id

    def set_players_names(self, list_of_names: List[str]) -> None:
        for i, name in enumerate(list_of_names):
            self.current_state_of_the_game.list_of_players_hands[i].name = name

    def points_of_player_by_id(self, id: int) -> int:
        return self.current_state_of_the_game.list_of_players_hands[
            id].number_of_my_points()

    def render(self, mode='human', interactive=True):
        """Creates window if necessary, then renders the current_state of the game """
        if self.gui is None:
            self.gui = SplendorGUI()

        self.gui.interactive = interactive

        #clear gui:
        #self.gui.clear_all()
        #draw current_state
        self.gui.draw_state(self.current_state_of_the_game)

    def reset(self):
        self.is_done = False
        self.first_winner = None
        self.current_state_of_the_game = State(all_cards=self.all_cards,
                                               all_nobles=self.all_nobles)
        self.update_actions()

    def show_observation(self, mode):
        self.update_actions()
        if mode == 'deterministic':
            return DeterministicObservation(self.current_state_of_the_game)
        if mode == 'stochastic':
            return StochasticObservation(self.current_state_of_the_game)

    def previous_player_id(self):
        return self.current_state_of_the_game.previous_player_id()

    def previous_players_hand(self):
        return self.current_state_of_the_game.previous_players_hand()

    def clone_state(self):
        observation = DeterministicObservation(self.current_state_of_the_game)
        return observation.recreate_state()

    def restore_state(self, state):
        self.current_state_of_the_game = state

    def vectorize_observation_space(self):
        pass

    def vectorize_action_space(self):
        pass
 def __init__(self, state: State = None):
     if state is not None:
         self.state_as_dict = state.to_dict()
     else:
         self.state_as_dict = {}
Esempio n. 23
0
class SplendorEnv(Env):
    """ Description:
        This environment runs the game Splendor."""

    metadata = {'render.modes': ['human']}

    def __init__(self, strategies=None):

        self.current_state_of_the_game = State()
        self.action_space = SplendorActionSpace()
        self.action_space.update(self.current_state_of_the_game)
        self.observation_space = SplendorObservationSpace()
        self.is_done = False
        self.end_episode_mode = 'instant_end'
        self.gui = None

        #Create initial state of the game

    def active_players_hand(self):
        return self.current_state_of_the_game.active_players_hand()

    def step(self, action: Action):
        """
        Executes action on the environment. Action is performed on the current state of the game. The are two modes for
        is_done: instant_end - the episode ends instantly when any player reaches the number of points equal POINTS_TO_WIN and
        let_all_move - when some player reaches POINTS_TO_WIN we allow all players to move (till the end of round) and then
        we end the episode.
        Reward: 1 if the action gives POINTS_TO_WIN to the player and episode is not yet ended (taking actions when episode ended
        is considered as loosing), -1 if episode ended, 0 if episode is not yet ended and the action does not give enough
        points to the player.

        :param action: action to take
        :return: observation, reward, is_done, info
        """
        """Performs one action on the current state of the game. """
        self.action_space.update(self.current_state_of_the_game)
        assert self.action_space.contains(
            action), '{} of type {} is not valid action'.format(
                action, type(action))
        action.execute(self.current_state_of_the_game)
        #We find the reward:
        reward = 0
        if not self.is_done:
            if self.current_state_of_the_game.previous_players_hans(
            ).number_of_my_points() >= POINTS_TO_WIN:
                reward = 1
        if self.is_done:
            reward = -1

        self.is_done_update(self.end_episode_mode)
        self.action_space.update(self.current_state_of_the_game)
        return self.observation_space.state_to_observation(
            self.current_state_of_the_game), reward, self.is_done, {}

    def is_done_update(self, end_episode_mode='instant_end'):
        if end_episode_mode == 'instant_end':
            if self.current_state_of_the_game.active_players_hand(
            ).number_of_my_points() >= POINTS_TO_WIN:
                self.is_done = True
        if end_episode_mode == 'let_all_move':
            #the episone can end only if some has reached enough points and last player has moved
            if self.current_state_of_the_game.active_player_id == \
                    len(self.current_state_of_the_game.list_of_players_hands) - 1:
                #check if someone has reached enough points to win
                for player_hand in self.current_state_of_the_game.list_of_players_hands:
                    if player_hand.number_of_my_points() >= POINTS_TO_WIN:
                        self.is_done = True
                        break

    def update_actions(self):
        self.action_space.update(self.current_state_of_the_game)

    def show_warning(self, action):
        if self.gui is not None:
            self.gui.show_warning(action)

    def show_last_action(self, action):
        if self.gui is not None:
            self.gui.show_last_action(action)

    def render(self, mode='human', interactive=False):
        """Creates window if necessary, then renders the state of the game """
        if self.gui is None:
            self.gui = SplendorGUI()

        #clear gui:
        self.gui.clear_all()
        for card in self.current_state_of_the_game.board.cards_on_board:
            self.gui.draw_state(self.current_state_of_the_game)
Esempio n. 24
0
import time

from gym_splendor_code.envs.mechanics.abstract_observation import DeterministicObservation
from gym_splendor_code.envs.mechanics.state import State
from monte_carlo_tree_search.mcts_algorithms.multi_process.multi_mcts import \
    MultiMCTS

from mpi4py import MPI
comm = MPI.COMM_WORLD
my_rank = MPI.COMM_WORLD.Get_rank()
main_process = my_rank == 0

mc = MultiMCTS(comm)

stan = State()
obs = DeterministicObservation(stan)

mc.create_root(obs)
t_start = time.time()
mc.run_simulation(50, 1)
t_end = time.time()
if main_process:
    print('Time taken = {}'.format(t_end - t_start))
Esempio n. 25
0
def generate_all_legal_trades_fast(state: State) -> List[ActionTradeGems]:
    """Returns the list of all possible actions of trade in a given current_state"""
    gems_board = state.board.gems_on_board
    gems_player = state.active_players_hand().gems_possessed
    #print(state.active_players_hand().discount)
    n_non_empty_stacks = len(gems_board.non_empty_stacks_except_gold())
    take3 = min(3, n_non_empty_stacks)
    take2 = min(2, n_non_empty_stacks)

    list_of_actions_trade = []
    gems_player_n = sum(gems_player.to_dict())

    if gems_player_n < 8:
        """ Take 2 gems of same color """
        for color in gems_board.get_colors_on_condition(4):
            list_of_actions_trade.append(
                ActionTradeGems(
                    tuple_of_gems_to_gems_collection({color}, val=2)))
        """ Take 3 gems of different colors """
        for option in list(
                combinations(gems_board.get_colors_on_condition(1), take3)):
            list_of_actions_trade.append(
                ActionTradeGems(tuple_of_gems_to_gems_collection(option)))

    elif gems_player_n == 8:
        """ Take 2 gems of same color. """
        for color in gems_board.get_colors_on_condition(4):
            list_of_actions_trade.append(
                ActionTradeGems(tuple_of_gems_to_gems_collection({color}, 2)))
        """ Take 2 gems of different color. """
        for option in list(
                combinations(gems_board.get_colors_on_condition(1), take2)):
            list_of_actions_trade.append(
                ActionTradeGems(tuple_of_gems_to_gems_collection(option)))
        """ Take 3 gems of different color and return one of other color. """
        for option in list(
                combinations(gems_board.get_colors_on_condition(1), take3)):
            for option_return in (
                    gems_player.get_all_colors_on_condition(1)).difference(
                        list(option)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            option, return_colors={option_return})))
    elif gems_player_n == 9:
        """ Take 1 gem """
        for color in gems_board.get_colors_on_condition(1):
            list_of_actions_trade.append(
                ActionTradeGems(tuple_of_gems_to_gems_collection({color})))
        """ Take 2 gems of same color and return one of other color. """
        for color in gems_board.get_colors_on_condition(4):
            for option_return in (
                    gems_player.get_all_colors_on_condition(1)).difference(
                        list({color})):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            {color}, val=2, return_colors={option_return})))
        """ Take 2 gems of different colors and return one of other color: """
        for option in list(
                combinations(gems_board.get_colors_on_condition(1), 2)):
            for option_return in (
                    gems_player.get_all_colors_on_condition(1)).difference(
                        list(option)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            option, return_colors={option_return})))
        """ Take 3 gems of different color and return: """
        for option in list(
                combinations(gems_board.get_colors_on_condition(1), 3)):
            """ 2 gems of different colors; """
            for option_return in list(
                    combinations((
                        gems_player.get_all_colors_on_condition(1)).difference(
                            list(option)), 2)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            option,
                            return_val=[1, 1],
                            return_colors=option_return)))
            """ 2 gems of same color. """
            for option_return in (
                    gems_player.get_all_colors_on_condition(2)).difference(
                        list(option)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            option,
                            return_val=[2],
                            return_colors={option_return})))
    elif gems_player_n == 10:
        """ Exchange 1 gem """
        for color in gems_board.get_colors_on_condition(1):
            for option_return in (
                    gems_player.get_all_colors_on_condition(1)).difference(
                        list({color})):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            {color}, return_colors={option_return})))
        """ Exchange 2 gems of same color: """
        for color in gems_board.get_colors_on_condition(4):
            """ for 2 gems of one but other color """
            for option_return in (
                    gems_player.get_all_colors_on_condition(2)).difference(
                        list({color})):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            {color},
                            val=2,
                            return_val=[2],
                            return_colors={option_return})))
            """ for 2 gems of 2 other colors """
            for option_return in list(
                    combinations((
                        gems_player.get_all_colors_on_condition(1)).difference(
                            list({color})), 2)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            {color},
                            val=2,
                            return_val=[1, 1],
                            return_colors=option_return)))
        """ Exchange 2 gems of two different colors: """
        for option in list(
                combinations(gems_board.get_colors_on_condition(1), 2)):
            """ for 2 gems of one but other color """
            for option_return in (
                    gems_player.get_all_colors_on_condition(2)).difference(
                        list(option)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            option,
                            return_val=[2],
                            return_colors={option_return})))
            """ for 2 gems of 2 other colors """
            for option_return in list(
                    combinations((
                        gems_player.get_all_colors_on_condition(1)).difference(
                            list(option)), 2)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            option,
                            return_val=[1, 1],
                            return_colors=option_return)))
        """ Exchange 3 gems of different colors: """
        for option in list(
                combinations(gems_board.get_colors_on_condition(1), 3)):
            """ for 3 gems of 3 remaining colors """
            for option_return in list(
                    combinations((
                        gems_player.get_all_colors_on_condition(1)).difference(
                            list(option)), 3)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            option,
                            return_val=[1, 1, 1],
                            return_colors=option_return)))
            """ for 2 gems of one and one of the remaining color """
            for option_return in (
                    gems_player.get_all_colors_on_condition(2)).difference(
                        list(option)):
                for option_return_ in ((
                        gems_player.get_all_colors_on_condition(1)).difference(
                            list(option))).difference(list({option_return})):
                    list_of_actions_trade.append(
                        ActionTradeGems(
                            tuple_of_gems_to_gems_collection(
                                option,
                                return_val=[2, 1],
                                return_colors=[option_return,
                                               option_return_])))
            """ for 3 gems of one color """
            for option_return in (
                    gems_player.get_all_colors_on_condition(3)).difference(
                        list(option)):
                list_of_actions_trade.append(
                    ActionTradeGems(
                        tuple_of_gems_to_gems_collection(
                            option,
                            return_val=[3],
                            return_colors={option_return})))

    return list_of_actions_trade
import gin

from gym_splendor_code.envs.mechanics.state import State
from nn_models.architectures.average_pool_v0 import StateEncoder, ValueRegressor

gin.parse_config_file(
    '/home/tomasz/ML_Research/splendor/gym-splendor/nn_models/experiments/series_1/experiment_1/params.gin'
)

x = StateEncoder()

f = State()
print(x.get_value(f))
Esempio n. 27
0
 def reset(self):
     self.is_done = False
     self.first_winner = None
     self.current_state_of_the_game = State(all_cards=self.all_cards,
                                            all_nobles=self.all_nobles)
     self.update_actions()
Esempio n. 28
0
 def state_to_features(self, state: State):
     my_hand = self.hand_features(state.active_players_hand())
     opp_hand = self.hand_features(state.other_players_hand())
     my_cards_stats = self.cards_stats(state, True)
     opp_cards_stats = self.cards_stats(state, False)
     return my_hand + opp_hand + my_cards_stats + opp_cards_stats
Esempio n. 29
0
def generate_all_legal_buys_fast_need_testing(
        state: State) -> List[ActionBuyCard]:
    """Returns the list of all possible actions of buys in a given current_state"""
    list_of_actions_buy = []
    discount = state.active_players_hand().discount()
    all_cards_can_afford = [card for card in state.board.cards_on_board if
                            state.active_players_hand().can_afford_card(card, discount)] + \
                           [reserved_card for reserved_card in state.active_players_hand().cards_reserved if
                            state.active_players_hand().can_afford_card(reserved_card, discount)]

    for card in all_cards_can_afford:
        new_price = card.price % state.active_players_hand().discount()
        gems = state.active_players_hand().gems_possessed
        minimum_gold_needed = state.active_players_hand(
        ).min_gold_needed_to_buy_card(card)

        for n_gold_gems_to_use in range(
                minimum_gold_needed,
                state.active_players_hand().gems_possessed.gems_dict[
                    GemColor.GOLD]):
            if n_gold_gems_to_use == 0:
                list_of_actions_buy.append(ActionBuyCard(card))
            else:
                if n_gold_gems_to_use == 1:
                    colors_1 = new_price.get_colors_on_condition(
                        1).intersection(gems.get_colors_on_condition(1))
                    for color1 in colors_1:
                        list_of_actions_buy.append(
                            ActionBuyCard(card, 1,
                                          colors_to_gems_collection([color1])))

                elif n_gold_gems_to_use == 2:
                    colors_1 = new_price.get_colors_on_condition(
                        1).intersection(gems.get_colors_on_condition(1))
                    colors_2 = new_price.get_colors_on_condition(
                        2).intersection(gems.get_colors_on_condition(2))

                    for option in colors_2:
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 2,
                                colors_to_gems_collection([option, option])))
                    for option in combinations(colors_1, 2):
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 2,
                                colors_to_gems_collection(list(option))))

                elif n_gold_gems_to_use == 3:
                    colors_1 = new_price.get_colors_on_condition(
                        1).intersection(gems.get_colors_on_condition(1))
                    colors_2 = new_price.get_colors_on_condition(
                        2).intersection(gems.get_colors_on_condition(2))
                    colors_3 = new_price.get_colors_on_condition(
                        3).intersection(gems.get_colors_on_condition(3))

                    for option in colors_3:
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 3,
                                colors_to_gems_collection(
                                    [option, option, option])))
                    for option in colors_2:
                        for option_2 in colors_1.difference({option}):
                            list_of_actions_buy.append(
                                ActionBuyCard(
                                    card, 3,
                                    colors_to_gems_collection(
                                        [option, option, option_2])))
                    for option in combinations(colors_1, 3):
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 3,
                                colors_to_gems_collection(list(option))))

                elif n_gold_gems_to_use == 4:
                    colors_1 = new_price.get_colors_on_condition(
                        1).intersection(gems.get_colors_on_condition(1))
                    colors_2 = new_price.get_colors_on_condition(
                        2).intersection(gems.get_colors_on_condition(2))
                    colors_3 = new_price.get_colors_on_condition(
                        3).intersection(gems.get_colors_on_condition(3))
                    colors_4 = new_price.get_colors_on_condition(
                        4).intersection(gems.get_colors_on_condition(4))

                    for option in colors_4:
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 4,
                                colors_to_gems_collection(
                                    [option, option, option, option])))
                    for option in colors_3:
                        for option_2 in colors_1.difference(list(option)):
                            list_of_actions_buy.append(
                                ActionBuyCard(
                                    card, 4,
                                    colors_to_gems_collection(
                                        [option, option, option, option_2])))
                    for option in colors_2:
                        for option_2 in combinations(
                                colors_1.difference(list(option)), 2):
                            list_of_actions_buy.append(
                                ActionBuyCard(
                                    card, 4,
                                    colors_to_gems_collection(
                                        [option,
                                         option].extend(list(option_2)))))
                    for option in combinations(colors_2, 2):
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 4,
                                colors_to_gems_collection(
                                    list(option).extend(list(option_2)))))
                    for option in combinations(colors_1, 4):
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 4,
                                colors_to_gems_collection(list(option))))

                elif n_gold_gems_to_use == 5:
                    colors_1 = new_price.get_colors_on_condition(
                        1).intersection(gems.get_colors_on_condition(1))
                    colors_2 = new_price.get_colors_on_condition(
                        2).intersection(gems.get_colors_on_condition(2))
                    colors_3 = new_price.get_colors_on_condition(
                        3).intersection(gems.get_colors_on_condition(3))
                    colors_4 = new_price.get_colors_on_condition(
                        4).intersection(gems.get_colors_on_condition(4))
                    colors_5 = new_price.get_colors_on_condition(
                        5).intersection(gems.get_colors_on_condition(5))

                    for option in colors_5:
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 5,
                                colors_to_gems_collection(
                                    [option, option, option, option, option])))
                    for option in colors_4:
                        for option_2 in colors_1.difference(list(option)):
                            list_of_actions_buy.append(
                                ActionBuyCard(
                                    card, 5,
                                    colors_to_gems_collection([
                                        option, option, option, option,
                                        option_2
                                    ])))
                    for option in colors_3:
                        for option_2 in colors_2.difference(list(option)):
                            list_of_actions_buy.append(
                                ActionBuyCard(
                                    card, 5,
                                    colors_to_gems_collection([
                                        option, option, option, option_2,
                                        option_2
                                    ])))
                        for option_2 in combinations(
                                colors_1.difference(list(option)), 2):
                            list_of_actions_buy.append(
                                ActionBuyCard(
                                    card, 5,
                                    colors_to_gems_collection(
                                        [option, option,
                                         option].extend(list(option_2)))))

                    for option in combinations(colors_2, 2):
                        for option_2 in colors_1.difference(set(option)):
                            list_of_actions_buy.append(
                                ActionBuyCard(
                                    card, 5,
                                    colors_to_gems_collection(
                                        [option_2].extend(2 * list(option)))))
                    for option in colors_2:
                        for option_2 in combinations(
                                colors_1.difference({option}), 3):
                            list_of_actions_buy.append(
                                ActionBuyCard(
                                    card, 5,
                                    colors_to_gems_collection(
                                        [option,
                                         option].extend(list(option_2)))))

                    for option in combinations(colors_1, 5):
                        list_of_actions_buy.append(
                            ActionBuyCard(
                                card, 5,
                                colors_to_gems_collection(list(option))))

    return list_of_actions_buy
Esempio n. 30
0
# w1 = GemsCollecion({GemColor.GOLD: 1, GemColor.RED: 2, GemColor.GREEN: 5, GemColor.BLUE: 3, GemColor.WHITE: 4, GemColor.BLACK: 5})
# w2 = GemsCollecion({GemColor.GOLD: 2, GemColor.RED: 1, GemColor.GREEN: 6, GemColor.BLUE: 3, GemColor.WHITE: 7, GemColor.BLACK: 5})
# print(w1 <= w2)
from gym_splendor_code.envs.graphics.splendor_gui import SplendorGUI, GemColor
from gym_splendor_code.envs.mechanics.action_space_generator import generate_all_legal_reservations
from gym_splendor_code.envs.mechanics.players_hand import PlayersHand
from gym_splendor_code.envs.mechanics.state import State

s = State()
f = SplendorGUI()

f.draw_board(s.board, 200, 10)
f.keep_window_open()

#testing action generator
pla = PlayersHand()
pla.gems_possessed.gems_dict[GemColor.BLUE] = 4
pla.gems_possessed.gems_dict[GemColor.GREEN] = 4
pla.gems_possessed.gems_dict[GemColor.RED] = 2
pla.gems_possessed.gems_dict[GemColor.WHITE] = 0
pla.gems_possessed.gems_dict[GemColor.BLACK] = 0
pla.gems_possessed.gems_dict[GemColor.GOLD] = 0
d = State()
d.list_of_players_hands = [pla, PlayersHand()]
print(d.active_players_hand().gems_possessed)
f = generate_all_legal_reservations(d)
print(len(f))
for du in f:
    print(du)