Esempio n. 1
0
 def test_win(self):
     figgie = Figgie()
     figgie.goal_suit = Suit.SPADES
     figgie.cards = np.array([[12, 0, 0, 0], [0, 10, 0, 0], [0, 0, 8, 0],
                              [0, 0, 0, 10]])
     results = figgie.get_utility()
     self.assertEqual([200, 200, 200, 400], results.tolist())
Esempio n. 2
0
def test(agents: list, trials: int, verbose=False):
    figgie = Figgie()
    start_time = time.process_time()
    figgie.test(agents, trials, verbose=verbose)
    total_time = time.process_time() - start_time
    print('Testing took {} seconds'.format(total_time))

    print('Results: ')
    for i, agent in enumerate(agents):
        print('agent {}: ({})'.format(i, agent.name))
        print('\twins: {}'.format(agent.wins))
        print('\tavg. utility: {}, total utility: {}'.format(
            agent.total_utility / trials, agent.total_utility))
        print('\toperations: {}'.format(
            {key: agent.operations[key]
             for key in sorted(agent.operations)}))
        print('\tfaded prices: {}'.format(agent.faded_price /
                                          agent.priced_operations))
        avg_operations = agent.get_avg_operations(trials)
        print('\tavg. operations: {}'.format(
            {key: avg_operations[key]
             for key in sorted(avg_operations)}))
        if isinstance(agent, RegretAgent):
            print('\tavg unknown states: {}, unknown states: {}'.format(
                agent.unknown_states / trials, agent.unknown_states))

    for agent in agents:
        if agent.collector:
            with open('ann/training_data.pickle', 'wb') as file:
                pickle.dump(agent.training_data, file)
Esempio n. 3
0
    def test_deal(self):
        figgie = Figgie()

        for i in range(10):
            figgie.deal()
            total_cards = figgie.cards[0] + figgie.cards[1] + figgie.cards[
                2] + figgie.cards[3]
            self.assertTrue(total_cards[figgie.goal_suit.value] == 8
                            or total_cards[figgie.goal_suit.value] == 10)
            self.assertEqual(12,
                             total_cards[figgie.goal_suit.opposite().value])
            self.assertEqual(40, np.sum(total_cards))
Esempio n. 4
0
def train(iterations: int, prev_iterations: int, info_set: InfoSetGenerator,
          model: UtilityModel, game_tree):
    agent = RegretAgent(model,
                        info_set,
                        ModularAgent(model, RandomPricer()),
                        game_tree=game_tree)
    figgie = Figgie()

    start_time = time.process_time()
    agent.train(figgie, iterations)
    total_time = time.process_time() - start_time
    print('\tTraining took {} seconds '.format(total_time))

    print('\tStrategy: ')
    print('\t\tinfo sets: {}'.format(len(agent.game_tree)))

    observations = [x.observations for x in agent.game_tree.values()]
    mean = sum(observations) / len(observations)
    variance = sum([((x - mean)**2) for x in observations]) / len(observations)
    dev = variance**0.5

    print('\t\tavg observations: {}'.format(mean))
    print('\t\tmedian observations: {}'.format(median(observations)))
    print('\t\tstd dev. observations: {}'.format(dev))

    start_time = time.process_time()
    file_name = save(agent.game_tree, prev_iterations + iterations,
                     agent.util_model.name, info_set.name)
    total_time = time.process_time() - start_time
    print('\tSaving to {} took {} seconds'.format(file_name, total_time))
Esempio n. 5
0
    def test_bid(self):
        figgie = Figgie()
        for suit in SUITS:
            figgie.cards = np.full((4, 4), 2, dtype=int)

            player = figgie.active_player
            market = figgie.markets[suit.value]

            can, _ = market.can_bid(player, 7)
            self.assertTrue(can)

            figgie.preform(('bid', suit, 7))
            self.assertEqual(
                7, market.buying_price,
                'Buying price not set properly with bid operation')
            self.assertEqual(1, market.operations)
            figgie.reset()
Esempio n. 6
0
    def test_ask(self):
        figgie = Figgie()
        for suit in SUITS:
            figgie.cards = np.full((4, 4), 2, dtype=int)

            player = figgie.active_player
            hand = figgie.cards[player]
            market = figgie.markets[suit.value]

            hand[suit.value] = 0
            can, _ = market.can_ask(player, 7)
            self.assertFalse(can)

            hand[suit.value] = 2
            can, _ = market.can_ask(player, 7)
            self.assertTrue(can)

            figgie.preform(('ask', suit, 7))
            self.assertEqual(
                7, market.selling_price,
                'Selling price not set properly with ask operation')
            self.assertEqual(1, market.operations)
            figgie.reset()
Esempio n. 7
0
    def test_buy(self):
        figgie = Figgie()
        for suit in SUITS:
            figgie.cards = np.full((4, 4), 2, dtype=int)

            player = figgie.active_player
            market = figgie.markets[suit.value]

            can, _ = market.can_ask(0, 7)
            self.assertTrue(can)
            asking_player = player
            figgie.preform(('ask', suit, 7))
            self.assertEqual(
                7, market.selling_price,
                'Selling price not set properly with ask operation')
            self.assertEqual(1, market.operations)

            if figgie.active_player == asking_player:
                figgie.preform(('pass', ))

            player = figgie.active_player

            figgie.preform(('buy', suit))
            for s in SUITS:
                self.assertEqual(figgie.markets[s.value].buying_price, None,
                                 'Market not reset after buy')
                self.assertEqual(figgie.markets[s.value].selling_price, None,
                                 'Market not reset after buy')
            self.assertEqual(STARTING_CHIPS - 7, figgie.chips[player],
                             'Chips not properly subtracted')
            self.assertEqual(STARTING_CHIPS + 7, figgie.chips[asking_player],
                             'Chips not properly added')
            self.assertEqual(3, figgie.cards[player][suit.value],
                             'card not properly added')
            self.assertEqual(1, figgie.cards[asking_player][suit.value],
                             'card not properly subtracted')
            self.assertEqual(1, market.transactions)
            self.assertEqual(7, market.last_price)
            self.assertEqual(2, market.operations)

            figgie.reset()
Esempio n. 8
0
    def __train(self, figgie: Figgie, pi: float, pi_prime: float,
                training_player: int) -> tuple:
        player = figgie.active_player
        if figgie.is_terminal():
            utility = figgie.get_utility()
            return utility[player] / pi_prime, 1.0

        player = figgie.active_player
        model_values = self.util_model.get_card_values(figgie, player)
        best_transaction = ModularAgent.get_best_transaction(
            figgie, model_values)
        if best_transaction is not None:
            figgie.preform(best_transaction)
            return self.__train(figgie, pi, pi_prime, training_player)

        best_operation, best_suit = ModularAgent.get_best_operation(
            figgie, model_values)
        if best_operation is None:
            figgie.preform(('pass', ))
            return self.__train(figgie, pi, pi_prime, training_player)
        else:
            info_set = self.info_set_generator.generate_info_set(
                figgie, model_values[best_suit.value], best_operation,
                best_suit)
            actions = self.info_set_generator.generate_actions(
                figgie, model_values[best_suit.value], best_operation,
                best_suit)
            assert len(actions) != 0, 'Length of actions == 0'
            if info_set in self.game_tree:
                node = self.game_tree[info_set]
            else:
                node = GameNode(len(actions))
                self.game_tree[info_set] = node

        strategy = node.get_strategy()

        epsilon = 0.6
        if player == training_player:
            probability = epsilon / len(actions) + ((1.0 - epsilon) * strategy)
        else:
            probability = np.copy(strategy)

        action_index = random.choices(range(len(actions)),
                                      weights=probability)[0]
        price = actions[action_index]
        if best_operation == 'bid' and price > figgie.chips[figgie.active_player]\
                or best_operation == 'at' and price[0] > figgie.chips[figgie.active_player]:
            figgie.preform(('pass', ))
            return self.__train(figgie, pi, pi_prime, training_player)
        action = self.create_action(best_operation, best_suit, price)
        figgie.preform(action)
        result = self.__train(
            figgie, pi * strategy[action_index], pi_prime *
            probability[action_index],
            training_player) if player == training_player else self.__train(
                figgie, pi, pi_prime, training_player)
        util = -result[0]
        p_tail = result[1]

        if player == training_player:
            w = util * p_tail
            regret = w * (1 - strategy)
            regret[action_index] = -w * strategy[action_index]
            node.sum_regret += regret
        else:
            node.sum_strategy += strategy / pi_prime

        node.observations += 1

        if player == training_player:
            return util, p_tail * strategy[action_index]
        else:
            return util, p_tail
Esempio n. 9
0
 def train(self, game: Figgie, trials: int):
     for i in range(trials):
         self.__train(game, 1.0, 1.0, i % NUM_PLAYERS)
         game.reset()