def test_win(self): figgie = Figgie() figgie.goal_suit = Suit.SPADES figgie.cards = np.array([[12, 0, 0, 0], [0, 10, 0, 0], [0, 0, 8, 0], [0, 0, 0, 10]]) results = figgie.get_utility() self.assertEqual([200, 200, 200, 400], results.tolist())
def test(agents: list, trials: int, verbose=False): figgie = Figgie() start_time = time.process_time() figgie.test(agents, trials, verbose=verbose) total_time = time.process_time() - start_time print('Testing took {} seconds'.format(total_time)) print('Results: ') for i, agent in enumerate(agents): print('agent {}: ({})'.format(i, agent.name)) print('\twins: {}'.format(agent.wins)) print('\tavg. utility: {}, total utility: {}'.format( agent.total_utility / trials, agent.total_utility)) print('\toperations: {}'.format( {key: agent.operations[key] for key in sorted(agent.operations)})) print('\tfaded prices: {}'.format(agent.faded_price / agent.priced_operations)) avg_operations = agent.get_avg_operations(trials) print('\tavg. operations: {}'.format( {key: avg_operations[key] for key in sorted(avg_operations)})) if isinstance(agent, RegretAgent): print('\tavg unknown states: {}, unknown states: {}'.format( agent.unknown_states / trials, agent.unknown_states)) for agent in agents: if agent.collector: with open('ann/training_data.pickle', 'wb') as file: pickle.dump(agent.training_data, file)
def test_deal(self): figgie = Figgie() for i in range(10): figgie.deal() total_cards = figgie.cards[0] + figgie.cards[1] + figgie.cards[ 2] + figgie.cards[3] self.assertTrue(total_cards[figgie.goal_suit.value] == 8 or total_cards[figgie.goal_suit.value] == 10) self.assertEqual(12, total_cards[figgie.goal_suit.opposite().value]) self.assertEqual(40, np.sum(total_cards))
def train(iterations: int, prev_iterations: int, info_set: InfoSetGenerator, model: UtilityModel, game_tree): agent = RegretAgent(model, info_set, ModularAgent(model, RandomPricer()), game_tree=game_tree) figgie = Figgie() start_time = time.process_time() agent.train(figgie, iterations) total_time = time.process_time() - start_time print('\tTraining took {} seconds '.format(total_time)) print('\tStrategy: ') print('\t\tinfo sets: {}'.format(len(agent.game_tree))) observations = [x.observations for x in agent.game_tree.values()] mean = sum(observations) / len(observations) variance = sum([((x - mean)**2) for x in observations]) / len(observations) dev = variance**0.5 print('\t\tavg observations: {}'.format(mean)) print('\t\tmedian observations: {}'.format(median(observations))) print('\t\tstd dev. observations: {}'.format(dev)) start_time = time.process_time() file_name = save(agent.game_tree, prev_iterations + iterations, agent.util_model.name, info_set.name) total_time = time.process_time() - start_time print('\tSaving to {} took {} seconds'.format(file_name, total_time))
def test_bid(self): figgie = Figgie() for suit in SUITS: figgie.cards = np.full((4, 4), 2, dtype=int) player = figgie.active_player market = figgie.markets[suit.value] can, _ = market.can_bid(player, 7) self.assertTrue(can) figgie.preform(('bid', suit, 7)) self.assertEqual( 7, market.buying_price, 'Buying price not set properly with bid operation') self.assertEqual(1, market.operations) figgie.reset()
def test_ask(self): figgie = Figgie() for suit in SUITS: figgie.cards = np.full((4, 4), 2, dtype=int) player = figgie.active_player hand = figgie.cards[player] market = figgie.markets[suit.value] hand[suit.value] = 0 can, _ = market.can_ask(player, 7) self.assertFalse(can) hand[suit.value] = 2 can, _ = market.can_ask(player, 7) self.assertTrue(can) figgie.preform(('ask', suit, 7)) self.assertEqual( 7, market.selling_price, 'Selling price not set properly with ask operation') self.assertEqual(1, market.operations) figgie.reset()
def test_buy(self): figgie = Figgie() for suit in SUITS: figgie.cards = np.full((4, 4), 2, dtype=int) player = figgie.active_player market = figgie.markets[suit.value] can, _ = market.can_ask(0, 7) self.assertTrue(can) asking_player = player figgie.preform(('ask', suit, 7)) self.assertEqual( 7, market.selling_price, 'Selling price not set properly with ask operation') self.assertEqual(1, market.operations) if figgie.active_player == asking_player: figgie.preform(('pass', )) player = figgie.active_player figgie.preform(('buy', suit)) for s in SUITS: self.assertEqual(figgie.markets[s.value].buying_price, None, 'Market not reset after buy') self.assertEqual(figgie.markets[s.value].selling_price, None, 'Market not reset after buy') self.assertEqual(STARTING_CHIPS - 7, figgie.chips[player], 'Chips not properly subtracted') self.assertEqual(STARTING_CHIPS + 7, figgie.chips[asking_player], 'Chips not properly added') self.assertEqual(3, figgie.cards[player][suit.value], 'card not properly added') self.assertEqual(1, figgie.cards[asking_player][suit.value], 'card not properly subtracted') self.assertEqual(1, market.transactions) self.assertEqual(7, market.last_price) self.assertEqual(2, market.operations) figgie.reset()
def __train(self, figgie: Figgie, pi: float, pi_prime: float, training_player: int) -> tuple: player = figgie.active_player if figgie.is_terminal(): utility = figgie.get_utility() return utility[player] / pi_prime, 1.0 player = figgie.active_player model_values = self.util_model.get_card_values(figgie, player) best_transaction = ModularAgent.get_best_transaction( figgie, model_values) if best_transaction is not None: figgie.preform(best_transaction) return self.__train(figgie, pi, pi_prime, training_player) best_operation, best_suit = ModularAgent.get_best_operation( figgie, model_values) if best_operation is None: figgie.preform(('pass', )) return self.__train(figgie, pi, pi_prime, training_player) else: info_set = self.info_set_generator.generate_info_set( figgie, model_values[best_suit.value], best_operation, best_suit) actions = self.info_set_generator.generate_actions( figgie, model_values[best_suit.value], best_operation, best_suit) assert len(actions) != 0, 'Length of actions == 0' if info_set in self.game_tree: node = self.game_tree[info_set] else: node = GameNode(len(actions)) self.game_tree[info_set] = node strategy = node.get_strategy() epsilon = 0.6 if player == training_player: probability = epsilon / len(actions) + ((1.0 - epsilon) * strategy) else: probability = np.copy(strategy) action_index = random.choices(range(len(actions)), weights=probability)[0] price = actions[action_index] if best_operation == 'bid' and price > figgie.chips[figgie.active_player]\ or best_operation == 'at' and price[0] > figgie.chips[figgie.active_player]: figgie.preform(('pass', )) return self.__train(figgie, pi, pi_prime, training_player) action = self.create_action(best_operation, best_suit, price) figgie.preform(action) result = self.__train( figgie, pi * strategy[action_index], pi_prime * probability[action_index], training_player) if player == training_player else self.__train( figgie, pi, pi_prime, training_player) util = -result[0] p_tail = result[1] if player == training_player: w = util * p_tail regret = w * (1 - strategy) regret[action_index] = -w * strategy[action_index] node.sum_regret += regret else: node.sum_strategy += strategy / pi_prime node.observations += 1 if player == training_player: return util, p_tail * strategy[action_index] else: return util, p_tail
def train(self, game: Figgie, trials: int): for i in range(trials): self.__train(game, 1.0, 1.0, i % NUM_PLAYERS) game.reset()