def test_rank(self): judge = TexasJudge() ranks = judge.rank([x[0] for x in self.cards_levels]) self.assertEqual(list(ranks), self.cards_levels_ranks) ranks = judge.rank([x[0] for x in self.dup_cards_levels]) self.assertEqual(list(ranks), self.dup_cards_levels_ranks) ranks = judge.rank([x[0] for x in self.two_cards_levels]) self.assertEqual(list(ranks), self.two_cards_levels_ranks)
def test_dividing_money(self): judge = TexasJudge() game = texas_games.NoLimitTexasGame(judge) amounts = game._divide_the_money(500, [300, None, None, None], [0, 0, 1, 2]) self.assertEqual(list(amounts), [150, 350, 0, 0]) amounts = game._divide_the_money(500, [300, 400, 200, None], [0, 0, 1, 1]) self.assertEqual(list(amounts), [150, 250, 0, 100])
def get_model(fname, big_blind): judge = TexasJudge() calc = Simulator(judge) model = { "static": naive_agents.StaticAgent(big_blind, 0, calc, 1000), "brave": naive_agents.BraveAgent(big_blind, 0, calc), "random": naive_agents.RandomAgent(big_blind, 0), }.get(fname) if model: return model with open(fname, "rb") as fin: return pickle.load(fin)
def test_get_pr(self): judge = TexasJudge() simulator = monte_carlo.Simulator(judge) pr = simulator.get_pr([(poker.PokerKind.heart, 2), (poker.PokerKind.diamond, 3)], trial_num=10000) self.assertTrue(pr <= 0.35) pr = simulator.get_pr( [(poker.PokerKind.heart, poker.PokerDigit.A), (poker.PokerKind.diamond, poker.PokerDigit.A)], ) self.assertTrue(pr >= 0.80) pr = simulator.get_pr([(poker.PokerKind.heart, poker.PokerDigit.A), (poker.PokerKind.heart, 13)]) self.assertTrue(pr >= 0.60)
def start_game(agents, big_blind, max_epoch, seed): judge = TexasJudge() np.random.seed(seed) game = NoLimitTexasGame(judge, big_blind) max_bankrupt_num = 10 min_agent_num = 4 init_pool_amount = big_blind * 100 for a in agents: a.set_amount(init_pool_amount) agent_rewards = {a: -init_pool_amount for a in agents} ordered_agents = agents[:] epoch = 0 while epoch < max_epoch: epoch += 1 amounts = game.run_a_hand(agents, is_verbose=False) for n, agent in enumerate(agents): agent.set_reward(amounts[n]) if epoch % 100 == 0: print("Hand", epoch) for a in agents: print(a.get_name(), agent_rewards[a], a.get_amount()) # rotate agents = agents[-1:] + agents[:-1] quitting_list = [] for index, a in enumerate(agents): if a.get_amount() >= big_blind: continue if agent_rewards[a] <= -max_bankrupt_num * init_pool_amount: # to quit agent_rewards[a] += a.get_amount() a.set_amount(0) quitting_list.append(index) else: # more money agent_rewards[a] -= init_pool_amount a.set_amount(a.get_amount() + init_pool_amount) for cnt, index in enumerate(quitting_list): agents.pop(index - cnt) if len(agents) < min_agent_num: break print("Final results -", epoch) for a in ordered_agents: print(a.get_name(), a.get_amount() + agent_rewards[a])
def _get_level_prs(self, cards, community_cards): judge = TexasJudge() simulator = monte_carlo.Simulator(judge) calc = direct_calc.ApproxCalc() num = 10000 level_counts = simulator.get_level_counts(cards, community_cards, trial_num=num) level_prs = calc.get_level_prs(cards + community_cards) prs = [] for level in list(TexasLevel): prs.append( (level.name, level_counts.get(level, 0) / num, level_prs.get(level, 0.0)) ) return prs
def start_game(agent_num, is_public, models, seed): judge = TexasJudge() simulator = Simulator(judge) big_blind = 20 agents = [] for n in range(agent_num - len(models) - 1): agents.append(naive_agents.StaticAgent(big_blind, 2000, simulator, 1000)) for n in range(len(models)): agents.append(load_model(models[n])) agents[-1].set_amount(2000) agents[-1].is_test = True agents.append(human_agent.HumanAgent(big_blind, 2000, simulator)) for n, a in enumerate(agents): a._name = None a._agent_index = n if seed is None: seed = random.randint(0, 32768) print("seed", seed) game = NoLimitTexasGame(judge, big_blind, seed=seed) is_bankrupt = False while not is_bankrupt: amounts = game.run_a_hand(agents, is_verbose=True, is_public=is_public) for n, amount in enumerate(amounts): print(agents[n].get_name(), amount) print() for n, agent in enumerate(agents): agent.set_reward(amounts[n]) if agent.get_amount() < big_blind: is_bankrupt = True # end of game print("Left amount") for n, agent in enumerate(agents): print("Agent%d" % n, agent.get_amount())
def start_game(): judge = TexasJudge() pr_calc = ApproxComparer() big_blind = 20 start_epoch = 1000000 learner = q_learner.QLearner( bandit.EpsilonGreedySampler(eps_end=0.2, eps_decay=100000), 0.01) quantizer2 = key_state_agent.State2Quantizer() model = load_model("models/InnerKeyStateAgent-q2-vs_naive-1000000.pkl") learner = model.q_learner agents = [ key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc, quantizer2, learner), key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc, quantizer2, learner), key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc, quantizer2, learner), key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc, quantizer2, learner), key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc, quantizer2, learner), key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc, quantizer2, learner), ] test_agent_indexes = [5] acc_rewards = [0] * len(agents) acc_test_rewards = [0] * len(agents) trial_num = 0 test_trial_num = 0 test_rewards_list = [deque(maxlen=100) for _ in range(len(agents))] np.random.seed(0) game = NoLimitTexasGame(judge, big_blind) for epoch in itertools.count(start=start_epoch + 1): # 充钱 or 归零 for n, agent in enumerate(agents): if agent.get_amount() < big_blind or agent.get_amount( ) > 2000 * 10: agent.set_amount(2000) # is test is_test = epoch % 10 in (1, 2) for agent in agents: agent.is_test = is_test # 开玩 amounts = game.run_a_hand(agents, is_verbose=False) for n, agent in enumerate(agents): agent.set_reward(amounts[n]) trial_num += 1 for n, agent in enumerate(agents): acc_rewards[n] = acc_rewards[n] * 0.9999 + amounts[n] * 0.0001 if is_test: test_trial_num += 1 for idx in range(len(agents)): acc_test_rewards[idx] = acc_test_rewards[ idx] * 0.9999 + amounts[idx] * 0.0001 test_rewards_list[idx].append(amounts[idx]) if epoch % 1000 == 0: print(epoch, end=" ", flush=True) if epoch % 10000 == 0: print() for n, agent in enumerate(agents): print("\tagent%d" % n, "acc-reward", acc_rewards[n] / trial_num, "acc-test-reward", acc_test_rewards[n] / trial_num, flush=True) for idx in test_agent_indexes: if test_trial_num <= 0: break ana_test_rewards = np.array(test_rewards_list[idx]) print("epoch", epoch, "index", idx, "avg-test-reward/std", ana_test_rewards.sum() / len(ana_test_rewards), ana_test_rewards.std(), flush=True) if epoch % 100000 != 0: continue for idx in test_agent_indexes: with open( "models/InnerKeyStateAgent-index%d-%d.pkl" % (idx, epoch), "wb") as fout: pickle.dump(agents[idx], fout)
def test_arg_max(self): judge = TexasJudge() indexes = judge.argmax([x[0] for x in self.cards_levels]) self.assertEqual(indexes, [0])
def test_level_judgement(self): judge = TexasJudge() for cards, level in self.cards_levels: # random.shuffle(total_cards) best_level, best_suit = judge._get_level_suit(cards) self.assertEqual(level, best_level, cards)