Esempio n. 1
0
 def test_rank(self):
     judge = TexasJudge()
     ranks = judge.rank([x[0] for x in self.cards_levels])
     self.assertEqual(list(ranks), self.cards_levels_ranks)
     ranks = judge.rank([x[0] for x in self.dup_cards_levels])
     self.assertEqual(list(ranks), self.dup_cards_levels_ranks)
     ranks = judge.rank([x[0] for x in self.two_cards_levels])
     self.assertEqual(list(ranks), self.two_cards_levels_ranks)
Esempio n. 2
0
 def test_dividing_money(self):
     judge = TexasJudge()
     game = texas_games.NoLimitTexasGame(judge)
     amounts = game._divide_the_money(500, [300, None, None, None],
                                      [0, 0, 1, 2])
     self.assertEqual(list(amounts), [150, 350, 0, 0])
     amounts = game._divide_the_money(500, [300, 400, 200, None],
                                      [0, 0, 1, 1])
     self.assertEqual(list(amounts), [150, 250, 0, 100])
Esempio n. 3
0
def get_model(fname, big_blind):
    judge = TexasJudge()
    calc = Simulator(judge)

    model = {
        "static": naive_agents.StaticAgent(big_blind, 0, calc, 1000),
        "brave": naive_agents.BraveAgent(big_blind, 0, calc),
        "random": naive_agents.RandomAgent(big_blind, 0),
    }.get(fname)
    if model:
        return model
    with open(fname, "rb") as fin:
        return pickle.load(fin)
Esempio n. 4
0
 def test_get_pr(self):
     judge = TexasJudge()
     simulator = monte_carlo.Simulator(judge)
     pr = simulator.get_pr([(poker.PokerKind.heart, 2),
                            (poker.PokerKind.diamond, 3)],
                           trial_num=10000)
     self.assertTrue(pr <= 0.35)
     pr = simulator.get_pr(
         [(poker.PokerKind.heart, poker.PokerDigit.A),
          (poker.PokerKind.diamond, poker.PokerDigit.A)], )
     self.assertTrue(pr >= 0.80)
     pr = simulator.get_pr([(poker.PokerKind.heart, poker.PokerDigit.A),
                            (poker.PokerKind.heart, 13)])
     self.assertTrue(pr >= 0.60)
Esempio n. 5
0
def start_game(agents, big_blind, max_epoch, seed):
    judge = TexasJudge()

    np.random.seed(seed)
    game = NoLimitTexasGame(judge, big_blind)

    max_bankrupt_num = 10
    min_agent_num = 4
    init_pool_amount = big_blind * 100

    for a in agents:
        a.set_amount(init_pool_amount)
    agent_rewards = {a: -init_pool_amount for a in agents}
    ordered_agents = agents[:]

    epoch = 0
    while epoch < max_epoch:
        epoch += 1
        amounts = game.run_a_hand(agents, is_verbose=False)
        for n, agent in enumerate(agents):
            agent.set_reward(amounts[n])
        if epoch % 100 == 0:
            print("Hand", epoch)
            for a in agents:
                print(a.get_name(), agent_rewards[a], a.get_amount())
        # rotate
        agents = agents[-1:] + agents[:-1]

        quitting_list = []
        for index, a in enumerate(agents):
            if a.get_amount() >= big_blind:
                continue
            if agent_rewards[a] <= -max_bankrupt_num * init_pool_amount:
                # to quit
                agent_rewards[a] += a.get_amount()
                a.set_amount(0)
                quitting_list.append(index)
            else:
                # more money
                agent_rewards[a] -= init_pool_amount
                a.set_amount(a.get_amount() + init_pool_amount)
        for cnt, index in enumerate(quitting_list):
            agents.pop(index - cnt)

        if len(agents) < min_agent_num:
            break
    print("Final results -", epoch)
    for a in ordered_agents:
        print(a.get_name(), a.get_amount() + agent_rewards[a])
Esempio n. 6
0
    def _get_level_prs(self, cards, community_cards):
        judge = TexasJudge()
        simulator = monte_carlo.Simulator(judge)
        calc = direct_calc.ApproxCalc()

        num = 10000
        level_counts = simulator.get_level_counts(cards, community_cards, trial_num=num)
        level_prs = calc.get_level_prs(cards + community_cards)

        prs = []
        for level in list(TexasLevel):
            prs.append(
                (level.name, level_counts.get(level, 0) / num, level_prs.get(level, 0.0))
            )
        return prs
Esempio n. 7
0
def start_game(agent_num, is_public, models, seed):
    judge = TexasJudge()
    simulator = Simulator(judge)
    big_blind = 20

    agents = []
    for n in range(agent_num - len(models) - 1):
        agents.append(naive_agents.StaticAgent(big_blind, 2000, simulator, 1000))
    for n in range(len(models)):
        agents.append(load_model(models[n]))
        agents[-1].set_amount(2000)
        agents[-1].is_test = True
    agents.append(human_agent.HumanAgent(big_blind, 2000, simulator))
    for n, a in enumerate(agents):
        a._name = None
        a._agent_index = n

    if seed is None:
        seed = random.randint(0, 32768)
        print("seed", seed)
    game = NoLimitTexasGame(judge, big_blind, seed=seed)

    is_bankrupt = False
    while not is_bankrupt:
        amounts = game.run_a_hand(agents, is_verbose=True, is_public=is_public)
        for n, amount in enumerate(amounts):
            print(agents[n].get_name(), amount)
        print()
        for n, agent in enumerate(agents):
            agent.set_reward(amounts[n])
            if agent.get_amount() < big_blind:
                is_bankrupt = True

    # end of game
    print("Left amount")
    for n, agent in enumerate(agents):
        print("Agent%d" % n, agent.get_amount())
Esempio n. 8
0
def start_game():
    judge = TexasJudge()
    pr_calc = ApproxComparer()
    big_blind = 20

    start_epoch = 1000000
    learner = q_learner.QLearner(
        bandit.EpsilonGreedySampler(eps_end=0.2, eps_decay=100000), 0.01)
    quantizer2 = key_state_agent.State2Quantizer()

    model = load_model("models/InnerKeyStateAgent-q2-vs_naive-1000000.pkl")
    learner = model.q_learner

    agents = [
        key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc,
                                           quantizer2, learner),
        key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc,
                                           quantizer2, learner),
        key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc,
                                           quantizer2, learner),
        key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc,
                                           quantizer2, learner),
        key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc,
                                           quantizer2, learner),
        key_state_agent.InnerKeyStateAgent(big_blind, 2000, pr_calc,
                                           quantizer2, learner),
    ]
    test_agent_indexes = [5]
    acc_rewards = [0] * len(agents)
    acc_test_rewards = [0] * len(agents)
    trial_num = 0
    test_trial_num = 0
    test_rewards_list = [deque(maxlen=100) for _ in range(len(agents))]

    np.random.seed(0)
    game = NoLimitTexasGame(judge, big_blind)

    for epoch in itertools.count(start=start_epoch + 1):
        # 充钱 or 归零
        for n, agent in enumerate(agents):
            if agent.get_amount() < big_blind or agent.get_amount(
            ) > 2000 * 10:
                agent.set_amount(2000)
        # is test
        is_test = epoch % 10 in (1, 2)
        for agent in agents:
            agent.is_test = is_test

        # 开玩
        amounts = game.run_a_hand(agents, is_verbose=False)
        for n, agent in enumerate(agents):
            agent.set_reward(amounts[n])

        trial_num += 1
        for n, agent in enumerate(agents):
            acc_rewards[n] = acc_rewards[n] * 0.9999 + amounts[n] * 0.0001
        if is_test:
            test_trial_num += 1
            for idx in range(len(agents)):
                acc_test_rewards[idx] = acc_test_rewards[
                    idx] * 0.9999 + amounts[idx] * 0.0001
                test_rewards_list[idx].append(amounts[idx])

        if epoch % 1000 == 0:
            print(epoch, end=" ", flush=True)
        if epoch % 10000 == 0:
            print()
            for n, agent in enumerate(agents):
                print("\tagent%d" % n,
                      "acc-reward",
                      acc_rewards[n] / trial_num,
                      "acc-test-reward",
                      acc_test_rewards[n] / trial_num,
                      flush=True)
            for idx in test_agent_indexes:
                if test_trial_num <= 0:
                    break
                ana_test_rewards = np.array(test_rewards_list[idx])
                print("epoch",
                      epoch,
                      "index",
                      idx,
                      "avg-test-reward/std",
                      ana_test_rewards.sum() / len(ana_test_rewards),
                      ana_test_rewards.std(),
                      flush=True)
        if epoch % 100000 != 0:
            continue
        for idx in test_agent_indexes:
            with open(
                    "models/InnerKeyStateAgent-index%d-%d.pkl" % (idx, epoch),
                    "wb") as fout:
                pickle.dump(agents[idx], fout)
Esempio n. 9
0
 def test_arg_max(self):
     judge = TexasJudge()
     indexes = judge.argmax([x[0] for x in self.cards_levels])
     self.assertEqual(indexes, [0])
Esempio n. 10
0
 def test_level_judgement(self):
     judge = TexasJudge()
     for cards, level in self.cards_levels:
         # random.shuffle(total_cards)
         best_level, best_suit = judge._get_level_suit(cards)
         self.assertEqual(level, best_level, cards)