def testRandomPlayer(self): """ """ random.seed(0) for i in range(100): players = [RandomPlayer() for i in range(4)] env = TexasHoldemEnv() num_normal_players = 3 chips = [1000 for i in range(num_normal_players)] params = { "param_num_normal_players": num_normal_players, "chips": chips } infos, public_state_history, person_states_history, private_state_history, action_history = env.init( params) while public_state_history[-1].is_terminal != True: for i in range(4): players[i].receive_info(infos[i]) turn = public_state_history[-1].turn action = players[turn].take_action() infos, public_state_history, person_states_history, private_state_history, action_history = env.forward( action) for i in range(100): players = [RandomPlayer() for i in range(3)] env = TexasHoldemEnv() num_normal_players = 2 chips = [1000 for i in range(num_normal_players)] dealer_id = i % 2 params = { "param_num_normal_players": num_normal_players, "dealer_id": dealer_id, "chips": chips } infos, public_state, person_states, private_state, action_history = env.init( params) while public_state[-1].is_terminal != True: for i in range(num_normal_players + 1): players[i].receive_info(infos[i]) turn = public_state[-1].turn action = players[turn].take_action() infos, public_state, person_states, private_state, action_history = env.forward( action)
def __init__(self, name, sess, coord, globalAC): self.num_normal_players = 2 self.params = { "param_num_normal_players": 2, "param_init_chips": [100, 100], "param_big_blind_bet": 20, "backward_enable": True } self.env = roomai.games.texasholdem.TexasHoldemEnv() self.name = name self.random_player = RandomPlayer() self.chance_player = roomai.games.common.RandomPlayerChance() self.AC = ACNet(name, sess, globalAC) self.SESS = sess self.COORD = coord
def testCompete(self): """ """ import random random.seed(100) players = [RandomPlayer() for i in range(5)] env = TexasHoldemEnv() scores = TexasHoldemEnv.compete_silent(env, players) print(scores)
def TexasholdemA3C_Predict(): import random random.seed(100) state_spec = [None, 14, 8, 1] n_a = 5 a3c_player = Texasholdem_A3CPlayer(state_spec, n_a) a3c_player.load_model('./checkpoint', 'TexasHoldemModel') randomp_pleyer = RandomPlayer() players = [a3c_player, randomp_pleyer] env = TexasHoldemEnv() for i in range(100): scores = TexasHoldemEnv.compete_silent(env, players) print(scores)
def TexasholdemA3C_Train(): env = roomai.games.texasholdem.TexasHoldemEnv() random_player = RandomPlayer() chance_player = roomai.games.common.RandomPlayerChance() other_players = [random_player, chance_player] params = { "param_num_normal_players": 2, "param_init_chips": [100, 100], "param_big_blind_bet": 20, "backward_enable": True, 'MAX_GLOBAL_EP': 1000, 'env': env, 'otherplayers': other_players, 'MODEL_DIR': './checkpoint/TexasHoldemModel' } a3c = AbstractA3C([None, 14, 8, 1], 5, params) action_dict = {"Fold": 0, "Check": 1, "Call": 2, "Raise": 3, "Allin": 4} a3c.train(action_dict)
class Worker(object): def __init__(self, name, sess, coord, globalAC): self.num_normal_players = 2 self.params = { "param_num_normal_players": 2, "param_init_chips": [100, 100], "param_big_blind_bet": 20, "backward_enable": True } self.env = roomai.games.texasholdem.TexasHoldemEnv() self.name = name self.random_player = RandomPlayer() self.chance_player = roomai.games.common.RandomPlayerChance() self.AC = ACNet(name, sess, globalAC) self.SESS = sess self.COORD = coord def work(self): global GLOBAL_RUNNING_R, GLOBAL_EP if GLOBAL_EP % 100 == 0: print(GLOBAL_EP) total_step = 1 workid = int(str(self.name).split("_")[1]) buffer_s, buffer_a, buffer_r = [], [], [] while not self.COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP: infos, public, person_states, private_state, _ = self.env.init( self.params) self.random_player.receive_info(infos[1]) self.chance_player.receive_info(infos[2]) ep_r = 0 while public[-1].is_terminal == False: turn = public[-1].turn # print(turn) if turn == 0: # A3C learner # for normalcard in infos[0].person_state_history[-1].hand_cards: # print(normalcard.point_rank) s = np.zeros((14, 8, 1)) if (public[-1].param_dealer_id == 0): for card in infos[0].public_state_history[ -1].public_cards: s[card.point_rank, card.suit_rank, 0] = 1 for card in infos[0].person_state_history[ -1].hand_cards: s[card.point_rank, card.suit_rank, 0] = 1 else: for card in infos[0].public_state_history[ -1].public_cards: s[card.point_rank, card.suit_rank + 4, 0] = 1 for card in infos[0].person_state_history[ -1].hand_cards: s[card.point_rank, card.suit_rank + 4, 0] = 1 available_action = dict() available_option = [] for action in list(infos[0].person_state_history[-1]. available_actions.values()): option = action.option if option not in available_option: available_option.append(option) available_action[option] = action a = self.AC.choose_action(s, available_option, workid) action = available_action[a] buffer_s.append(s) buffer_a.append(action_dict[a]) buffer_r.append(0) # print("action0", action.option) elif turn == 1: # random player # # a3c player # s = np.zeros((14, 8, 1)) # if (public[-1].param_dealer_id == 1): # for normalcard in infos[1].public_state_history[-1].public_cards: # s[normalcard.point_rank, normalcard.suit_rank, 0] = 1 # for normalcard in infos[1].person_state_history[-1].hand_cards: # s[normalcard.point_rank, normalcard.suit_rank, 0] = 1 # else: # for normalcard in infos[1].public_state_history[-1].public_cards: # s[normalcard.point_rank, normalcard.suit_rank + 4, 0] = 1 # for normalcard in infos[1].person_state_history[-1].hand_cards: # s[normalcard.point_rank, normalcard.suit_rank + 4, 0] = 1 # available_action = dict() # available_option = [] # for action in list(infos[1].person_state_history[-1].available_actions.values()): # option = action.option # if option not in available_option: # available_option.append(option) # available_action[option] = action # a = self.AC.choose_action(s, available_option, workid) # action = available_action[a] # # random player action = self.random_player.take_action() print("action1:", action.option) else: # chance player action = self.chance_player.take_action() infos, public, persons, private, _ = self.env.forward(action) self.random_player.receive_info(infos[1]) self.chance_player.receive_info(infos[2]) if (len(buffer_r) == 0): continue # print("score", public[-1].scores[0]) buffer_r[-1] = public[-1].scores[0] ep_r += buffer_r[-1] v_s_ = 0 # terminal buffer_v_target = [] for r in buffer_r[::-1]: # reverse buffer r v_s_ = r + GAMMA * v_s_ buffer_v_target.append(v_s_) buffer_v_target.reverse() buffer_s, buffer_a, buffer_v_target = np.stack( buffer_s, axis=0), np.array(buffer_a), np.vstack(buffer_v_target) feed_dict = { self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target, } self.AC.update_global(feed_dict) buffer_s, buffer_a, buffer_r = [], [], [] self.AC.pull_global() if len(GLOBAL_RUNNING_R) == 0: # record running episode reward GLOBAL_RUNNING_R.append(ep_r) else: GLOBAL_RUNNING_R.append(0.99 * GLOBAL_RUNNING_R[-1] + 0.01 * ep_r) # if len(rewardlist[workid]) == 0: # record running episode reward # rewardlist[workid].append(ep_r) # else: # rewardlist[workid].append(0.99 * rewardlist[workid][-1] + 0.01 * ep_r) print( self.name, "Ep:", GLOBAL_EP, "| Ep_r: %.3f" % GLOBAL_RUNNING_R[-1], ) GLOBAL_EP += 1
def testEnv3players(self): env = TexasHoldemEnv() num_normal_players = 3 dealer_id = 0 chips = [100, 100, 100] big_blind_bet = 20 params = { "param_num_normal_players": num_normal_players, "param_dealer_id": dealer_id, "param_init_chips": chips, "param_big_blind_bet": big_blind_bet, "backward_enable": True } players = [RandomPlayer() for i in range(4)] infos, public_state, person_states, private_state, _ = env.init(params) for i in range(3 * 2 + 5): action = list(env.available_actions().values())[0] infos, public_state_history, person_states_history, private_state_history, action_history = env.forward( action) print(i) self.assertEqual(infos[0].person_state_history[-1].id, 0) env.__person_states_history__[0][-1].__hand_cards__ = [ roomai.games.texasholdem.PokerCard(0, 0), roomai.games.texasholdem.PokerCard(0, 1) ] env.__person_states_history__[0][-1].__hand_cards__ = [ roomai.games.texasholdem.PokerCard(2, 0), roomai.games.texasholdem.PokerCard(2, 1) ] env.__person_states_history__[0][-1].__hand_cards__ = [ roomai.games.texasholdem.PokerCard(2, 0), roomai.games.texasholdem.PokerCard(2, 1) ] env.__private_state_history__[-1].__keep_cards__ = [ roomai.games.texasholdem.PokerCard(3, 0), roomai.games.texasholdem.PokerCard(4, 0), roomai.games.texasholdem.PokerCard(5, 0), roomai.games.texasholdem.PokerCard(6, 0), roomai.games.texasholdem.PokerCard(7, 0) ] self.assertEqual(env.__public_state_history__[-1].turn, 0) self.assertNotEqual( len(infos[0].person_state_history[-1].available_actions), 0) self.assertTrue("Allin_100" in infos[0].person_state_history[-1]. available_actions.keys()) # dealer_id = 0 # turn = 0 # chips:100, 90, 80 # bets :0, 10, 20 # state:n, n, n action = TexasHoldemAction("Allin_100") infos, public_state, person_states, private_state, action_history = env.forward( action) self.assertEqual(env.__public_state_history__[-1].turn, 1) self.assertNotEqual( len(infos[1].person_state_history[-1].available_actions), 0) self.assertTrue("Allin_90" in infos[1].person_state_history[-1]. available_actions.keys()) self.assertEqual(env.__public_state_history__[-1].turn, 1) self.assertEqual(env.__public_state_history__[-1].chips[0], 0) self.assertEqual(env.__public_state_history__[-1].chips[1], 90) self.assertEqual(env.__public_state_history__[-1].stage, Stage.firstStage) # dealer_id = 0 # turn = 1 # chips:0, 90, 80 # bets :100, 10, 20 # state:all, n, n action = TexasHoldemAction("Fold_0") infos, public_state, person_states, private_state, action_history = env.forward( action) # dealer_id = 0 # turn = 2 # chips:0, 90, 80 # bets :100, 10, 20 # state:all, q, n self.assertEqual(env.__public_state_history__[-1].turn, 2) action = TexasHoldemAction("Fold_0") infos, public_state, person_states, private_state, action_history = env.forward( action) # dealer_id = 0 # turn = 1 # chips:0, 90, 80 # bets :100, 10, 20 # state:all, q, n print(env.__public_state_history__[-1].bets) print(env.__public_state_history__[-1].is_allin) print(env.__public_state_history__[-1].is_fold) print(env.__public_state_history__[-1].chips) print(env.__public_state_history__[-1].turn) self.assertTrue(public_state[-1].is_terminal) self.assertEqual(public_state[-1].scores[0], 30.0 / public_state[-1].param_big_blind_bet) self.assertEqual(public_state[-1].scores[1], -10.0 / public_state[-1].param_big_blind_bet) self.assertEqual(public_state[-1].scores[2], -20.0 / public_state[-1].param_big_blind_bet)
def testEnv3Players2(self): """ """ env = TexasHoldemEnv() num_normal_players = 3 dealer_id = 0 chips = [100, 500, 1000] big_blind_bet = 20 params = { "param_num_normal_players": num_normal_players, "param_dealer_id": dealer_id, "param_init_chips": chips, "param_big_blind_bet": big_blind_bet } players = [RandomPlayer() for i in range(4)] infos, public_state_history, person_states_history, private_state_history, action_history = env.init( params) for i in range(3 * 2 + 5): action = list(env.available_actions().values())[0] infos, public_state_history, person_states_history, private_state_history, action_history = env.forward( action) self.assertEqual(infos[0].person_state_history[-1].id, 0) env.__person_states_history__[0][-1].__hand_cards__ = [ roomai.games.texasholdem.PokerCard(7, 0), roomai.games.texasholdem.PokerCard(7, 1) ] env.__person_states_history__[1][-1].__hand_cards__ = [ roomai.games.texasholdem.PokerCard(2, 0), roomai.games.texasholdem.PokerCard(2, 1) ] env.__person_states_history__[2][-1].__hand_cards__ = [ roomai.games.texasholdem.PokerCard(2, 2), roomai.games.texasholdem.PokerCard(2, 3) ] env.__private_state_history__[-1].__keep_cards__ = [ roomai.games.texasholdem.PokerCard(3, 1), roomai.games.texasholdem.PokerCard(4, 2), roomai.games.texasholdem.PokerCard(5, 3), roomai.games.texasholdem.PokerCard(6, 0), roomai.games.texasholdem.PokerCard(7, 3) ] self.assertEqual(env.__public_state_history__[-1].turn, 0) self.assertNotEqual( len(infos[0].person_state_history[-1].available_actions), 0) self.assertTrue("Raise_60" in infos[0].person_state_history[-1]. available_actions.keys()) self.assertEqual(env.__public_state_history__[-1].raise_account, 20) # dealer_id = 0 # turn = 0 # chips:100, 490, 980 # bets :0, 10, 20 # state:n, n, n # flag_next:0 # raise_account: 20 action = TexasHoldemAction("Raise_60") infos, public_state, person_states, private_state, action_history = env.forward( action) print(env.__public_state_history__[-1].num_needed_to_action, env.__public_state_history__[-1].is_needed_to_action) self.assertEqual(env.__public_state_history__[-1].turn, 1) self.assertTrue("Raise_60" not in infos[1].person_state_history[-1].available_actions) self.assertTrue("Raise_80" not in infos[1].person_state_history[-1].available_actions) self.assertEqual(env.__public_state_history__[-1].raise_account, 40) action = TexasHoldemAction("Call_40") self.assertRaises(ValueError, env.forward, action) # dealer_id = 0 # turn = 1 # stage = 1 # chips:40, 490, 980 # bets :60, 10, 20 # state:n, n, n # raise_account: 40 action = TexasHoldemAction("Call_50") infos, public_state, person_states, private_state, action_history = env.forward( action) assert (public_state[-1].stage == Stage.firstStage) print(env.__public_state_history__[-1].num_needed_to_action, env.__public_state_history__[-1].is_needed_to_action) print(public_state[-1].stage) print(public_state[-1].chips) print(public_state[-1].bets) print(public_state[-1].param_dealer_id) # dealer_id = 0 # turn = 2 # stage = 1 # chips:40, 440, 980 # bets :60, 60, 20 # state:n, n, n # raise_account: 40 # expected:f,f,t action = TexasHoldemAction("Call_40") infos, public_state, person_states, private_state, action_history = env.forward( action) print("\n\n") print("stage", public_state[-1].stage) print("dealer_id+1", (public_state[-1].param_dealer_id + 1) % public_state[-1].param_num_normal_players) print("is_needed_to_action", public_state[-1].is_needed_to_action) self.assertEqual(infos[0].public_state_history[-1].stage, Stage.secondStage) self.assertEqual(env.__public_state_history__[-1].chips[1], 440) self.assertEqual(env.__public_state_history__[-1].turn, 1) # dealer_id = 0 # turn = 1 # stage = 2 # chips:40, 440, 940 # bets :60, 60, 60 # state:n, n, n # raise_account: 40 action = TexasHoldemAction("Check_0") infos, public_state, person_states, private_state, action_history = env.forward( action) infos, public_state, person_states, private_state, action_history = env.forward( action) infos, public_state, person_states, private_state, action_history = env.forward( action) self.assertEqual(env.__public_state_history__[-1].stage, 3) self.assertEqual(len(env.__public_state_history__[-1].public_cards), 4) p = 0 tmp = [ roomai.games.texasholdem.PokerCard(3, 1), roomai.games.texasholdem.PokerCard(4, 2), roomai.games.texasholdem.PokerCard(5, 3), roomai.games.texasholdem.PokerCard(6, 0) ] self.assertEqual(env.__public_state_history__[-1].raise_account, 40) self.assertEqual(env.__public_state_history__[-1].stage, 3) self.assertEqual(env.__public_state_history__[-1].turn, 1) print("1", infos[1].person_state_history[-1].available_actions.keys()) # dealer_id = 0 # turn = 1 # stage = 3 # chips:40, 440, 940 # bets :60, 60, 60 # state:n, n, n # raise_account: 40 action = TexasHoldemAction("Allin_440") infos, public_state, person_states, private_state, action_history = env.forward( action) self.assertEqual(infos[0].public_state_history[-1].max_bet_sofar, 500) print("2", infos[2].person_state_history[-1].available_actions.keys()) self.assertEqual(env.__public_state_history__[-1].is_allin[1], True) self.assertEqual(infos[0].public_state_history[-1].stage, 3) # dealer_id = 0 # turn = 2 # stage = 3 # chips:40, 0, 940 # bets :60, 500, 60 # state:n, n, n # raise_account: 40 action = TexasHoldemAction("Call_440") infos, public_state, person_states, private_state, action_history = env.forward( action) action = TexasHoldemAction("Allin_40") infos, public_state, person_states, private_state, action_history = env.forward( action) # dealer_id = 0 # chips:0, 0, 500 # bets :100, 500, 500 # 0 > 1 = 2 self.assertEqual(public_state[-1].scores[0], 200.0 / public_state[-1].param_big_blind_bet) self.assertEqual(public_state[-1].scores[1], -100.0 / public_state[-1].param_big_blind_bet) self.assertEqual(public_state[-1].scores[2], -100.0 / public_state[-1].param_big_blind_bet)