def test_set_blind_structure(self): config = G.setup_config(1, 100, 10) config.register_player("p1", FoldMan()) config.register_player("p2", FoldMan()) config.set_blind_structure({ 1: { "ante":5, "small_blind":10 } }) result = G.start_poker(config) p1, p2 = [result["players"][i] for i in range(2)] self.eq(115, p1["stack"]) self.eq(85, p2["stack"])
def test_start_poker(self): config = G.setup_config(1, 100, 10) config.register_player("p1", FoldMan()) config.register_player("p2", FoldMan()) result = G.start_poker(config) p1, p2 = [result["players"][i] for i in range(2)] self.eq("p1", p1["name"]) self.eq(110, p1["stack"]) self.eq("p2", p2["name"]) self.eq(90, p2["stack"])
def test_start_poker_validation_when_one_player(self): config = G.setup_config(1, 100, 10) config.register_player("p1", FoldMan()) with self.assertRaises(Exception) as e: result = G.start_poker(config) self.assertIn("only 1 player", str(e.exception))
def test_start_poker_validation_when_no_player(self): config = G.setup_config(1, 100, 10) with self.assertRaises(Exception) as e: result = G.start_poker(config) self.assertIn("no player", str(e.exception))
import datetime from pypokerengine.players import RandomPlayer, ExternalExecutablePlayer from pypokerengine.api.game import setup_config, start_poker if __name__ == '__main__': start_time = datetime.datetime.now() # choose here your strategy player = ExternalExecutablePlayer('bot.py') config = setup_config(max_round=50, initial_stack=1500, small_blind_amount=15, summary_file='example_game_replay.json') config.register_player(name='Participant', algorithm=player) for i in range(8): config.register_player(name='Random {}'.format(i), algorithm=RandomPlayer()) num_games = 3 game_scores = [] for game_no in range(num_games): game_result = start_poker(config, verbose=0) participant_result = game_result['players'][0] game_scores.append(participant_result['stack']) print('Game #{}: stack={}, state={}'.format( game_no, participant_result['stack'], participant_result['state'],
from pypokerengine.api.game import setup_config, start_poker from players.fish_player import FishPlayer from players.console_player import ConsolePlayer from players.random_player import RandomPlayer from players.honest_player import HonestPlayer from players.emulator_player import EmulatorPlayer from pypokerengine.engine.tts import tts import time config = setup_config(max_round=10, initial_stack=20, small_blind_amount=0) config.register_player(name="인공지능", algorithm=HonestPlayer()) config.register_player(name="플레이어", algorithm=ConsolePlayer()) #config.register_player(name="EmulatorPlayer", algorithm=EmulatorPlayer()) #config.register_player(name="FishPlayer", algorithm=FishPlayer()) game_result = start_poker(config, verbose=1) quote = ( "게임이 종료되었습니다." ) tts.playTts(tts, quote) time.sleep(3)
def declare_action(self, valid_actions, hole_card, round_state, dealer): ''' a backup of the current gamestate is created (all stored in the current dealer) and each possible round will be played once in this simulation and the result will be stored ''' if self.__next_action == -1: # still needs to save the states # check if state to save has come (preflop/flop/ etc.) if round_state["action_histories"]: acthis = round_state["action_histories"] if self.__save_state in acthis: tensor = create_tensor(valid_actions, hole_card, round_state, self.__community_card, self.__small_blind, self.__last_action) # save the current state to file # the state to save state_to_save = self.__save_state with open( self.__path + state_to_save + "/save" + str(self.__last_number) + ".pickle", 'wb') as handle: pickle.dump(tensor, handle, protocol=pickle.HIGHEST_PROTOCOL) result_of_moves = [0] * 10 # backup of the current game_state bu_dealer = dealer.copy() config = setup_config( max_round=1, initial_stack=self.__initial_stack, small_blind_amount=self.__small_blind) ''' print("______________________________________________________________________") print("simulation_time") ''' # start the simluation of the 10 moves for i in range(10): # recursive call of trainings generator with iterative increase of next action algorithm = TrainingsGenerator( self.__next_action + i + 1, self.__save_state, self.__path, self.__last_number) # changes the used algorithm in game to the "new" trainings_gen algorithm bu_dealer.change_algorithm_of_player( self.uuid, algorithm) # play the game game_result = start_poker_with_dealer(config, bu_dealer, verbose=0) # get the result and normalize it amount_win_loss = 0 for l in range(len(game_result["players"])): if game_result["players"][l]["uuid"] == self.uuid: amount_win_loss = game_result["players"][l][ "stack"] - self.__stack normalized_result = 0.5 if amount_win_loss < 0: # normalized for loss (maximum the half of the whole chip size can be lost) # loss is in range from 0 to 0.5 normalized_result = amount_win_loss / ( self.__stack * 2) + 0.5 if amount_win_loss > 0: # normalized for win # win is in range from 0.5 to 1 whole_stack = 0 for l in range(len(game_result["players"])): whole_stack += game_result["players"][l][ "stack"] poss_win = whole_stack - self.__stack normalized_result = amount_win_loss / (poss_win * 2) + 0.5 result_of_moves[i] = normalized_result # save the results to file ''' print(result_of_moves) print("simulation over") print("______________________________________________________________________") ''' with open( self.__path + state_to_save + "/result" + str(self.__last_number) + ".pickle", 'wb') as handle: pickle.dump(result_of_moves, handle, protocol=pickle.HIGHEST_PROTOCOL) # use the heuristic bot for other actions (if next_action =-1 and state to save not reached yet) ''' return HeuristicPlayer.bot_action(self, valid_actions, hole_card, round_state, dealer, self.__community_card, self.__stack, self.__last_action) ''' return FishPlayer.declare_action(self, valid_actions, hole_card, round_state, dealer) # if next=action != -1 and state to save (preflop/flop/...) is reached else: if 0 <= self.__next_action < 9: action, amount = getAction(valid_actions, self.__stack, self.__last_action, self.__next_action) self.__next_action = -2 return action, amount else: return HeuristicPlayer.bot_action( self, valid_actions, hole_card, round_state, dealer, self.__community_card, self.__stack, self.__last_action)
from pypokerengine.api.game import start_poker, setup_config from bots.monte_carlo import MonteCarloBot from bots.callbot import CallBot import numpy as np if __name__ == '__main__': blogger_bot = MonteCarloBot() # The stack log contains the stacks of the Data Blogger bot after each game (the initial stack is 100) stack_log = [] for round in range(1000): p1, p2 = blogger_bot, CallBot() config = setup_config(max_round=5, initial_stack=100, small_blind_amount=5) config.register_player(name="p1", algorithm=p1) config.register_player(name="p2", algorithm=p2) game_result = start_poker(config, verbose=0) stack_log.append([ player['stack'] for player in game_result['players'] if player['uuid'] == blogger_bot.uuid ]) print('Avg. stack:', '%d' % (int(np.mean(stack_log))))
cst_decks = pickle.load(f) with open( gen_dir + '/bots/' + str(bot_id) + '/bot_' + str(bot_id) + '_flat.pkl', 'rb') as f: deepbot_flat = pickle.load(f) deepbot_dict = get_full_dict(all_params=deepbot_flat, ref_full_dict=ref_full_dict) deepbot = DeepBot(id_=bot_id, gen_dir=gen_dir, full_dict=deepbot_dict, network=my_network, validation_mode='mutation_variance', validation_id=validation_id) while True: config = setup_config(max_round=max_round, initial_stack=3000, small_blind_amount=50) config.register_player(name="p1", algorithm=deepbot) config.register_player(name="p2", algorithm=CallBot()) game_result = start_poker(config, verbose=0, cheat=True, cst_deck_ids=cst_decks.copy()) max_round -= (deepbot.round_count + 1) if max_round <= 0: break nb_measures = 10 ##### MUTATION ####### print('\n MUTANT BOT')
for i in range(8) ] # for player in hall_of_fame: # player.agent.load_model("player_"+str(player.name)+".h5") # set up the emulator emulator = Emulator() emulator.set_game_rule(player_num=len(hall_of_fame), max_round=2**32, small_blind_amount=SB, ante_amount=ANTE["initial"]) # simulate 1 round to obtain the starting game state and player info config = setup_config(max_round=1, initial_stack=STARTING_STACK, small_blind_amount=SB) for player in hall_of_fame: config.register_player(name=player.name, algorithm=player) game_result = start_poker(config, verbose=0) # obtain simulated player info for player in game_result['players']: for _player in hall_of_fame: if _player.name == player['name']: _player.uuid = player['uuid'] emulator.register_player(_player.uuid, _player) break for i in range(1, TOTAL_EPISODES + 1): print(">>>>>>>>>>Initializing game %d<<<<<<<<<<<<" % (i))
'''new_ep = old_ep * (1-EPSILON_DECAY)''' EPSILON_END = 0 # 0-1 minimum exploration probability '''new_ep = max(new_ep,EPSILON_END)''' DISCOUNT_FACTOR = 0.3 # 0-1 percetange to discount future q values by '''future_q *= DISCOUNT_FACTOR''' TOTAL_EPISODES = 100 # number of poker games J = 10 # update target network weights for every J fits # initialize dql agents with random weights # make sure to give each DQL_Agent an unique name hall_of_fame = [ DQLPlayer( str(i), DQL_Agent(DQN(), ReplayMemory(REPLAY_MEMORY_SIZE, REPLAY_MEMORY_BATCH_SIZE), EPSILON, EPSILON_DECAY, EPSILON_END, DISCOUNT_FACTOR, J)) for i in range(8) ] #load player models for player in hall_of_fame: player.agent.load_model("player_" + str(player.name) + ".h5") config = setup_config(max_round=2**32, initial_stack=1500, small_blind_amount=10) for player in hall_of_fame: config.register_player(name=player.name, algorithm=player) game_result = start_poker(config, verbose=1)
from pypokerengine.api.game import setup_config, start_poker from fish_player import FishPlayer from honest_player import HonestPlayer import config engine_config = setup_config( max_round=config.num_rounds, initial_stack=config.initial_stack, small_blind_amount=config.small_blind_amount ) engine_config.register_player(name="p1", algorithm=FishPlayer()) engine_config.register_player(name="p2", algorithm=HonestPlayer()) game_result = start_poker(engine_config, verbose=1) print("Game result: " + str(game_result))
# ['Randomer1', Randomer(), 0, 0], # ['Randomer2', Randomer(), 0, 0], # ['Randomer3', Randomer(), 0, 0], # ['Randomer4', Randomer(), 0, 0], # ['Randomer5', Randomer(), 0, 0], # ['Randomer6', Randomer(), 0, 0], # ['Randomer7', Randomer(), 0, 0], # ['Randomer8', Randomer(), 0, 0], ] for g in range(GAMES): print(g) config = setup_config(max_round=50, initial_stack=STACK, small_blind_amount=15) i = 0 while i < len(players): config.register_player(name=players[i][0], algorithm=players[i][1]) i += 1 game_result = start_poker(config, verbose=0) # print(json.dumps(game_result['players'], indent=2, sort_keys=True)) i = 0 while i < len(game_result['players']): chips = game_result['players'][i]['stack'] players[i][2] += chips - STACK if chips >= STACK:
def test_register_player_when_invalid(self): config = G.setup_config(1, 100, 10) config.register_player("p1", "dummy")
## Initialization """ # %% h_size = 128 # %% %time main_wp = DQNPlayer(h_size=h_size, is_restore=True, is_train=False, debug=True, is_double=True) # %% """ ## Testing """ # %% config = setup_config(max_round=2, initial_stack=1500, small_blind_amount=15, summary_file='/dev/null') config.register_player(name="wp", algorithm=main_wp) # config.register_player(name="r2", algorithm=RandomPlayer()) config.register_player(name="f2", algorithm=pm.CallPlayer()) config.register_player(name="f3", algorithm=pm.CallPlayer()) config.register_player(name="f4", algorithm=pm.CallPlayer()) config.register_player(name="f5", algorithm=pm.CallPlayer()) config.register_player(name="f6", algorithm=pm.CallPlayer()) config.register_player(name="f7", algorithm=pm.CallPlayer()) config.register_player(name="f8", algorithm=pm.CallPlayer()) config.register_player(name="f9", algorithm=pm.CallPlayer()) game_result = start_poker(config, verbose=1) # %%
import numpy as np from pypokerengine.api.game import start_poker, setup_config from .players.fish_player import FishPlayer from .players.monte_carlo_player import MonteCarloPlayer if __name__ == '__main__': blogger_bot = MonteCarloPlayer() # The stack log contains the stacks of the Data Blogger bot after each game (the initial stack is 100) stack_log = [] for round in range(1000): p1, p2 = blogger_bot, FishPlayer() config = setup_config(max_round=5, initial_stack=100, small_blind_amount=5) config.register_player(name="p1", algorithm=p1) config.register_player(name="p2", algorithm=p2) game_result = start_poker(config, verbose=0) stack_log.append([player['stack'] for player in game_result['players'] if player['uuid'] == blogger_bot.uuid]) print('Avg. stack:', '%d' % (int(np.mean(stack_log))))
#Training hyperparameters N_GAMES = 10000 N_ROUNDS_PER_GAME = 20 SAVE_PATH = 'saved_models/second_test/policy_net_after' #Game Parameters MAX_ROUND = 10 INITIAL_STACK = 100 SMALL_BLIND_AMOUNT = 5 #The network policy1 = Network() policy2 = Network() #Players bot1 = PGBot('bot_p1', policy1) bot2 = PGBot('bot_p2', policy2) for i in range(N_GAMES): config = setup_config(max_round=N_ROUNDS_PER_GAME, initial_stack=INITIAL_STACK, small_blind_amount=SMALL_BLIND_AMOUNT) config.register_player(name="bot_p1", algorithm=bot1) config.register_player(name="bot_p2", algorithm=bot2) game_result = start_poker(config, verbose=0) if ((i + 1) % 10000 == 0): model_save = SAVE_PATH + str((i + 1) * N_ROUNDS_PER_GAME) print('Games played:', str((i + 1) * N_ROUNDS_PER_GAME)) policy1.save_network(model_save)