def lets_play(env, n_seats, model_list): try: while True: cur_state = env.new_cycle() # env.render(mode='human') cycle_terminal = False if env.episode_end: break while not cycle_terminal: actions = holdem.model_list_action(cur_state=cur_state, n_seats=n_seats, model_list=model_list) for m in model_list: m.showAction(actions) cur_state, rews, cycle_terminal, info = env.step(actions) for m in model_list: m.endCycle(cur_state) # for s in cur_state.player_states: # print( holdem.utils.hand_to_str(s.hand, "human")) except Exception as e: traceback.print_exc() raise
def episode(env, n_seats, model_list): if arg_list.log: o_mode = 'machine' else: o_mode = 'human' while not env.episode_end: cur_state, cycle_terminal = env.reset() # if not env.episode_end and cycle_terminal: if cycle_terminal: # a cycle may terminate here because players may be "forced" to all in if they have a low stack env.render(mode=o_mode, cur_episode=i) if env.episode_end: break valid_actions = env.get_valid_actions(env._current_player) while not cycle_terminal: # in actions = holdem.model_list_action(cur_state, n_seats=n_seats, model_list=model_list, valid_actions=valid_actions) cur_state, rews, cycle_terminal, valid_actions = env.step(actions) env.render(mode=o_mode, cur_episode=i) if env.episode_end: break print(colored_output("Episode ends.\n", 'magenta'))
def lets_play(env, n_seats, model_list): cur_state = env.reset() # display the table, cards and all env.render(mode='human') end_of_game = False while not end_of_game: cycle_terminal = False while not cycle_terminal: # play safe actions, check when no one else has raised, call when raised. # actions = holdem.safe_actions(cur_state, n_seats=n_seats) print("state(t)") for p in cur_state.player_states: print(p) print(cur_state.community_state) actions = holdem.model_list_action(cur_state, n_seats=n_seats, model_list=model_list) cur_state, rews, cycle_terminal, info = env.step(actions) print( "action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])" ) print(actions) print("reward(t+1)") print(rews) env.render(mode="machine") print("final state") print(cur_state) break
def _lets_play(): env = gym.make('TexasHoldem-v2') # holdem.TexasHoldemEnv(2) model_list = add_users(env) while True: cur_state = env.reset() env.render(mode='human') cycle_terminal = False # (cur_state) if env.episode_end: break while not cycle_terminal: current_player = cur_state.community_state.current_player # play safe actions, check when no one else has raised, call when raised. # print(">>> Debug Information ") # print("state(t)") # for p in cur_state.player_states: # print(p) # print(cur_state.community_state) actions = holdem.model_list_action(cur_state, n_seats=env.n_seats, model_list=model_list) cur_state, rews, cycle_terminal, info = env.step(actions) if cycle_terminal: try: for p in cur_state.player_states: if p.emptyplayer: continue model_action = model_list[p.seat].RoundEndAction( cur_state, p.seat) except: pass # print("action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])") # print(actions) # print("reward(t+1)") # print(rews) # print("<<< Debug Information ") env.render(mode="human") # print("final state") # print(cur_state) # total_stack = sum([p.stack for p in env._seats]) # if total_stack != 10000: # return print("Episode End!!!")
def episode(env, n_seats, model_list): while True: cur_state = env.new_cycle() env.render(mode='machine') cycle_terminal = False try: logger.info("reseting all reset state") for m in model_list: m.reset_state() except: pass # (cur_state) if env.episode_end: break while not cycle_terminal: # play safe actions, check when no one else has raised, call when raised. # print(">>> Debug Information ") # print("state(t)") # for p in cur_state.player_states: # print(p) # print(cur_state.community_state) actions = holdem.model_list_action(cur_state, n_seats=n_seats, model_list=model_list) cur_state, rews, cycle_terminal, info = env.step(actions) # print("action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])") # print(actions) # print("reward(t+1)") # print(rews) # print("<<< Debug Information ") env.render(mode="machine") # print("final state") # print(cur_state) # total_stack = sum([p.stack for p in env._seats]) # if total_stack != 10000: # return try: for p in env.winning_players: model_list[p.player_id].estimateReward(p.stack) except: pass logger.info("Episode End!!!") return np.array([p.stack for p in cur_state.player_states])
def lets_play(env, n_seats, model_list): while True: cur_state = env.reset() env.render(mode='human') cycle_terminal = False # (cur_state) if env.episode_end: break while not cycle_terminal: # play safe actions, check when no one else has raised, call when raised. # print(">>> Debug Information ") # print("state(t)") # for p in cur_state.player_states: # print(p) # print(cur_state.community_state) actions = holdem.model_list_action(cur_state, n_seats=n_seats, model_list=model_list) cur_state, rews, cycle_terminal, info = env.step(actions) # print("action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])") # print(actions) # print("reward(t+1)") # print(rews) # print("<<< Debug Information ") env.render(mode="human") # print("final state") # print(cur_state) # total_stack = sum([p.stack for p in env._seats]) # if total_stack != 10000: # return print("Episode End!!!")
def lets_play(env, n_seats, model_list): memory = deque(maxlen=10000) def model_saveMemory(state, action, reward, next_state): memory.append((state, action, reward, next_state)) # for dqn_model parameter dqnModel_id = 0 rounds_to_train = 1 cur_state = env.reset() # display the table, cards and all env.render(mode='human') end_of_game = False while not end_of_game: cycle_terminal = False boolen_NeedToRememberStateT = True boolen_NeedToRememberStateT1 = False cur_round = env._get_round_number() stateT_list = [] stateT1_list = [] actionList = [] begin_money = cur_state.player_states[dqnModel_id].stack while not cycle_terminal: # play safe actions, check when no one else has raised, call when raised. # actions = holdem.safe_actions(cur_state, n_seats=n_seats) # print("state(t)") # for p in cur_state.player_states: # print(p) # print(cur_state.community_state) # if dqn_agent do action and also trun into next round, remember state t+1 cur_state if cur_round != env._get_round_number(): cur_round = env._get_round_number() # print("Turn into next round:",cur_round) if boolen_NeedToRememberStateT1: # print("state_t+1:",cur_state) boolen_NeedToRememberStateT1 = False boolen_NeedToRememberStateT = True stateT1_For_neuralNetwork = model_list[ dqnModel_id].turn_observation_to_stateJust52_plus2dim( cur_state, dqnModel_id) rank, percentage = model_list[ dqnModel_id].evaluateFromState(cur_state, dqnModel_id) stateT1_For_neuralNetwork.append(rank) stateT1_For_neuralNetwork.append(percentage) stateT1_list.append(stateT1_For_neuralNetwork) # [-3] means stack_t - stack_t+1 > 0 if stateT_list[-1][-3] - stateT1_list[-1][-3] > 0: # play next round actionList.append(0) else: # not play next round actionList.append(1) # input("pause") actions = holdem.model_list_action(cur_state, n_seats=n_seats, model_list=model_list) # if player is dqn_agent, remember state t cur_state if cur_state.community_state.current_player == dqnModel_id: # print("state_t:",cur_state) if boolen_NeedToRememberStateT: boolen_NeedToRememberStateT = False boolen_NeedToRememberStateT1 = True stateT_For_neuralNetwork = model_list[ dqnModel_id].turn_observation_to_stateJust52_plus2dim( cur_state, dqnModel_id) rank, percentage = model_list[ dqnModel_id].evaluateFromState(cur_state, dqnModel_id) stateT_For_neuralNetwork.append(rank) stateT_For_neuralNetwork.append(percentage) stateT_list.append(stateT_For_neuralNetwork) # print(stateT_For_neuralNetwork) # input("pause") # and do next action. cur_state, rews, cycle_terminal, info = env.step(actions) env.render(mode="machine") # if cycle_terminal, remember the difference money if cycle_terminal: if len(stateT_list) != len(actionList) or len( stateT_list) != len(stateT1_list) or len( actionList) != len(stateT1_list): print("Error for state_t action state_t+1 length ") break else: reward = cur_state.player_states[ dqnModel_id].stack - begin_money # print("stateT_list:",stateT_list) # print("action:",actionList) # print("stateT1_list:",stateT1_list) # print("reward:",reward) model_saveMemory(stateT_list, actionList, reward, stateT1_list) # input("pause") print("Finish this game") if len(memory) > rounds_to_train - 1: # you can define that how many rounds you want to train your model. model_list[dqnModel_id].train(memory)
def episode(env, n_seats, model_list): while True: cur_state = env.new_cycle() env.render(mode='machine') cycle_terminal = False try: logger.info("reseting all reset state") for m in model_list: m.reset_state() except: pass # (cur_state) if env.episode_end: break action_code = None pre_state = None initial_stack = cur_state.player_states[DQN_player_id].stack while not cycle_terminal: # play safe actions, check when no one else has raised, call when raised. # print(">>> Debug Information ") # print("state(t)") # for p in cur_state.player_states: # print(p) # print(cur_state.community_state) DQN_player_react = False cur_pre_act_state = cur_state if cur_state.community_state.current_player == DQN_player_id: DQN_player_react = True actions = holdem.model_list_action(cur_state, n_seats=n_seats, model_list=model_list) cur_state, rews, cycle_terminal, info = env.step(actions) if DQN_player_react: if action_code is not None and pre_state is not None: if model_list[DQN_player_id].react != 0: model_list[DQN_player_id].remember( pre_state, action_code, 0, cur_pre_act_state, False, DQN_player_id) model_list[DQN_player_id].onlineTrainModel() pre_state = cur_state action_code = model_list[DQN_player_id].react elif model_list[DQN_player_id].react == 0: #model_list[DQN_player_id].remember(pre_state, action_code, cur_state.player_states[DQN_player_id].stack, cur_pre_act_state, False, DQN_player_id) #model_list[DQN_player_id].onlineTrainModel() pre_state = None action_code = None else: pre_state = cur_state action_code = model_list[DQN_player_id].react # print("action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])") # print(actions) # print("reward(t+1)") # print(rews) # print("<<< Debug Information ") env.render(mode="machine") if action_code is not None and pre_state is not None: model_list[DQN_player_id].remember( pre_state, action_code, cur_state.player_states[DQN_player_id].stack - initial_stack, cur_state, True, DQN_player_id) model_list[DQN_player_id].onlineTrainModel() # print("final state") # print(cur_state) # total_stack = sum([p.stack for p in env._seats]) # if total_stack != 10000: # return try: for p in env.winning_players: model_list[p.player_id].estimateReward(p.stack) except: pass logger.info("Episode End!!!") return np.array([p.stack for p in cur_state.player_states])