コード例 #1
0
ファイル: shared_dqn_example.py プロジェクト: unaeat/holdem
def lets_play(env, n_seats, model_list):
    try:
        while True:
            cur_state = env.new_cycle()
            # env.render(mode='human')
            cycle_terminal = False
            if env.episode_end:
                break

            while not cycle_terminal:
                actions = holdem.model_list_action(cur_state=cur_state,
                                                   n_seats=n_seats,
                                                   model_list=model_list)

                for m in model_list:
                    m.showAction(actions)

                cur_state, rews, cycle_terminal, info = env.step(actions)

            for m in model_list:
                m.endCycle(cur_state)

            # for s in cur_state.player_states:
            #     print( holdem.utils.hand_to_str(s.hand, "human"))
    except Exception as e:
        traceback.print_exc()
        raise
コード例 #2
0
def episode(env, n_seats, model_list):
    if arg_list.log:
        o_mode = 'machine'
    else:
        o_mode = 'human'
    while not env.episode_end:
        cur_state, cycle_terminal = env.reset()
        # if not env.episode_end and cycle_terminal:
        if cycle_terminal:
            # a cycle may terminate here because players may be "forced" to all in if they have a low stack
            env.render(mode=o_mode, cur_episode=i)
        if env.episode_end:
            break
        valid_actions = env.get_valid_actions(env._current_player)
        while not cycle_terminal:
            # in
            actions = holdem.model_list_action(cur_state,
                                               n_seats=n_seats,
                                               model_list=model_list,
                                               valid_actions=valid_actions)
            cur_state, rews, cycle_terminal, valid_actions = env.step(actions)
            env.render(mode=o_mode, cur_episode=i)
            if env.episode_end:
                break

    print(colored_output("Episode ends.\n", 'magenta'))
コード例 #3
0
def lets_play(env, n_seats, model_list):
    cur_state = env.reset()

    # display the table, cards and all
    env.render(mode='human')

    end_of_game = False
    while not end_of_game:
        cycle_terminal = False
        while not cycle_terminal:
            #  play safe actions, check when no one else has raised, call when raised.
            # actions = holdem.safe_actions(cur_state, n_seats=n_seats)
            print("state(t)")
            for p in cur_state.player_states:
                print(p)
            print(cur_state.community_state)

            actions = holdem.model_list_action(cur_state,
                                               n_seats=n_seats,
                                               model_list=model_list)
            cur_state, rews, cycle_terminal, info = env.step(actions)

            print(
                "action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])"
            )
            print(actions)

            print("reward(t+1)")
            print(rews)

            env.render(mode="machine")
        print("final state")
        print(cur_state)
        break
コード例 #4
0
def _lets_play():
    env = gym.make('TexasHoldem-v2')  # holdem.TexasHoldemEnv(2)
    model_list = add_users(env)

    while True:

        cur_state = env.reset()
        env.render(mode='human')
        cycle_terminal = False
        # (cur_state)
        if env.episode_end:
            break

        while not cycle_terminal:
            current_player = cur_state.community_state.current_player
            # play safe actions, check when no one else has raised, call when raised.
            # print(">>> Debug Information ")
            # print("state(t)")
            # for p in cur_state.player_states:
            #     print(p)
            # print(cur_state.community_state)

            actions = holdem.model_list_action(cur_state,
                                               n_seats=env.n_seats,
                                               model_list=model_list)
            cur_state, rews, cycle_terminal, info = env.step(actions)

            if cycle_terminal:
                try:
                    for p in cur_state.player_states:
                        if p.emptyplayer:
                            continue
                        model_action = model_list[p.seat].RoundEndAction(
                            cur_state, p.seat)
                except:
                    pass

            # print("action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])")
            # print(actions)

            # print("reward(t+1)")
            # print(rews)
            # print("<<< Debug Information ")

            env.render(mode="human")
        # print("final state")
        # print(cur_state)

        # total_stack = sum([p.stack for p in env._seats])
        # if total_stack != 10000:
        #     return

    print("Episode End!!!")
コード例 #5
0
ファイル: arena.py プロジェクト: sypannapis/holdem
def episode(env, n_seats, model_list):
    while True:
        cur_state = env.new_cycle()
        env.render(mode='machine')
        cycle_terminal = False
        try:
            logger.info("reseting all reset state")
            for m in model_list:
                m.reset_state()
        except:
            pass

        # (cur_state)
        if env.episode_end:
            break

        while not cycle_terminal:
            # play safe actions, check when no one else has raised, call when raised.
            # print(">>> Debug Information ")
            # print("state(t)")
            # for p in cur_state.player_states:
            #     print(p)
            # print(cur_state.community_state)

            actions = holdem.model_list_action(cur_state,
                                               n_seats=n_seats,
                                               model_list=model_list)
            cur_state, rews, cycle_terminal, info = env.step(actions)

            # print("action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])")
            # print(actions)

            # print("reward(t+1)")
            # print(rews)
            # print("<<< Debug Information ")
            env.render(mode="machine")
        # print("final state")
        # print(cur_state)

        # total_stack = sum([p.stack for p in env._seats])
        # if total_stack != 10000:
        #     return
    try:
        for p in env.winning_players:
            model_list[p.player_id].estimateReward(p.stack)
    except:
        pass

    logger.info("Episode End!!!")
    return np.array([p.stack for p in cur_state.player_states])
コード例 #6
0
ファイル: local_example.py プロジェクト: yuting-tseng/holdem
def lets_play(env, n_seats, model_list):
    while True:
        cur_state = env.reset()
        env.render(mode='human')
        cycle_terminal = False
        # (cur_state)
        if env.episode_end:
            break



        while not cycle_terminal:
            # play safe actions, check when no one else has raised, call when raised.
            # print(">>> Debug Information ")
            # print("state(t)")
            # for p in cur_state.player_states:
            #     print(p)
            # print(cur_state.community_state)

            actions = holdem.model_list_action(cur_state, n_seats=n_seats, model_list=model_list)
            cur_state, rews, cycle_terminal, info = env.step(actions)

            # print("action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])")
            # print(actions)

            # print("reward(t+1)")
            # print(rews)
            # print("<<< Debug Information ")
            env.render(mode="human")
        # print("final state")
        # print(cur_state)

        # total_stack = sum([p.stack for p in env._seats])
        # if total_stack != 10000:
        #     return

    print("Episode End!!!")
コード例 #7
0
def lets_play(env, n_seats, model_list):
    memory = deque(maxlen=10000)

    def model_saveMemory(state, action, reward, next_state):
        memory.append((state, action, reward, next_state))

    # for dqn_model parameter
    dqnModel_id = 0
    rounds_to_train = 1

    cur_state = env.reset()

    # display the table, cards and all
    env.render(mode='human')

    end_of_game = False
    while not end_of_game:
        cycle_terminal = False
        boolen_NeedToRememberStateT = True
        boolen_NeedToRememberStateT1 = False
        cur_round = env._get_round_number()
        stateT_list = []
        stateT1_list = []
        actionList = []
        begin_money = cur_state.player_states[dqnModel_id].stack
        while not cycle_terminal:
            #  play safe actions, check when no one else has raised, call when raised.
            # actions = holdem.safe_actions(cur_state, n_seats=n_seats)

            # print("state(t)")
            # for p in cur_state.player_states:
            #   print(p)
            # print(cur_state.community_state)

            # if dqn_agent do action and also trun into next round, remember state t+1 cur_state
            if cur_round != env._get_round_number():
                cur_round = env._get_round_number()
                # print("Turn into next round:",cur_round)
                if boolen_NeedToRememberStateT1:
                    # print("state_t+1:",cur_state)
                    boolen_NeedToRememberStateT1 = False
                    boolen_NeedToRememberStateT = True

                    stateT1_For_neuralNetwork = model_list[
                        dqnModel_id].turn_observation_to_stateJust52_plus2dim(
                            cur_state, dqnModel_id)
                    rank, percentage = model_list[
                        dqnModel_id].evaluateFromState(cur_state, dqnModel_id)
                    stateT1_For_neuralNetwork.append(rank)
                    stateT1_For_neuralNetwork.append(percentage)
                    stateT1_list.append(stateT1_For_neuralNetwork)
                    # [-3] means stack_t - stack_t+1 > 0
                    if stateT_list[-1][-3] - stateT1_list[-1][-3] > 0:
                        # play next round
                        actionList.append(0)
                    else:
                        # not play next round
                        actionList.append(1)
                    # input("pause")

            actions = holdem.model_list_action(cur_state,
                                               n_seats=n_seats,
                                               model_list=model_list)

            # if player is dqn_agent, remember state t cur_state
            if cur_state.community_state.current_player == dqnModel_id:
                # print("state_t:",cur_state)
                if boolen_NeedToRememberStateT:
                    boolen_NeedToRememberStateT = False
                    boolen_NeedToRememberStateT1 = True

                    stateT_For_neuralNetwork = model_list[
                        dqnModel_id].turn_observation_to_stateJust52_plus2dim(
                            cur_state, dqnModel_id)
                    rank, percentage = model_list[
                        dqnModel_id].evaluateFromState(cur_state, dqnModel_id)
                    stateT_For_neuralNetwork.append(rank)
                    stateT_For_neuralNetwork.append(percentage)
                    stateT_list.append(stateT_For_neuralNetwork)
                    # print(stateT_For_neuralNetwork)
                    # input("pause")

            # and do next action.
            cur_state, rews, cycle_terminal, info = env.step(actions)

            env.render(mode="machine")

            # if cycle_terminal, remember the difference money
            if cycle_terminal:
                if len(stateT_list) != len(actionList) or len(
                        stateT_list) != len(stateT1_list) or len(
                            actionList) != len(stateT1_list):
                    print("Error for state_t action state_t+1 length ")
                    break
                else:
                    reward = cur_state.player_states[
                        dqnModel_id].stack - begin_money
                    # print("stateT_list:",stateT_list)
                    # print("action:",actionList)
                    # print("stateT1_list:",stateT1_list)
                    # print("reward:",reward)
                    model_saveMemory(stateT_list, actionList, reward,
                                     stateT1_list)
                # input("pause")
                print("Finish this game")

            if len(memory) > rounds_to_train - 1:
                # you can define that how many rounds you want to train your model.
                model_list[dqnModel_id].train(memory)
コード例 #8
0
ファイル: arena.py プロジェクト: AnsonShie/holdem
def episode(env, n_seats, model_list):
    while True:
        cur_state = env.new_cycle()
        env.render(mode='machine')
        cycle_terminal = False
        try:
            logger.info("reseting all reset state")
            for m in model_list:
                m.reset_state()
        except:
            pass

        # (cur_state)
        if env.episode_end:
            break
        action_code = None
        pre_state = None
        initial_stack = cur_state.player_states[DQN_player_id].stack
        while not cycle_terminal:
            # play safe actions, check when no one else has raised, call when raised.
            # print(">>> Debug Information ")
            # print("state(t)")
            # for p in cur_state.player_states:
            #     print(p)
            # print(cur_state.community_state)
            DQN_player_react = False
            cur_pre_act_state = cur_state
            if cur_state.community_state.current_player == DQN_player_id:
                DQN_player_react = True

            actions = holdem.model_list_action(cur_state,
                                               n_seats=n_seats,
                                               model_list=model_list)
            cur_state, rews, cycle_terminal, info = env.step(actions)

            if DQN_player_react:
                if action_code is not None and pre_state is not None:
                    if model_list[DQN_player_id].react != 0:
                        model_list[DQN_player_id].remember(
                            pre_state, action_code, 0, cur_pre_act_state,
                            False, DQN_player_id)
                        model_list[DQN_player_id].onlineTrainModel()
                        pre_state = cur_state
                        action_code = model_list[DQN_player_id].react
                    elif model_list[DQN_player_id].react == 0:
                        #model_list[DQN_player_id].remember(pre_state, action_code, cur_state.player_states[DQN_player_id].stack, cur_pre_act_state, False, DQN_player_id)
                        #model_list[DQN_player_id].onlineTrainModel()
                        pre_state = None
                        action_code = None
                else:
                    pre_state = cur_state
                    action_code = model_list[DQN_player_id].react

            # print("action(t), (CALL=1, RAISE=2, FOLD=3 , CHECK=0, [action, amount])")
            # print(actions)

            # print("reward(t+1)")
            # print(rews)
            # print("<<< Debug Information ")
            env.render(mode="machine")
        if action_code is not None and pre_state is not None:
            model_list[DQN_player_id].remember(
                pre_state, action_code,
                cur_state.player_states[DQN_player_id].stack - initial_stack,
                cur_state, True, DQN_player_id)
            model_list[DQN_player_id].onlineTrainModel()
        # print("final state")
        # print(cur_state)

        # total_stack = sum([p.stack for p in env._seats])
        # if total_stack != 10000:
        #     return
    try:
        for p in env.winning_players:
            model_list[p.player_id].estimateReward(p.stack)
    except:
        pass

    logger.info("Episode End!!!")
    return np.array([p.stack for p in cur_state.player_states])