Python EvalAgentDeepCFRの例

プログラミング言語: Python

名前空間/パッケージ名: DeepCFR.EvalAgentDeepCFR

クラス/型: EvalAgentDeepCFR

hotexamples.comのコード掲載数: 7

Python EvalAgentDeepCFR - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのDeepCFR.EvalAgentDeepCFR.EvalAgentDeepCFRの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

load_from_disk(5)

EvalAgentDeepCFR(2)

_strategy_buffers(2)

reset(2)

set_mode(2)

store_to_disk(1)

update_weights(1)

コード例 #1

ファイルを表示

ファイル: local.py プロジェクト: ridhi1412/Deep-CFR

    def checkpoint(self, curr_step):
        print('HIIII a')
        if self._SINGLE:
            for p_id in range(self._t_prof.n_seats):
                #                state = {
                #                    "strat_buffer": self._strategy_buffers[p_id].state_dict(),
                #                    "env_bldr": self._env_bldr,
                #                }

                MODE = EvalAgentDeepCFR.EVAL_MODE_SINGLE
                t_prof = copy.deepcopy(self._t_prof)
                t_prof.eval_modes_of_algo = [MODE]

                eval_agent = EvalAgentDeepCFR(t_prof=t_prof)
                eval_agent.reset()

                eval_agent._strategy_buffers = self._strategy_buffers  # could copy - it's just for the export, so it's ok
                eval_agent.set_mode(mode=MODE)

                #                with open(self._get_checkpoint_file_path(name=self._t_prof.name, step=curr_step,
                #                                                         cls=self.__class__, worker_id="P" + str(p_id)),
                #                          "wb") as pkl_file:
                path = f'sample_{p_id}.pkl'
                print('HIIII a')

                if curr_step == 30:
                    #Can replace later with if iterations=10, for now let it be
                    from PokerRL.game.InteractiveGame import InteractiveGame
                    game = InteractiveGame(
                        env_cls=eval_agent.env_bldr.env_cls,
                        env_args=eval_agent.env_bldr.env_args,
                        seats_human_plays_list=[0],
                        eval_agent=eval_agent,
                    )

                    game.start_to_play()

コード例 #2

ファイルを表示

            "N": None,
        }

    mean = np.mean(data).item()
    std = np.std(data).item()
    conf = 1.96 * std / np.sqrt(len(data))
    return {
        "mean": float(mean),
        "std": float(std),
        "conf": float(conf),
        "N": len(data),
    }


if __name__ == '__main__':
    eval_agent_first = EvalAgentDeepCFR.load_from_disk(
        path_to_eval_agent=path_to_first_eval_agent)
    eval_agent_second = EvalAgentDeepCFR.load_from_disk(
        path_to_eval_agent=path_to_second_eval_agent)
    #assert eval_agent_first.t_prof.name == eval_agent_second.t_prof.name

    env_bldr = eval_agent_first.env_bldr
    env = env_bldr.get_new_env(is_evaluating=False)

    strategy_differences = {
        r: {depth: []
            for depth in range(MAX_DEPTH)}
        for r in env_bldr.rules.ALL_ROUNDS_LIST
    }

    start_time = time.time()

コード例 #3

ファイルを表示

ファイル: local.py プロジェクト: ridhi1412/Deep-CFR

    def export_agent(self, step):
        _dir = ospj(self._t_prof.path_agent_export_storage,
                    str(self._t_prof.name), str(step))
        file_util.create_dir_if_not_exist(_dir)

        # """"""""""""""""""""""""""""
        # Deep CFR
        # """"""""""""""""""""""""""""
        if self._AVRG:
            MODE = EvalAgentDeepCFR.EVAL_MODE_AVRG_NET

            t_prof = copy.deepcopy(self._t_prof)
            t_prof.eval_modes_of_algo = [MODE]

            eval_agent = EvalAgentDeepCFR(t_prof=t_prof)
            eval_agent.reset()

            w = {
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET:
                self._pull_avrg_net_eval_strat()
            }
            eval_agent.update_weights(w)
            eval_agent.set_mode(mode=MODE)
            eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE)

        # """"""""""""""""""""""""""""
        # SD-CFR
        # """"""""""""""""""""""""""""
        if self._SINGLE:
            MODE = EvalAgentDeepCFR.EVAL_MODE_SINGLE
            t_prof = copy.deepcopy(self._t_prof)
            t_prof.eval_modes_of_algo = [MODE]

            eval_agent = EvalAgentDeepCFR(t_prof=t_prof)
            eval_agent.reset()

            eval_agent._strategy_buffers = self._strategy_buffers  # could copy - it's just for the export, so it's ok
            eval_agent.set_mode(mode=MODE)
            eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE)

コード例 #4

ファイルを表示

ファイル: interactive_user_v_agent.py プロジェクト: rgg81/Deep-CFR

"""
This file is not runable; it's is a template to show how you could play against your algorithms. To do so,
replace "YourAlgorithmsEvalAgentCls" with the EvalAgent subclass (not instance) of your algorithm.

Note that you can see the AI's cards on the screen since this is just a research application and not meant for actual
competition. The AI can, of course, NOT see your cards.
"""

from PokerRL.game.InteractiveGameCustom import InteractiveGameCustom
from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from PokerRL.game.games import DiscretizedNLHoldem
from PokerRL.game import bet_sets

if __name__ == '__main__':
    eval_agent = EvalAgentDeepCFR.load_from_disk(
        path_to_eval_agent="eval_agentSINGLE.pkl")

    playerA = 500
    playerB = 500
    round = 0

    playerAWinnings = 0
    playerBWinnings = 0

    while True:
        while playerA > 0 and playerB > 0:

            if round % 2 == 0:
                game_cls = DiscretizedNLHoldem
                args = game_cls.ARGS_CLS(n_seats=2,
                                         bet_sizes_list_as_frac_of_pot=bet_sets.B_5,

コード例 #5

ファイルを表示

ファイル: training.py プロジェクト: marble999/opp-modelling

                    _eval_agents[1 - REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting,
                                                                       action_he_did=action_int)
                elif p_id_acting == seat_p1:
                    a_probs = _eval_agents[REFERENCE_AGENT].get_a_probs()
                    action_int, _ = _eval_agents[1 - REFERENCE_AGENT].get_action(step_env=True, need_probs=False)
                    _eval_agents[REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting,
                                                                   action_he_did=action_int)
                else:
                    raise ValueError("Only HU supported!")
                
                _, r_for_all, done, info = _env.step(action_int)  
    
    end_time = time.time()
    print("Time taken", end_time - start_time)

    print(optimizer)
    
    return results

agent_file1 = "/home/leduc/poker_ai_data/eval_agent/SD-CFR_LEDUC_EXAMPLE_200/120/eval_agentAVRG_NET.pkl"

student_agent = EvalAgentDeepRange(t_prof, mode=None, device=None)
teacher_agent = EvalAgentDeepCFR.load_from_disk(path_to_eval_agent=agent_file1)

results = distill(student_agent, teacher_agent, args={'lr':1e-2, 'iters': 500000, 'lambda': 1})
name = "deep_range_500000_1"

student_agent.save_to_file(name + ".pt")

pickle.dump(results, open(name + "_log.pkl", "wb" ))

コード例 #6

ファイルを表示

ファイル: mo_preflop_analyze.py プロジェクト: mohamedun/Deep-CFR

N_DECK = 52
N_HOLE = 169 # Number of possible hole cards 13 * 12 + 13

#A function that takes a hole hand and produces (high rank, low rank, is_suited) representation
def hand2rep(hand):
    card1_rank = hand[0][0]
    card1_suit = hand[0][1]
    card2_rank = hand[1][0]
    card2_suit = hand[1][1]
    suited = (card2_suit == card1_suit)
    high_rank = max(card1_rank, card2_rank)
    low_rank = min(card1_rank, card2_rank)
    return (high_rank, low_rank, suited)

#Load EvalAgent from file
curr_eval_agent = EvalAgentDeepCFR.load_from_disk(path_to_eval_agent=path_to_eval_agent)

#get an env bldr from the agent and create an env
env_bldr = curr_eval_agent.env_bldr
env = env_bldr.get_new_env(is_evaluating=False)

start_time = time.time()
hands = {}
while len(hands) < N_HOLE:
    #Reset env and EvalAgent
    env.reset()
    curr_eval_agent.reset(deck_state_dict=env.cards_state_dict())
    #Act
    for c in history:
        current_seat = env.current_player.seat_id
        env.step(str_to_action[c])

コード例 #7

ファイルを表示

ファイル: mo_analyze.py プロジェクト: mohamedun/Deep-CFR

def hand2rep(hand):
    card1_rank = hand[0][0]
    card1_suit = hand[0][1]
    card2_rank = hand[1][0]
    card2_suit = hand[1][1]
    suited = (card2_suit == card1_suit)
    high_rank = max(card1_rank, card2_rank)
    low_rank = min(card1_rank, card2_rank)
    return (high_rank, low_rank, suited)


#--------------- Generate p0 strat -------------------------

#Loading EvalAgents and checking if hey have same experiment name
eval_agent_dcfr = EvalAgentDeepCFR.load_from_disk(
    path_to_eval_agent=path_to_dcfr_eval_agent)

#get an env bldr from the agent and create an env
env_bldr = eval_agent_dcfr.env_bldr
env = env_bldr.get_new_env(is_evaluating=False)

start_time = time.time()
hands = {}
while len(hands) < N_HOLE:
    obs, rew, done, info = env.reset()
    eval_agent_dcfr.reset(deck_state_dict=env.cards_state_dict())
    hole_hand = hand2rep(env.seats[0].hand)
    if hole_hand not in hands:
        hands[hole_hand] = eval_agent_dcfr.get_a_probs()
'''
print(f"Computed {N_HOLE} possible hands in {time.time()-start_time} sec")