Beispiel #1
0
    def checkpoint(self, curr_step):
        print('HIIII a')
        if self._SINGLE:
            for p_id in range(self._t_prof.n_seats):
                #                state = {
                #                    "strat_buffer": self._strategy_buffers[p_id].state_dict(),
                #                    "env_bldr": self._env_bldr,
                #                }

                MODE = EvalAgentDeepCFR.EVAL_MODE_SINGLE
                t_prof = copy.deepcopy(self._t_prof)
                t_prof.eval_modes_of_algo = [MODE]

                eval_agent = EvalAgentDeepCFR(t_prof=t_prof)
                eval_agent.reset()

                eval_agent._strategy_buffers = self._strategy_buffers  # could copy - it's just for the export, so it's ok
                eval_agent.set_mode(mode=MODE)

                #                with open(self._get_checkpoint_file_path(name=self._t_prof.name, step=curr_step,
                #                                                         cls=self.__class__, worker_id="P" + str(p_id)),
                #                          "wb") as pkl_file:
                path = f'sample_{p_id}.pkl'
                print('HIIII a')

                if curr_step == 30:
                    #Can replace later with if iterations=10, for now let it be
                    from PokerRL.game.InteractiveGame import InteractiveGame
                    game = InteractiveGame(
                        env_cls=eval_agent.env_bldr.env_cls,
                        env_args=eval_agent.env_bldr.env_args,
                        seats_human_plays_list=[0],
                        eval_agent=eval_agent,
                    )

                    game.start_to_play()
Beispiel #2
0
            "N": None,
        }

    mean = np.mean(data).item()
    std = np.std(data).item()
    conf = 1.96 * std / np.sqrt(len(data))
    return {
        "mean": float(mean),
        "std": float(std),
        "conf": float(conf),
        "N": len(data),
    }


if __name__ == '__main__':
    eval_agent_first = EvalAgentDeepCFR.load_from_disk(
        path_to_eval_agent=path_to_first_eval_agent)
    eval_agent_second = EvalAgentDeepCFR.load_from_disk(
        path_to_eval_agent=path_to_second_eval_agent)
    #assert eval_agent_first.t_prof.name == eval_agent_second.t_prof.name

    env_bldr = eval_agent_first.env_bldr
    env = env_bldr.get_new_env(is_evaluating=False)

    strategy_differences = {
        r: {depth: []
            for depth in range(MAX_DEPTH)}
        for r in env_bldr.rules.ALL_ROUNDS_LIST
    }

    start_time = time.time()
Beispiel #3
0
    def export_agent(self, step):
        _dir = ospj(self._t_prof.path_agent_export_storage,
                    str(self._t_prof.name), str(step))
        file_util.create_dir_if_not_exist(_dir)

        # """"""""""""""""""""""""""""
        # Deep CFR
        # """"""""""""""""""""""""""""
        if self._AVRG:
            MODE = EvalAgentDeepCFR.EVAL_MODE_AVRG_NET

            t_prof = copy.deepcopy(self._t_prof)
            t_prof.eval_modes_of_algo = [MODE]

            eval_agent = EvalAgentDeepCFR(t_prof=t_prof)
            eval_agent.reset()

            w = {
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET:
                self._pull_avrg_net_eval_strat()
            }
            eval_agent.update_weights(w)
            eval_agent.set_mode(mode=MODE)
            eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE)

        # """"""""""""""""""""""""""""
        # SD-CFR
        # """"""""""""""""""""""""""""
        if self._SINGLE:
            MODE = EvalAgentDeepCFR.EVAL_MODE_SINGLE
            t_prof = copy.deepcopy(self._t_prof)
            t_prof.eval_modes_of_algo = [MODE]

            eval_agent = EvalAgentDeepCFR(t_prof=t_prof)
            eval_agent.reset()

            eval_agent._strategy_buffers = self._strategy_buffers  # could copy - it's just for the export, so it's ok
            eval_agent.set_mode(mode=MODE)
            eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE)
"""
This file is not runable; it's is a template to show how you could play against your algorithms. To do so,
replace "YourAlgorithmsEvalAgentCls" with the EvalAgent subclass (not instance) of your algorithm.

Note that you can see the AI's cards on the screen since this is just a research application and not meant for actual
competition. The AI can, of course, NOT see your cards.
"""

from PokerRL.game.InteractiveGameCustom import InteractiveGameCustom
from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from PokerRL.game.games import DiscretizedNLHoldem
from PokerRL.game import bet_sets

if __name__ == '__main__':
    eval_agent = EvalAgentDeepCFR.load_from_disk(
        path_to_eval_agent="eval_agentSINGLE.pkl")

    playerA = 500
    playerB = 500
    round = 0

    playerAWinnings = 0
    playerBWinnings = 0

    while True:
        while playerA > 0 and playerB > 0:

            if round % 2 == 0:
                game_cls = DiscretizedNLHoldem
                args = game_cls.ARGS_CLS(n_seats=2,
                                         bet_sizes_list_as_frac_of_pot=bet_sets.B_5,
Beispiel #5
0
                    _eval_agents[1 - REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting,
                                                                       action_he_did=action_int)
                elif p_id_acting == seat_p1:
                    a_probs = _eval_agents[REFERENCE_AGENT].get_a_probs()
                    action_int, _ = _eval_agents[1 - REFERENCE_AGENT].get_action(step_env=True, need_probs=False)
                    _eval_agents[REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting,
                                                                   action_he_did=action_int)
                else:
                    raise ValueError("Only HU supported!")
                
                _, r_for_all, done, info = _env.step(action_int)  
    
    end_time = time.time()
    print("Time taken", end_time - start_time)

    print(optimizer)
    
    return results

agent_file1 = "/home/leduc/poker_ai_data/eval_agent/SD-CFR_LEDUC_EXAMPLE_200/120/eval_agentAVRG_NET.pkl"

student_agent = EvalAgentDeepRange(t_prof, mode=None, device=None)
teacher_agent = EvalAgentDeepCFR.load_from_disk(path_to_eval_agent=agent_file1)

results = distill(student_agent, teacher_agent, args={'lr':1e-2, 'iters': 500000, 'lambda': 1})
name = "deep_range_500000_1"

student_agent.save_to_file(name + ".pt")

pickle.dump(results, open(name + "_log.pkl", "wb" ))
N_DECK = 52
N_HOLE = 169 # Number of possible hole cards 13 * 12 + 13

#A function that takes a hole hand and produces (high rank, low rank, is_suited) representation
def hand2rep(hand):
    card1_rank = hand[0][0]
    card1_suit = hand[0][1]
    card2_rank = hand[1][0]
    card2_suit = hand[1][1]
    suited = (card2_suit == card1_suit)
    high_rank = max(card1_rank, card2_rank)
    low_rank = min(card1_rank, card2_rank)
    return (high_rank, low_rank, suited)

#Load EvalAgent from file
curr_eval_agent = EvalAgentDeepCFR.load_from_disk(path_to_eval_agent=path_to_eval_agent)

#get an env bldr from the agent and create an env
env_bldr = curr_eval_agent.env_bldr
env = env_bldr.get_new_env(is_evaluating=False)

start_time = time.time()
hands = {}
while len(hands) < N_HOLE:
    #Reset env and EvalAgent
    env.reset()
    curr_eval_agent.reset(deck_state_dict=env.cards_state_dict())
    #Act
    for c in history:
        current_seat = env.current_player.seat_id
        env.step(str_to_action[c])
Beispiel #7
0
def hand2rep(hand):
    card1_rank = hand[0][0]
    card1_suit = hand[0][1]
    card2_rank = hand[1][0]
    card2_suit = hand[1][1]
    suited = (card2_suit == card1_suit)
    high_rank = max(card1_rank, card2_rank)
    low_rank = min(card1_rank, card2_rank)
    return (high_rank, low_rank, suited)


#--------------- Generate p0 strat -------------------------

#Loading EvalAgents and checking if hey have same experiment name
eval_agent_dcfr = EvalAgentDeepCFR.load_from_disk(
    path_to_eval_agent=path_to_dcfr_eval_agent)

#get an env bldr from the agent and create an env
env_bldr = eval_agent_dcfr.env_bldr
env = env_bldr.get_new_env(is_evaluating=False)

start_time = time.time()
hands = {}
while len(hands) < N_HOLE:
    obs, rew, done, info = env.reset()
    eval_agent_dcfr.reset(deck_state_dict=env.cards_state_dict())
    hole_hand = hand2rep(env.seats[0].hand)
    if hole_hand not in hands:
        hands[hole_hand] = eval_agent_dcfr.get_a_probs()
'''
print(f"Computed {N_HOLE} possible hands in {time.time()-start_time} sec")