def checkpoint(self, curr_step): print('HIIII a') if self._SINGLE: for p_id in range(self._t_prof.n_seats): # state = { # "strat_buffer": self._strategy_buffers[p_id].state_dict(), # "env_bldr": self._env_bldr, # } MODE = EvalAgentDeepCFR.EVAL_MODE_SINGLE t_prof = copy.deepcopy(self._t_prof) t_prof.eval_modes_of_algo = [MODE] eval_agent = EvalAgentDeepCFR(t_prof=t_prof) eval_agent.reset() eval_agent._strategy_buffers = self._strategy_buffers # could copy - it's just for the export, so it's ok eval_agent.set_mode(mode=MODE) # with open(self._get_checkpoint_file_path(name=self._t_prof.name, step=curr_step, # cls=self.__class__, worker_id="P" + str(p_id)), # "wb") as pkl_file: path = f'sample_{p_id}.pkl' print('HIIII a') if curr_step == 30: #Can replace later with if iterations=10, for now let it be from PokerRL.game.InteractiveGame import InteractiveGame game = InteractiveGame( env_cls=eval_agent.env_bldr.env_cls, env_args=eval_agent.env_bldr.env_args, seats_human_plays_list=[0], eval_agent=eval_agent, ) game.start_to_play()
"N": None, } mean = np.mean(data).item() std = np.std(data).item() conf = 1.96 * std / np.sqrt(len(data)) return { "mean": float(mean), "std": float(std), "conf": float(conf), "N": len(data), } if __name__ == '__main__': eval_agent_first = EvalAgentDeepCFR.load_from_disk( path_to_eval_agent=path_to_first_eval_agent) eval_agent_second = EvalAgentDeepCFR.load_from_disk( path_to_eval_agent=path_to_second_eval_agent) #assert eval_agent_first.t_prof.name == eval_agent_second.t_prof.name env_bldr = eval_agent_first.env_bldr env = env_bldr.get_new_env(is_evaluating=False) strategy_differences = { r: {depth: [] for depth in range(MAX_DEPTH)} for r in env_bldr.rules.ALL_ROUNDS_LIST } start_time = time.time()
def export_agent(self, step): _dir = ospj(self._t_prof.path_agent_export_storage, str(self._t_prof.name), str(step)) file_util.create_dir_if_not_exist(_dir) # """""""""""""""""""""""""""" # Deep CFR # """""""""""""""""""""""""""" if self._AVRG: MODE = EvalAgentDeepCFR.EVAL_MODE_AVRG_NET t_prof = copy.deepcopy(self._t_prof) t_prof.eval_modes_of_algo = [MODE] eval_agent = EvalAgentDeepCFR(t_prof=t_prof) eval_agent.reset() w = { EvalAgentDeepCFR.EVAL_MODE_AVRG_NET: self._pull_avrg_net_eval_strat() } eval_agent.update_weights(w) eval_agent.set_mode(mode=MODE) eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE) # """""""""""""""""""""""""""" # SD-CFR # """""""""""""""""""""""""""" if self._SINGLE: MODE = EvalAgentDeepCFR.EVAL_MODE_SINGLE t_prof = copy.deepcopy(self._t_prof) t_prof.eval_modes_of_algo = [MODE] eval_agent = EvalAgentDeepCFR(t_prof=t_prof) eval_agent.reset() eval_agent._strategy_buffers = self._strategy_buffers # could copy - it's just for the export, so it's ok eval_agent.set_mode(mode=MODE) eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE)
""" This file is not runable; it's is a template to show how you could play against your algorithms. To do so, replace "YourAlgorithmsEvalAgentCls" with the EvalAgent subclass (not instance) of your algorithm. Note that you can see the AI's cards on the screen since this is just a research application and not meant for actual competition. The AI can, of course, NOT see your cards. """ from PokerRL.game.InteractiveGameCustom import InteractiveGameCustom from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR from PokerRL.game.games import DiscretizedNLHoldem from PokerRL.game import bet_sets if __name__ == '__main__': eval_agent = EvalAgentDeepCFR.load_from_disk( path_to_eval_agent="eval_agentSINGLE.pkl") playerA = 500 playerB = 500 round = 0 playerAWinnings = 0 playerBWinnings = 0 while True: while playerA > 0 and playerB > 0: if round % 2 == 0: game_cls = DiscretizedNLHoldem args = game_cls.ARGS_CLS(n_seats=2, bet_sizes_list_as_frac_of_pot=bet_sets.B_5,
_eval_agents[1 - REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting, action_he_did=action_int) elif p_id_acting == seat_p1: a_probs = _eval_agents[REFERENCE_AGENT].get_a_probs() action_int, _ = _eval_agents[1 - REFERENCE_AGENT].get_action(step_env=True, need_probs=False) _eval_agents[REFERENCE_AGENT].notify_of_action(p_id_acted=p_id_acting, action_he_did=action_int) else: raise ValueError("Only HU supported!") _, r_for_all, done, info = _env.step(action_int) end_time = time.time() print("Time taken", end_time - start_time) print(optimizer) return results agent_file1 = "/home/leduc/poker_ai_data/eval_agent/SD-CFR_LEDUC_EXAMPLE_200/120/eval_agentAVRG_NET.pkl" student_agent = EvalAgentDeepRange(t_prof, mode=None, device=None) teacher_agent = EvalAgentDeepCFR.load_from_disk(path_to_eval_agent=agent_file1) results = distill(student_agent, teacher_agent, args={'lr':1e-2, 'iters': 500000, 'lambda': 1}) name = "deep_range_500000_1" student_agent.save_to_file(name + ".pt") pickle.dump(results, open(name + "_log.pkl", "wb" ))
N_DECK = 52 N_HOLE = 169 # Number of possible hole cards 13 * 12 + 13 #A function that takes a hole hand and produces (high rank, low rank, is_suited) representation def hand2rep(hand): card1_rank = hand[0][0] card1_suit = hand[0][1] card2_rank = hand[1][0] card2_suit = hand[1][1] suited = (card2_suit == card1_suit) high_rank = max(card1_rank, card2_rank) low_rank = min(card1_rank, card2_rank) return (high_rank, low_rank, suited) #Load EvalAgent from file curr_eval_agent = EvalAgentDeepCFR.load_from_disk(path_to_eval_agent=path_to_eval_agent) #get an env bldr from the agent and create an env env_bldr = curr_eval_agent.env_bldr env = env_bldr.get_new_env(is_evaluating=False) start_time = time.time() hands = {} while len(hands) < N_HOLE: #Reset env and EvalAgent env.reset() curr_eval_agent.reset(deck_state_dict=env.cards_state_dict()) #Act for c in history: current_seat = env.current_player.seat_id env.step(str_to_action[c])
def hand2rep(hand): card1_rank = hand[0][0] card1_suit = hand[0][1] card2_rank = hand[1][0] card2_suit = hand[1][1] suited = (card2_suit == card1_suit) high_rank = max(card1_rank, card2_rank) low_rank = min(card1_rank, card2_rank) return (high_rank, low_rank, suited) #--------------- Generate p0 strat ------------------------- #Loading EvalAgents and checking if hey have same experiment name eval_agent_dcfr = EvalAgentDeepCFR.load_from_disk( path_to_eval_agent=path_to_dcfr_eval_agent) #get an env bldr from the agent and create an env env_bldr = eval_agent_dcfr.env_bldr env = env_bldr.get_new_env(is_evaluating=False) start_time = time.time() hands = {} while len(hands) < N_HOLE: obs, rew, done, info = env.reset() eval_agent_dcfr.reset(deck_state_dict=env.cards_state_dict()) hole_hand = hand2rep(env.seats[0].hand) if hole_hand not in hands: hands[hole_hand] = eval_agent_dcfr.get_a_probs() ''' print(f"Computed {N_HOLE} possible hands in {time.time()-start_time} sec")