shutil.copyfile(episode_generate_script_path, episode_generate_script_output_path) # copy round-robin match script to output dir round_robin_script_path = os.path.join(root, "scripts", "round_robin_match.py") round_robin_script_output_path = os.path.join(OUTPUT_DIR, os.path.basename(round_robin_script_path)) shutil.copyfile(round_robin_script_path, round_robin_script_output_path) TEST_LENGTH = 1000000 # Setup algorithm value_func = ApproxActionValueFunction() task = TexasHoldemTask(final_round=1000, scale_reward=True, lose_penalty=True, shuffle_position=True) task.set_opponent_value_functions([value_func]*9) policy = EpsilonGreedyPolicy(eps=0.99) policy.set_eps_annealing(0.99, 0.1, int(TEST_LENGTH*0.8)) algorithm = MonteCarlo() algorithm.setup(task, policy, value_func) # load last training result LOAD_DIR_NAME = "" LOAD_DIR_PATH = os.path.join(os.path.dirname(__file__), "results", LOAD_DIR_NAME, "checkpoint", "gpi_finished") if len(LOAD_DIR_NAME) != 0: algorithm.load(LOAD_DIR_PATH) # Setup callbacks callbacks = [] save_interval = 50000 save_dir_path = os.path.join(OUTPUT_DIR, "checkpoint") os.mkdir(save_dir_path) learning_recorder = LearningRecorder(algorithm, save_dir_path, save_interval)
# record log on terminal in log file sys.stdout = Logger(os.path.join(OUTPUT_DIR, "training.log")) # copy training script to output dir script_output_path = os.path.join(OUTPUT_DIR, os.path.basename(__file__)) shutil.copyfile(__file__, script_output_path) TEST_LENGTH = 10000 # Setup algorithm value_func = ApproxActionValueFunction() task = TexasHoldemTask(scale_reward=True, lose_penalty=True) task.set_opponent_value_functions([value_func] * 9) policy = EpsilonGreedyPolicy(eps=0.99) policy.set_eps_annealing(0.99, 0.1, TEST_LENGTH) algorithm = MonteCarlo(gamma=0.99) algorithm.setup(task, policy, value_func) # Setup callbacks callbacks = [] save_interval = 1000 save_dir_path = os.path.join(OUTPUT_DIR, "checkpoint") os.mkdir(save_dir_path) learning_recorder = LearningRecorder(algorithm, save_dir_path, save_interval) callbacks.append(learning_recorder) monitor_file_path = os.path.join(OUTPUT_DIR, "stop.txt") manual_interruption = ManualInterruption(monitor_file_path) callbacks.append(manual_interruption)