shutil.copyfile(episode_generate_script_path, episode_generate_script_output_path)

# copy round-robin match script to output dir
round_robin_script_path = os.path.join(root, "scripts", "round_robin_match.py")
round_robin_script_output_path = os.path.join(OUTPUT_DIR, os.path.basename(round_robin_script_path))
shutil.copyfile(round_robin_script_path, round_robin_script_output_path)

TEST_LENGTH = 1000000

# Setup algorithm
value_func = ApproxActionValueFunction()
task = TexasHoldemTask(final_round=1000, scale_reward=True, lose_penalty=True, shuffle_position=True)
task.set_opponent_value_functions([value_func]*9)
policy = EpsilonGreedyPolicy(eps=0.99)
policy.set_eps_annealing(0.99, 0.1, int(TEST_LENGTH*0.8))
algorithm = MonteCarlo()
algorithm.setup(task, policy, value_func)

# load last training result
LOAD_DIR_NAME = ""
LOAD_DIR_PATH = os.path.join(os.path.dirname(__file__), "results", LOAD_DIR_NAME, "checkpoint", "gpi_finished")
if len(LOAD_DIR_NAME) != 0:
    algorithm.load(LOAD_DIR_PATH)

# Setup callbacks
callbacks = []

save_interval = 50000
save_dir_path = os.path.join(OUTPUT_DIR, "checkpoint")
os.mkdir(save_dir_path)
learning_recorder = LearningRecorder(algorithm, save_dir_path, save_interval)
Esempio n. 2
0
# record log on terminal in log file
sys.stdout = Logger(os.path.join(OUTPUT_DIR, "training.log"))

# copy training script to output dir
script_output_path = os.path.join(OUTPUT_DIR, os.path.basename(__file__))
shutil.copyfile(__file__, script_output_path)

TEST_LENGTH = 10000

# Setup algorithm
value_func = ApproxActionValueFunction()
task = TexasHoldemTask(scale_reward=True, lose_penalty=True)
task.set_opponent_value_functions([value_func] * 9)
policy = EpsilonGreedyPolicy(eps=0.99)
policy.set_eps_annealing(0.99, 0.1, TEST_LENGTH)
algorithm = MonteCarlo(gamma=0.99)
algorithm.setup(task, policy, value_func)

# Setup callbacks
callbacks = []

save_interval = 1000
save_dir_path = os.path.join(OUTPUT_DIR, "checkpoint")
os.mkdir(save_dir_path)
learning_recorder = LearningRecorder(algorithm, save_dir_path, save_interval)
callbacks.append(learning_recorder)

monitor_file_path = os.path.join(OUTPUT_DIR, "stop.txt")
manual_interruption = ManualInterruption(monitor_file_path)
callbacks.append(manual_interruption)