def train(episodes, name, experiment_dir, load_model): output_dir = os.path.join(experiment_dir, name) os.makedirs(output_dir, exist_ok=True) print(f"Will run a total of {episodes} episodes") print(f"Writting files to {output_dir}") # logging.basicConfig( # format='%(asctime)s %(filename)s:%(lineno)d %(message)s', # level=logging.INFO, datefmt='%Y/%m/%d %H:%M:%S') board_shape = (6, 7) k = 4 model_dir = os.path.join(output_dir, "model_snapshots") model_path = os.path.join(model_dir, "model.npz") model_tmp_path = os.path.join(model_dir, "model.tmp.npz") os.makedirs(model_dir, exist_ok=True) episode_queue = multiprocessing.Queue(8) model_version = 0 model_version_shared = multiprocessing.Value("i", 0) for _ in range(4): worker = multiprocessing.Process( target=run_rollout_worker, args=(model_path, episode_queue, board_shape, k, model_version_shared), ) worker.start() evaluators = [] for evaluator_name, agent_name in [ ("eval_better_greedy", "better-greedy"), ("eval_best_greedy", "best-greedy"), ]: evaluator = Evaluator(evaluator_name) model_best_path = os.path.join(model_dir, f"{evaluator_name}_best.npz") worker = multiprocessing.Process( target=run_evaluator_worker, args=( model_path, model_best_path, evaluator.queue, board_shape, k, model_version_shared, agent_name, ), ) worker.start() evaluators.append(evaluator) agent = NNAgent(board_shape) agent.save_model(model_path) config_manager = ConfigManager(agent.update_config, agent.current_config()) webui.run_http_server(config_manager) memory = Memory(board_shape, 100000000) if load_model: filename = os.path.join(experiment_dir, load_model, "model_snapshots", "model.npz") print(f"Loading model from {filename}") agent.load_model(filename) batch_size = 1 << 12 np.set_printoptions(threshold=100000) with open(os.path.join(output_dir, "episodes.txt"), "w") as output_log: try: for i_episode in range(episodes): start = time.time() config_manager.handle_events() filename = os.path.join(output_dir, "ep_{:05d}.json.gz".format(i_episode)) start = time.time() for _ in range(30): transitions = episode_queue.get() for obs, next_obs, rew in transitions: memory.add(obs, next_obs, rew) agent.save_model(model_tmp_path) os.rename(model_tmp_path, model_path) model_version += 1 model_version_shared.value = model_version start_learn = time.time() if memory.size() >= 1024: train_metrics = agent.train(memory, batch_size) else: train_metrics = {} learn_duration = time.time() - start_learn rss = resource.getrusage( resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024 duration = time.time() - start metrics = { "episode": i_episode, "iteration duration": duration, "memory size": memory.size(), "train step duration (ms)": learn_duration * 1000.0, "rss (MB)": rss, } for key, val in train_metrics.items(): metrics[f"train/{key}"] = val for evaluator in evaluators: evaluator.update() for key, val in evaluator.last_values.items(): metrics[f"{evaluator.name}/{key}"] = val for key, val in metrics.items(): metrics[key] = str(val) print( tabulate(metrics.items(), tablefmt="psql", headers=["name", "value"])) output_log.write(json.dumps(metrics) + "\n") output_log.flush() except KeyboardInterrupt: pass