Exemple #1
0
def train(episodes, name, experiment_dir, load_model):

    output_dir = os.path.join(experiment_dir, name)

    os.makedirs(output_dir, exist_ok=True)

    print(f"Will run a total of {episodes} episodes")
    print(f"Writting files to {output_dir}")

    # logging.basicConfig(
    #     format='%(asctime)s %(filename)s:%(lineno)d %(message)s',
    #     level=logging.INFO, datefmt='%Y/%m/%d %H:%M:%S')

    board_shape = (6, 7)
    k = 4

    model_dir = os.path.join(output_dir, "model_snapshots")
    model_path = os.path.join(model_dir, "model.npz")
    model_tmp_path = os.path.join(model_dir, "model.tmp.npz")
    os.makedirs(model_dir, exist_ok=True)

    episode_queue = multiprocessing.Queue(8)
    model_version = 0
    model_version_shared = multiprocessing.Value("i", 0)
    for _ in range(4):
        worker = multiprocessing.Process(
            target=run_rollout_worker,
            args=(model_path, episode_queue, board_shape, k,
                  model_version_shared),
        )
        worker.start()

    evaluators = []
    for evaluator_name, agent_name in [
        ("eval_better_greedy", "better-greedy"),
        ("eval_best_greedy", "best-greedy"),
    ]:
        evaluator = Evaluator(evaluator_name)
        model_best_path = os.path.join(model_dir, f"{evaluator_name}_best.npz")
        worker = multiprocessing.Process(
            target=run_evaluator_worker,
            args=(
                model_path,
                model_best_path,
                evaluator.queue,
                board_shape,
                k,
                model_version_shared,
                agent_name,
            ),
        )
        worker.start()
        evaluators.append(evaluator)

    agent = NNAgent(board_shape)
    agent.save_model(model_path)

    config_manager = ConfigManager(agent.update_config, agent.current_config())
    webui.run_http_server(config_manager)

    memory = Memory(board_shape, 100000000)

    if load_model:
        filename = os.path.join(experiment_dir, load_model, "model_snapshots",
                                "model.npz")
        print(f"Loading model from {filename}")
        agent.load_model(filename)

    batch_size = 1 << 12

    np.set_printoptions(threshold=100000)

    with open(os.path.join(output_dir, "episodes.txt"), "w") as output_log:
        try:
            for i_episode in range(episodes):
                start = time.time()
                config_manager.handle_events()

                filename = os.path.join(output_dir,
                                        "ep_{:05d}.json.gz".format(i_episode))

                start = time.time()
                for _ in range(30):
                    transitions = episode_queue.get()
                    for obs, next_obs, rew in transitions:
                        memory.add(obs, next_obs, rew)

                agent.save_model(model_tmp_path)
                os.rename(model_tmp_path, model_path)
                model_version += 1
                model_version_shared.value = model_version

                start_learn = time.time()
                if memory.size() >= 1024:
                    train_metrics = agent.train(memory, batch_size)
                else:
                    train_metrics = {}
                learn_duration = time.time() - start_learn
                rss = resource.getrusage(
                    resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024

                duration = time.time() - start

                metrics = {
                    "episode": i_episode,
                    "iteration duration": duration,
                    "memory size": memory.size(),
                    "train step duration (ms)": learn_duration * 1000.0,
                    "rss (MB)": rss,
                }
                for key, val in train_metrics.items():
                    metrics[f"train/{key}"] = val

                for evaluator in evaluators:
                    evaluator.update()
                    for key, val in evaluator.last_values.items():
                        metrics[f"{evaluator.name}/{key}"] = val

                for key, val in metrics.items():
                    metrics[key] = str(val)
                print(
                    tabulate(metrics.items(),
                             tablefmt="psql",
                             headers=["name", "value"]))
                output_log.write(json.dumps(metrics) + "\n")
                output_log.flush()

        except KeyboardInterrupt:
            pass