コード例 #1
0
        load = False
        if args.env_config_path is None:
            config_path = "fixed_wing_config.json"
        else:
            config_path = args.env_config_path
        os.makedirs(model_folder)
        os.makedirs(os.path.join(model_folder, "render"))
        shutil.copy2(config_path,
                     os.path.join(model_folder, "fixed_wing_config.json"))
    config_path = os.path.join(model_folder, "fixed_wing_config.json")

    env = VecNormalize(
        SubprocVecEnv([
            make_env(config_path, i, info_kw=info_kw) for i in range(num_cpu)
        ]))
    env.env_method("set_curriculum_level", curriculum_level)
    env.set_attr("training", True)

    if load:
        model = PPO2.load(os.path.join(model_folder, "model.pkl"),
                          env=env,
                          verbose=1,
                          tensorboard_log=os.path.join(model_folder, "tb"))
    else:
        model = PPO2(policy,
                     env,
                     verbose=1,
                     tensorboard_log=os.path.join(model_folder, "tb"))
    model.learn(total_timesteps=training_steps,
                log_interval=log_interval,
                callback=monitor_training)
コード例 #2
0
def main(
    model_name,
    num_envs,
    env_config_path=None,
    train_steps=None,
    policy=None,
    disable_curriculum=True,
    test_set_path=None,
):

    curriculum_level = 0.25  # Initial difficulty level of environment
    curriculum_cooldown = (
        25  # Minimum number of episodes between environment difficulty adjustments
    )
    render_interval = 600  # Time in seconds between rendering of training episodes
    last_test = 0
    last_render = time.time()
    checkpoint_save_interval = 300
    last_save = time.time()
    last_ep_info = None
    log_interval = 50
    render_check = {"files": [], "time": time.time()}
    info_kw = [
        "success",
        "control_variation",
        "end_error",
        "total_error",
        "success_time_frac",
    ]

    num_cpu = int(num_envs)
    if policy is None or policy == "MLP":
        policy = MlpPolicy
    elif policy == "CNN":
        try:
            from stable_baselines.common.policies import CnnMlpPolicy

            policy = CnnMlpPolicy
        except:
            print(
                "To use the CNN policy described in the paper you need to use the stable-baselines fork at github.com/eivindeb/stable-baselines"
            )
            exit(0)
    else:
        raise ValueError(
            "Invalid value supplied for argument policy (must be either 'MLP' or 'CNN')"
        )

    if disable_curriculum:
        curriculum_level = 1

    if train_steps:
        training_steps = int(train_steps)
    else:
        training_steps = int(5e6)

    test_interval = int(
        training_steps / 5
    )  # How often in time steps during training the model is evaluated on the test set

    model_folder = os.path.join("models", model_name)
    if os.path.exists(model_folder):
        load = True
    else:
        load = False
        if env_config_path is None:
            config_path = "fixed_wing_config.json"
        else:
            config_path = env_config_path
        os.makedirs(model_folder)
        os.makedirs(os.path.join(model_folder, "render"))
        shutil.copy2(config_path, os.path.join(model_folder, "fixed_wing_config.json"))
    config_path = os.path.join(model_folder, "fixed_wing_config.json")

    env = VecNormalize(
        SubprocVecEnv(
            [make_env(config_path, i, info_kw=info_kw) for i in range(num_cpu)]
        )
    )
    env.env_method("set_curriculum_level", curriculum_level)
    env.set_attr("training", True)

    if load:
        model = PPO2.load(
            os.path.join(model_folder, "model.pkl"),
            env=env,
            verbose=1,
            tensorboard_log=os.path.join(model_folder, "tb"),
        )
    else:
        model = PPO2(
            policy, env, verbose=1, tensorboard_log=os.path.join(model_folder, "tb")
        )
    model.learn(
        total_timesteps=training_steps,
        log_interval=log_interval,
        callback=monitor_training,
    )
    save_model(model, model_folder)