if args.env_config_path is None: config_path = "fixed_wing_config.json" else: config_path = args.env_config_path os.makedirs(model_folder) os.makedirs(os.path.join(model_folder, "render")) shutil.copy2(config_path, os.path.join(model_folder, "fixed_wing_config.json")) config_path = os.path.join(model_folder, "fixed_wing_config.json") env = VecNormalize( SubprocVecEnv([ make_env(config_path, i, info_kw=info_kw) for i in range(num_cpu) ])) env.env_method("set_curriculum_level", curriculum_level) env.set_attr("training", True) if load: model = PPO2.load(os.path.join(model_folder, "model.pkl"), env=env, verbose=1, tensorboard_log=os.path.join(model_folder, "tb")) else: model = PPO2(policy, env, verbose=1, tensorboard_log=os.path.join(model_folder, "tb")) model.learn(total_timesteps=training_steps, log_interval=log_interval, callback=monitor_training) save_model(model, model_folder)
def main( model_name, num_envs, env_config_path=None, train_steps=None, policy=None, disable_curriculum=True, test_set_path=None, ): curriculum_level = 0.25 # Initial difficulty level of environment curriculum_cooldown = ( 25 # Minimum number of episodes between environment difficulty adjustments ) render_interval = 600 # Time in seconds between rendering of training episodes last_test = 0 last_render = time.time() checkpoint_save_interval = 300 last_save = time.time() last_ep_info = None log_interval = 50 render_check = {"files": [], "time": time.time()} info_kw = [ "success", "control_variation", "end_error", "total_error", "success_time_frac", ] num_cpu = int(num_envs) if policy is None or policy == "MLP": policy = MlpPolicy elif policy == "CNN": try: from stable_baselines.common.policies import CnnMlpPolicy policy = CnnMlpPolicy except: print( "To use the CNN policy described in the paper you need to use the stable-baselines fork at github.com/eivindeb/stable-baselines" ) exit(0) else: raise ValueError( "Invalid value supplied for argument policy (must be either 'MLP' or 'CNN')" ) if disable_curriculum: curriculum_level = 1 if train_steps: training_steps = int(train_steps) else: training_steps = int(5e6) test_interval = int( training_steps / 5 ) # How often in time steps during training the model is evaluated on the test set model_folder = os.path.join("models", model_name) if os.path.exists(model_folder): load = True else: load = False if env_config_path is None: config_path = "fixed_wing_config.json" else: config_path = env_config_path os.makedirs(model_folder) os.makedirs(os.path.join(model_folder, "render")) shutil.copy2(config_path, os.path.join(model_folder, "fixed_wing_config.json")) config_path = os.path.join(model_folder, "fixed_wing_config.json") env = VecNormalize( SubprocVecEnv( [make_env(config_path, i, info_kw=info_kw) for i in range(num_cpu)] ) ) env.env_method("set_curriculum_level", curriculum_level) env.set_attr("training", True) if load: model = PPO2.load( os.path.join(model_folder, "model.pkl"), env=env, verbose=1, tensorboard_log=os.path.join(model_folder, "tb"), ) else: model = PPO2( policy, env, verbose=1, tensorboard_log=os.path.join(model_folder, "tb") ) model.learn( total_timesteps=training_steps, log_interval=log_interval, callback=monitor_training, ) save_model(model, model_folder)