def _preprocess_schedules(hyperparams: Dict[str, Any]) -> Dict[str, Any]: # Create schedules for key in ["learning_rate", "clip_range", "clip_range_vf"]: if key not in hyperparams: continue if isinstance(hyperparams[key], str): schedule, initial_value = hyperparams[key].split("_") initial_value = float(initial_value) hyperparams[key] = linear_schedule(initial_value) elif isinstance(hyperparams[key], (float, int)): # Negative value: ignore (ex: for clipping) if hyperparams[key] < 0: continue hyperparams[key] = constant_fn(float(hyperparams[key])) else: raise ValueError(f"Invalid value for {key}: {hyperparams[key]}") return hyperparams
log_path = os.path.join(args.log_folder, args.algo) save_path = os.path.join( log_path, "{}_{}".format(ENV_ID, get_latest_run_id(log_path, ENV_ID) + 1)) params_path = os.path.join(save_path, ENV_ID) os.makedirs(params_path, exist_ok=True) # Create learning rate schedules for ppo2 and sac if args.algo in ["ppo2", "sac"]: for key in ['learning_rate', 'cliprange']: if key not in hyperparams: continue if isinstance(hyperparams[key], str): schedule, initial_value = hyperparams[key].split('_') initial_value = float(initial_value) hyperparams[key] = linear_schedule(initial_value) elif isinstance(hyperparams[key], float): hyperparams[key] = constfn(hyperparams[key]) else: raise ValueError('Invalid valid for {}: {}'.format( key, hyperparams[key])) # Should we overwrite the number of timesteps? if args.n_timesteps > 0: n_timesteps = args.n_timesteps else: n_timesteps = int(hyperparams['n_timesteps']) del hyperparams['n_timesteps'] normalize = False normalize_kwargs = {}