print("Using {} environments".format(n_envs)) # Create learning rate schedules for ppo2 and sac if algo_ in ["ppo2", "sac", "td3"]: for key in ['learning_rate', 'cliprange', 'cliprange_vf']: if key not in hyperparams: continue if isinstance(hyperparams[key], str): schedule, initial_value = hyperparams[key].split('_') initial_value = float(initial_value) hyperparams[key] = linear_schedule(initial_value) elif isinstance(hyperparams[key], (float, int)): # Negative value: ignore (ex: for clipping) if hyperparams[key] < 0: continue hyperparams[key] = constfn(float(hyperparams[key])) else: raise ValueError('Invalid value for {}: {}'.format( key, hyperparams[key])) # Should we overwrite the number of timesteps? if args.n_timesteps > 0: if args.verbose: print("Overwriting n_timesteps with n={}".format( args.n_timesteps)) n_timesteps = args.n_timesteps else: n_timesteps = int(hyperparams['n_timesteps']) normalize = False normalize_kwargs = {}
log_path, "{}_{}".format(ENV_ID, get_latest_run_id(log_path, ENV_ID) + 1)) params_path = os.path.join(save_path, ENV_ID) os.makedirs(params_path, exist_ok=True) # Create learning rate schedules for ppo2 and sac if args.algo in ["ppo2", "sac"]: for key in ['learning_rate', 'cliprange']: if key not in hyperparams: continue if isinstance(hyperparams[key], str): schedule, initial_value = hyperparams[key].split('_') initial_value = float(initial_value) hyperparams[key] = linear_schedule(initial_value) elif isinstance(hyperparams[key], float): hyperparams[key] = constfn(hyperparams[key]) else: raise ValueError('Invalid valid for {}: {}'.format( key, hyperparams[key])) # Should we overwrite the number of timesteps? if args.n_timesteps > 0: n_timesteps = args.n_timesteps else: n_timesteps = int(hyperparams['n_timesteps']) del hyperparams['n_timesteps'] normalize = False normalize_kwargs = {} if 'normalize' in hyperparams.keys(): normalize = hyperparams['normalize']