def configure(folder=None, format_strs=None): """ configure the current logger :param folder: (str) the save location (if None, $OPENAI_LOGDIR, if still None, tempdir/openai-[date & time]) :param format_strs: (list) the output logging format (if None, $OPENAI_LOG_FORMAT, if still None, ['stdout', 'log', 'csv']) """ if folder is None: folder = os.getenv('OPENAI_LOGDIR') if folder is None: folder = os.path.join( tempfile.gettempdir(), datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f")) assert isinstance(folder, str) os.makedirs(folder, exist_ok=True) rank = mpi_rank_or_zero() log_suffix = '' if format_strs is None: if rank == 0: format_strs = os.getenv('OPENAI_LOG_FORMAT', 'stdout,log,csv').split(',') else: log_suffix = "-rank%03i" % rank format_strs = os.getenv('OPENAI_LOG_FORMAT_MPI', 'log').split(',') format_strs = filter(None, format_strs) output_formats = [ make_output_format(f, folder, log_suffix) for f in format_strs ] Logger.CURRENT = Logger(folder=folder, output_formats=output_formats) log('Logging to %s' % folder)
def make_mujoco_env(env_id, seed, allow_early_resets=True): """ Create a wrapped, monitored gym.Env for MuJoCo. :param env_id: (str) the environment ID :param seed: (int) the inital seed for RNG :param allow_early_resets: (bool) allows early reset of the environment :return: (Gym Environment) The mujoco environment """ set_global_seeds(seed + 10000 * mpi_rank_or_zero()) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), '0'), allow_early_resets=allow_early_resets) env.seed(seed) return env
def train(params, model=None, path=None): if model: # indicate in filename that this is a finetune if params['name']: params['name'] += '_Finetune' else: params['name'] = 'Finetune' data_dir, tb_path = get_paths(params, path=path) print("Training Parameters: ", params) os.makedirs(data_dir, exist_ok=True) # Save parameters immediatly params.save(data_dir) rank = mpi_rank_or_zero() if rank != 0: logger.set_level(logger.DISABLED) def make_env(i): env = get_env(params) env = Monitor(env, data_dir + '/' + str(i), allow_early_resets=params['early_reset']) return env use_her = params['env_args']['use_her'] if 'use_her' in params['env_args'] else False if use_her: env = make_env(0) goal_selection_strategy = 'future' else: env = DummyVecEnv([(lambda n: lambda: make_env(n))(i) for i in range(params['num_proc'])]) if model: # indicate in filename that this is a finetune print("Model action space", model.action_space, model.action_space.low) print("Env action space", env.action_space, env.action_space.low) if params['normalize']: env = VecNormalize(env) if params['seed']: seed = params['seed'] + 100000 * rank set_global_seeds(seed) params['alg_args']['seed'] = seed if 'noise' in params and params['noise']: from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise n_actions = env.action_space.shape[-1] params['alg_args']['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(params['noise'])*np.ones(n_actions)) if model is None: alg = get_alg(params) policy = get_policy(params) if use_her: from stable_baselines import HER model = HER(policy, env, alg, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args']) else: model = alg(policy, env, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args']) else: model.set_env(env) model.learn(total_timesteps=params['timesteps'], log_interval=params['log_interval'], callback=create_training_callback(data_dir, freq=params['eval_freq'], checkpoint_freq=params['checkpoint_freq'])) print("######## SAVING MODEL TO", data_dir) model.save(data_dir +'/final_model') if params['normalize']: env.save(data_dir + '/normalized_environment.env') env.close()
def train(params, model=None, env=None): print("Training Parameters: ", params) data_dir, tb_path = get_paths(params) os.makedirs(data_dir, exist_ok=True) # Save parameters immediately params.save(data_dir) rank = mpi_rank_or_zero() if rank != 0: logger.set_level(logger.DISABLED) # Create the environment if not given if env is None: def make_env(i): env = get_env(params) print("ENV IN UTIL" ,env) # TODO: make monitor work for multiple agent. env = Monitor(env, data_dir + '/' + str(i), allow_early_resets=params['early_reset']) return env # if 'PPO' in params['alg']: # env = DummyVecEnv([(lambda n: lambda: make_env(n))(i) for i in range(params['num_proc'])]) # else: # env = make_env(0) env = make_env(0) if params['normalize']: env = VecNormalize(env) # Set the seeds if params['seed']: seed = params['seed'] + 100000 * rank set_global_seeds(seed) params['alg_args']['seed'] = seed if 'noise' in params and params['noise']: from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise n_actions = env.action_space.shape[-1] params['alg_args']['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(params['noise'])*np.ones(n_actions)) print("ENV", env, env.action_space) if model is None: alg = get_alg(params) policy = get_policy(params) model = alg(policy, env, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args']) else: model.set_env(env) print("\n===============================\n") print("TENSORBOARD PATH:", tb_path) print("\n===============================\n") model.learn(total_timesteps=params['timesteps'], log_interval=params['log_interval'], callback=create_training_callback(data_dir, params, env, freq=params['eval_freq'], checkpoint_freq=params['checkpoint_freq'])) print("Saving model to", data_dir) model.save(data_dir +'/final_model') if params['normalize']: env.save(data_dir + '/environment.pkl') env.close()