Esempio n. 1
0
def configure(folder=None, format_strs=None):
    """
    configure the current logger

    :param folder: (str) the save location (if None, $OPENAI_LOGDIR, if still None, tempdir/openai-[date & time])
    :param format_strs: (list) the output logging format
        (if None, $OPENAI_LOG_FORMAT, if still None, ['stdout', 'log', 'csv'])
    """
    if folder is None:
        folder = os.getenv('OPENAI_LOGDIR')
    if folder is None:
        folder = os.path.join(
            tempfile.gettempdir(),
            datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
    assert isinstance(folder, str)
    os.makedirs(folder, exist_ok=True)
    rank = mpi_rank_or_zero()

    log_suffix = ''
    if format_strs is None:
        if rank == 0:
            format_strs = os.getenv('OPENAI_LOG_FORMAT',
                                    'stdout,log,csv').split(',')
        else:
            log_suffix = "-rank%03i" % rank
            format_strs = os.getenv('OPENAI_LOG_FORMAT_MPI', 'log').split(',')
    format_strs = filter(None, format_strs)
    output_formats = [
        make_output_format(f, folder, log_suffix) for f in format_strs
    ]

    Logger.CURRENT = Logger(folder=folder, output_formats=output_formats)
    log('Logging to %s' % folder)
Esempio n. 2
0
def make_mujoco_env(env_id, seed, allow_early_resets=True):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param allow_early_resets: (bool) allows early reset of the environment
    :return: (Gym Environment) The mujoco environment
    """
    set_global_seeds(seed + 10000 * mpi_rank_or_zero())
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), '0'), allow_early_resets=allow_early_resets)
    env.seed(seed)
    return env
def train(params, model=None, path=None):
    if model: # indicate in filename that this is a finetune
        if params['name']:
            params['name'] += '_Finetune'
        else:
            params['name'] = 'Finetune'
    
    data_dir, tb_path = get_paths(params, path=path)
    print("Training Parameters: ", params)
    os.makedirs(data_dir, exist_ok=True)
    # Save parameters immediatly
    params.save(data_dir)

    rank = mpi_rank_or_zero()
    if rank != 0:
        logger.set_level(logger.DISABLED)
    
    def make_env(i):
        env = get_env(params)
        env = Monitor(env, data_dir + '/' + str(i), allow_early_resets=params['early_reset'])
        return env

    use_her = params['env_args']['use_her'] if 'use_her' in params['env_args'] else False

    if use_her:
        env = make_env(0)
        goal_selection_strategy = 'future'
    else:
        env = DummyVecEnv([(lambda n: lambda: make_env(n))(i) for i in range(params['num_proc'])])

    if model: # indicate in filename that this is a finetune
        print("Model action space", model.action_space, model.action_space.low)
        print("Env action space", env.action_space, env.action_space.low)
    if params['normalize']:
        env = VecNormalize(env)
    if params['seed']:
        seed = params['seed'] + 100000 * rank
        set_global_seeds(seed)
        params['alg_args']['seed'] = seed
    if 'noise' in params and params['noise']:
        from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise
        n_actions = env.action_space.shape[-1]
        params['alg_args']['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(params['noise'])*np.ones(n_actions))
    
    if model is None:
        alg = get_alg(params)
        policy = get_policy(params)
        if use_her:
            from stable_baselines import HER
            model = HER(policy, env, alg, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1, 
                            tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args'])
        else:
            model = alg(policy,  env, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args'])
    else:
        model.set_env(env)

    model.learn(total_timesteps=params['timesteps'], log_interval=params['log_interval'], callback=create_training_callback(data_dir, 
                                                    freq=params['eval_freq'], checkpoint_freq=params['checkpoint_freq']))
    print("######## SAVING MODEL TO", data_dir)
    model.save(data_dir +'/final_model')
    if params['normalize']:
        env.save(data_dir + '/normalized_environment.env')
    env.close()
Esempio n. 4
0
def train(params, model=None, env=None): 
    print("Training Parameters: ", params)

    data_dir, tb_path = get_paths(params)
    os.makedirs(data_dir, exist_ok=True)
    # Save parameters immediately
    params.save(data_dir)

    rank = mpi_rank_or_zero()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create the environment if not given
    if env is None:  
        def make_env(i):
            env = get_env(params)
            print("ENV IN UTIL" ,env)
            # TODO: make monitor work for multiple agent.
            env = Monitor(env, data_dir + '/' + str(i), allow_early_resets=params['early_reset'])
            return env

        # if 'PPO' in params['alg']:
        #     env = DummyVecEnv([(lambda n: lambda: make_env(n))(i) for i in range(params['num_proc'])])
        # else:
        #     env = make_env(0)
        env = make_env(0)

        if params['normalize']:
            env = VecNormalize(env)
    # Set the seeds
    if params['seed']:
        seed = params['seed'] + 100000 * rank
        set_global_seeds(seed)
        params['alg_args']['seed'] = seed

    if 'noise' in params and params['noise']:
        from stable_baselines.ddpg import OrnsteinUhlenbeckActionNoise
        n_actions = env.action_space.shape[-1]
        params['alg_args']['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(params['noise'])*np.ones(n_actions))
  

    print("ENV", env, env.action_space)
    if model is None:
        alg = get_alg(params)
        policy = get_policy(params)
        model = alg(policy,  env, verbose=1, tensorboard_log=tb_path, policy_kwargs=params['policy_args'], **params['alg_args'])
    else:
        model.set_env(env)

    print("\n===============================\n")
    print("TENSORBOARD PATH:", tb_path)
    print("\n===============================\n")
    model.learn(total_timesteps=params['timesteps'], log_interval=params['log_interval'], 
                callback=create_training_callback(data_dir, params, env, freq=params['eval_freq'], checkpoint_freq=params['checkpoint_freq']))
    
    print("Saving model to", data_dir)
    model.save(data_dir +'/final_model')

    if params['normalize']:
        env.save(data_dir + '/environment.pkl')
        
    env.close()