コード例 #1
0
def train(env, policy, seed, njobs=1, **alg_args):

    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\w+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)
        # Define env maker
        def make_env(seed=0):
            def _thunk():
                env_rllab = Rllab2GymWrapper(env_rllab_class())
                env_rllab.seed(seed)
                return env_rllab
            return _thunk
        parallel_env = SubprocVecEnv([make_env(i + seed) for i in range(njobs)])
        # Used later
        env_type = 'rllab'
    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari, custom env creation
            def make_env(seed=0):
                def _thunk():
                    _env = make_atari(env)
                    _env.seed(seed)
                    return wrap_deepmind(_env)
                return _thunk
            parallel_env = VecFrameStack(SubprocVecEnv([make_env(i + seed) for i in range(njobs)]), 4)
        else:
            # Not atari, standard env creation
            def make_env(seed=0):
                def _thunk():
                    _env = gym.make(env)
                    _env.seed(seed)
                    return _env
                return _thunk
            parallel_env = SubprocVecEnv([make_env(i + seed) for i in range(njobs)])

    # Create the policy
    if policy == 'linear':
        hid_size = num_hid_layers = 0
    elif policy == 'nn':
        hid_size = [100, 50, 25]
        num_hid_layers = 3

    if policy == 'linear' or policy == 'nn':
        def make_policy(name, ob_space, ac_space):
            return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                             hid_size=hid_size, num_hid_layers=num_hid_layers, gaussian_fixed_var=True, use_bias=False, use_critic=False,
                             hidden_W_init=tf.contrib.layers.xavier_initializer(),
                             output_W_init=tf.contrib.layers.xavier_initializer())
    elif policy == 'cnn':
        def make_policy(name, ob_space, ac_space):
            return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                         gaussian_fixed_var=True, use_bias=False, use_critic=False,
                         hidden_W_init=tf.contrib.layers.xavier_initializer(),
                         output_W_init=tf.contrib.layers.xavier_initializer())
    else:
        raise Exception('Unrecognized policy type.')

    try:
        affinity = len(os.sched_getaffinity(0))
    except:
        affinity = njobs
    sess = U.make_session(affinity)
    sess.__enter__()

    set_global_seeds(seed)

    gym.logger.setLevel(logging.WARN)

    pois2.learn(parallel_env, make_policy, **alg_args)
コード例 #2
0
def train(env,
          max_iters,
          num_episodes,
          horizon,
          iw_method,
          iw_norm,
          natural,
          bound,
          delta,
          gamma,
          seed,
          policy,
          max_offline_iters,
          njobs=1):

    # Declare env and created the vectorized env
    def make_env(seed=0):
        def _thunk():
            _env = gym.make(env)
            _env.seed(seed)
            return _env

        return _thunk

    parallel_env = SubprocVecEnv([make_env(i + seed) for i in range(njobs)],
                                 terminating=True)

    # Create the policy
    if policy == 'linear':
        hid_size = num_hid_layers = 0
    elif policy == 'nn':
        hid_size = [100, 50, 25]
        num_hid_layers = 3

    def make_policy(name, ob_space, ac_space):
        return MlpPolicy(name=name,
                         ob_space=ob_space,
                         ac_space=ac_space,
                         hid_size=hid_size,
                         num_hid_layers=num_hid_layers,
                         gaussian_fixed_var=True,
                         use_bias=False,
                         use_critic=False,
                         hidden_W_init=tf.contrib.layers.xavier_initializer(),
                         output_W_init=tf.contrib.layers.xavier_initializer())

    try:
        affinity = len(os.sched_getaffinity(0))
    except:
        affinity = njobs
    sess = U.make_session(affinity)
    sess.__enter__()

    set_global_seeds(seed)

    gym.logger.setLevel(logging.WARN)

    pois2.learn(
        parallel_env,
        make_policy,
        n_episodes=num_episodes,
        max_iters=max_iters,
        horizon=horizon,
        gamma=gamma,
        delta=delta,
        use_natural_gradient=natural,
        iw_method=iw_method,
        iw_norm=iw_norm,
        bound=bound,
        save_weights=True,
        center_return=True,
        render_after=None,
        max_offline_iters=max_offline_iters,
    )