Beispiel #1
0
def train(env, policy, horizon, seed,
          trainable_std, gain_init, std_init,
          **alg_args):

    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\w+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)

        # Define env maker
        def make_env():
            env_rllab = env_rllab_class()
            _env = Rllab2GymWrapper(env_rllab)
            return _env
        # Used later
        env_type = 'rllab'

    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari is not tested here
            raise Exception('Not tested on atari.')
        else:
            # Not atari, standard env creation
            def make_env():
                env_rllab = gym.make(env)
                return env_rllab

    # Create the policy
    if policy == 'linear':
        hid_layers = []
    else:
        raise NotImplementedError

    def make_policy(name, ob_space, ac_space):
        return PeMlpPolicy(name, ob_space, ac_space, hid_layers,
                           deterministic=True, diagonal=True,
                           trainable_std=trainable_std,
                           use_bias=False, use_critic=False,
                           seed=seed, verbose=True,
                           hidden_W_init=U.normc_initializer(1.0),
                           higher_mean_init=tf.constant_initializer(gain_init),
                           higher_logstd_init=tf.constant_initializer(
                               np.log(std_init)))
    try:
        affinity = len(os.sched_getaffinity(0))
    except:
        affinity = -1
    sess = U.make_session(affinity)
    sess.__enter__()

    set_global_seeds(seed)

    gym.logger.setLevel(logging.WARN)

    # Learn
    ucb1.learn(make_env, make_policy, horizon=horizon, **alg_args)
Beispiel #2
0
def create_env(env, seed):
    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\S+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)

        # Define env maker
        def make_env():
            env_rllab = env_rllab_class()
            _env = Rllab2GymWrapper(env_rllab)
            return _env

        # Used later
        env_type = 'rllab'
    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari, custom env creation
            def make_env():
                _env = make_atari(env)
                return wrap_deepmind(_env)
        else:
            # Not atari, standard env creation
            def make_env():
                env_rllab = gym.make(env)
                return env_rllab

    env = make_env()
    env.seed(seed)
    return env
def train(env, policy, policy_init, num_episodes, episode_cap, horizon,
          **alg_args):

    # Getting the environment
    env_class = rllab_env_from_name(env)
    env = normalize(env_class())

    # Policy initialization
    if policy_init == 'zeros':
        initializer = LI.Constant(0)
    elif policy_init == 'normal':
        initializer = LI.Normal()
    else:
        raise Exception('Unrecognized policy initialization.')

    # Setting the policy type
    if policy == 'linear':
        hidden_sizes = tuple()
    elif policy == 'simple-nn':
        hidden_sizes = [16]
    else:
        raise Exception('NOT IMPLEMENTED.')

    # Creating the policy
    obs_dim = env.observation_space.flat_dim
    action_dim = env.action_space.flat_dim
    mean_network = MLP(
        input_shape=(obs_dim, ),
        output_dim=action_dim,
        hidden_sizes=hidden_sizes,
        hidden_nonlinearity=NL.tanh,
        output_nonlinearity=None,
        output_b_init=None,
        output_W_init=initializer,
    )
    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=hidden_sizes,
        mean_network=mean_network,
        log_weights=True,
    )

    # Creating baseline
    baseline = LinearFeatureBaseline(env_spec=env.spec)

    # Adding max_episodes constraint. If -1, this is unbounded
    if episode_cap:
        alg_args['max_episodes'] = num_episodes

    # Run algorithm
    algo = TRPO(env=env,
                policy=policy,
                baseline=baseline,
                batch_size=horizon * num_episodes,
                whole_paths=True,
                max_path_length=horizon,
                **alg_args)
    algo.train()
Beispiel #4
0
def create_policy_and_env(env, seed, policy, policy_file):
    # Session
    sess = U.single_threaded_session()
    sess.__enter__()
    '''
    # Create the environment
    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\S+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)
        # Define env maker
        def make_env():
            env_rllab = env_rllab_class()
            _env = Rllab2GymWrapper(env_rllab)
            return _env
        # Used later
        env_type = 'rllab'
    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari, custom env creation
            def make_env():
                _env = make_atari(env)
                return wrap_deepmind(_env)
        else:
            # Not atari, standard env creation
            def make_env():
                env_rllab = gym.make(env)
                return env_rllab
    env = make_env()
    env.seed(seed)
    ob_space = env.observation_space
    ac_space = env.action_space
    '''
    env_class = rllab_env_from_name(env)
    env = normalize(env_class())
    '''
    # Make policy
    if policy == 'linear':
        hid_size = num_hid_layers = 0
    elif policy == 'simple-nn':
        hid_size = [16]
        num_hid_layers = 1
    elif policy == 'nn':
        hid_size = [100, 50, 25]
        num_hid_layers = 3
    # Temp initializer
    policy_initializer = U.normc_initializer(0.0)
    if policy == 'linear' or policy == 'nn' or policy == 'simple-nn':
        def make_policy(name, ob_space, ac_space):
            return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                             hid_size=hid_size, num_hid_layers=num_hid_layers, gaussian_fixed_var=True, use_bias=True, use_critic=False,
                             hidden_W_init=policy_initializer,
                             output_W_init=policy_initializer)
    elif policy == 'cnn':
        def make_policy(name, ob_space, ac_space):
            return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                         gaussian_fixed_var=True, use_bias=False, use_critic=False,
                         hidden_W_init=policy_initializer,
                         output_W_init=policy_initializer)
    else:
        raise Exception('Unrecognized policy type.')
    pi = make_policy('pi', ob_space, ac_space)
    # Load policy weights from file
    all_var_list = pi.get_trainable_variables()
    var_list = [v for v in all_var_list if v.name.split('/')[1].startswith('pol')]
    set_parameter = U.SetFromFlat(var_list)
    '''
    obs_dim = env.observation_space.flat_dim
    action_dim = env.action_space.flat_dim
    policy_init = 'zeros'
    # Policy initialization
    if policy_init == 'zeros':
        initializer = LI.Constant(0)
    elif policy_init == 'normal':
        initializer = LI.Normal()
    else:
        raise Exception('Unrecognized policy initialization.')

    # Setting the policy type
    if policy == 'linear':
        hidden_sizes = tuple()
    elif policy == 'simple-nn':
        hidden_sizes = [16]
    else:
        raise Exception('NOT IMPLEMENTED.')
    # Creating the policy
    mean_network = MLP(
        input_shape=(obs_dim, ),
        output_dim=action_dim,
        hidden_sizes=[16],
        hidden_nonlinearity=NL.tanh,
        output_nonlinearity=None,
        output_b_init=None,
        output_W_init=initializer,
    )
    policy = GaussianMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=[16],
        mean_network=mean_network)

    #weights = pkl.load(open(policy_file, 'rb'))
    # TMP overriding weights
    #weights = [-0.19337249, -0.12103618, 0.00849289, -0.1105529, -3.6525128] # TRPO
    #weights = [-0.5894, -0.2585, -0.0137, -0.2464, -0.2788] # POIS
    #weights = list(map(float, ['-0.5807', '-0.3046', '-0.0127', '-0.3045', '-0.7427']))
    weights = list(
        map(
            lambda x: x.rstrip(' \r\n')
            if len(x.rstrip(' \r\n')) > 0 else None,
            """0.02483223 -0.17645608  0.77450023  0.54770311  0.33464952 -0.29827444
 -0.62524864  0.46413191 -0.31990006 -0.32972003  0.38753632 -0.15170416
 -0.43518174 -0.15718946  0.19542838 -0.02774486  0.13546377 -0.18621497
  0.18444675  0.774653    0.19710147 -0.20958339  0.15098953  0.42278248
 -0.53121678 -0.33369185 -0.04331141 -0.2140371   0.27077572  0.58111134
  0.34637848  0.56956591  0.45061681 -0.15826946 -1.06925573 -0.39311001
 -0.35695692  0.14414285 -1.25332428 -0.24016012  0.17774961  0.23973508
 -0.65415459  1.53059934 -0.71953132  1.79764386  0.18561774  1.4640445
 -0.1625999   0.0606595  -0.22058723 -0.34247517  0.46232139  0.07013392
 -0.32074007  0.14488911  0.1123158   0.28914362  0.6727726  -0.58491444
  0.35895434  1.32873906 -0.0708237  -0.05147256  0.01689644  0.38244615
  0.10005984  0.71253728 -0.18824528 -0.15552894 -0.05634595  0.3517145
  0.20900426 -0.19631462 -0.03828797  0.08125694 -0.22894259 -0.08030374
  0.59522035 -0.1752422  -0.40809067  1.62409963 -1.39307047  0.81438794
 -0.54068521  0.19321547 -1.65661292  0.3264788   0.46482921 -0.01649974
 -0.79186757 -1.3378886  -0.57094913 -1.57079733 -1.78056839  1.05324632
 -2.14386428""".rstrip(' \r\n').split(' ')))
    weights = [w for w in weights if w is not None]
    weights = list(map(float, weights))

    print(weights)
    #pi.set_param(weights)

    return env, policy
Beispiel #5
0
def train(env, policy, seed, njobs=1, **alg_args):

    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\w+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)
        # Define env maker
        def make_env(seed=0):
            def _thunk():
                env_rllab = Rllab2GymWrapper(env_rllab_class())
                env_rllab.seed(seed)
                return env_rllab
            return _thunk
        parallel_env = SubprocVecEnv([make_env(i + seed) for i in range(njobs)])
        # Used later
        env_type = 'rllab'
    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari, custom env creation
            def make_env(seed=0):
                def _thunk():
                    _env = make_atari(env)
                    _env.seed(seed)
                    return wrap_deepmind(_env)
                return _thunk
            parallel_env = VecFrameStack(SubprocVecEnv([make_env(i + seed) for i in range(njobs)]), 4)
        else:
            # Not atari, standard env creation
            def make_env(seed=0):
                def _thunk():
                    _env = gym.make(env)
                    _env.seed(seed)
                    return _env
                return _thunk
            parallel_env = SubprocVecEnv([make_env(i + seed) for i in range(njobs)])

    # Create the policy
    if policy == 'linear':
        hid_size = num_hid_layers = 0
    elif policy == 'nn':
        hid_size = [100, 50, 25]
        num_hid_layers = 3

    if policy == 'linear' or policy == 'nn':
        def make_policy(name, ob_space, ac_space):
            return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                             hid_size=hid_size, num_hid_layers=num_hid_layers, gaussian_fixed_var=True, use_bias=False, use_critic=False,
                             hidden_W_init=tf.contrib.layers.xavier_initializer(),
                             output_W_init=tf.contrib.layers.xavier_initializer())
    elif policy == 'cnn':
        def make_policy(name, ob_space, ac_space):
            return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                         gaussian_fixed_var=True, use_bias=False, use_critic=False,
                         hidden_W_init=tf.contrib.layers.xavier_initializer(),
                         output_W_init=tf.contrib.layers.xavier_initializer())
    else:
        raise Exception('Unrecognized policy type.')

    try:
        affinity = len(os.sched_getaffinity(0))
    except:
        affinity = njobs
    sess = U.make_session(affinity)
    sess.__enter__()

    set_global_seeds(seed)

    gym.logger.setLevel(logging.WARN)

    pois2.learn(parallel_env, make_policy, **alg_args)
Beispiel #6
0
def train(env, policy, policy_init, n_episodes, horizon, seed, njobs=1, save_weights=False, **alg_args):

    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\S+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)
        # Define env maker
        def make_env():
            env_rllab = env_rllab_class()
            _env = Rllab2GymWrapper(env_rllab)
            return _env
        # Used later
        env_type = 'rllab'
    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari, custom env creation
            def make_env():
                _env = make_atari(env)
                return wrap_deepmind(_env)
        else:
            # Not atari, standard env creation
            def make_env():
                env_rllab = gym.make(env)
                return env_rllab

    if policy == 'linear':
        hid_size = num_hid_layers = 0
    elif policy == 'nn':
        hid_size = [100, 50, 25]
        num_hid_layers = 3

    if policy_init == 'xavier':
        policy_initializer = tf.contrib.layers.xavier_initializer()
    elif policy_init == 'zeros':
        policy_initializer = U.normc_initializer(0.0)
    else:
        raise Exception('Unrecognized policy initializer.')

    if policy == 'linear' or policy == 'nn':
        def make_policy(name, ob_space, ac_space):
            return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                             hid_size=hid_size, num_hid_layers=num_hid_layers, gaussian_fixed_var=True, use_bias=False, use_critic=False,
                             hidden_W_init=policy_initializer, output_W_init=policy_initializer)
    elif policy == 'cnn':
        def make_policy(name, ob_space, ac_space):
            return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                         gaussian_fixed_var=True, use_bias=False, use_critic=False,
                         hidden_W_init=policy_initializer,
                         output_W_init=policy_initializer)
    else:
        raise Exception('Unrecognized policy type.')

    sampler = ParallelSampler(make_policy, make_env, n_episodes, horizon, True, n_workers=njobs, seed=seed)

    try:
        affinity = len(os.sched_getaffinity(0))
    except:
        affinity = njobs
    sess = U.make_session(affinity)
    sess.__enter__()

    set_global_seeds(seed)

    gym.logger.setLevel(logging.WARN)

    pois.learn(make_env, make_policy, n_episodes=n_episodes, horizon=horizon,
                sampler=sampler, save_weights=save_weights, **alg_args)

    sampler.close()
Beispiel #7
0
def train(env,
          policy,
          horizon,
          seed,
          bounded_policy,
          mu_init,
          std_init,
          njobs=1,
          **alg_args):

    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\w+)', env).group(1)
        print('env_name', env_name)
        env_rllab_class = rllab_env_from_name(env_name)

        # Define env maker
        def make_env():
            env_rllab = env_rllab_class()
            _env = Rllab2GymWrapper(env_rllab, env_name)
            return _env

        # Used later
        env_type = 'rllab'

    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari is not tested here
            raise Exception('Not tested on atari.')
        else:
            # Not atari, standard env creation
            def make_env():
                env_rllab = gym.make(env)
                return env_rllab

        env_name = make_env().spec.id

    # Create the policy
    if policy == 'linear':
        hid_layers = []
    else:
        raise NotImplementedError

    const_std_init = False
    if mu_init is not None:
        higher_mean_init = tf.constant_initializer(mu_init)
    else:
        higher_mean_init = U.normc_initializer(1.0)

    if std_init is not None:
        higher_logstd_init = tf.constant_initializer(np.log(std_init))
    else:
        higher_logstd_init = tf.constant_initializer(np.log(1e-2))
        # higher_logstd_init = tf.constant(np.log([0.15, 1.5]).astype(np.float32))
        # const_std_init = True

    def make_policy(name, ob_space, ac_space):
        return PeMlpPolicy(name,
                           ob_space,
                           ac_space,
                           hid_layers,
                           deterministic=True,
                           diagonal=True,
                           trainable_std=alg_args['trainable_std'],
                           use_bias=False,
                           use_critic=False,
                           seed=seed,
                           verbose=True,
                           hidden_W_init=U.normc_initializer(1.0),
                           higher_mean_init=higher_mean_init,
                           higher_logstd_init=higher_logstd_init,
                           const_std_init=const_std_init)

    try:
        affinity = len(os.sched_getaffinity(0))
    except:
        affinity = njobs
    sess = U.make_session(affinity)
    sess.__enter__()

    set_global_seeds(seed)

    gym.logger.setLevel(logging.WARN)

    # Prepare (sequential) sampler to generate ONE trajectory at a time
    sampler = None

    # Learn
    optimist.learn(env_name,
                   make_env,
                   seed,
                   make_policy,
                   horizon=horizon,
                   sampler=sampler,
                   **alg_args)
Beispiel #8
0
def train(env, max_iters, num_episodes, horizon, iw_norm, bound, delta, gamma, seed, policy, max_offline_iters, aggregate, center, use_bias, njobs=1):

    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\w+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)
        # Define env maker
        def make_env():
            env_rllab = env_rllab_class()
            _env = Rllab2GymWrapper(env_rllab)
            return _env
        # Used later
        env_type = 'rllab'
    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari is not tested here
            raise Exception('Not tested on atari.')
        else:
            # Not atari, standard env creation
            def make_env():
                env_rllab = gym.make(env)
                return env_rllab

    # Create the policy
    if policy == 'linear':
        hid_layers = []
    elif policy == 'nn':
        hid_layers = [100, 50, 25]
    elif policy == 'cnn':
        raise Exception('CNN policy not tested.')

    if aggregate=='none':
        learner = pbpois
        PolicyClass = PeMlpPolicy
    elif aggregate=='neuron':
        learner = nbpois
        PolicyClass = MultiPeMlpPolicy
    else:
        print("Unknown aggregation method, defaulting to none")
        learner = pbpois
        PolicyClass = PeMlpPolicy

    make_policy = lambda name, observation_space, action_space: PolicyClass(name,
                      observation_space,
                      action_space,
                      hid_layers,
                      use_bias=use_bias,
                      seed=seed)

    sampler = ParallelSampler(make_env, make_policy, gamma, horizon, np.ravel, num_episodes, njobs, seed)

    try:
        affinity = len(os.sched_getaffinity(0))
    except:
        affinity = njobs
    sess = U.make_session(affinity)
    sess.__enter__()

    set_global_seeds(seed)

    gym.logger.setLevel(logging.WARN)


    learner.learn(
          make_env,
          make_policy,
          sampler,
          gamma=gamma,
          n_episodes=num_episodes,
          horizon=horizon,
          max_iters=max_iters,
          verbose=1,
          feature_fun=np.ravel,
          iw_norm=iw_norm,
          bound = bound,
          max_offline_iters=max_offline_iters,
          delta=delta,
          center_return=center,
          line_search_type='parabola')
Beispiel #9
0
def create_sampler(env=None,
                   policy='linear',
                   n_episodes=100,
                   horizon=500,
                   njobs=1,
                   seed=42):
    # Create the environment
    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\S+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)

        # Define env maker
        def make_env():
            env_rllab = env_rllab_class()
            _env = Rllab2GymWrapper(env_rllab)
            return _env

        # Used later
        env_type = 'rllab'
    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari, custom env creation
            def make_env():
                _env = make_atari(env)
                return wrap_deepmind(_env)
        else:
            # Not atari, standard env creation
            def make_env():
                env_rllab = gym.make(env)
                return env_rllab

    # Select policy architecture
    if policy == 'linear':
        hid_size = num_hid_layers = 0
        use_bias = False
    elif policy == 'simple-nn':
        hid_size = [16]
        num_hid_layers = 1
        use_bias = True
    elif policy == 'nn':
        hid_size = [100, 50, 25]
        num_hid_layers = 3
        use_bias = True
    policy_initializer = U.normc_initializer(0.0)
    if policy == 'linear' or policy == 'nn' or policy == 'simple-nn':

        def make_policy(name, ob_space, ac_space):
            return MlpPolicy(name=name,
                             ob_space=ob_space,
                             ac_space=ac_space,
                             hid_size=hid_size,
                             num_hid_layers=num_hid_layers,
                             gaussian_fixed_var=True,
                             use_bias=use_bias,
                             use_critic=False,
                             hidden_W_init=policy_initializer,
                             output_W_init=policy_initializer)
    elif policy == 'cnn':

        def make_policy(name, ob_space, ac_space):
            return CnnPolicy(name=name,
                             ob_space=ob_space,
                             ac_space=ac_space,
                             gaussian_fixed_var=True,
                             use_bias=False,
                             use_critic=False,
                             hidden_W_init=policy_initializer,
                             output_W_init=policy_initializer)
    else:
        raise Exception('Unrecognized policy type.')
    # Create the sampler
    sampler = ParallelSampler(make_policy,
                              make_env,
                              n_episodes,
                              horizon,
                              True,
                              n_workers=njobs,
                              seed=seed)
    try:
        affinity = len(os.sched_getaffinity(0))
    except:
        affinity = njobs
    sess = U.make_session(affinity)
    sess.__enter__()
    # Set random seed
    set_global_seeds(seed)
    return sampler
def set_script_test(env, policy, horizon, seed, bounded_policy, trainable_std,
                    gain_init, max_mean, min_mean, max_std, min_std, std_init):

    # Common imports
    import sys, re, os, time, logging
    from collections import defaultdict
    # Framework imports
    import gym
    import tensorflow as tf
    # Self imports: utils
    from baselines.common import set_global_seeds
    from baselines import logger
    import baselines.common.tf_util as U
    from baselines.common.rllab_utils import Rllab2GymWrapper, rllab_env_from_name
    from baselines.common.atari_wrappers import make_atari, wrap_deepmind
    # Import custom envs
    import baselines.envs.lqg1d  # registered at import as gym env

    def get_env_type(env_id):
        # First load all envs
        _game_envs = defaultdict(set)
        for env in gym.envs.registry.all():
            env_type = env._entry_point.split(':')[0].split('.')[-1]
            _game_envs[env_type].add(env.id)
        # Get env type
        env_type = None
        for g, e in _game_envs.items():
            if env_id in e:
                env_type = g
                break
        return env_type

    env = 'LQG1D-v0'
    # Prepare environment maker
    if env.startswith('rllab.'):
        # Get env name and class
        env_name = re.match('rllab.(\w+)', env).group(1)
        env_rllab_class = rllab_env_from_name(env_name)

        # Define env maker
        def make_env():
            env_rllab = env_rllab_class()
            _env = Rllab2GymWrapper(env_rllab)
            return _env

        # Used later
        env_type = 'rllab'
    else:
        # Normal gym, get if Atari or not.
        env_type = get_env_type(env)
        assert env_type is not None, "Env not recognized."
        # Define the correct env maker
        if env_type == 'atari':
            # Atari, custom env creation
            def make_env():
                _env = make_atari(env)
                return wrap_deepmind(_env)
        else:
            # Not atari, standard env creation
            def make_env():
                env_rllab = gym.make(env)
                return env_rllab

    # Prepare policy maker
    if policy == 'linear':
        hid_size = num_hid_layers = 0
    elif policy == 'nn':
        hid_size = [100, 50, 25]
        num_hid_layers = 3

    def make_policy(name, ob_space, ac_space):
        return MlpPolicyBounded(
            name=name,
            ob_space=ob_space,
            ac_space=ac_space,
            hid_size=hid_size,
            num_hid_layers=num_hid_layers,
            gaussian_fixed_var=True,
            trainable_std=trainable_std,
            use_bias=False,
            use_critic=False,
            #hidden_W_init=tf.constant_initializer(1.1),
            gain_init=gain_init,
            max_mean=max_mean,
            min_mean=min_mean,
            max_std=max_std,
            min_std=min_std,
            std_init=std_init)

    # Initialize
    affinity = len(os.sched_getaffinity(0))
    sess = U.make_session(affinity)
    sess.__enter__()
    set_global_seeds(seed)
    gym.logger.setLevel(logging.WARN)

    env = make_env()
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = make_policy('pi', ob_space, ac_space)

    return pi
Beispiel #11
0
def create_env_rllab(env, seed):
    env_name = re.match('rllab.(\S+)', env).group(1)
    env_rllab_class = rllab_env_from_name(env_name)
    env = normalize(env_rllab_class())
    return env