Exemple #1
0
 def _thunk():
     env = make_env.make_env(env_id)
     env.seed(seed + rank)
     env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
                         allow_early_resets=True)
     gym.logger.setLevel(logging.WARN)
     return env
Exemple #2
0
 def _thunk():
     env = make_env.make_env(env_id, max_episode_len=max_episode_len)
     env.discrete_action_input = True
     env.seed(seed + rank)
     env = bench.Monitor(env,
                         logger.get_dir()
                         and os.path.join(logger.get_dir(), str(rank)),
                         allow_early_resets=True)
     gym.logger.setLevel(logging.WARN)
     return env
Exemple #3
0
def train(logdir,
          env_id,
          lr,
          num_timesteps,
          seed,
          timesteps_per_batch,
          cont=False):
    from sandbox.ppo_sgd import mlp_policy
    from sandbox.ppo_sgd import pposgd_simple
    from rl import logger
    from rl.common import set_global_seeds, tf_util as U
    from rl import bench

    from gym.envs.registration import register
    import multiagent
    import make_env

    logger.configure(logdir, format_strs=['log', 'json', 'tensorboard'])
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(seed)
    env = make_env.make_env(env_id)

    def policy_fn(name, ob_space, ac_space, id):
        pi = mlp_policy.MlpPolicy(name=name,
                                  ob_space=ob_space,
                                  ac_space=ac_space,
                                  hid_size=64,
                                  num_hid_layers=2,
                                  id=id)
        return pi

    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), "monitor.json"))
    env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    pposgd_simple.learn(env,
                        policy_fn,
                        max_timesteps=num_timesteps,
                        timesteps_per_batch=timesteps_per_batch,
                        clip_param=0.2,
                        entcoeff=0.0,
                        optim_epochs=10,
                        optim_stepsize=lr,
                        optim_batchsize=64,
                        gamma=0.99,
                        lam=0.95,
                        schedule='linear',
                        cont=cont)
    env.close()
    return None
Exemple #4
0
 def _make_env():
     env = gym.make(env_id)
     env = MAWrapper(env)
     env = bench.Monitor(env, logger.get_dir())
     return env
Exemple #5
0
 def _make_env():
     env = make_env(env_id)  # gym.make(env_id)
     env = bench.Monitor(env, logger.get_dir())
     return env
Exemple #6
0
 def create_env():
     env = make_env.make_env('simple_spread')
     env.seed(3)
     env = bench.Monitor(env, '/tmp/', allow_early_resets=True)
     set_global_seeds(3)
     return env
Exemple #7
0
 def _make_env(rank):
     env = gym.make('RoboSumo-Ant-vs-Ant-v0')
     env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return env