예제 #1
0
def train(env_id, num_timesteps, seed):
    env = gym.make(env_id)
    env = bench.Monitor(env, logger.get_dir())
    set_global_seeds(seed)
    env.seed(seed)
    gym.logger.setLevel(logging.WARN)

    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env,
              policy=policy,
              vf=vf,
              gamma=0.99,
              lam=0.97,
              timesteps_per_batch=2500,
              desired_kl=0.002,
              num_timesteps=num_timesteps,
              animate=False)

        env.close()
예제 #2
0
def train(env_id, num_timesteps, seed):
    env = Lynx()
    #env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(1)))
    set_global_seeds(seed)
    #env.seed(seed)
    gym.logger.setLevel(logging.WARN)

    with tf.Session(config=tf.ConfigProto()) as sess:
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            if MLP:
                policy = MlpPolicy(sess,
                                   ob_space=env.observation_space,
                                   ac_space=env.action_space)
            else:
                policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env,
              policy=policy,
              vf=vf,
              gamma=0.99,
              lam=0.97,
              timesteps_per_batch=50,
              desired_kl=0.002,
              num_timesteps=num_timesteps,
              animate=False)
예제 #3
0
def train(env_id, num_timesteps, seed, alg, lr, momentum):
    env = make_mujoco_env(env_id, seed)

    if alg == 'sgd':
        from baselines.acktr.acktr_cont import learn
    elif alg == 'mid':
        from baselines.acktr.acktr_cont_midpoint import learn
    elif alg == 'geo':
        from baselines.acktr.acktr_cont_geo import learn
    else:
        raise ValueError
    nprocs = 4
    with tf.Session(
            config=tf.ConfigProto(allow_soft_placement=True,
                                  intra_op_parallelism_threads=nprocs,
                                  inter_op_parallelism_threads=nprocs)):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        policy = GaussianMlpPolicy(ob_dim, ac_dim, 'pi')

        learn(env,
              policy=policy,
              vf=vf,
              gamma=0.99,
              lam=0.97,
              timesteps_per_batch=2500,
              desired_kl=0.002,
              num_timesteps=num_timesteps,
              animate=False,
              lr=lr,
              momentum=momentum)

        env.close()
예제 #4
0
def train(args, num_timesteps, seed):
    import tensorflow as tf

    from baselines.common.cmd_util import make_mujoco_env, mujoco_arg_parser
    from baselines.acktr.acktr_cont import learn
    from baselines.acktr.policies import GaussianMlpPolicy
    from baselines.acktr.value_functions import NeuralNetValueFunction

    env = common.make_env(args)
    env.reward_scale = 0.01

    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env,
              policy=policy,
              vf=vf,
              gamma=0.99,
              lam=0.97,
              timesteps_per_batch=2500,
              desired_kl=0.002,
              num_timesteps=num_timesteps,
              animate=False)

    env.close()
def run_train_task(vv):

    # Create envs.
    env = vv['env'](log_scale_limit=0.0, max_path_length=vv['path_length'])

    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env,
              policy=policy,
              vf=vf,
              gamma=vv['discount'],
              lam=0.97,
              timesteps_per_batch=vv['batch_size'],
              desired_kl=0.002,
              num_timesteps=vv['num_timesteps'],
              max_path_length=vv['path_length'],
              animate=False)

        env.close()
예제 #6
0
def train(env_id, num_timesteps, seed):
    env = make_gym_control_env(env_id, seed)
    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env, policy=policy, vf=vf,
            gamma=0.99, lam=0.97, timesteps_per_batch=2500,
            desired_kl=0.002,
            num_timesteps=num_timesteps, animate=False)

        env.close()
예제 #7
0
def train(env_id, num_timesteps, seed):
    env = make_mujoco_env(env_id, seed)

    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env, policy=policy, vf=vf,
            gamma=0.99, lam=0.97, timesteps_per_batch=2500,
            desired_kl=0.002,
            num_timesteps=num_timesteps, animate=False)

        env.close()
예제 #8
0
def train(env_id, num_timesteps, seed, render):
    env = LearningEnvironment(num_particles=PARTICLES, disable_render=not render)
    env = bench.Monitor(env, os.path.join(logger.get_dir(), "monitor.json"))
    set_global_seeds(seed)
    gym.logger.setLevel(logging.WARN)

    with tf.Session(config=tf.ConfigProto()) as session:
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env, policy=policy, vf=vf,
            gamma=0.99, lam=0.97, timesteps_per_batch=8000,
            desired_kl=0.0002,
            num_timesteps=num_timesteps,
            animate=False)

        env.close()
예제 #9
0
def train(env_id, num_timesteps, seed):
    """
    train an ACKTR model on atari

    :param env_id: (str) Environment ID
    :param num_timesteps: (int) The total number of samples
    :param seed: (int) The initial seed for training
    """
    env = make_mujoco_env(env_id, seed)

    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            value_fn = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env, policy=policy, value_fn=value_fn, gamma=0.99, lam=0.97, timesteps_per_batch=2500, desired_kl=0.002,
              num_timesteps=num_timesteps, animate=False)

        env.close()
예제 #10
0
def train(env_id, num_timesteps, seed):
    env=gym.make(env_id)
    if logger.get_dir():
        env = bench.Monitor(env, os.path.join(logger.get_dir(), "monitor.json"))
    set_global_seeds(seed)
    env.seed(seed)
    gym.logger.setLevel(logging.WARN)

    with tf.Session(config=tf.ConfigProto()) as session:
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env, policy=policy, vf=vf,
            gamma=0.99, lam=0.97, timesteps_per_batch=2500,
            desired_kl=0.002,
            num_timesteps=num_timesteps, animate=False)

        env.close()
예제 #11
0
def train(env_id, num_timesteps, seed):
    env=gym.make(env_id)
    rank = MPI.COMM_WORLD.Get_rank()
    env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
    set_global_seeds(seed)
    env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        learn(env, policy=policy, vf=vf,
            gamma=0.99, lam=0.97, timesteps_per_batch=2500,
            desired_kl=0.002,
            num_timesteps=num_timesteps, animate=False)

        env.close()
예제 #12
0
def train(env_id, num_timesteps, seed, save, gamma, lam, desired_kl):
    env = make_mujoco_env(env_id, seed)

    with tf.Session(config=tf.ConfigProto()):
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim, ac_dim)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim, ac_dim)

        ret = learn(env,
                    policy=policy,
                    vf=vf,
                    gamma=gamma,
                    lam=lam,
                    desired_kl=desired_kl,
                    timesteps_per_batch=2500,
                    num_timesteps=num_timesteps,
                    animate=False)

        env.close()
        np.savetxt(save, np.array([ret]))
예제 #13
0
from baselines.acktr.value_functions import NeuralNetValueFunction
from baselines.common import set_global_seeds

env = gym.make('GazeboModularScara3DOF-v0')
initial_observation = env.reset()
print("Initial observation: ", initial_observation)
env.render()

seed=0
set_global_seeds(seed)
env.seed(seed)

with tf.Session(config=tf.ConfigProto()) as session:
    ob_dim = env.observation_space.shape[0]
    ac_dim = env.action_space.shape[0]
    with tf.variable_scope("vf"):
        vf = NeuralNetValueFunction(ob_dim, ac_dim)
    with tf.variable_scope("pi"):
        policy = GaussianMlpPolicy(ob_dim, ac_dim)

    learn(env,
        policy=policy, vf=vf,
        gamma=0.99,
        lam=0.97,
        timesteps_per_batch=2500,
        desired_kl=0.02,
        num_timesteps=1e6,
        animate=False,
        save_model_with_prefix='',
        restore_model_from_file='')