Ejemplo n.º 1
0
def main():
    """
    Runs the test
    """
    logger.configure()
    parser = mujoco_arg_parser()
    parser.add_argument('--model-path',
                        default=os.path.join(logger.get_dir(),
                                             'humanoid_policy'))
    parser.set_defaults(num_timesteps=int(2e7))

    args = parser.parse_args()

    if not args.play:
        # train the model
        train(num_timesteps=args.num_timesteps,
              seed=args.seed,
              model_path=args.model_path)
    else:
        # construct the model object, load pre-trained model and render
        policy = train(num_timesteps=1, seed=args.seed)
        tf_util.load_state(args.model_path)
        env = make_mujoco_env('Humanoid-v2', seed=0)

        obs = env.reset()
        while True:
            action = policy.act(stochastic=False, obs=obs)[0]
            obs, _, done, _ = env.step(action)
            env.render()
            if done:
                obs = env.reset()
Ejemplo n.º 2
0
def main():
    logger.configure()
    parser = mujoco_arg_parser()
    parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'policy'))
    parser.set_defaults(num_timesteps=int(2e7))
   
    args = parser.parse_args()
    
    if not args.play:
        # train the model
        train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)
    else:       
        # construct the model object, load pre-trained model and render
        pi = train(args.env, num_timesteps=1, seed=args.seed)
        U.load_state(args.model_path)
        env = make_mujoco_env(args.env, seed=0)

        ob = env.reset()        
        while True:
            action = pi.act(stochastic=False, ob=ob)[0]
            ob, _, done, _ =  env.step(action)
            print(ob,action)
            #env.render()
            if done:
                ob = env.reset()
Ejemplo n.º 3
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          save=args.save_model)
Ejemplo n.º 4
0
def main():
    args = mujoco_arg_parser().parse_args()
    wandb.config.update(args)
    wandb.config.algo = 'ppo2'
    logger.configure()
    model, env = train(args.env,
                       num_timesteps=args.num_timesteps,
                       seed=args.seed)

    env_final = gym.make(args.env)
    video_recorder = gym.wrappers.monitoring.video_recorder.VideoRecorder(
        env=env_final,
        base_path=os.path.join(wandb.run.dir, 'humanoid'),
        enabled=True)

    # obs = env_final.reset()

    if True:  # if args.play
        logger.log("Running trained model")
        obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
        obs[:] = env_final.reset()
        while True:
            actions = model.step(obs)[0]
            print(actions.shape)
            o, r, d, i = env_final.step(actions[0])
            obs[:] = o
            # env.render()
            video_recorder.capture_frame()
            if d:
                obs[:] = env_final.reset()
                video_recorder.close()
                break
Ejemplo n.º 5
0
def main():
    logger.configure()
    parser = mujoco_arg_parser()
    parser.add_argument('--model-path',
                        default='checkpoints_best/Humanoid-v2-6914')
    parser.set_defaults(num_timesteps=int(2e8))

    args = parser.parse_args()

    if not args.play:
        # train the model
        train(num_timesteps=args.num_timesteps,
              seed=args.seed,
              model_path=args.model_path)
    else:
        # construct the model object, load pre-trained model and render
        pi = train(num_timesteps=1, seed=args.seed)
        U.load_state(args.model_path)
        env = make_mujoco_env('Humanoid-v2', seed=123)

        ob = env.reset()
        while True:
            action = pi.act(stochastic=False, ob=ob)[0]
            ob, _, done, _ = env.step(action)
            env.render()
            time.sleep(0.01)
            if done:
                ob = env.reset()
Ejemplo n.º 6
0
def main():
    """
    Runs the test
    """
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Ejemplo n.º 7
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          soc=args.soc,
          psi=args.psi)
Ejemplo n.º 8
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          p=args.p,
          alpha=args.alpha)
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure(dir=args.filepath)
    train(args.env,
          num_timesteps=args.num_timesteps,
          timesteps_per_actor_batch=args.timesteps_per_episode,
          seed=args.seed,
          entropy_coeff=args.entropy_coeff,
          filepath=args.filepath)
Ejemplo n.º 10
0
def main():
    parser = mujoco_arg_parser()
    parser.add_argument('--logdir')
    parser.add_argument('--load-path', default=None)
    args = parser.parse_args()
    logger.configure(dir=args.logdir)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          load_path=args.load_path)
Ejemplo n.º 11
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure(dir='geo/v{}/{}/{}/{}/{}'.format(
        args.version, args.alg, args.env, args.lr, args.seed))
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          alg=args.alg,
          lr=args.lr,
          momentum=args.mom)
Ejemplo n.º 12
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    train(args.env,
          gamma=args.gamma,
          lam=args.lam,
          save=args.save,
          desired_kl=args.desired_kl,
          num_timesteps=args.num_timesteps,
          seed=args.seed)
Ejemplo n.º 13
0
def main():
    parser = mujoco_arg_parser()
    parser.add_argument('--log-dir',
                        help='Log directory where all logs will be written',
                        default=None)
    parser.add_argument('--log-formats',
                        help='Formats in which the logs will be written.',
                        default=None)
    args = parser.parse_args()
    logger.configure(args.log_dir, args.log_formats)
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Ejemplo n.º 14
0
def main():
    parser = mujoco_arg_parser()
    parser.add_argument('--load-path')
    parser.add_argument('--save-video')
    args = parser.parse_args()
    logger.configure()
    infer(args.env,
          load_path=args.load_path,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          save_video=args.save_video)
Ejemplo n.º 15
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          clip_param=args.clip_param,
          optim_stepsize=args.stepsize,
          optim_batchsize=args.batch_size,
          gamma=args.gamma,
          lam=args.lam,
          save=args.save)
Ejemplo n.º 16
0
def main():
    parser = mujoco_arg_parser()
    parser.add_argument('--cpu', type=int, default=1)
    parser.add_argument('--lr', type=float, default=3e-4)
    parser.add_argument('--batch', type=int, default=2048)
    args = parser.parse_args()
    logdir = './results/mappo/' + args.env + '/l-{}-b-{}/seed-{}'.format(args.lr, args.batch, args.seed)
    try:
        logger.configure(logdir, format_strs=['stdout', 'log', 'json', 'tensorboard'])
    except:
        logger.configure()
    train(args.env, num_timesteps=1e7, seed=args.seed, num_cpu=args.cpu, batch=args.batch, lr=args.lr)
Ejemplo n.º 17
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    pi = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
    env = make_mujoco_env('Walker2d-v2', seed=0)
    ob = env.reset()
    while True:
        action = pi.act(stochastic=False, ob=ob)[0]
        ob, _, done, _ =  env.step(action)
        env.render()
        time.sleep(0.01)
        if done:
            env.reset()
Ejemplo n.º 18
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)

    if args.play:
        logger.log("Running trained model")
        obs = np.zeros((env.num_envs,) + env.observation_space.shape)
        obs[:] = env.reset()
        while True:
            actions = model.step(obs)[0]
            obs[:]  = env.step(actions)[0]
            env.render()
Ejemplo n.º 19
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)

    if 1:
        logger.log("Running trained model")
        obs = np.zeros((env.num_envs,) + env.observation_space.shape)
        obs[:] = env.reset()
        while True:
            actions = model.step(obs)[0]
            obs[:]  = env.step(actions)[0]
            env.render()
Ejemplo n.º 20
0
def start(fold, env_id):
    from baselines import logger
    from baselines.common.cmd_util import mujoco_arg_parser
    from baselines.trpo_replay.acktr_cont import train
    from algorithm_parameters import algorithm_parameters
    import os
    import tensorflow as tf
    tf.reset_default_graph()
    os.environ['OPENAI_LOGDIR'] = 'logs_' + env_id + '_' + str(fold)
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    parameters = algorithm_parameters()
    train(env_id, parameters=parameters, seed=args.seed)
Ejemplo n.º 21
0
def main():

    parser = mujoco_arg_parser()
    parser.add_argument('--model-path')
    parser.add_argument('--sim', default=False, action='store_true')
    parser.add_argument('--hessians', default=False, action='store_true')
    parser.add_argument('--logdir', type=str, default=None)
    args = parser.parse_args()
    logger.configure(args.logdir)

    if not args.model_path:
        raise ValueError('You have to provide a model path.')

    if not args.play:
        # train the model
        train(args.env,
              num_timesteps=args.num_timesteps,
              seed=args.seed,
              model_path=args.model_path,
              target1=args.target1,
              target2=args.target2,
              target3=args.target3,
              output_prefix=args.output_prefix,
              input_file=args.input_file,
              sim=args.sim,
              hessians=args.hessians)
    else:
        # construct the model object, load pre-trained model and render
        pi = train(args.env,
                   num_timesteps=1,
                   seed=args.seed,
                   target1=args.target1,
                   target2=args.target2,
                   target3=args.target3,
                   output_prefix=args.output_prefix,
                   input_file=args.input_file,
                   sim=False)
        U.load_state('models/' + args.model_path)
        env = make_pareto_mujoco_env(args.env,
                                     seed=0,
                                     target1=args.target1,
                                     target2=args.target2,
                                     target3=args.target3)

        ob = env.reset()
        while True:
            action = pi.act(stochastic=False, ob=ob)[0]
            ob, _, done, _ = env.step(action)
            env.render()
            if done:
                ob = env.reset()
Ejemplo n.º 22
0
def main():
    parser = mujoco_arg_parser()
    args = parser.parse_args()
    if 'ext-v2' in args.env:
        import gym
        cost = gym.make(args.env).messageCost
        logdir = 'TRY_logs/env=%s-c-%d/seed=%d_%s' % (
            args.env, cost, args.seed, datetime.now().strftime('%d_%H:%M:%S'))
    else:

        logdir = 'TRY_logs/env=%s/seed=%d_%s' % (
            args.env, args.seed, datetime.now().strftime('%d_%H:%M:%S'))
    logger.configure(logdir)

    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Ejemplo n.º 23
0
def main():
    from time import strftime
    pydart.init()
    args = mujoco_arg_parser().parse_args()
    logger.configure(dir='./log'+strftime("%Y%m%d%H%M")+'/')
    model, env = train(args.env, num_timesteps=10000000, seed=args.seed)

    logger.log("Running trained model")
    obs = np.zeros((env.num_envs,) + env.observation_space.shape)
    obs[:] = env.reset()
    while True:
        actions = model.step(obs)
        res = env.step(actions)
        obs[:] = res[0]
        done = res[2]
        if done[0]:
            break
Ejemplo n.º 24
0
def main():
    parser = mujoco_arg_parser()
    parser.add_argument('--use-penal', help='enable penal', default=False)
    parser.add_argument('--gpu', type=int, default=0, help='GPU selection')
    parser.add_argument('--pg-rate', type=float, default=0.0)
    args = parser.parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = '%d' % args.gpu
    print("game %s run on GPU: %d" % (args.env, args.gpu))
    logger.configure(
        args.env + '_seed_' + str(args.seed) + '_nopen' + '_pg' +
        str(args.pg_rate) if not args.use_penal else args.env + '_seed_' +
        str(args.seed) + '_pen' + '_pg' + str(args.pg_rate),
        ['log', 'tensorboard'])
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          args=args)
Ejemplo n.º 25
0
def main():
    args = mujoco_arg_parser().parse_args()
    args.env = 'RoboschoolWalker2d-v1'
    args.save_file = ''
    args.load_file = 'ppo_walker2d'
    args.num_timesteps = 10000000
    args.render = True
    args.stochastic = False
    print(args)
    logger.configure()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          save_file=args.save_file,
          load_file=args.load_file,
          render=args.render,
          stochastic=args.stochastic)
Ejemplo n.º 26
0
def main():
    args = mujoco_arg_parser().parse_args()

    logger.configure(
        dir=
        '/home/jonasrothfuss/Dropbox/Eigene_Dateien/UC_Berkley/2_Code/model_ensemble_meta_learning/data/local/ppo-baselines'
    )
    model, env = train(args.env,
                       num_timesteps=args.num_timesteps,
                       seed=args.seed)

    if args.play:
        logger.log("Running trained model")
        obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
        obs[:] = env.reset()
        while True:
            actions = model.step(obs)[0]
            obs[:] = env.step(actions)[0]
            env.render()
Ejemplo n.º 27
0
def main():
    parser = mujoco_arg_parser()
    parser.add_argument('--log_dir',
                        help='the directory to save log file',
                        default='log')
    parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate")
    parser.add_argument('--sil-update',
                        type=float,
                        default=10,
                        help="Number of updates per iteration")
    parser.add_argument('--sil-value',
                        type=float,
                        default=0.01,
                        help="Weight for value update")
    parser.add_argument('--sil-alpha',
                        type=float,
                        default=0.6,
                        help="Alpha for prioritized replay")
    parser.add_argument('--sil-beta',
                        type=float,
                        default=0.1,
                        help="Beta for prioritized replay")

    args = parser.parse_args()
    logger.configure(dir=args.log_dir)
    model, env = train(args.env,
                       num_timesteps=args.num_timesteps,
                       seed=args.seed,
                       lr=args.lr,
                       sil_update=args.sil_update,
                       sil_value=args.sil_value,
                       sil_alpha=args.sil_alpha,
                       sil_beta=args.sil_beta)

    if args.play:
        logger.log("Running trained model")
        obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
        obs[:] = env.reset()
        while True:
            actions = model.step(obs)[0]
            obs[:] = env.step(actions)[0]
            env.render()
Ejemplo n.º 28
0
def main():
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    pi = train(args.env, num_timesteps=1, seed=args.seed, play=False)
    run = 'run-20180703_034952-1a24a6ik/'
    run_home = '/home/ubuntu/wandb_baselines/wandb/' + run  #run-20180702_220411-4xtopfue/'
    model_path = run_home + 'humanoid_policy'
    # model_path = '/home/ubuntu/wandb_baselines/wandb/run-20180702_220411-4xtopfue/humanoid_policy'
    U.load_state(model_path)
    seed = random.randint(1, 1000)
    env = make_mujoco_env('RoboschoolHumanoid-v1', seed=seed)
    tot_r = 0
    ob = env.reset()
    runs = 0
    video = True
    if video:
        video_recorder = gym.wrappers.monitoring.video_recorder.VideoRecorder(
            env=env,
            base_path=os.path.join('/home/ubuntu/wandb_baselines',
                                   'humanoid_run2_%i' % seed),
            enabled=True)

    while True:
        action = pi.act(stochastic=False, ob=ob)[0]
        ob, r, done, _ = env.step(action)
        if video:
            video_recorder.capture_frame()
        tot_r += r
        if done:

            ob = env.reset()
            runs += 1
            #            if video:
            #                video_recorder.close()
            # video_recorder = gym.wrappers.monitoring.video_recorder.VideoRecorder(env=env, base_path=os.path.join(run_home, 'humanoid_run_%i'%runs), enabled=True)

            print(tot_r)
            tot_r = 0
            print("@@@@@@@@@@@@@@@")
        if runs > 0:
            break
Ejemplo n.º 29
0
def main():
    parser = mujoco_arg_parser()
    parser.add_argument('--log-dir',
                        type=str,
                        default='./log',
                        help="Log directory")
    parser.add_argument('--exp-scale',
                        type=float,
                        default=0.5,
                        help="Exp scale of confidence score")
    args = parser.parse_args()
    #fig_path = os.path.join(args.log_dir, args.env_id+'.png')

    # Single processing for testing
    #arguments = [args.env, 0, args.log_dir, args.num_timesteps, args.exp_scale]
    #run(arguments)
    exp_num = 3
    # Multiprocessing
    pool = Pool(processes=exp_num)
    arguments = [[
        args.env_id, seed, args.log_dir, args.num_timesteps, args.exp_scale
    ] for seed in range(exp_num)]
    pool.map(run, arguments)
    #stats_dict = {'timestep': [], 'reward': []}
    stats_dict = {'timestep': [], 'reward': [], 'agent': []}

    # Read Logs
    print('Reading logs...')
    for seed in range(exp_num):
        filename = os.path.join(args.log_dir, args.env_id + '_' + str(seed),
                                'progress.csv')
        with open(filename, 'r') as csvfile:
            csvreader = csv.reader(csvfile)
            fields = next(csvreader)
            for row in csvreader:
                reward = row[fields.index('eprewmean')]
                timestep = row[fields.index('total_timesteps')]
                agent = row[fields.index('agent')]
                stats_dict['timestep'].append(int(timestep) * 2)
                stats_dict['reward'].append(float(reward))
                stats_dict['agent'].append(agent)
Ejemplo n.º 30
0
def main():
    parser = mujoco_arg_parser()
    parser.add_argument('--log_dir',
                        help='the directory to save log file',
                        default='log')
    parser.add_argument('--lr', type=float, default=3e-4, help="Learning rate")
    args = parser.parse_args()
    logger.configure(dir=args.log_dir)
    model, env = train(args.env,
                       num_timesteps=args.num_timesteps,
                       seed=args.seed,
                       lr=args.lr)

    if args.play:
        logger.log("Running trained model")
        obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
        obs[:] = env.reset()
        while True:
            actions = model.step(obs)[0]
            obs[:] = env.step(actions)[0]
            env.render()
Ejemplo n.º 31
0
def main(*args, **kwargs):
    if "env" in kwargs.keys():
        game = kwargs["env"]
    else:
        game = "Hopper"
    parser = mujoco_arg_parser()
    parser.add_argument('--attention',
                        help='attention or not',
                        type=str,
                        default="NoAttention",
                        choices=["Attention,NoAttention,StateAttention"])
    parser.add_argument('--env',
                        help='environment ID',
                        type=str,
                        default=game + "-v2")
    args = parser.parse_args()
    print("Going to train.")
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          method=args.attention)
Ejemplo n.º 32
0
def main():
    logger.configure()
    parser = mujoco_arg_parser()
    parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
    parser.set_defaults(num_timesteps=int(2e7))
   
    args = parser.parse_args()
    
    if not args.play:
        # train the model
        train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)
    else:       
        # construct the model object, load pre-trained model and render
        pi = train(num_timesteps=1, seed=args.seed)
        U.load_state(args.model_path)
        env = make_mujoco_env('Humanoid-v2', seed=0)

        ob = env.reset()        
        while True:
            action = pi.act(stochastic=False, ob=ob)[0]
            ob, _, done, _ =  env.step(action)
            env.render()
            if done:
                ob = env.reset()
Ejemplo n.º 33
0
def main():
    args = mujoco_arg_parser().parse_args()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)