def main():
    """
    Runs the test
    """
    args = mujoco_arg_parser().parse_args()
    logger.configure()  # baselines logger
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main():
    """
    Runs the test
    """
    logger.configure()
    parser = mujoco_arg_parser()
    parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
    parser.set_defaults(num_timesteps=int(2e7))

    args = parser.parse_args()

    if not args.play:
        # train the model
        train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path)

    else:
        # construct the model object, load pre-trained model and render
        model = train(num_timesteps=1, seed=args.seed)
        tf_util.load_state(args.model_path)
        env = make_mujoco_env('Humanoid-v2', seed=0)

        obs = env.reset()
        while True:
            action = model.policy.act(stochastic=False, obs=obs)[0]
            obs, _, done, _ = env.step(action)
            env.render()
            if done:
                obs = env.reset()
Exemple #3
0
def main():

    main_dir = "simulation_results"
    try:
        os.mkdir(main_dir)
    except FileExistsError:
        pass

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    parser = mujoco_arg_parser()
    parser.add_argument('--algorithm', help="The algorithm which shall be used, TRPO or PPO", type=str, default="TRPO")
    args = parser.parse_args()

    algorithm = args.algorithm

    if rank == 0:
        save_dir = [
            os.path.join(main_dir, time.strftime('%Y_%m_%d-%Hh_%Mm_%Ss', time.localtime(time.time())) + "-" + algorithm)
        ]
    else:
        save_dir = None

    save_dir = comm.bcast(save_dir, root=0)

    # Unpack list
    save_dir = save_dir[0]

    model_file = os.path.join(save_dir, "model")
    log_dir = os.path.join(save_dir, "log")

    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, algorithm=algorithm,
          model_save_file=model_file, log_dir=log_dir)
Exemple #4
0
def main():
    """
    Runs the test
    """
    args = mujoco_arg_parser().parse_args()
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.run,
          lam=args.lam,
          sgd_steps=args.sgd_steps,
          klcoeff=args.klcoeff,
          log=args.log)
def main():
    """
    Runs the test
    """
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)

    if args.play:
        logger.log("Running trained model")
        obs = np.zeros((env.num_envs,) + env.observation_space.shape)
        obs[:] = env.reset()
        while True:
            actions = model.step(obs)[0]
            obs[:] = env.step(actions)[0]
            env.render('human')
Exemple #6
0
def main():
    """
    Runs the test
    """
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)

    env = make_mujoco_env(args.env, args.seed)
    model = PPO1(MlpPolicy,
                 env,
                 timesteps_per_actorbatch=2048,
                 clip_param=0.2,
                 entcoeff=0.0,
                 optim_epochs=10,
                 optim_stepsize=3e-4,
                 optim_batchsize=64,
                 gamma=0.99,
                 lam=0.95,
                 schedule='linear')
    model.learn(total_timesteps=args.num_timesteps)

    model.save("ppo1")
    # env.close()

    del model  # remove to demonstrate saving and loading
    # env = make_mujoco_env(args.env, args.seed)

    model = PPO1.load("ppo1")
    logger.log("~!!!!!!!!")
    episode_rew = 0
    obs = env.reset()

    while True:
        action, _states = model.predict(obs)
        ob, reward, done, info = env.step(action)
        episode_rew += reward
        env.render()
        if done:
            print(f'episode_rew={episode_rew}')
            episode_rew = 0
            obs = env.reset()
Exemple #7
0
def main():
    """
    Runs the testd
    """
    args = mujoco_arg_parser().parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['OPENBLAS_NUM_THREADS'] = '1'
    log = not args.no_log
    is_action_features = not args.states

    # for t_c in t_cs:
    #     for t_pi in t_pis:
    #         for lam in lams:
    #             args.lam = lam
    #             args.t_c = t_c
    #             args.t_pi = t_pi
    for seed in range(args.num_seeds):
        train(args.env,
              algo=args.algo,
              num_timesteps=args.num_timesteps,
              seed=(seed + args.seed_offset),
              expert_model=args.expert_model,
              expert_path=args.expert_path,
              num_trajectories=args.num_trajectories,
              is_action_features=is_action_features,
              sgd_steps=args.sgd_steps,
              mdpo_update_steps=args.mdpo_update_steps,
              lipschitz=args.lipschitz,
              t_pi=args.t_pi,
              t_c=args.t_c,
              lam=args.lam,
              log=log,
              pretrain=args.pretrain,
              pretrain_epochs=args.pretrain_epochs,
              exploration_bonus=args.exploration,
              bonus_coef=args.bonus_coef,
              random_action_len=args.random_action_len,
              dir_name=args.dir_name,
              neural=args.neural,
              args=args)
Exemple #8
0
        try:
            return eval(v)
        except (NameError, SyntaxError):
            return v

    return {k: parse(v) for k, v in parse_unknown_args(args).items()}


if __name__ == '__main__':

    import time
    import os
    from stable_baselines.common.cmd_util import mujoco_arg_parser
    from stable_baselines.low_dim_analysis.common_parser import get_common_parser
    parser = get_common_parser()
    openai_arg_parser = mujoco_arg_parser()

    plot_args, plot_unknown_args = parser.parse_known_args()
    openai_args, openai_unknown_args = openai_arg_parser.parse_known_args()

    plot_unknown_args = parse_cmdline_kwargs(plot_unknown_args)
    openai_unknown_args = parse_cmdline_kwargs(openai_unknown_args)
    both_unknown_args = dict(plot_unknown_args.items()
                             & openai_unknown_args.items())

    threads_or_None = 'threads' if plot_args.use_threads else None
    logger.log(f"THREADS OR NOT: {threads_or_None}")

    plot_dir_alg = get_plot_dir(plot_args.alg, plot_args.num_timesteps,
                                plot_args.env, plot_args.normalize,
                                plot_args.run_num)
Exemple #9
0
def train(args):
    """
    Runs the test
    """
    args, argv = mujoco_arg_parser().parse_known_args(args)
    logger.log(f"#######TRAIN: {args}")
    args.alg = "ppo2"

    this_run_dir = get_dir_path_for_this_run(args)
    if os.path.exists(this_run_dir):
        import shutil
        shutil.rmtree(this_run_dir)
    os.makedirs(this_run_dir)

    log_dir = get_log_dir(this_run_dir)
    save_dir = get_save_dir(this_run_dir)
    logger.configure(log_dir)

    def make_env():
        env_out = gym.make(args.env)
        env_out.env.visualize = False
        env_out = bench.Monitor(env_out,
                                logger.get_dir(),
                                allow_early_resets=True)
        return env_out

    env = DummyVecEnv([make_env])
    env.envs[0].env.env.disableViewer = True
    set_global_seeds(args.seed)
    env.envs[0].env.env.seed(args.seed)

    if args.normalize:
        env = VecNormalize(env)

    policy = MlpPolicy

    # extra run info I added for my purposes

    full_param_traj_dir_path = get_full_params_dir(this_run_dir)

    if os.path.exists(full_param_traj_dir_path):
        import shutil
        shutil.rmtree(full_param_traj_dir_path)
    os.makedirs(full_param_traj_dir_path)

    if os.path.exists(save_dir):
        import shutil
        shutil.rmtree(save_dir)
    os.makedirs(save_dir)

    run_info = {
        "run_num": args.run_num,
        "env_id": args.env,
        "full_param_traj_dir_path": full_param_traj_dir_path,
        "state_samples_to_collect": args.state_samples_to_collect
    }

    model = PPO2(policy=policy,
                 env=env,
                 n_steps=args.n_steps,
                 nminibatches=args.nminibatches,
                 lam=0.95,
                 gamma=0.99,
                 noptepochs=10,
                 ent_coef=0.0,
                 learning_rate=3e-4,
                 cliprange=0.2,
                 optimizer=args.optimizer,
                 seed=args.seed)
    model.tell_run_info(run_info)

    model.learn(total_timesteps=args.num_timesteps)

    model.save(f"{save_dir}/ppo2")

    if args.normalize:
        env.save_running_average(save_dir)
def main():
    """
    Runs the test
    """
    args = mujoco_arg_parser().parse_args()
    train(args.env, num_timesteps=args.num_timesteps, run=args.run, kappa=args.kappa, vf_phi_update_interval=args.vf_phi_update_interval, log=args.log)
Exemple #11
0
def main():
    """
    Runs the test
    """
    parser = mujoco_arg_parser()
    parser.add_argument(
        '--model-path',
        default="/cvgl2/u/surajn/workspace/saved_models/sawyerlift_ppo2/model")
    parser.add_argument('--images', default=False)
    args = parser.parse_args()

    logger.configure()
    if not args.play:
        model, env = train(args.env,
                           num_timesteps=args.num_timesteps,
                           seed=args.seed,
                           model_path=args.model_path,
                           images=args.images)

    if args.play:

        def make_env():
            env_out = GymWrapper(
                suite.make(
                    "SawyerLift",
                    use_camera_obs=False,  # do not use pixel observations
                    has_offscreen_renderer=
                    False,  # not needed since not using pixel obs
                    has_renderer=True,  # make sure we can render to the screen
                    reward_shaping=True,  # use dense rewards
                    control_freq=
                    10,  # control should happen fast enough so that simulation looks smooth
                ))
            env_out.reward_range = None
            env_out.metadata = None
            env_out.spec = None
            env_out = bench.Monitor(env_out,
                                    logger.get_dir(),
                                    allow_early_resets=True)
            return env_out

        #env = make_env()
        env = DummyVecEnv([make_env])
        env = VecNormalize(env)

        policy = MlpPolicy
        #model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
        #         optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=1)
        model = TRPO(MlpPolicy,
                     env,
                     timesteps_per_batch=1024,
                     max_kl=0.01,
                     cg_iters=10,
                     cg_damping=0.1,
                     entcoeff=0.0,
                     gamma=0.99,
                     lam=0.98,
                     vf_iters=5,
                     vf_stepsize=1e-3)
        model.load(args.model_path)
        logger.log("Running trained model")
        obs = np.zeros((env.num_envs, ) + env.observation_space.shape)
        obs[:] = env.reset()
        while True:
            env.render()
            actions = model.step(obs)[0]
            obs[:] = env.step(actions)[0]
Exemple #12
0
def main():
    """
    Runs the test
    """
    args = mujoco_arg_parser().parse_args()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)