Ejemplo n.º 1
0
def run_pusher3dof(args, sim=True, vanilla=False):
    try:
        from hyperdash import Experiment

        hyperdash_support = True
    except:
        hyperdash_support = False

    env = NormalizedEnv(gym.make(args.env))

    torques = [1.0] * 3  # if real
    colored = False

    if sim:
        torques = [args.t0, args.t1, args.t2]
        colored = True

    if not vanilla:
        env.env._init(
            torques=torques,
            colored=colored
        )

    if args.seed > 0:
        np.random.seed(args.seed)
        env.seed(args.seed)

    nb_states = env.observation_space.shape[0]
    nb_actions = env.action_space.shape[0]

    agent = DDPG(nb_states, nb_actions, args)
    evaluate = Evaluator(
        args.validate_episodes,
        args.validate_steps,
        args.output,
        max_episode_length=args.max_episode_length
    )

    exp = None

    if args.mode == 'train':
        if hyperdash_support:
            prefix = "real"
            if sim: prefix = "sim"

            exp = Experiment("s2r-pusher3dof-ddpg-{}".format(prefix))
            import socket

            exp.param("host", socket.gethostname())
            exp.param("type", prefix)  # sim or real
            exp.param("vanilla", vanilla)  # vanilla or not
            exp.param("torques", torques)
            exp.param("folder", args.output)

            for arg in ["env", "max_episode_length", "train_iter", "seed", "resume"]:
                arg_val = getattr(args, arg)
                exp.param(arg, arg_val)

        train(args, args.train_iter, agent, env, evaluate,
              args.validate_steps, args.output,
              max_episode_length=args.max_episode_length, debug=args.debug, exp=exp)

        # when done
        exp.end()

    elif args.mode == 'test':
        test(args.validate_episodes, agent, env, evaluate, args.resume,
             visualize=args.vis, debug=args.debug, load_best=args.best)

    else:
        raise RuntimeError('undefined mode {}'.format(args.mode))
Ejemplo n.º 2
0
exp = None

if args.mode == 'train':
    if hyperdash_support:
        exp = Experiment("{}-{}".format(env_name,"ddpg"))

        import socket
        exp.param("host", socket.gethostname())
        exp.param("folder", args.output)

        for arg in ["env", "max_episode_length", "train_iter", "seed", "resume"]:
            arg_val = getattr(args, arg)
            exp.param(arg, arg_val)

    train(args, args.train_iter, agent, env, evaluate,
          args.validate_steps, args.output,
          max_episode_length=args.max_episode_length, debug=args.debug, exp=exp)

    # when done
    exp.end()

elif args.mode == 'test':
    test(args.validate_episodes, agent, env, evaluate, args.resume,
         visualize=args.vis, debug=args.debug, load_best=args.best)

else:
    raise RuntimeError('undefined mode {}'.format(args.mode))


Ejemplo n.º 3
0
agent = DDPG(nb_states, nb_actions, args)
evaluate = Evaluator(args.validate_episodes,
    args.validate_steps, args.output, max_episode_length=args.max_episode_length)

exp = None

if args.mode == 'train':
    exp = Experiment("sim2real-ddpg-real-cheetah")
    for arg in ["env", "rate", "prate", "hidden1", "hidden2", "warmup", "discount",
                "bsize", "rmsize", "window_length", "tau", "ou_theta", "ou_sigma", "ou_mu",
                "validate_episodes", "max_episode_length", "validate_steps", "init_w",
                "train_iter", "epsilon", "seed", "resume"]:
        arg_val = getattr(args, arg)

    import socket
    exp.param("host", socket.gethostname())

    train(args, args.train_iter, agent, env, evaluate,
        args.validate_steps, args.output, max_episode_length=args.max_episode_length, debug=args.debug, exp=exp)

    # when done
    exp.end()

elif args.mode == 'test':
    test(args.validate_episodes, agent, env, evaluate, args.resume,
        visualize=True, debug=args.debug)

else:
    raise RuntimeError('undefined mode {}'.format(args.mode))
Ejemplo n.º 4
0
def run_reacher(args, sim=True):
    try:
        from hyperdash import Experiment

        hyperdash_support = True
    except:
        hyperdash_support = False

    env = NormalizedEnv(gym.make(args.env))

    torques = [200, 200]  # if real
    colors = None
    if sim:
        torques = [args.t0, args.t1]
        colors = {
            "arenaBackground": ".27 .27 .81",
            "arenaBorders": "1.0 0.8 0.4",
            "arm0": "0.9 0.6 0.9",
            "arm1": "0.9 0.9 0.6"
        }

    env.env.env._init(  # real robot
        torque0=torques[0],  # torque of joint 1
        torque1=torques[0],  # torque of joint 2
        topDown=True,
        colors=colors)

    if args.seed > 0:
        np.random.seed(args.seed)
        env.seed(args.seed)

    nb_states = env.observation_space.shape[0]
    nb_actions = env.action_space.shape[0]

    agent = DDPG(nb_states, nb_actions, args)
    evaluate = Evaluator(args.validate_episodes,
                         args.validate_steps,
                         args.output,
                         max_episode_length=args.max_episode_length)

    exp = None

    if args.mode == 'train':
        if hyperdash_support:
            prefix = "real"
            if sim: prefix = "sim"

            exp = Experiment("s2r-reacher-ddpg-{}".format(prefix))
            import socket

            exp.param("host", socket.gethostname())
            exp.param("type", prefix)  # sim or real
            exp.param("torques", [torques[0], torques[1]])
            exp.param("folder", args.output)

            for arg in [
                    "env", "max_episode_length", "train_iter", "seed", "resume"
            ]:
                arg_val = getattr(args, arg)
                exp.param(arg, arg_val)

        train(args,
              args.train_iter,
              agent,
              env,
              evaluate,
              args.validate_steps,
              args.output,
              max_episode_length=args.max_episode_length,
              debug=args.debug,
              exp=exp)

        # when done
        exp.end()

    elif args.mode == 'test':
        test(args.validate_episodes,
             agent,
             env,
             evaluate,
             args.resume,
             visualize=args.vis,
             debug=args.debug,
             load_best=args.best)

    else:
        raise RuntimeError('undefined mode {}'.format(args.mode))