Esempio n. 1
0
def run_ars(params):

    dir_path = params['dir_path']

    if not(os.path.exists(dir_path)):
        os.makedirs(dir_path)
    logdir = dir_path
    if not(os.path.exists(logdir)):
        os.makedirs(logdir)

    #directory for saving videos (for colab)
    monitor_dir = os.path.join(dir_path, 'monitor')
    if not os.path.exists(monitor_dir):
        os.makedirs(monitor_dir)

    # INSERT laikago env here
    env = env_builder.build_imitation_env(motion_files=[params['motion_file']],
                                            num_parallel_envs=1,
                                            mode='train',
                                            enable_randomizer=False,
                                            enable_rendering=params['visualize'],
                                            action_lim=params['actionlim'],
                                            curr_steps=params['currsteps'],
                                            path=params['path'])
    # env = gym.make(params['env_name'])
    # env = wrappers.Monitor(env, monitor_dir, force=True)
    ob_dim = env.observation_space.shape[0] #should be 4+4+12+33
    ob_dim_h = 4*3
    ac_dim = env.action_space.shape[0] #should be 12+33

    # set policy parameters. Possible filters: 'MeanStdFilter' for v2, 'NoFilter' for v1.
    policy_params={'type':params['policy_type'],
                   'ob_filter':params['filter'],
                   'ob_dim':ob_dim,
                   'ac_dim':ac_dim,
                   'history_size':3,
                   'latent_dim':2,
                   'ob_h_dim':ob_dim_h,
                   'ob_l_dim':ob_dim-ob_dim_h}

    ARS = ARSLearner(env_name=params['env_name'],
                     policy_params=policy_params,
                     num_workers=params['n_workers'],
                     num_deltas=params['n_directions'],
                     deltas_used=params['deltas_used'],
                     step_size=params['step_size'],
                     delta_std=params['delta_std'],
                     logdir=logdir,
                     rollout_length=params['rollout_length'],
                     shift=params['shift'],
                     params=params,
                     seed = params['seed'])

    ARS.train(params['n_iter'])

    return
Esempio n. 2
0
def main():
  arg_parser = argparse.ArgumentParser()
  arg_parser.add_argument("--seed", dest="seed", type=int, default=None)
  arg_parser.add_argument("--mode", dest="mode", type=str, default="train")
  arg_parser.add_argument("--motion_file", dest="motion_file", type=str, default="motion_imitation/data/motions/laikago_dog_pace.txt")
  arg_parser.add_argument("--visualize", dest="visualize", action="store_true", default=False)
  arg_parser.add_argument("--output_dir", dest="output_dir", type=str, default="output")
  arg_parser.add_argument("--num_test_episodes", dest="num_test_episodes", type=int, default=None)
  arg_parser.add_argument("--model_file", dest="model_file", type=str, default="")
  arg_parser.add_argument("--total_timesteps", dest="total_timesteps", type=int, default=2e8)
  arg_parser.add_argument("--int_save_freq", dest="int_save_freq", type=int, default=0) # save intermediate model every n policy steps

  args = arg_parser.parse_args()
  
  num_procs = MPI.COMM_WORLD.Get_size()
  os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
  
  enable_env_rand = ENABLE_ENV_RANDOMIZER and (args.mode != "test")
  env = env_builder.build_imitation_env(motion_files=[args.motion_file],
                                        num_parallel_envs=num_procs,
                                        mode=args.mode,
                                        enable_randomizer=enable_env_rand,
                                        enable_rendering=args.visualize)
  
  model = build_model(env=env,
                      num_procs=num_procs,
                      timesteps_per_actorbatch=TIMESTEPS_PER_ACTORBATCH,
                      optim_batchsize=OPTIM_BATCHSIZE,
                      output_dir=args.output_dir)

  
  if args.model_file != "":
    model.load_parameters(args.model_file)

  if args.mode == "train":
      train(model=model, 
            env=env, 
            total_timesteps=args.total_timesteps,
            output_dir=args.output_dir,
            int_save_freq=args.int_save_freq)
  elif args.mode == "test":
      test(model=model,
           env=env,
           num_procs=num_procs,
           num_episodes=args.num_test_episodes)
  else:
      assert False, "Unsupported mode: " + args.mode

  return
Esempio n. 3
0
    def __init__(self, env_seed,
                 env_name='',
                 policy_params = None,
                 deltas=None,
                 rollout_length=1000,
                 delta_std=0.02,
                 params = None):

        # initialize OpenAI environment for each worker
        self.env = env_builder.build_imitation_env(motion_files=[params['motion_file']],
                                                num_parallel_envs=1,
                                                mode='train',
                                                enable_randomizer=False,
                                                enable_rendering=params['visualize'],
                                                action_lim=params['actionlim'],
                                                curr_steps=params['currsteps'],
                                                path=params['path'])
        # self.env = gym.make(env_name)
        # self.env.seed(env_seed)

        # each worker gets access to the shared noise table
        # with independent random streams for sampling
        # from the shared noise table.
        self.deltas = SharedNoiseTable(deltas, env_seed + 7)
        self.policy_params = policy_params
        if policy_params['type'] == 'linear':
            self.policy = LinearPolicy(policy_params)
        elif policy_params['type'] == 'honly':
            self.policy = HLinearPolicyHOnly(policy_params)
            if params['initweights'] != None:
                self.policy.loadWeights(params['initweights'])
        else:
            self.policy = HLinearPolicy(policy_params)
            if params['initweights'] != None:
                self.policy.loadWeights(params['initweights'])

        self.delta_std = delta_std
        self.rollout_length = rollout_length
Esempio n. 4
0
args = arg_parser.parse_args()

hp = Hp(nb_steps=args.steps,
        episode_length=args.eplength,
        learning_rate=args.learnrate,
        nb_directions=args.ndirections,
        nb_best_directions=args.nbestdir,
        noise=args.noise,
        seed=1,
        latent_dim=args.latent)
np.random.seed(hp.seed)

env = env_builder.build_imitation_env(motion_files=[args.motion_file],
                                      num_parallel_envs=1,
                                      mode=args.mode,
                                      enable_randomizer=False,
                                      enable_rendering=args.visualize,
                                      action_lim=args.actionlim,
                                      curr_steps=0)

#env = wrappers.Monitor(env, video_path, force=True)
nb_inputs = env.observation_space.shape[0]
nb_outputs = env.action_space.shape[0]
if args.policytype == 0:
    policy = HPolicy(input_dim_h, nb_inputs - input_dim_h, hp.latent_dim,
                     nb_outputs, sensor_history_num)
else:
    policy = HPolicyhlb(input_dim_h, nb_inputs - input_dim_h, hp.latent_dim,
                        nb_outputs, sensor_history_num, args.latentval1,
                        args.latentval2)
normalizer = Normalizer(nb_inputs)
Esempio n. 5
0
    def __init__(self, env_name='HalfCheetah-v1',
                 policy_params=None,
                 num_workers=32,
                 num_deltas=320,
                 deltas_used=320,
                 delta_std=0.02,
                 logdir=None,
                 rollout_length=1000,
                 step_size=0.01,
                 shift='constant zero',
                 params=None,
                 seed=123):

        logz.configure_output_dir(logdir)
        logz.save_params(params)

        env = env_builder.build_imitation_env(motion_files=[params['motion_file']],
                                                num_parallel_envs=1,
                                                mode='train',
                                                enable_randomizer=False,
                                                enable_rendering=params['visualize'],
                                                action_lim=params['actionlim'],
                                                curr_steps=params['currsteps'],
                                                path=params['path'])
        self.timesteps = 0
        self.action_size = env.action_space.shape[0]
        self.ob_size = env.observation_space.shape[0]
        self.num_deltas = num_deltas
        self.deltas_used = deltas_used
        self.rollout_length = rollout_length
        self.step_size = step_size
        self.delta_std = delta_std
        self.logdir = logdir
        self.shift = shift
        self.params = params
        self.max_past_avg_reward = float('-inf')
        self.num_episodes_used = float('inf')


        # create shared table for storing noise
        print("Creating deltas table.")
        deltas_id = create_shared_noise.remote()
        self.deltas = SharedNoiseTable(ray.get(deltas_id), seed = seed + 3)
        print('Created deltas table.')

        # initialize workers with different random seeds
        print('Initializing workers.')
        self.num_workers = num_workers
        self.workers = [Worker.remote(seed + 7 * i,
                                      env_name=env_name,
                                      policy_params=policy_params,
                                      deltas=deltas_id,
                                      rollout_length=rollout_length,
                                      delta_std=delta_std,
                                      params = params) for i in range(num_workers)]


        # initialize policy
        if policy_params['type'] == 'hlinear':
            self.policy = HLinearPolicy(policy_params)
            if params['initweights'] != None:
                self.policy.loadWeights(params['initweights'])
            self.w_policy = self.policy.get_weights()
        elif policy_params['type'] == 'honly':
            self.policy = HLinearPolicyHOnly(policy_params)
            if params['initweights'] != None:
                self.policy.loadWeights(params['initweights'])
            self.w_policy = self.policy.get_weights()
        else:
            raise NotImplementedError

        # initialize optimization algorithm
        self.optimizer = optimizers.SGD(self.w_policy, self.step_size)
        print("Initialization of ARS complete.")