Beispiel #1
0
 def __init__(self, args, env, env_params):
     self.args = args
     self.env = env
     self.env_params = env_params
     self.T = self.env_params['max_timesteps']
     # create the network
     # Create T actors
     self.actor_networks = [
         residualactor(env_params) for _ in range(self.T)
     ]
     self.critic_networks = [
         residualcritic(env_params) for _ in range(self.T)
     ]
     # sync the networks across the cpus
     sync_all_networks(self.actor_networks)
     sync_all_networks(self.critic_networks)
     # build up the target network
     # if use gpu
     if self.args.cuda:
         _ = [self.actor_networks[i].cuda() for i in range(self.T)]
         _ = [self.critic_networks[i].cuda() for i in range(self.T)]
     # create the optimizer
     # Create T optimizers
     self.actor_optims = [
         torch.optim.Adam(self.actor_networks[i].parameters(),
                          lr=self.args.lr_actor) for i in range(self.T)
     ]
     self.critic_optims = [
         torch.optim.Adam(self.critic_networks[i].parameters(),
                          lr=self.args.lr_critic) for i in range(self.T)
     ]
     # her sampler
     self.her_module = residual_her_sampler(self.args.replay_strategy,
                                            self.args.replay_k,
                                            self.env.compute_reward)
     # create the replay buffer
     self.buffer = residual_replay_buffer(
         self.env_params, self.args.buffer_size,
         self.her_module.sample_her_transitions)
     # create the normalizer
     self.o_norm = normalizer(size=env_params['obs'],
                              default_clip_range=self.args.clip_range)
     self.g_norm = normalizer(size=env_params['goal'],
                              default_clip_range=self.args.clip_range)
     # create the dict for store the model
     if MPI.COMM_WORLD.Get_rank() == 0:
         if not os.path.exists(self.args.save_dir):
             os.mkdir(self.args.save_dir)
         # path to save the model
         self.model_path = os.path.join(self.args.save_dir,
                                        self.args.env_name)
         if not os.path.exists(self.model_path):
             os.mkdir(self.model_path)
     logger.info("initialized agent")
Beispiel #2
0
    def __init__(self, args, env, planning_env, env_params, controller):
        self.args = args
        self.env = env
        self.planning_env = planning_env
        self.env_params = env_params
        self.controller = controller
        self.controller_heuristic_fn = controller.heuristic_obs_g
        self.extract_features_fn = planning_env.extract_features
        self.reward_fn = planning_env.compute_reward

        self.sampler = Sampler(args, self.reward_fn,
                               self.controller_heuristic_fn,
                               self.extract_features_fn)
        self.dataset = Dataset(args, env_params, self.sampler)
        self.dynamics_dataset = DynamicsDataset(args, env_params)
        self.residual = Residual(env_params)
        self.residual_target = Residual(env_params)
        self.dynamics_residual = DynamicsResidual(env_params)
        self.residual_optim = torch.optim.Adam(
            # self.residual_optim = torch.optim.SGD(
            self.residual.parameters(),
            lr=self.args.lr_residual,
            # momentum=0.9,
            weight_decay=self.args.l2_reg
        )
        self.dynamics_residual_optim = torch.optim.Adam(
            # self.dynamics_residual_optim = torch.optim.SGD(
            self.dynamics_residual.parameters(),
            lr=self.args.lr_model,
            # momentum=0.9,
            weight_decay=self.args.model_l2_reg
        )
        # TODO: Sync networks, if we want to use MPI
        self.residual_target.load_state_dict(self.residual.state_dict())

        self.f_norm = normalizer(
            size=env_params['num_features'],
        )
        self.pos_norm = normalizer(
            size=4,
        )

        self.dummy_sim_state = self.planning_env.reset()['sim_state']

        self.workers = [Worker.remote(args, env_params)
                        for i in range(args.num_ilc_workers)]

        self.n_planning_steps = 0
        self.n_real_steps = 0

        # Store start and goal states of real env for num_real_traj_eval trajectories
        self.eval_qpos, self.eval_qvel, self.eval_goals = [], [], []
        self.populate_sim_states_and_goals()
Beispiel #3
0
 def __init__(self, args, env, env_params):
     self.args = args
     self.env = env
     self.env_params = env_params
     # create the network
     self.actor_network = actor(env_params, residual=True)
     self.critic_network = critic(env_params)
     # sync the networks across the cpus
     sync_networks(self.actor_network)
     sync_networks(self.critic_network)
     # build up the target network
     self.actor_target_network = actor(env_params, residual=True)
     self.critic_target_network = critic(env_params)
     # load the weights into the target networks
     self.actor_target_network.load_state_dict(
         self.actor_network.state_dict())
     self.critic_target_network.load_state_dict(
         self.critic_network.state_dict())
     # if use gpu
     if self.args.cuda:
         self.actor_network.cuda()
         self.critic_network.cuda()
         self.actor_target_network.cuda()
         self.critic_target_network.cuda()
     # create the optimizer
     self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                         lr=self.args.lr_actor)
     self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                          lr=self.args.lr_critic)
     # her sampler
     self.her_module = her_sampler(self.args.replay_strategy,
                                   self.args.replay_k,
                                   self.env.compute_reward,
                                   self.env.extract_features)
     # create the replay buffer
     self.buffer = replay_buffer(self.env_params, self.args.buffer_size,
                                 self.her_module.sample_her_transitions)
     # create the normalizer
     self.o_norm = normalizer(size=env_params['obs'],
                              default_clip_range=self.args.clip_range)
     self.g_norm = normalizer(size=env_params['goal'],
                              default_clip_range=self.args.clip_range)
     self.f_norm = normalizer(size=env_params['num_features'])
     # create the dict for store the model
     if MPI.COMM_WORLD.Get_rank() == 0:
         if not os.path.exists(self.args.save_dir):
             os.mkdir(self.args.save_dir)
         # path to save the model
         self.model_path = os.path.join(self.args.save_dir,
                                        self.args.env_name)
         if not os.path.exists(self.model_path):
             os.mkdir(self.model_path)
     logger.info("initialized agent")
Beispiel #4
0
    def __init__(self, args, env, env_params, controller):
        self.args = args
        self.env = env
        self.env_params = env_params
        self.controller = controller
        self.controller_heuristic_fn = controller.heuristic_obs_g
        self.extract_features_fn = env.extract_features
        self.reward_fn = env.compute_reward

        self.sampler = Sampler(args, self.reward_fn,
                               self.controller_heuristic_fn,
                               self.extract_features_fn)
        self.dataset = Dataset(args, env_params, self.sampler)
        self.residual = Residual(env_params)
        self.residual_target = Residual(env_params)
        # self.residual_optim = torch.optim.Adam(
        self.residual_optim = torch.optim.SGD(self.residual.parameters(),
                                              lr=self.args.lr_residual,
                                              momentum=0.9,
                                              weight_decay=self.args.l2_reg)
        # TODO: Sync networks, if we want to use MPI
        self.residual_target.load_state_dict(self.residual.state_dict())

        self.f_norm = normalizer(size=env_params['num_features'])

        self.dummy_sim_state = self.env.reset()['sim_state']

        self.workers = [
            Worker.remote(args, env_params)
            for i in range(args.num_polo_workers)
        ]
Beispiel #5
0
 def __init__(self, args, env_params):
     self.controller = get_controller(args.env_name, env_id=args.env_id, discrete=True,
                                      num_expansions=args.offline_num_expansions, reward_type=args.reward_type)
     self.residual = Residual(env_params)
     self.env = make_env(args.env_name, args.env_id,
                         discrete=True, reward_type=args.reward_type)
     self.f_norm = normalizer(env_params['num_features'])
     self.dummy_sim_state = self.env.reset()['sim_state']
Beispiel #6
0
 def __init__(self, env_params):
     '''
     Create dynamics normalizer
     '''
     # Save args
     self.env_params = env_params
     # Normalizer
     dyn_norm = normalizer(size=4)
     super(DynamicsNormalizer, self).__init__(dyn_norm)
Beispiel #7
0
 def __init__(self, env_params):
     '''
     Create a feature normalizer
     '''
     # Save args
     self.env_params = env_params
     # Normalizer
     f_norm = normalizer(size=env_params['num_features'])
     super(FeatureNormalizer, self).__init__(f_norm)