def __init__(self, args, env, env_params): self.args = args self.env = env self.env_params = env_params self.T = self.env_params['max_timesteps'] # create the network # Create T actors self.actor_networks = [ residualactor(env_params) for _ in range(self.T) ] self.critic_networks = [ residualcritic(env_params) for _ in range(self.T) ] # sync the networks across the cpus sync_all_networks(self.actor_networks) sync_all_networks(self.critic_networks) # build up the target network # if use gpu if self.args.cuda: _ = [self.actor_networks[i].cuda() for i in range(self.T)] _ = [self.critic_networks[i].cuda() for i in range(self.T)] # create the optimizer # Create T optimizers self.actor_optims = [ torch.optim.Adam(self.actor_networks[i].parameters(), lr=self.args.lr_actor) for i in range(self.T) ] self.critic_optims = [ torch.optim.Adam(self.critic_networks[i].parameters(), lr=self.args.lr_critic) for i in range(self.T) ] # her sampler self.her_module = residual_her_sampler(self.args.replay_strategy, self.args.replay_k, self.env.compute_reward) # create the replay buffer self.buffer = residual_replay_buffer( self.env_params, self.args.buffer_size, self.her_module.sample_her_transitions) # create the normalizer self.o_norm = normalizer(size=env_params['obs'], default_clip_range=self.args.clip_range) self.g_norm = normalizer(size=env_params['goal'], default_clip_range=self.args.clip_range) # create the dict for store the model if MPI.COMM_WORLD.Get_rank() == 0: if not os.path.exists(self.args.save_dir): os.mkdir(self.args.save_dir) # path to save the model self.model_path = os.path.join(self.args.save_dir, self.args.env_name) if not os.path.exists(self.model_path): os.mkdir(self.model_path) logger.info("initialized agent")
def __init__(self, args, env, planning_env, env_params, controller): self.args = args self.env = env self.planning_env = planning_env self.env_params = env_params self.controller = controller self.controller_heuristic_fn = controller.heuristic_obs_g self.extract_features_fn = planning_env.extract_features self.reward_fn = planning_env.compute_reward self.sampler = Sampler(args, self.reward_fn, self.controller_heuristic_fn, self.extract_features_fn) self.dataset = Dataset(args, env_params, self.sampler) self.dynamics_dataset = DynamicsDataset(args, env_params) self.residual = Residual(env_params) self.residual_target = Residual(env_params) self.dynamics_residual = DynamicsResidual(env_params) self.residual_optim = torch.optim.Adam( # self.residual_optim = torch.optim.SGD( self.residual.parameters(), lr=self.args.lr_residual, # momentum=0.9, weight_decay=self.args.l2_reg ) self.dynamics_residual_optim = torch.optim.Adam( # self.dynamics_residual_optim = torch.optim.SGD( self.dynamics_residual.parameters(), lr=self.args.lr_model, # momentum=0.9, weight_decay=self.args.model_l2_reg ) # TODO: Sync networks, if we want to use MPI self.residual_target.load_state_dict(self.residual.state_dict()) self.f_norm = normalizer( size=env_params['num_features'], ) self.pos_norm = normalizer( size=4, ) self.dummy_sim_state = self.planning_env.reset()['sim_state'] self.workers = [Worker.remote(args, env_params) for i in range(args.num_ilc_workers)] self.n_planning_steps = 0 self.n_real_steps = 0 # Store start and goal states of real env for num_real_traj_eval trajectories self.eval_qpos, self.eval_qvel, self.eval_goals = [], [], [] self.populate_sim_states_and_goals()
def __init__(self, args, env, env_params): self.args = args self.env = env self.env_params = env_params # create the network self.actor_network = actor(env_params, residual=True) self.critic_network = critic(env_params) # sync the networks across the cpus sync_networks(self.actor_network) sync_networks(self.critic_network) # build up the target network self.actor_target_network = actor(env_params, residual=True) self.critic_target_network = critic(env_params) # load the weights into the target networks self.actor_target_network.load_state_dict( self.actor_network.state_dict()) self.critic_target_network.load_state_dict( self.critic_network.state_dict()) # if use gpu if self.args.cuda: self.actor_network.cuda() self.critic_network.cuda() self.actor_target_network.cuda() self.critic_target_network.cuda() # create the optimizer self.actor_optim = torch.optim.Adam(self.actor_network.parameters(), lr=self.args.lr_actor) self.critic_optim = torch.optim.Adam(self.critic_network.parameters(), lr=self.args.lr_critic) # her sampler self.her_module = her_sampler(self.args.replay_strategy, self.args.replay_k, self.env.compute_reward, self.env.extract_features) # create the replay buffer self.buffer = replay_buffer(self.env_params, self.args.buffer_size, self.her_module.sample_her_transitions) # create the normalizer self.o_norm = normalizer(size=env_params['obs'], default_clip_range=self.args.clip_range) self.g_norm = normalizer(size=env_params['goal'], default_clip_range=self.args.clip_range) self.f_norm = normalizer(size=env_params['num_features']) # create the dict for store the model if MPI.COMM_WORLD.Get_rank() == 0: if not os.path.exists(self.args.save_dir): os.mkdir(self.args.save_dir) # path to save the model self.model_path = os.path.join(self.args.save_dir, self.args.env_name) if not os.path.exists(self.model_path): os.mkdir(self.model_path) logger.info("initialized agent")
def __init__(self, args, env, env_params, controller): self.args = args self.env = env self.env_params = env_params self.controller = controller self.controller_heuristic_fn = controller.heuristic_obs_g self.extract_features_fn = env.extract_features self.reward_fn = env.compute_reward self.sampler = Sampler(args, self.reward_fn, self.controller_heuristic_fn, self.extract_features_fn) self.dataset = Dataset(args, env_params, self.sampler) self.residual = Residual(env_params) self.residual_target = Residual(env_params) # self.residual_optim = torch.optim.Adam( self.residual_optim = torch.optim.SGD(self.residual.parameters(), lr=self.args.lr_residual, momentum=0.9, weight_decay=self.args.l2_reg) # TODO: Sync networks, if we want to use MPI self.residual_target.load_state_dict(self.residual.state_dict()) self.f_norm = normalizer(size=env_params['num_features']) self.dummy_sim_state = self.env.reset()['sim_state'] self.workers = [ Worker.remote(args, env_params) for i in range(args.num_polo_workers) ]
def __init__(self, args, env_params): self.controller = get_controller(args.env_name, env_id=args.env_id, discrete=True, num_expansions=args.offline_num_expansions, reward_type=args.reward_type) self.residual = Residual(env_params) self.env = make_env(args.env_name, args.env_id, discrete=True, reward_type=args.reward_type) self.f_norm = normalizer(env_params['num_features']) self.dummy_sim_state = self.env.reset()['sim_state']
def __init__(self, env_params): ''' Create dynamics normalizer ''' # Save args self.env_params = env_params # Normalizer dyn_norm = normalizer(size=4) super(DynamicsNormalizer, self).__init__(dyn_norm)
def __init__(self, env_params): ''' Create a feature normalizer ''' # Save args self.env_params = env_params # Normalizer f_norm = normalizer(size=env_params['num_features']) super(FeatureNormalizer, self).__init__(f_norm)