def __init__(self, num_inputs, action_space, param, action_bound): self.action_bound = action_bound self.alpha = param['alpha'] self.gamma = param['gamma'] self.tau = param['tau'] self.target_update_interval = param['target_update_interval'] self.automatic_entropy_tuning = param['automatic_entropy_tuning'] self.lr = param['lr'] self.device = torch.device("cuda" if param['cuda'] else "cpu") self.critic = QNetwork(num_inputs, action_space).to(device=self.device) self.critic_optim = Adam(self.critic.parameters(), lr=self.lr) self.critic_target = QNetwork(num_inputs, action_space).to(self.device) hard_update(self.critic_target, self.critic) # Target Entropy = -dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper if self.automatic_entropy_tuning is True: self.target_entropy = -torch.prod( torch.Tensor(action_space).to(self.device)).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = Adam([self.log_alpha], lr=param['lr']) self.policy = CNNPolicy(num_inputs, action_space).to(self.device) self.policy_optim = Adam(self.policy.parameters(), lr=param['lr'])
def run(): # Set parameters of env LASER_HIST = 3 NUM_ENV = 1 # the number of agents in the environment OBS_SIZE = 512 # number of leaserbeam action_bound = [[0, -1], [1, 1]] # the limitation of velocity # Set env and agent policy env = StageWorld( OBS_SIZE, index=0, num_env=NUM_ENV ) #index is useful for parallel programming, 0 is for the first agent trained_model_file = os.path.dirname(__file__) + '/policy/stage2.pth' policy = CNNPolicy(frames=LASER_HIST, action_space=2) policy.cpu() # policy.cuda() for gpu state_dict = torch.load( trained_model_file, map_location=torch.device( 'cpu')) #torch.load(trained_model_file) for gpu policy.load_state_dict(state_dict) rospy.init_node('rl_collision_avoidance_tb3', anonymous=False) print( '==================================\nrl_collision_avoidance node started' ) nn_tb3 = NN_tb3(env, policy, action_bound, OBS_SIZE, index=0, num_env=NUM_ENV) rospy.on_shutdown(nn_tb3.on_shutdown) rospy.spin()
logger_cal.setLevel(logging.INFO) cal_f_handler = logging.FileHandler(cal_file, mode='a') file_handler.setLevel(logging.INFO) logger_cal.addHandler(cal_f_handler) # logger.info('rosport: %d robotIndex: %d rank:%d' %(arg_list.rosports[rank],arg_list.robotIds[rank],rank)) reward = None action_bound = [[0, -1], [1, 1]] # torch.manual_seed(1) # np.random.seed(1) if rank == 0: policy_path = policydir # policy_path = 'policy' # policy = MLPPolicy(obs_size, act_size) policy = CNNPolicy(frames=LASER_HIST, action_space=2) policy.cuda() opt = Adam(policy.parameters(), lr=LEARNING_RATE) mse = nn.MSELoss() if not os.path.exists(policy_path): os.makedirs(policy_path) file = policy_path + '/Stage1_%d' % ID_EP if os.path.exists(file): logger.info('####################################') logger.info('############Loading Model###########') logger.info('####################################') state_dict = torch.load(file) policy.load_state_dict(state_dict) else: