コード例 #1
0
    def __init__(self, num_inputs, action_space, param, action_bound):

        self.action_bound = action_bound
        self.alpha = param['alpha']
        self.gamma = param['gamma']
        self.tau = param['tau']
        self.target_update_interval = param['target_update_interval']
        self.automatic_entropy_tuning = param['automatic_entropy_tuning']
        self.lr = param['lr']

        self.device = torch.device("cuda" if param['cuda'] else "cpu")

        self.critic = QNetwork(num_inputs, action_space).to(device=self.device)
        self.critic_optim = Adam(self.critic.parameters(), lr=self.lr)

        self.critic_target = QNetwork(num_inputs, action_space).to(self.device)
        hard_update(self.critic_target, self.critic)

        # Target Entropy = -dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper
        if self.automatic_entropy_tuning is True:
            self.target_entropy = -torch.prod(
                torch.Tensor(action_space).to(self.device)).item()
            self.log_alpha = torch.zeros(1,
                                         requires_grad=True,
                                         device=self.device)
            self.alpha_optim = Adam([self.log_alpha], lr=param['lr'])

        self.policy = CNNPolicy(num_inputs, action_space).to(self.device)
        self.policy_optim = Adam(self.policy.parameters(), lr=param['lr'])
コード例 #2
0
def run():

    # Set parameters of env
    LASER_HIST = 3
    NUM_ENV = 1  # the number of agents in the environment
    OBS_SIZE = 512  # number of leaserbeam
    action_bound = [[0, -1], [1, 1]]  # the limitation of velocity

    # Set env and agent policy
    env = StageWorld(
        OBS_SIZE, index=0, num_env=NUM_ENV
    )  #index is useful for parallel programming, 0 is for the first agent
    trained_model_file = os.path.dirname(__file__) + '/policy/stage2.pth'
    policy = CNNPolicy(frames=LASER_HIST, action_space=2)
    policy.cpu()  # policy.cuda() for gpu
    state_dict = torch.load(
        trained_model_file, map_location=torch.device(
            'cpu'))  #torch.load(trained_model_file) for gpu
    policy.load_state_dict(state_dict)

    rospy.init_node('rl_collision_avoidance_tb3', anonymous=False)
    print(
        '==================================\nrl_collision_avoidance node started'
    )

    nn_tb3 = NN_tb3(env,
                    policy,
                    action_bound,
                    OBS_SIZE,
                    index=0,
                    num_env=NUM_ENV)
    rospy.on_shutdown(nn_tb3.on_shutdown)

    rospy.spin()
コード例 #3
0
    logger_cal.setLevel(logging.INFO)
    cal_f_handler = logging.FileHandler(cal_file, mode='a')
    file_handler.setLevel(logging.INFO)
    logger_cal.addHandler(cal_f_handler)

    # logger.info('rosport: %d robotIndex: %d rank:%d' %(arg_list.rosports[rank],arg_list.robotIds[rank],rank))
    reward = None
    action_bound = [[0, -1], [1, 1]]

    # torch.manual_seed(1)
    # np.random.seed(1)
    if rank == 0:
        policy_path = policydir
        # policy_path = 'policy'
        # policy = MLPPolicy(obs_size, act_size)
        policy = CNNPolicy(frames=LASER_HIST, action_space=2)
        policy.cuda()
        opt = Adam(policy.parameters(), lr=LEARNING_RATE)
        mse = nn.MSELoss()

        if not os.path.exists(policy_path):
            os.makedirs(policy_path)

        file = policy_path + '/Stage1_%d' % ID_EP
        if os.path.exists(file):
            logger.info('####################################')
            logger.info('############Loading Model###########')
            logger.info('####################################')
            state_dict = torch.load(file)
            policy.load_state_dict(state_dict)
        else: