Exemple #1
0
score_lst = []
discriminator_score_lst = []
score = 0.0
discriminator_score = 0
if agent_args.on_policy == True:
    state_lst = []
    state_ = (env.reset())
    state = np.clip((state_ - state_rms.mean) / (state_rms.var**0.5 + 1e-8),
                    -5, 5)
    for n_epi in range(args.epochs):
        for t in range(agent_args.traj_length):
            if args.render:
                env.render()
            state_lst.append(state_)

            action, log_prob = agent.get_action(
                torch.from_numpy(state).float().unsqueeze(0).to(device))

            next_state_, r, done, info = env.step(action.cpu().numpy())
            next_state = np.clip(
                (next_state_ - state_rms.mean) / (state_rms.var**0.5 + 1e-8),
                -5, 5)
            if discriminator_args.is_airl:
                reward = discriminator.get_reward(\
                                        log_prob,\
                                        torch.tensor(state).unsqueeze(0).float().to(device),action,\
                                        torch.tensor(next_state).unsqueeze(0).float().to(device),\
                                                              torch.tensor(done).view(1,1)\
                                                 ).item()
            else:
                reward = discriminator.get_reward(
                    torch.tensor(state).unsqueeze(0).float().to(device),
Exemple #2
0
class DQNBot:
    """
    An operator to train the dqn agent.
    """
    def __init__(self, robot="r", online=False, policy_mode="epsilon", debug=True,
                 save_path=None, load_path=None, pkl_path=None, manual_avoid=False):
        """
        Args:
            robot (str): robot namespace ("r" or "b")
            online (bool): training is done on online or not
            policy_mode (str): policy ("epsilon" or "boltzmann")
            debug (bool): debug mode
            save_path (str): model save path
            load_path (str): model load path
            pkl_path (str): dump path
            manual_avoid (bool): manually avoid walls or not
        """
        # attributes
        self.robot = robot
        self.enemy = "b" if robot == "r" else "r"
        self.online = online
        self.policy_mode = policy_mode
        self.debug = debug
        self.my_markers = ROBOT_MARKERS[self.robot]
        self.op_markers = ROBOT_MARKERS[self.enemy]
        self.marker_list = FIELD_MARKERS + self.my_markers + self.op_markers
        self.score = {k: 0 for k in self.marker_list}
        self.past_score = {k: 0 for k in self.marker_list}

        if save_path is None:
            self.save_path = "../catkin_ws/src/burger_war_dev/burger_war_dev/scripts/models/tmp.pth"
        else:
            self.save_path = save_path
        
        self.pkl_path = pkl_path

        # state variables
        self.lidar_ranges = None
        self.my_pose = None
        self.image = None
        self.mask = None
        self.state = None
        self.past_state = None
        self.action = None

        # other variables
        self.game_state = "end"
        self.step = 0
        self.episode = 0
        self.bridge = CvBridge()

        # rostopic subscription
        self.lidar_sub = rospy.Subscriber('scan', LaserScan, self.callback_lidar)
        self.image_sub = rospy.Subscriber('image_raw', Image, self.callback_image)
        self.state_sub = rospy.Timer(rospy.Duration(0.5), self.callback_warstate)

        if self.debug:
            if self.robot == "r": self.odom_sub = rospy.Subscriber("red_bot/tracker", Odometry, self.callback_odom)
            if self.robot == "b": self.odom_sub = rospy.Subscriber("enemy_bot/tracker", Odometry, self.callback_odom)
        else:
            self.amcl_sub = rospy.Subscriber("amcl_pose", PoseWithCovarianceStamped, self.callback_amcl)

        # rostopic publication
        self.twist_pub = rospy.Publisher('cmd_vel', Twist, queue_size=1)

        # rostopic service
        self.state_service = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState)
        self.pause_service = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.unpause_service = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)

        # agent
        if self.debug:
            # connect to agent server
            from agents.agent_conn import AgentClient
            self.agent = AgentClient(server_address='127.0.0.1', port=5010,
                                     num_actions=len(ACTION_LIST), batch_size=BATCH_SIZE, capacity=MEM_CAPACITY, gamma=GAMMA, prioritized=PRIOTIZED, lr=LR)
        else:
            # create agent in this process
            from agents.agent import Agent
            self.agent = Agent(num_actions=len(ACTION_LIST), batch_size=BATCH_SIZE, capacity=MEM_CAPACITY, gamma=GAMMA, prioritized=PRIOTIZED, lr=LR)
        # load model
        if load_path is not None:
            self.agent.load_model(load_path)

        # load pickle file if exists
        if self.pkl_path is not None and os.path.exists(self.pkl_path):
            self.agent.load_memory(self.pkl_path)

        # mode
        self.punish_if_facing_wall = not manual_avoid

        self.punish_far_from_center = True
    
    def callback_lidar(self, data):
        """
        callback function of lidar subscription

        Args:
            data (LaserScan): distance data of lidar
        """
        raw_lidar = np.array(data.ranges)
        raw_lidar = lidar_transform(raw_lidar, self.debug)
        self.lidar_ranges = torch.FloatTensor(raw_lidar).view(1, 1, -1)   # (1, 1, 360)

    def callback_image(self, data):
        """
        callback function of image subscription

        Args:
            data (Image): image from from camera mounted on the robot
        """
        try:
            img = self.bridge.imgmsg_to_cv2(data, "bgr8")  # 640x480[px]

            # preprocess image
            img = img[100:, :]   # 640x380[px]
            img = cv2.resize(img, (160, 95))
            deriv_x = cv2.Sobel(img, cv2.CV_32F, 1, 0, ksize=5)
            deriv_y = cv2.Sobel(img, cv2.CV_32F, 0, 1, ksize=5)
            grad = np.sqrt(deriv_x ** 2 + deriv_x ** 2)

            '''
            # visualize preprocessed image for debug
            def min_max(x, axis=None):
                min = x.min(axis=axis, keepdims=True)
                max = x.max(axis=axis, keepdims=True)
                result = (x-min)/(max-min)
                return result
            cv2.imshow('grad', min_max(grad))
            cv2.waitKey(1)
            '''

            img = torchvision.transforms.ToTensor()(grad)
            self.image = img.unsqueeze(0)                   # (1, 3, 95, 160)
        except CvBridgeError as e:
            rospy.logerr(e)
    
    def callback_odom(self, data):
        """
        callback function of tracker subscription

        Args:
            data (Odometry): robot pose
        """
        x = data.pose.pose.position.x
        y = data.pose.pose.position.y
        self.my_pose = torch.FloatTensor([x, y]).view(1, 2)

    def callback_amcl(self, data):
        """
        callback function of amcl subscription

        Args:
            data (PoseWithCovarianceStamped): robot pose
        """
        x = data.pose.pose.position.x
        y = data.pose.pose.position.y
        self.my_pose = torch.FloatTensor([-y, x]).view(1, 2)

    def callback_warstate(self, event):
        """
        callback function of warstate subscription

        Notes:
            https://github.com/p-robotics-hub/burger_war_kit/blob/main/judge/README.md
        """
        # get the game state from judge server by HTTP request
        resp = requests.get(JUDGE_URL + "/warState")
        json_dict = json.loads(resp.text)
        self.game_state = json_dict['state']
        
        if self.game_state == "running":            
            for tg in json_dict["targets"]:
                if tg["player"] == self.robot:
                    self.score[tg["name"]] = int(tg["point"])
                elif tg["player"] == self.enemy:
                    self.score[tg["name"]] = -int(tg["point"])

            msk = []
            for k in self.marker_list:
                if self.score[k] > 0:    msk.append(0)
                elif self.score[k] == 0: msk.append(1)
                else:                    msk.append(2)
            
            self.mask = torch.FloatTensor(msk).view(1, 18)

    def get_reward(self, past, current):
        """
        reward function.
        
        Args:
            past (dict): score dictionary at previous step
            current (dict): score dictionary at current step

        Return:
            reward (int)
        """
        diff_my_score = {k: current[k] - past[k] for k in self.score.keys() if k not in self.my_markers}
        diff_op_score = {k: current[k] - past[k] for k in self.my_markers}

        # Check LiDAR data to punish for AMCL failure
        if self.punish_if_facing_wall:
            bad_position = punish_by_min_dist(self.lidar_ranges, dist_th=0.15)
        else:
            if self.punish_far_from_center:
                pose = self.my_pose.squeeze()
                bad_position = punish_by_count(self.lidar_ranges, dist_th=0.2, count_th=90)
                if abs(pose[0].item()) > 1:
                    bad_position -= 0.1
                if abs(pose[1].item()) > 1:
                    bad_position -= 0.1
            else:
                bad_position = 0

        plus_diff = sum([v for v in diff_my_score.values() if v > 0])
        minus_diff = sum([v for v in diff_op_score.values() if v < 0])

        return plus_diff + minus_diff + bad_position

    def strategy(self):

        # past state
        self.past_state = self.state

        # current state
        self.state = State(
            self.my_pose,           # (1, 2)
            self.lidar_ranges,      # (1, 1, 360)
            self.image,             # (1, 3, 480, 640)
            self.mask,              # (1, 18)
        )

        if self.action is not None:
            current_score = copy.deepcopy(self.score)
            reward = self.get_reward(self.past_score, current_score)
            print("reward: {}".format(reward))
            self.past_score = current_score
            reward = torch.LongTensor([reward])
            self.agent.memorize(self.past_state, self.action, self.state, reward)

        # manual wall avoidance
        if not self.punish_if_facing_wall:
            avoid, linear_x, angular_z = manual_avoid_wall_2(self.lidar_ranges, dist_th=0.13, back_vel=0.2)
        else:
            avoid = False

        if avoid:
            self.action = None
        else:
            # get action from agent
            if self.step % 3 == 0:
                policy = "boltzmann"
            else:
                policy = "epsilon"

            self.action = self.agent.get_action(self.state, self.episode, policy, self.debug)
            choice = int(self.action.item())

            linear_x = ACTION_LIST[choice][0]
            angular_z = ACTION_LIST[choice][1]

        print("step: {}, vel:{}, omega:{}".format(self.step, linear_x, angular_z))

        # update twist
        twist = Twist()
        twist.linear.x = linear_x
        twist.linear.y = 0.0
        twist.linear.z = 0.0
        twist.angular.x = 0.0
        twist.angular.y = 0.0
        twist.angular.z = angular_z
        self.twist_pub.publish(twist)

        self.step += 1

    def move_robot(self, model_name, position=None, orientation=None, linear=None, angular=None):
        state = ModelState()
        state.model_name = model_name
        # set pose
        pose = Pose()
        if position is not None:
            pose.position = Point(*position)
        if orientation is not None:
            tmpq = tft.quaternion_from_euler(*orientation)
            pose.orientation = Quaternion(tmpq[0], tmpq[1], tmpq[2], tmpq[3])
        state.pose = pose
        # set twist
        twist = Twist()
        if linear is not None:
            twist.linear = Vector3(*linear)
        if angular is not None:
            twist.angular = Vector3(*angular)
        state.twist = twist
        try:
            self.state_service(state)
        except rospy.ServiceException, e:
            print("Service call failed: %s".format(e))