Exemple #1
0
    def test_area(self):
        """Test area."""
        track = Track(
            Person([
                KeyPoint(BodyPart(0), Point(0.1, 0.2), 1),
                KeyPoint(BodyPart(1), Point(0.3, 0.4), 0.9),
                KeyPoint(BodyPart(2), Point(0.4, 0.6), 0.9),
                KeyPoint(BodyPart(3), Point(0.7, 0.8), 0.1)
            ], Rectangle(Point(0, 0), Point(0, 0)), 1), 1000000)

        area = self.kpt_tracker._area(track)
        expected_area = (0.4 - 0.1) * (0.6 - 0.2)
        self.assertAlmostEqual(area, expected_area, 6)
Exemple #2
0
    def test_oks(self):
        """Test OKS."""
        person = Person([
            KeyPoint(BodyPart(0), Point(0.2, 0.2), 1),
            KeyPoint(BodyPart(1), Point(0.4, 0.4), 0.8),
            KeyPoint(BodyPart(2), Point(0.6, 0.6), 0.1),
            KeyPoint(BodyPart(3), Point(0.8, 0.7), 0.8)
        ], Rectangle(Point(0, 0), Point(0, 0)), 1)
        track = Track(
            Person([
                KeyPoint(BodyPart(0), Point(0.2, 0.2), 1),
                KeyPoint(BodyPart(1), Point(0.4, 0.4), 0.8),
                KeyPoint(BodyPart(2), Point(0.6, 0.6), 0.9),
                KeyPoint(BodyPart(3), Point(0.8, 0.8), 0.8)
            ], Rectangle(Point(0, 0), Point(0, 0)), 1), 1000000)

        oks = self.kpt_tracker._object_keypoint_similarity(person, track)

        box_area = (0.8 - 0.2) * (0.8 - 0.2)
        x = 2 * self.tracker_config.keypoint_tracker_params.keypoint_falloff[3]
        d = 0.1
        expected_oks = (1 + 1 + math.exp(-1 * (d**2) / (2 * box_area *
                                                        (x**2)))) / 3

        self.assertAlmostEqual(oks, expected_oks, 6)
Exemple #3
0
    def _detect_and_assert(self, detector, image, keypoints_truth):
        """Run pose estimation and assert if the result is close to ground truth."""
        person = detector.detect(image)
        keypoints = person.keypoints
        for idx in range(len(BodyPart)):
            distance = np.linalg.norm(
                keypoints[idx].coordinate - keypoints_truth[idx], np.inf)

            self.assertGreaterEqual(
                _ALLOWED_DISTANCE, distance,
                '{0} is too far away ({1}) from ground truth data.'.format(
                    BodyPart(idx).name, int(distance)))
            logging.debug('Detected %s close to expected result (%d)',
                          BodyPart(idx).name, int(distance))
Exemple #4
0
    def _assert(self, keypoints: List[KeyPoint],
                keypoints_truth: np.ndarray) -> None:
        """Assert if the detection result is close to ground truth.

    Args:
      keypoints: List Keypoint detected by from the Movenet Multipose model.
      keypoints_truth: Ground truth keypoints.
    """
        for idx in range(len(BodyPart)):
            kpt_estimate = np.array(
                [keypoints[idx].coordinate.x, keypoints[idx].coordinate.y])
            kpt_truth = keypoints_truth[idx]
            distance = np.linalg.norm(kpt_estimate - kpt_truth, np.inf)

            self.assertGreaterEqual(
                _ALLOWED_DISTANCE, distance,
                '{0} is too far away ({1}) from ground truth data.'.format(
                    BodyPart(idx).name, int(distance)))
            logging.debug('Detected %s close to expected result (%d)',
                          BodyPart(idx).name, int(distance))
Exemple #5
0
    def _determine_torso_and_body_range(self, keypoints: np.ndarray,
                                        target_keypoints: Dict[(str, float)],
                                        center_y: float,
                                        center_x: float) -> List[float]:
        """Calculates the maximum distance from each keypoints to the center.

    The function returns the maximum distances from the two sets of keypoints:
    full 17 keypoints and 4 torso keypoints. The returned information will
    be used to determine the crop size. See determine_crop_region for more
    details.

    Args:
      keypoints: Detection result of Movenet model.
      target_keypoints: The 4 torso keypoints.
      center_y (float): Vertical coordinate of the body center.
      center_x (float): Horizontal coordinate of the body center.

    Returns:
      The maximum distance from each keypoints to the center location.
    """
        torso_joints = [
            BodyPart.LEFT_SHOULDER, BodyPart.RIGHT_SHOULDER, BodyPart.LEFT_HIP,
            BodyPart.RIGHT_HIP
        ]
        max_torso_yrange = 0.0
        max_torso_xrange = 0.0
        for joint in torso_joints:
            dist_y = abs(center_y - target_keypoints[joint][0])
            dist_x = abs(center_x - target_keypoints[joint][1])
            if dist_y > max_torso_yrange:
                max_torso_yrange = dist_y
            if dist_x > max_torso_xrange:
                max_torso_xrange = dist_x

        max_body_yrange = 0.0
        max_body_xrange = 0.0
        for idx in range(len(BodyPart)):
            if keypoints[BodyPart(idx).value,
                         2] < Movenet._MIN_CROP_KEYPOINT_SCORE:
                continue
            dist_y = abs(center_y - target_keypoints[joint][0])
            dist_x = abs(center_x - target_keypoints[joint][1])
            if dist_y > max_body_yrange:
                max_body_yrange = dist_y

            if dist_x > max_body_xrange:
                max_body_xrange = dist_x

        return [
            max_torso_yrange, max_torso_xrange, max_body_yrange,
            max_body_xrange
        ]
Exemple #6
0
  def _detect_and_assert(self, detector: Movenet, image: np.ndarray,
                         keypoints_truth: np.ndarray) -> None:
    """Run pose estimation and assert if the result is close to ground truth.

    Args:
      detector: A Movenet pose estimator.
      image: A [height, width, 3] RGB image.
      keypoints_truth: Ground truth keypoint coordinates to be compared to.
    """
    person = detector.detect(image, reset_crop_region=True)
    keypoints = person.keypoints

    for idx in range(len(BodyPart)):
      kpt_estimate = np.array(
          [keypoints[idx].coordinate.x, keypoints[idx].coordinate.y])
      distance = np.linalg.norm(kpt_estimate - keypoints_truth[idx], np.inf)

      self.assertGreaterEqual(
          _ALLOWED_DISTANCE, distance,
          '{0} is too far away ({1}) from ground truth data.'.format(
              BodyPart(idx).name, int(distance)))
      logging.debug('Detected %s close to expected result (%d)',
                    BodyPart(idx).name, int(distance))
Exemple #7
0
    def test_oks_returns_zero(self):
        """Compute OKS returns 0.0 with less than 2 valid keypoints."""
        person = Person([
            KeyPoint(BodyPart(0), Point(0.2, 0.2), 1),
            KeyPoint(BodyPart(1), Point(0.4, 0.4), 0.1),
            KeyPoint(BodyPart(2), Point(0.6, 0.6), 0.9),
            KeyPoint(BodyPart(3), Point(0.8, 0.8), 0.8)
        ], Rectangle(Point(0, 0), Point(0, 0)), 1)
        track = Track(
            Person([
                KeyPoint(BodyPart(0), Point(0.2, 0.2), 1),
                KeyPoint(BodyPart(1), Point(0.4, 0.4), 0.8),
                KeyPoint(BodyPart(2), Point(0.6, 0.6), 0.1),
                KeyPoint(BodyPart(3), Point(0.8, 0.8), 0.1)
            ], Rectangle(Point(0, 0), Point(0, 0)), 1), 1000000)

        oks = self.kpt_tracker._object_keypoint_similarity(person, track)
        self.assertAlmostEqual(oks, 0.0, 6)
Exemple #8
0
    def test_keypoint_tracker(self):
        """Test Keypoint tracker."""

        # Timestamp: 0. Person becomes the only track
        persons = [
            Person([
                KeyPoint(BodyPart(0), Point(0.2, 0.2), 1),
                KeyPoint(BodyPart(1), Point(0.4, 0.4), 0.8),
                KeyPoint(BodyPart(2), Point(0.6, 0.6), 0.9),
                KeyPoint(BodyPart(3), Point(0.8, 0.8), 0.0)
            ], Rectangle(Point(0, 0), Point(0, 0)), 1)
        ]

        persons = self.kpt_tracker.apply(persons, 0)
        tracks = self.kpt_tracker._tracks
        self.assertEqual(len(persons), 1)
        self.assertEqual(persons[0].id, 1)
        self.assertEqual(len(tracks), 1)
        self.assertEqual(tracks[0].person.id, 1)
        self.assertEqual(tracks[0].last_timestamp, 0)

        # Timestamp: 100000. First person is linked with track 1. Second person
        # spawns a new track (id = 2).
        persons = [
            Person([
                KeyPoint(BodyPart(0), Point(0.2, 0.2), 1),
                KeyPoint(BodyPart(1), Point(0.4, 0.4), 0.8),
                KeyPoint(BodyPart(2), Point(0.6, 0.6), 0.9),
                KeyPoint(BodyPart(3), Point(0.8, 0.8), 0.8)
            ], Rectangle(Point(0, 0), Point(0, 0)), 1),
            Person(
                [
                    KeyPoint(BodyPart(0), Point(0.8, 0.8), 0.8),
                    KeyPoint(BodyPart(1), Point(0.6, 0.6), 0.3),
                    KeyPoint(BodyPart(2), Point(0.4, 0.4),
                             0.1),  # Low confidence.
                    KeyPoint(BodyPart(3), Point(0.2, 0.2), 0.8)
                ],
                Rectangle(Point(0, 0), Point(0, 0)),
                1)
        ]

        persons = self.kpt_tracker.apply(persons, 100000)
        tracks = self.kpt_tracker._tracks
        self.assertEqual(len(persons), 2)
        self.assertEqual(persons[0].id, 1)
        self.assertEqual(persons[1].id, 2)
        self.assertEqual(len(tracks), 2)
        self.assertEqual(tracks[0].person.id, 1)
        self.assertEqual(tracks[0].last_timestamp, 100000)
        self.assertEqual(tracks[1].person.id, 2)
        self.assertEqual(tracks[1].last_timestamp, 100000)

        # Timestamp: 900000. First person is linked with track 2. Second person
        # spawns a new track (id = 3).
        persons = [  # Links with id = 2.
            Person(
                [
                    KeyPoint(BodyPart(0), Point(0.6, 0.7), 0.7),
                    KeyPoint(BodyPart(1), Point(0.5, 0.6), 0.7),
                    KeyPoint(BodyPart(2), Point(0.0, 0.0),
                             0.1),  # Low confidence.
                    KeyPoint(BodyPart(3), Point(0.2, 0.1), 1.0)
                ],
                Rectangle(Point(0, 0), Point(0, 0)),
                1),
            # Becomes id = 3.
            Person(
                [
                    KeyPoint(BodyPart(0), Point(0.5, 0.1), 0.6),
                    KeyPoint(BodyPart(1), Point(0.9, 0.3), 0.6),
                    KeyPoint(BodyPart(2), Point(0.1, 0.1), 0.9),
                    KeyPoint(BodyPart(3), Point(0.4, 0.4), 0.1)
                ],  # Low confidence.
                Rectangle(Point(0, 0), Point(0, 0)),
                1)
        ]

        persons = self.kpt_tracker.apply(persons, 900000)
        tracks = self.kpt_tracker._tracks
        self.assertEqual(len(persons), 2)
        self.assertEqual(persons[0].id, 2)
        self.assertEqual(persons[1].id, 3)
        self.assertEqual(len(tracks), 3)
        self.assertEqual(tracks[0].person.id, 2)
        self.assertEqual(tracks[0].last_timestamp, 900000)
        self.assertEqual(tracks[1].person.id, 3)
        self.assertEqual(tracks[1].last_timestamp, 900000)
        self.assertEqual(tracks[2].person.id, 1)
        self.assertEqual(tracks[2].last_timestamp, 100000)

        # Timestamp: 1200000. First person spawns a new track (id = 4), even though
        # it has the same keypoints as track 1. This is because the age exceeds
        # 1000 msec. The second person links with id 2. The third person spawns a
        # new track (id = 5).
        persons = [  # Becomes id = 4.
            Person([
                KeyPoint(BodyPart(0), Point(0.2, 0.2), 1.0),
                KeyPoint(BodyPart(1), Point(0.4, 0.4), 0.8),
                KeyPoint(BodyPart(2), Point(0.6, 0.6), 0.9),
                KeyPoint(BodyPart(3), Point(0.8, 0.8), 0.8)
            ], Rectangle(Point(0, 0), Point(0, 0)), 1),
            # Links with id = 2.
            Person(
                [
                    KeyPoint(BodyPart(0), Point(0.55, 0.7), 0.7),
                    KeyPoint(BodyPart(1), Point(0.5, 0.6), 0.9),
                    KeyPoint(BodyPart(2), Point(1.0, 1.0),
                             0.1),  # Low confidence.
                    KeyPoint(BodyPart(3), Point(0.8, 0.1), 0.0)
                ],  # Low confidence.
                Rectangle(Point(0, 0), Point(0, 0)),
                1),
            # Becomes id = 5.
            Person(
                [
                    KeyPoint(BodyPart(0), Point(0.1, 0.1),
                             0.1),  # Low confidence.
                    KeyPoint(BodyPart(1), Point(0.2, 0.2), 0.9),
                    KeyPoint(BodyPart(2), Point(0.3, 0.3), 0.7),
                    KeyPoint(BodyPart(3), Point(0.4, 0.4), 0.8)
                ],
                Rectangle(Point(0, 0), Point(0, 0)),
                1)
        ]

        persons = self.kpt_tracker.apply(persons, 1200000)
        tracks = self.kpt_tracker._tracks
        self.assertEqual(len(persons), 3)
        self.assertEqual(persons[0].id, 4)
        self.assertEqual(persons[1].id, 2)
        self.assertEqual(len(tracks), 4)
        self.assertEqual(tracks[0].person.id, 2)
        self.assertEqual(tracks[0].last_timestamp, 1200000)
        self.assertEqual(tracks[1].person.id, 4)
        self.assertEqual(tracks[1].last_timestamp, 1200000)
        self.assertEqual(tracks[2].person.id, 5)
        self.assertEqual(tracks[2].last_timestamp, 1200000)
        self.assertEqual(tracks[3].person.id, 3)
        self.assertEqual(tracks[3].last_timestamp, 900000)

        # Timestamp: 1300000. First person spawns a new track (id = 6). Since
        # max_tracks is 4, the oldest track (id = 3) is removed.
        persons = [  # Becomes id = 6.
            Person([
                KeyPoint(BodyPart(0), Point(0.1, 0.8), 1.0),
                KeyPoint(BodyPart(1), Point(0.2, 0.9), 0.6),
                KeyPoint(BodyPart(2), Point(0.2, 0.9), 0.5),
                KeyPoint(BodyPart(3), Point(0.8, 0.2), 0.4)
            ], Rectangle(Point(0, 0), Point(0, 0)), 1)
        ]

        persons = self.kpt_tracker.apply(persons, 1300000)
        tracks = self.kpt_tracker._tracks
        self.assertEqual(len(persons), 1)
        self.assertEqual(persons[0].id, 6)
        self.assertEqual(len(tracks), 4)
        self.assertEqual(tracks[0].person.id, 6)
        self.assertEqual(tracks[0].last_timestamp, 1300000)
        self.assertEqual(tracks[1].person.id, 2)
        self.assertEqual(tracks[1].last_timestamp, 1200000)
        self.assertEqual(tracks[2].person.id, 4)
        self.assertEqual(tracks[2].last_timestamp, 1200000)
        self.assertEqual(tracks[3].person.id, 5)
        self.assertEqual(tracks[3].last_timestamp, 1200000)
Exemple #9
0
    def _determine_crop_region(self, keypoints: np.ndarray, image_height: int,
                               image_width: int) -> Dict[(str, float)]:
        """Determines the region to crop the image for the model to run inference on.

    The algorithm uses the detected joints from the previous frame to
    estimate the square region that encloses the full body of the target
    person and centers at the midpoint of two hip joints. The crop size is
    determined by the distances between each joints and the center point.
    When the model is not confident with the four torso joint predictions,
    the function returns a default crop which is the full image padded to
    square.

    Args:
      keypoints: Detection result of Movenet model.
      image_height (int): The input image width
      image_width (int): The input image height

    Returns:
      crop_region (dict): The crop region to run inference on.
    """
        # Convert keypoint index to human-readable names.
        target_keypoints = {}
        for idx in range(len(BodyPart)):
            target_keypoints[BodyPart(idx)] = [
                keypoints[idx, 0] * image_height,
                keypoints[idx, 1] * image_width
            ]

        # Calculate crop region if the torso is visible.
        if self._torso_visible(keypoints):
            center_y = (target_keypoints[BodyPart.LEFT_HIP][0] +
                        target_keypoints[BodyPart.RIGHT_HIP][0]) / 2
            center_x = (target_keypoints[BodyPart.LEFT_HIP][1] +
                        target_keypoints[BodyPart.RIGHT_HIP][1]) / 2

            (max_torso_yrange, max_torso_xrange, max_body_yrange,
             max_body_xrange) = self._determine_torso_and_body_range(
                 keypoints, target_keypoints, center_y, center_x)

            crop_length_half = np.amax([
                max_torso_xrange * Movenet._TORSO_EXPANSION_RATIO,
                max_torso_yrange * Movenet._TORSO_EXPANSION_RATIO,
                max_body_yrange * Movenet._BODY_EXPANSION_RATIO,
                max_body_xrange * Movenet._BODY_EXPANSION_RATIO
            ])

            # Adjust crop length so that it is still within the image border
            distances_to_border = np.array([
                center_x, image_width - center_x, center_y,
                image_height - center_y
            ])
            crop_length_half = np.amin(
                [crop_length_half,
                 np.amax(distances_to_border)])

            # If the body is large enough, there's no need to apply cropping logic.
            if crop_length_half > max(image_width, image_height) / 2:
                return self.init_crop_region(image_height, image_width)
            # Calculate the crop region that nicely covers the full body.
            else:
                crop_length = crop_length_half * 2
            crop_corner = [
                center_y - crop_length_half, center_x - crop_length_half
            ]
            return {
                'y_min':
                crop_corner[0] / image_height,
                'x_min':
                crop_corner[1] / image_width,
                'y_max': (crop_corner[0] + crop_length) / image_height,
                'x_max': (crop_corner[1] + crop_length) / image_width,
                'height': (crop_corner[0] + crop_length) / image_height -
                crop_corner[0] / image_height,
                'width': (crop_corner[1] + crop_length) / image_width -
                crop_corner[1] / image_width
            }
        # Return the initial crop regsion if the torso isn't visible.
        else:
            return self.init_crop_region(image_height, image_width)
Exemple #10
0
    def _postprocess(self, keypoints_with_scores: np.ndarray,
                     image_height: int, image_width: int,
                     detection_threshold: float) -> List[Person]:
        """Returns a list "Person" corresponding to the input image.

    Note that coordinates are expressed in (x, y) format for drawing
    utilities.

    Args:
      keypoints_with_scores: Output of the MultiPose TFLite model.
      image_height: height of the image in pixels.
      image_width: width of the image in pixels.
      detection_threshold: minimum confidence score for an entity to be
        considered.

    Returns:
      A list of Person(keypoints, bounding_box, scores), each containing:
        * the coordinates of all keypoints of the detected entity;
        * the bounding boxes of the entity.
        * the confidence core of the entity.
    """

        _, num_instances, _ = keypoints_with_scores.shape
        list_persons = []
        for idx in range(num_instances):
            # Skip a detected pose if its confidence score is below the threshold
            person_score = keypoints_with_scores[0, idx, 55]
            if person_score < detection_threshold:
                continue

            # Extract the keypoint coordinates and scores
            kpts_y = keypoints_with_scores[0, idx, range(0, 51, 3)]
            kpts_x = keypoints_with_scores[0, idx, range(1, 51, 3)]
            scores = keypoints_with_scores[0, idx, range(2, 51, 3)]

            # Create the list of keypoints
            keypoints = []
            for i in range(scores.shape[0]):
                keypoints.append(
                    KeyPoint(
                        BodyPart(i),
                        Point(int(kpts_x[i] * image_width),
                              int(kpts_y[i] * image_height)), scores[i]))

            # Calculate the bounding box
            rect = [
                keypoints_with_scores[0, idx,
                                      51], keypoints_with_scores[0, idx, 52],
                keypoints_with_scores[0, idx,
                                      53], keypoints_with_scores[0, idx, 54]
            ]
            bounding_box = Rectangle(
                Point(int(rect[1] * image_width), int(rect[0] * image_height)),
                Point(int(rect[3] * image_width), int(rect[2] * image_height)))

            # Create a Person instance corresponding to the detected entity.
            list_persons.append(Person(keypoints, bounding_box, person_score))
        if self._tracker:
            list_persons = self._tracker.apply(list_persons,
                                               time.time() * 1000)

        return list_persons