Ejemplo n.º 1
0
    def preprocess_entry(self, entry):
        """Use annotations to segment eyes and calculate gaze direction."""
        full_image = entry['full_image']
        json_data = entry['json_data']
        del entry['full_image']  # release cache
        del entry['json_data']

        ih, iw = full_image.shape
        oh, ow = self._eye_image_shape

        def process_coords(coords_list):
            coords = [eval(l)
                      for l in coords_list]  # eval(): str Conversion to List
            return np.array([(x, ih - y, z) for (x, y, z) in coords])

        interior_landmarks = process_coords(json_data['interior_margin_2d'])
        caruncle_landmarks = process_coords(json_data['caruncle_2d'])
        iris_landmarks = process_coords(json_data['iris_2d'])

        # Only select almost frontal images
        # h_pitch, h_yaw, _ = eval(json_data['head_pose'])
        # if h_pitch > 180.0:  # Need to correct pitch
        #     h_pitch -= 360.0
        # h_yaw -= 180.0  # Need to correct yaw
        # if abs(h_pitch) > 20 or abs(h_yaw) > 20:
        #     return None

        # Prepare to segment eye image
        left_corner = np.mean(caruncle_landmarks[:, :2], axis=0)
        right_corner = interior_landmarks[8, :2]
        eye_width = 1.5 * abs(left_corner[0] - right_corner[0])
        up_corner = np.amin(interior_landmarks[:, 1], axis=0)
        down_corner = np.amax(interior_landmarks[:, 1], axis=0)
        # eye_height = 1.5 * abs(down_corner - up_corner)
        # fixed width height rate
        eye_height = 0.6 * eye_width
        eye_middle = np.mean([
            np.amin(interior_landmarks[:, :2], axis=0),
            np.amax(interior_landmarks[:, :2], axis=0)
        ],
                             axis=0)
        top_l_x = abs(int(eye_middle[0] - eye_width / 2.0))
        top_l_y = abs(int(eye_middle[1] - eye_height / 2.0))
        down_r_x = abs(int(eye_middle[0] + eye_width / 2.0))
        down_r_y = abs(int(eye_middle[1] + eye_height / 2.0))
        # segment eye image
        eye = full_image[top_l_y:down_r_y, top_l_x:down_r_x]

        # Convert look vector to gaze direction in polar angles
        look_vec = np.array(eval(json_data['eye_details']['look_vec']))[:3]
        look_vec[0] = -look_vec[0]
        gaze = util.gaze.vector_to_pitchyaw(look_vec.reshape((1, 3))).flatten()
        if gaze[1] > 0.0:
            gaze[1] = np.pi - gaze[1]
        elif gaze[1] < 0.0:
            gaze[1] = -(np.pi + gaze[1])
        entry['gaze'] = gaze.astype(np.float32)
        """Resize eye image and normalize intensities."""
        # oh, ow = self._eye_image_shape
        # eye = entry['eye']
        eye = cv.resize(eye, (ow, oh))
        eye = eye.astype(np.float32)
        eye *= 2.0 / 255.0
        eye -= 1.0
        eye = np.expand_dims(
            eye,
            axis=0 if self.data_format == 'NCHW' else -1)  # add N dims to NCHW
        entry['eye'] = eye

        # Select and transform landmark coordinates
        landmarks = np.concatenate([
            interior_landmarks[:, :2],  # 16
            iris_landmarks[:, :2],  # 32
        ])  # 48 in total
        landmarks = landmarks - [top_l_x, top_l_y]

        entry['gazemaps'] = util.gazemap_uteyes.from_gaze2d(
            landmarks,
            intput_size=(int(eye_height), int(eye_width)),
            output_size=(oh, ow),
            scale=0.5,
        ).astype(np.float32)
        if self.data_format == 'NHWC':
            np.transpose(entry['gazemaps'], (1, 2, 0))

        # Ensure all values in an entry are 4-byte floating point numbers
        for key, value in entry.items():
            entry[key] = value.astype(np.float32)

        return entry
Ejemplo n.º 2
0
    def preprocess_entry(self, entry):
        """Use annotations to segment eyes and calculate gaze direction."""
        full_image = entry['full_image']
        json_data = entry['json_data']
        del entry['full_image']
        del entry['json_data']

        ih, iw = full_image.shape
        iw_2, ih_2 = 0.5 * iw, 0.5 * ih
        oh, ow = self._eye_image_shape

        def process_coords(coords_list):
            coords = [eval(l) for l in coords_list]
            return np.array([(x, ih - y, z) for (x, y, z) in coords])

        interior_landmarks = process_coords(json_data['interior_margin_2d'])
        caruncle_landmarks = process_coords(json_data['caruncle_2d'])
        iris_landmarks = process_coords(json_data['iris_2d'])

        random_multipliers = []

        def value_from_type(augmentation_type):
            # Scale to be in range
            easy_value, hard_value = self._augmentation_ranges[
                augmentation_type]
            value = (hard_value - easy_value) * self._difficulty + easy_value
            value = (np.clip(value, easy_value, hard_value)
                     if easy_value < hard_value else np.clip(
                         value, hard_value, easy_value))
            return value

        def noisy_value_from_type(augmentation_type):
            # Get normal distributed random value
            if len(random_multipliers) == 0:
                random_multipliers.extend(
                    list(
                        np.random.normal(
                            size=(len(self._augmentation_ranges), ))))
            return random_multipliers.pop() * value_from_type(
                augmentation_type)

        # Only select almost frontal images
        h_pitch, h_yaw, _ = eval(json_data['head_pose'])
        if h_pitch > 180.0:  # Need to correct pitch
            h_pitch -= 360.0
        h_yaw -= 180.0  # Need to correct yaw
        if abs(h_pitch) > 20 or abs(h_yaw) > 20:
            return None

        # Prepare to segment eye image
        left_corner = np.mean(caruncle_landmarks[:, :2], axis=0)
        right_corner = interior_landmarks[8, :2]
        eye_width = 1.5 * abs(left_corner[0] - right_corner[0])
        eye_middle = np.mean([
            np.amin(interior_landmarks[:, :2], axis=0),
            np.amax(interior_landmarks[:, :2], axis=0)
        ],
                             axis=0)

        # Centre axes to eyeball centre
        translate_mat = np.asmatrix(np.eye(3))
        translate_mat[:2, 2] = [[-iw_2], [-ih_2]]

        # Rotate eye image if requested
        rotate_mat = np.asmatrix(np.eye(3))
        rotation_noise = noisy_value_from_type('rotation')
        if rotation_noise > 0:
            rotate_angle = np.radians(rotation_noise)
            cos_rotate = np.cos(rotate_angle)
            sin_rotate = np.sin(rotate_angle)
            rotate_mat[0, 0] = cos_rotate
            rotate_mat[0, 1] = -sin_rotate
            rotate_mat[1, 0] = sin_rotate
            rotate_mat[1, 1] = cos_rotate

        # Scale image to fit output dimensions (with a little bit of noise)
        scale_mat = np.asmatrix(np.eye(3))
        scale = 1. + noisy_value_from_type('scale')
        scale_inv = 1. / scale
        np.fill_diagonal(scale_mat, ow / eye_width * scale)
        original_eyeball_radius = 71.7593
        eyeball_radius = original_eyeball_radius * scale_mat[
            0, 0]  # See: https://goo.gl/ZnXgDE
        entry['radius'] = np.float32(eyeball_radius)

        # Re-centre eye image such that eye fits (based on determined `eye_middle`)
        recentre_mat = np.asmatrix(np.eye(3))
        recentre_mat[0,
                     2] = iw / 2 - eye_middle[0] + 0.5 * eye_width * scale_inv
        recentre_mat[
            1,
            2] = ih / 2 - eye_middle[1] + 0.5 * oh / ow * eye_width * scale_inv
        recentre_mat[0, 2] += noisy_value_from_type('translation')  # x
        recentre_mat[1, 2] += noisy_value_from_type('translation')  # y

        # Apply transforms
        transform_mat = recentre_mat * scale_mat * rotate_mat * translate_mat
        eye = cv.warpAffine(full_image, transform_mat[:2, :3], (ow, oh))

        # Convert look vector to gaze direction in polar angles
        look_vec = np.array(eval(json_data['eye_details']['look_vec']))[:3]
        look_vec[0] = -look_vec[0]
        original_gaze = util.gaze.vector_to_pitchyaw(look_vec.reshape(
            (1, 3))).flatten()
        look_vec = rotate_mat * look_vec.reshape(3, 1)
        gaze = util.gaze.vector_to_pitchyaw(look_vec.reshape((1, 3))).flatten()
        if gaze[1] > 0.0:
            gaze[1] = np.pi - gaze[1]
        elif gaze[1] < 0.0:
            gaze[1] = -(np.pi + gaze[1])
        entry['gaze'] = gaze.astype(np.float32)

        # Draw line randomly
        num_line_noise = int(np.round(noisy_value_from_type('num_line')))
        if num_line_noise > 0:
            line_rand_nums = np.random.rand(5 * num_line_noise)
            for i in range(num_line_noise):
                j = 5 * i
                lx0, ly0 = int(ow * line_rand_nums[j]), oh
                lx1, ly1 = ow, int(oh * line_rand_nums[j + 1])
                direction = line_rand_nums[j + 2]
                if direction < 0.25:
                    lx1 = ly0 = 0
                elif direction < 0.5:
                    lx1 = 0
                elif direction < 0.75:
                    ly0 = 0
                line_colour = int(255 * line_rand_nums[j + 3])
                eye = cv.line(eye, (lx0, ly0), (lx1, ly1),
                              color=(line_colour, line_colour, line_colour),
                              thickness=int(6 * line_rand_nums[j + 4]),
                              lineType=cv.LINE_AA)

        # Rescale image if required
        rescale_max = value_from_type('rescale')
        if rescale_max < 1.0:
            rescale_noise = np.random.uniform(low=rescale_max, high=1.0)
            interpolation = cv.INTER_CUBIC
            eye = cv.resize(eye,
                            dsize=(0, 0),
                            fx=rescale_noise,
                            fy=rescale_noise,
                            interpolation=interpolation)
            eye = cv.equalizeHist(eye)
            eye = cv.resize(eye, dsize=(ow, oh), interpolation=interpolation)

        # Add rgb noise to eye image
        intensity_noise = int(value_from_type('intensity'))
        if intensity_noise > 0:
            eye = eye.astype(np.int16)
            eye += np.random.randint(low=-intensity_noise,
                                     high=intensity_noise,
                                     size=eye.shape,
                                     dtype=np.int16)
            cv.normalize(eye, eye, alpha=0, beta=255, norm_type=cv.NORM_MINMAX)
            eye = eye.astype(np.uint8)

        # Add blur to eye image
        blur_noise = noisy_value_from_type('blur')
        if blur_noise > 0:
            eye = cv.GaussianBlur(eye, (7, 7), 0.5 + np.abs(blur_noise))

        # Histogram equalization and preprocessing for NN
        eye = cv.equalizeHist(eye)
        eye = eye.astype(np.float32)
        eye *= 2.0 / 255.0
        eye -= 1.0
        eye = np.expand_dims(eye, -1 if self.data_format == 'NHWC' else 0)
        entry['eye'] = eye

        # Select and transform landmark coordinates
        iris_centre = np.asarray([
            iw_2 + original_eyeball_radius * -np.cos(original_gaze[0]) *
            np.sin(original_gaze[1]),
            ih_2 + original_eyeball_radius * -np.sin(original_gaze[0]),
        ])
        landmarks = np.concatenate([
            interior_landmarks[::2, :2],  # 8
            iris_landmarks[::4, :2],  # 8
            iris_centre.reshape((1, 2)),
            [[iw_2, ih_2]],  # Eyeball centre
        ])  # 18 in total
        landmarks = np.asmatrix(
            np.pad(landmarks, ((0, 0), (0, 1)), 'constant', constant_values=1))
        landmarks = np.asarray(landmarks * transform_mat.T)
        landmarks = landmarks[:, :2]  # We only need x, y
        entry['landmarks'] = landmarks.astype(np.float32)

        # Generate heatmaps if necessary
        if self._generate_heatmaps:
            # Should be half-scale (compared to eye image)
            entry['heatmaps'] = np.asarray([
                util.heatmap.gaussian_2d(
                    shape=(self._heatmaps_scale * oh,
                           self._heatmaps_scale * ow),
                    centre=self._heatmaps_scale * landmark,
                    sigma=value_from_type('heatmap_sigma'),
                ) for landmark in entry['landmarks']
            ]).astype(np.float32)
            if self.data_format == 'NHWC':
                entry['heatmaps'] = np.transpose(entry['heatmaps'], (1, 2, 0))

        return entry
Ejemplo n.º 3
0
def preprocess_unityeyes_image(img, json_data):
    ow = 160
    oh = 96
    # Prepare to segment eye image
    ih, iw = img.shape[:2]
    ih_2, iw_2 = ih / 2.0, iw / 2.0

    heatmap_w = int(ow / 2)
    heatmap_h = int(oh / 2)

    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    def process_coords(coords_list):
        coords = [eval(l) for l in coords_list]
        return np.array([(x, ih - y, z) for (x, y, z) in coords])

    interior_landmarks = process_coords(json_data['interior_margin_2d'])
    caruncle_landmarks = process_coords(json_data['caruncle_2d'])
    iris_landmarks = process_coords(json_data['iris_2d'])

    left_corner = np.mean(caruncle_landmarks[:, :2], axis=0)
    right_corner = interior_landmarks[8, :2]
    eye_width = 1.5 * abs(left_corner[0] - right_corner[0])
    eye_middle = np.mean([
        np.amin(interior_landmarks[:, :2], axis=0),
        np.amax(interior_landmarks[:, :2], axis=0)
    ],
                         axis=0)

    # Normalize to eye width.
    scale = ow / eye_width

    translate = np.asmatrix(np.eye(3))
    translate[0, 2] = -eye_middle[0] * scale
    translate[1, 2] = -eye_middle[1] * scale

    rand_x = np.random.uniform(low=-10, high=10)
    rand_y = np.random.uniform(low=-10, high=10)
    recenter = np.asmatrix(np.eye(3))
    recenter[0, 2] = ow / 2 + rand_x
    recenter[1, 2] = oh / 2 + rand_y

    scale_mat = np.asmatrix(np.eye(3))
    scale_mat[0, 0] = scale
    scale_mat[1, 1] = scale

    angle = 0  #np.random.normal(0, 1) * 20 * np.pi/180
    rotation = R.from_rotvec([0, 0, angle]).as_matrix()

    transform = recenter * rotation * translate * scale_mat
    transform_inv = np.linalg.inv(transform)

    # Apply transforms
    eye = cv2.warpAffine(img, transform[:2], (ow, oh))

    rand_blur = np.random.uniform(low=0, high=20)
    eye = cv2.GaussianBlur(eye, (5, 5), rand_blur)

    # Normalize eye image
    eye = cv2.equalizeHist(eye)
    eye = eye.astype(np.float32)
    eye = eye / 255.0

    # Gaze
    # Convert look vector to gaze direction in polar angles
    look_vec = np.array(eval(
        json_data['eye_details']['look_vec']))[:3].reshape((1, 3))
    #look_vec = np.matmul(look_vec, rotation.T)

    gaze = util.gaze.vector_to_pitchyaw(-look_vec).flatten()
    gaze = gaze.astype(np.float32)

    iris_center = np.mean(iris_landmarks[:, :2], axis=0)

    landmarks = np.concatenate([
        interior_landmarks[:, :2],  # 8
        iris_landmarks[::2, :2],  # 8
        iris_center.reshape((1, 2)),
        [[iw_2, ih_2]],  # Eyeball center
    ])  # 18 in total

    landmarks = np.asmatrix(
        np.pad(landmarks, ((0, 0), (0, 1)), 'constant', constant_values=1))
    landmarks = np.asarray(landmarks * transform[:2].T) * np.array(
        [heatmap_w / ow, heatmap_h / oh])
    landmarks = landmarks.astype(np.float32)

    # Swap columns so that landmarks are in (y, x), not (x, y)
    # This is because the network outputs landmarks as (y, x) values.
    temp = np.zeros((34, 2), dtype=np.float32)
    temp[:, 0] = landmarks[:, 1]
    temp[:, 1] = landmarks[:, 0]
    landmarks = temp

    heatmaps = get_heatmaps(w=heatmap_w, h=heatmap_h, landmarks=landmarks)

    assert heatmaps.shape == (34, heatmap_h, heatmap_w)

    return {
        'img': eye,
        'transform': np.asarray(transform),
        'transform_inv': np.asarray(transform_inv),
        'eye_middle': np.asarray(eye_middle),
        'heatmaps': np.asarray(heatmaps),
        'landmarks': np.asarray(landmarks),
        'gaze': np.asarray(gaze)
    }