Beispiel #1
0
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)

        x_min = min(pt2d[0, :])
        y_min = min(pt2d[1, :])
        x_max = max(pt2d[0, :])
        y_max = max(pt2d[1, :])

        k = 0.20
        x_min -= 2 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 2 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi
        # Bin values
        bins = np.array(range(-99, 102, 3))
        labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1)
        cont_labels = torch.FloatTensor([yaw, pitch, roll])

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
Beispiel #2
0
    def __getitem__(self, index):
        img = Image.open(
            os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir,
                                self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0, :])
        y_min = min(pt2d[1, :])
        x_max = max(pt2d[0, :])
        y_max = max(pt2d[1, :])

        # k = 0.2 to 0.40
        k = np.random.random_sample() * 0.2 + 0.2
        x_min -= 0.6 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 0.6 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi

        ds = 1 + np.random.randint(0, 4) * 5
        original_size = img.size
        img = img.resize((img.size[0] / ds, img.size[1] / ds),
                         resample=Image.NEAREST)
        img = img.resize((original_size[0], original_size[1]),
                         resample=Image.NEAREST)

        # Flip?
        rnd = np.random.random_sample()
        if rnd < 0.5:
            yaw = -yaw
            roll = -roll
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

        # Blur?
        rnd = np.random.random_sample()
        if rnd < 0.05:
            img = img.filter(ImageFilter.BLUR)

        # Bin values
        bins = np.array(range(-99, 102, 3))
        binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

        # Get target tensors
        labels = binned_pose
        cont_labels = torch.FloatTensor([yaw, pitch, roll])

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
Beispiel #3
0
    def __getitem__(self, index):
        img = Image.open(
            os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir,
                                self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0, :])
        y_min = min(pt2d[1, :])
        x_max = max(pt2d[0, :])
        y_max = max(pt2d[1, :])

        # k = 0.2 to 0.40
        k = np.random.random_sample() * 0.2 + 0.2
        x_min -= 0.6 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 0.6 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        # if any(pose*180/np.pi <= -99) or any(pose*180/np.pi >= 99):
        #     with open(os.path.join(self.data_dir, "image_garbage.txt"), 'a') as file:
        #         file.write(self.X_train[index] + '\n')
        #     file.close()
        pitch = min(max(pose[0] * 180 / np.pi, -98.99), 98.99)
        yaw = min(max(pose[1] * 180 / np.pi, -98.99), 98.99)
        roll = min(max(pose[2] * 180 / np.pi, -98.99), 98.99)
        # Flip?
        rnd = np.random.random_sample()
        if rnd < 0.5:
            yaw = -yaw
            roll = -roll
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

        # Blur?
        rnd = np.random.random_sample()
        if rnd < 0.05:
            img = img.filter(ImageFilter.BLUR)

        # Bin values
        bins = np.array(range(-99, 102, 3))
        binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

        # Get target tensors
        labels = binned_pose
        cont_labels = torch.FloatTensor([yaw, pitch, roll])
        if any(labels < 0) or any(labels >= 66):
            print("Out labels error {}, yxz: {}, labels: {}".format(
                self.X_train[index], [yaw, pitch, roll], labels))

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
    def __getitem__(self, index):
        img = Image.open(
            os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir,
                                self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0, :])
        y_min = min(pt2d[1, :])
        x_max = max(pt2d[0, :])
        y_max = max(pt2d[1, :])

        # k = 0.2 to 0.40
        k = np.random.random_sample() * 0.2 + 0.2
        x_min -= 0.6 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 0.6 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi

        # Flip?
        rnd = np.random.random_sample()
        if rnd < 0.5:
            yaw = -yaw
            roll = -roll
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

        # Blur?
        rnd = np.random.random_sample()
        if rnd < 0.05:
            img = img.filter(ImageFilter.BLUR)

        # Bin values
        bins = np.array(range(-99, 100, 3))
        binned_pose = np.digitize([yaw, pitch, roll], bins) - 1
        map_to_minus = np.where(binned_pose == 66)
        binned_pose[map_to_minus] = -1

        # Get target tensors
        labels = binned_pose
        cont_labels = torch.tensor([yaw, pitch, roll],
                                   dtype=torch.float,
                                   requires_grad=False)

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
Beispiel #5
0
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) # 读取一张图像
        im = cv2.imread(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) # 读取一张图像
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # 读取该图像的mat格式标签

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0,:])
        y_min = min(pt2d[1,:])
        x_max = max(pt2d[0,:])
        y_max = max(pt2d[1,:])

        # k = 0.2 to 0.40
        k = np.random.random_sample() * 0.2 + 0.2
        x_min -= 0.6 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 0.6 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        im = im[int(y_min):int(y_max), int(x_min):int(x_max)]
        #cv2.imwrite('1.jpg', im) # check whether the labels is correct
        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi

        # Flip?
        rnd = np.random.random_sample()
        if rnd < 0.5:
            yaw = -yaw
            roll = -roll
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

        # Blur?
        rnd = np.random.random_sample()
        if rnd < 0.05:
            img = img.filter(ImageFilter.BLUR)

        # Bin values
        bins = np.array(range(-99, 102, 3))
        binned_pose = np.digitize([yaw, pitch, roll], bins) # - 1

        # Get target tensors
        labels = binned_pose
        cont_labels = torch.FloatTensor([yaw, pitch, roll])

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.data_dir, self.X_train[index].split('.')[0] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir, self.y_train[index].split('.')[0] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0,:])
        y_min = min(pt2d[1,:])
        x_max = max(pt2d[0,:])
        y_max = max(pt2d[1,:])

        # k = 0.2 to 0.40
        k = np.random.random_sample() * 0.2 + 0.2
        x_min -= 0.6 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 0.6 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi

        # Flip?
        rnd = np.random.random_sample()
        if rnd < 0.5:
            yaw = -yaw
            roll = -roll
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

        # Blur?
        rnd = np.random.random_sample()
        if rnd < 0.05:
            img = img.filter(ImageFilter.BLUR)

        # Bin values:  200 bins 
        # 200 bins - Bin width 1 ; 40 bins - bin width 5; 66 bins - Bin Width 3 
        bin_width = int((102 - (-99))/self.num_bins)
        bins = np.array(range(-99, 102, bin_width))
        binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

        # Get target tensors
        labels = binned_pose
        cont_labels = torch.FloatTensor([yaw, pitch, roll])

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
    def __getitem__(self, index):
        img = Image.open(
            os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir,
                                self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0, :])
        y_min = min(pt2d[1, :])
        x_max = max(pt2d[0, :])
        y_max = max(pt2d[1, :])

        k = 0.20
        x_min -= 2 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 2 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        ds = 3  # downsampling factor
        original_size = img.size
        img = img.resize((img.size[0] / ds, img.size[1] / ds),
                         resample=Image.NEAREST)
        img = img.resize((original_size[0], original_size[1]),
                         resample=Image.NEAREST)

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi
        # Bin values
        bins = np.array(range(-99, 102, 3))
        binned_pose = np.digitize([yaw, pitch, roll], bins) - 1
        map_to_minus = np.where(binned_pose == 66)
        binned_pose[map_to_minus] = -1

        labels = torch.LongTensor(binned_pose)
        cont_labels = torch.FloatTensor([yaw, pitch, roll])
        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
Beispiel #8
0
    def generate(self):
        for index in range(self.length):

            img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
            img = img.convert(self.image_mode)
            mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

            # Crop the face loosely
            pt2d = utils.get_pt2d_from_mat(mat_path)
            x_min = min(pt2d[0,:])
            y_min = min(pt2d[1,:])
            x_max = max(pt2d[0,:])
            y_max = max(pt2d[1,:])

            # k = 0.2 to 0.40
            k = np.random.random_sample() * 0.2 + 0.2
            x_min -= 0.6 * k * abs(x_max - x_min)
            y_min -= 2 * k * abs(y_max - y_min)
            x_max += 0.6 * k * abs(x_max - x_min)
            y_max += 0.6 * k * abs(y_max - y_min)
            img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

            # We get the pose in radians
            pose = utils.get_ypr_from_mat(mat_path)
            # And convert to degrees.
            pitch = pose[0] * 180 / np.pi
            yaw   = pose[1] * 180 / np.pi
            roll  = pose[2] * 180 / np.pi

            # Bin values
            bins = np.array(range(-99, 99, 3))
            binned_pose = np.digitize([yaw, pitch, roll], bins)

            # Get target tensors
            labels = binned_pose
            cont_labels = np.array([yaw, pitch, roll])

            yield img, labels, cont_labels, self.X_train[index]
Beispiel #9
0
    def __getitem__(self, index):
        wdata = open("/media/omnisky/D4T/huli/work/headpose/data/filename_mix_biwi_300e_lp.txt",'a')


        img_path = os.path.join(self.data_dir, self.X_train[index] + self.img_ext)
        # img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        # img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0,:])
        y_min = min(pt2d[1,:])
        x_max = max(pt2d[0,:])
        y_max = max(pt2d[1,:])

        # k = 0.35 to 0.650
        k = np.random.random_sample() * 0.35 + 0.3
        w,h = x_max - x_min,y_max - y_min
        ratio = h/w - 1
        x_min -= (ratio/2*w+k*h)
        y_min -= (k*h+40)
        # x_min -= 0.6 * k * abs(x_max - x_min)
        # y_min -= 2 * k * abs(y_max - y_min)
        x_max += (ratio/2*w+k*h)
        y_max += (k*h-40)

        # x_min -= 0.6 * k * abs(x_max - x_min)
        # y_min -= 2 * k * abs(y_max - y_min)
        # x_max += 0.6 * k * abs(x_max - x_min)
        # y_max += 0.6 * k * abs(y_max - y_min)
        # img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi

        # Flip?
        # rnd = np.random.random_sample()
        # if rnd < 0.5:
        #     yaw = -yaw
        #     roll = -roll
        #     img = img.transpose(Image.FLIP_LEFT_RIGHT)

        # Blur?
        # rnd = np.random.random_sample()
        # if rnd < 0.05:
        #     img = img.filter(ImageFilter.BLUR)
        # erase
        # img = RandomErasing()(img)


        # Bin values
        # bins = np.array(range(-99, 102, 3))
        # binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

        # Get target tensors
        # labels = binned_pose
        # cont_labels = torch.FloatTensor([yaw, pitch, roll])

        # if self.transform is not None:
        #     img = self.transform(img)
        wdata.write("{},{},{},{},{},{},{},{}\n".format(img_path,x_min,y_min,x_max,y_max,yaw,pitch,roll))
        wdata.close()
        return 0
Beispiel #10
0
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0,:])
        y_min = min(pt2d[1,:])
        x_max = max(pt2d[0,:])
        y_max = max(pt2d[1,:])

        # k = 0.2 to 0.40
        k = np.random.random_sample() * 0.2 + 0.2
        x_min -= 0.6 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 0.6 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi
    
        ds = 1 + np.random.randint(0,4) * 5
        original_size = img.size
        img = img.resize((img.size[0] // ds, img.size[1] // ds), resample=Image.NEAREST)
        img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST)

        # Rotate
        angle = np.random.uniform(-50, 50)
        R = utils.create_rotation_matrix(yaw, pitch, roll)
        R = np.matmul(R, utils.create_rotation_matrix(0, 0, -angle))
        yaw_r, pitch_r, roll_r = utils.rotation_matrix_to_euler_angles(R)
        if np.abs(yaw_r) <= self.max_angle and np.abs(pitch_r) <= self.max_angle and np.abs(roll_r) <= self.max_angle:
            img = img.rotate(angle, resample=Image.NEAREST)
            #utils.draw_axis_pil(img, yaw_r, pitch_r, roll_r).show()
            yaw, pitch, roll = yaw_r, pitch_r, roll_r
            #exit()

        # Flip?
        rnd = np.random.random_sample()
        if rnd < 0.5:
            yaw = -yaw
            roll = -roll
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

        # Change contrast?
        rnd = np.random.random_sample()
        if rnd < 0.35:
            if np.random.random_sample() < 0.5:
                img = ImageEnhance.Contrast(img).enhance(np.random.uniform(0.1, 0.9))
            else:
                img = ImageEnhance.Contrast(img).enhance(np.random.uniform(1.1, 1.9))

        # Change brightness?
        rnd = np.random.random_sample()
        if rnd < 0.35:
            if np.random.random_sample() < 0.5:
                img = ImageEnhance.Brightness(img).enhance(np.random.uniform(0.15, 0.9))
            else:
                img = ImageEnhance.Brightness(img).enhance(np.random.uniform(1.1, 1.85))

        # Change sharpness?
        rnd = np.random.random_sample()
        if rnd < 0.1:
            img = ImageEnhance.Sharpness(img).enhance(np.random.uniform(0.1, 1.9))

        # Grayscale?
        rnd = np.random.random_sample()
        if rnd < 0.35:
            img = img.convert('L')
            img = img.convert(mode='RGB')

        # Blur?
        rnd = np.random.random_sample()
        if rnd < 0.05:
            img = img.filter(ImageFilter.BLUR)

        # Bin values
        binned_pose = np.digitize([yaw, pitch, roll], self.bins) - 1

        # Get target tensors
        labels = binned_pose
        cont_labels = torch.FloatTensor([yaw, pitch, roll])

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
Beispiel #11
0
    def __getitem__(self, index):

        file_name, flag = self.X_train[index].split(",")
        ann_name, _ = self.y_train[index].split(",")
        yaw, pitch, roll = 0, 0, 0
        if int(flag) == 1:
            img = cv2.imread(
                os.path.join(self.data_dir,
                             file_name + '_rgb' + self.biwi_img_ext))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
            img = Image.fromarray(img.astype(np.uint8))
            # img = Image.open(os.path.join(self.data_dir, file_name + '_rgb' + self.biwi_img_ext))
            img = img.convert(self.image_mode)
            # orial_img = img.copy()

            pose_path = os.path.join(self.data_dir,
                                     ann_name + '_pose' + self.biwi_annot_ext)

            y_train_list = ann_name.split('/')
            y_train_new = y_train_list[0:-1]
            temp = ''
            for idt in y_train_new:
                temp = os.path.join(temp, idt)
            y_train_new = temp
            bbox_path = os.path.join(
                self.data_dir, y_train_new + '/dockerface-' +
                y_train_list[-1] + '_rgb' + self.biwi_annot_ext)

            # Load bounding box
            # bbox = open(bbox_path, 'r')
            with open(bbox_path, 'r') as tf:
                for tdata in tf.readlines():
                    line = tdata.split(" ")
                    if float(line[1]) > 215.0:
                        break

        # line = bbox.readline().split(' ')
            if len(line) < 4:
                x_min, y_min, x_max, y_max = 0, 0, img.size[0], img.size[1]
            else:
                x_min, y_min, x_max, y_max = [
                    float(line[1]),
                    float(line[2]),
                    float(line[3]),
                    float(line[4])
                ]
        # bbox.close()

        # Load pose in degrees
            pose_annot = open(pose_path, 'r')
            R = []
            for line in pose_annot:
                line = line.strip('\n').split(' ')
                l = []
                if line[0] != '':
                    for nb in line:
                        if nb == '':
                            continue
                        l.append(float(nb))
                    R.append(l)

            R = np.array(R)
            T = R[3, :]
            R = R[:3, :]
            pose_annot.close()

            R = np.transpose(R)

            roll = -np.arctan2(R[1][0], R[0][0]) * 180 / np.pi
            yaw = -np.arctan2(-R[2][0],
                              np.sqrt(R[2][1]**2 + R[2][2]**2)) * 180 / np.pi
            pitch = np.arctan2(R[2][1], R[2][2]) * 180 / np.pi

            # Loosely crop face
            k = np.random.random_sample() * 0.3 + 0.15
            # k = 0.35
            w, h = x_max - x_min, y_max - y_min
            ratio = h / w - 1

            x_min -= ((ratio / 2.0 * w) + k * h)  #w*k*0.6    k*(y_max - y_min)
            y_min -= (k * h + 10)
            # y_min -= (k*abs(y_max - y_min)+0)#h*k

            x_max += (ratio / 2.0 * w) + k * h  #w*h*0.6  + k*(y_max - y_min)
            y_max += (k * h - 10)

            # x_min -= 0.6 * k * abs(x_max - x_min)
            # y_min -= k * abs(y_max - y_min)
            # x_max += 0.6 * k * abs(x_max - x_min)
            # y_max += 0.6 * k * abs(y_max - y_min)
            img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

            # Bin values
            bins = np.array(range(-99, 102, 3))
            binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

            # Flip?
            # rnd = np.random.random_sample()
            # if rnd < 0.5:
            #     yaw = -yaw
            #     roll = -roll
            #     img = img.transpose(Image.FLIP_LEFT_RIGHT)
            rnd = np.random.random_sample()
            if rnd < 0.5:
                yaw = -yaw
                roll = -roll
                img = img.transpose(Image.FLIP_LEFT_RIGHT)
            # Blur?
            rnd = np.random.random_sample()
            if rnd < 0.05:
                img = img.filter(ImageFilter.BLUR)
            # orial_img = img.copy()
            # erase
            # img = RandomErasing()(img)
            if self.transform is not None:
                img = self.transform(img)
            labels = torch.LongTensor(binned_pose)
            cont_labels = torch.FloatTensor([yaw, pitch, roll])
            # file_name = torch.tensor(file_name)
            # return img, labels, cont_labels, file_name
        else:
            img = cv2.imread(
                os.path.join(self.data_dir, file_name + self.t300w_lp_img_ext))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
            img = Image.fromarray(img.astype(np.uint8))
            # img = Image.open(os.path.join(self.data_dir, file_name + '_rgb' + self.biwi_img_ext))
            # img = img.convert(self.image_mode)
            # img = Image.open(os.path.join(self.data_dir, file_name + self.t300w_lp_img_ext))
            img = img.convert(self.image_mode)

            mat_path = os.path.join(self.data_dir,
                                    ann_name + self.t300w_lp_annot_ext)

            # Crop the face loosely
            pt2d = utils.get_pt2d_from_mat(mat_path)
            x_min = min(pt2d[0, :])
            y_min = min(pt2d[1, :])
            x_max = max(pt2d[0, :])
            y_max = max(pt2d[1, :])

            # k = 0.35 to 0.650
            k = np.random.random_sample() * 0.35 + 0.3
            w, h = x_max - x_min, y_max - y_min
            ratio = h / w - 1
            x_min -= (ratio / 2 * w + k * h)
            y_min -= (k * h + 40)
            # x_min -= 0.6 * k * abs(x_max - x_min)
            # y_min -= 2 * k * abs(y_max - y_min)
            x_max += (ratio / 2 * w + k * h)
            y_max += (k * h - 40)

            # x_min -= 0.6 * k * abs(x_max - x_min)
            # y_min -= 2 * k * abs(y_max - y_min)
            # x_max += 0.6 * k * abs(x_max - x_min)
            # y_max += 0.6 * k * abs(y_max - y_min)
            img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

            # We get the pose in radians
            pose = utils.get_ypr_from_mat(mat_path)
            # And convert to degrees.
            pitch = pose[0] * 180 / np.pi
            yaw = pose[1] * 180 / np.pi
            roll = pose[2] * 180 / np.pi

            # Flip?
            rnd = np.random.random_sample()
            if rnd < 0.5:
                yaw = -yaw
                roll = -roll
                img = img.transpose(Image.FLIP_LEFT_RIGHT)

            # Blur?
            rnd = np.random.random_sample()
            if rnd < 0.05:
                img = img.filter(ImageFilter.BLUR)
            # erase
            # img = RandomErasing()(img)
            # orial_img = img.copy()

            # Bin values
            bins = np.array(range(-99, 102, 3))
            binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

            # Get target tensors
            labels = torch.LongTensor(binned_pose)
            cont_labels = torch.FloatTensor([yaw, pitch, roll])

            if self.transform is not None:
                img = self.transform(img)
        # print(file_name)
        file_name = file_name.split('/')[-1]
        return img, labels, cont_labels, file_name
Beispiel #12
0
    def __getitem__(self, index):
        img = cv2.imread(
            os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        img = Image.fromarray(img.astype(np.uint8))
        # img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir,
                                self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        ih, iw = img.height, img.width
        x_list = pt2d[0, :]
        x_list = np.sort(x_list)
        r_x_list = x_list[np.argsort(-x_list)]
        for i in range(len(x_list)):
            if x_list[i] != -1:
                x_min = x_list[i]
                break
        for i in range(len(x_list)):
            if r_x_list[i] < iw:
                x_max = r_x_list[i]
                break
        y_list = pt2d[1, :]
        y_list = np.sort(y_list)
        r_y_list = y_list[np.argsort(-y_list)]
        for i in range(len(y_list)):
            if y_list[i] != -1:
                y_min = y_list[i]
                break
        for i in range(len(y_list)):
            if ih > r_y_list[i]:
                y_max = r_y_list[i]
                break
        w, h = x_max - x_min, y_max - y_min
        ratio = h / w
        k = 0.4
        if ratio > 1:
            x_min = max(x_min - w * (ratio - 1.0) / 2.0 - h * k, 0)
            x_max = min(x_max + w * (ratio - 1.0) / 2.0 + h * k, iw)
            y_min = max(y_min - h * k - 35.0, 0)
            y_max = min(y_max + h * k - 35.0, ih)
        else:
            ratio = w / h
            y_min = max(y_min - h * (ratio - 1.0) / 2.0 - w * k - 35.0, 0)
            y_max = min(y_max + h * (ratio - 1.0) / 2.0 + w * k - 35.0, ih)
            x_min = max(x_min - w * k, 0)
            x_max = min(x_max + w * k, iw)

        # x_min = min(pt2d[0, :])
        # y_min = min(pt2d[1, :])
        # x_max = max(pt2d[0, :])
        # y_max = max(pt2d[1, :])

        # k = 0.20
        # x_min -= 2 * k * abs(x_max - x_min)
        # y_min -= 2 * k * abs(y_max - y_min)
        # x_max += 2 * k * abs(x_max - x_min)
        # y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi
        # Bin values
        bins = np.array(range(-99, 102, 3))
        labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1)
        cont_labels = torch.FloatTensor([yaw, pitch, roll])

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
Beispiel #13
0
    def __getitem__(self, index):

        img = Image.open(
            os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.data_dir,
                                self.y_train[index] + self.annot_ext)

        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0, :])
        y_min = min(pt2d[1, :])
        x_max = max(pt2d[0, :])
        y_max = max(pt2d[1, :])

        # k = 0.35 to 0.650
        k = np.random.random_sample() * 0.35 + 0.3
        w, h = x_max - x_min, y_max - y_min
        ratio = h / w - 1
        x_min -= (ratio / 2 * w + k * h)
        y_min -= (k * h + 40)
        # x_min -= 0.6 * k * abs(x_max - x_min)
        # y_min -= 2 * k * abs(y_max - y_min)
        x_max += (ratio / 2 * w + k * h)
        y_max += (k * h - 40)

        # x_min -= 0.6 * k * abs(x_max - x_min)
        # y_min -= 2 * k * abs(y_max - y_min)
        # x_max += 0.6 * k * abs(x_max - x_min)
        # y_max += 0.6 * k * abs(y_max - y_min)
        img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        # We get the pose in radians
        pose = utils.get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0] * 180 / np.pi
        yaw = pose[1] * 180 / np.pi
        roll = pose[2] * 180 / np.pi

        # Flip?
        rnd = np.random.random_sample()
        if rnd < 0.5:
            yaw = -yaw
            roll = -roll
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

        # Blur?
        rnd = np.random.random_sample()
        if rnd < 0.05:
            img = img.filter(ImageFilter.BLUR)
        # erase
        img = RandomErasing()(img)

        # Bin values
        bins = np.array(range(-99, 102, 3))
        binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

        # Get target tensors
        labels = binned_pose
        cont_labels = torch.FloatTensor([yaw, pitch, roll])

        if self.transform is not None:
            img = self.transform(img)

        return img, labels, cont_labels, self.X_train[index]
Beispiel #14
0
    def get(self):

        images = np.zeros(
            (self.batch_size, self.image_size, self.image_size, 3))
        Llabels = np.zeros((self.batch_size, 3), np.int32)
        Lcont_labels = np.zeros((self.batch_size, 3))
        count = 0

        while count < self.batch_size:
            img = Image.open(
                os.path.join(self.data_dir,
                             self.X_train[self.cursor] + self.img_ext))
            #print('img', img.shape)
            img = img.convert(self.image_mode)
            mat_path = os.path.join(self.data_dir,
                                    self.y_train[self.cursor] + self.annot_ext)

            # Crop the face loosely
            pt2d = utils.get_pt2d_from_mat(mat_path)
            x_min = min(pt2d[0, :])
            y_min = min(pt2d[1, :])
            x_max = max(pt2d[0, :])
            y_max = max(pt2d[1, :])
            # k = 0.2 to 0.40
            k = np.random.random_sample() * 0.2 + 0.2
            x_min -= 0.6 * k * abs(x_max - x_min)
            y_min -= 2 * k * abs(y_max - y_min)
            x_max += 0.6 * k * abs(x_max - x_min)
            y_max += 0.6 * k * abs(y_max - y_min)
            img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

            # We get the pose in radians
            pose = utils.get_ypr_from_mat(mat_path)
            pitch = pose[0] * 180 / np.pi
            yaw = pose[1] * 180 / np.pi
            roll = pose[2] * 180 / np.pi

            # Flip?
            rnd = np.random.random_sample()
            if rnd < 0.5:
                yaw = -yaw
                roll = -roll
                img = img.transpose(Image.FLIP_LEFT_RIGHT)
            # Blur?
            rnd = np.random.random_sample()
            if rnd < 0.05:
                img = img.filter(ImageFilter.BLUR)

            #preprocess
            img = rescale(img)
            img = random_crop(img)
            img = nomalizing(img, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

            # Bin values
            bins = np.array(range(-99, 102, 3))
            binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

            labels = binned_pose

            cont_labels = [float(yaw), float(pitch), float(roll)]

            images[count, :, :, :] = img
            Llabels[count] = labels
            Lcont_labels[count] = cont_labels

            count += 1
            self.cursor += 1
            if self.cursor >= len(self.X_train):
                np.random.shuffle(self.X_train)
                self.cursor = 0
                print("self.cursor ====0")

            #print(self.X_train[0])

        return images, Llabels, Lcont_labels