def __getitem__(self, index): img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0, :]) y_min = min(pt2d[1, :]) x_max = max(pt2d[0, :]) y_max = max(pt2d[1, :]) k = 0.20 x_min -= 2 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 2 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Bin values bins = np.array(range(-99, 102, 3)) labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1) cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): img = Image.open( os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0, :]) y_min = min(pt2d[1, :]) x_max = max(pt2d[0, :]) y_max = max(pt2d[1, :]) # k = 0.2 to 0.40 k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi ds = 1 + np.random.randint(0, 4) * 5 original_size = img.size img = img.resize((img.size[0] / ds, img.size[1] / ds), resample=Image.NEAREST) img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST) # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # Bin values bins = np.array(range(-99, 102, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 # Get target tensors labels = binned_pose cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): img = Image.open( os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0, :]) y_min = min(pt2d[1, :]) x_max = max(pt2d[0, :]) y_max = max(pt2d[1, :]) # k = 0.2 to 0.40 k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. # if any(pose*180/np.pi <= -99) or any(pose*180/np.pi >= 99): # with open(os.path.join(self.data_dir, "image_garbage.txt"), 'a') as file: # file.write(self.X_train[index] + '\n') # file.close() pitch = min(max(pose[0] * 180 / np.pi, -98.99), 98.99) yaw = min(max(pose[1] * 180 / np.pi, -98.99), 98.99) roll = min(max(pose[2] * 180 / np.pi, -98.99), 98.99) # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # Bin values bins = np.array(range(-99, 102, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 # Get target tensors labels = binned_pose cont_labels = torch.FloatTensor([yaw, pitch, roll]) if any(labels < 0) or any(labels >= 66): print("Out labels error {}, yxz: {}, labels: {}".format( self.X_train[index], [yaw, pitch, roll], labels)) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): img = Image.open( os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0, :]) y_min = min(pt2d[1, :]) x_max = max(pt2d[0, :]) y_max = max(pt2d[1, :]) # k = 0.2 to 0.40 k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # Bin values bins = np.array(range(-99, 100, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 map_to_minus = np.where(binned_pose == 66) binned_pose[map_to_minus] = -1 # Get target tensors labels = binned_pose cont_labels = torch.tensor([yaw, pitch, roll], dtype=torch.float, requires_grad=False) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) # 读取一张图像 im = cv2.imread(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) # 读取一张图像 img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # 读取该图像的mat格式标签 # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0,:]) y_min = min(pt2d[1,:]) x_max = max(pt2d[0,:]) y_max = max(pt2d[1,:]) # k = 0.2 to 0.40 k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) im = im[int(y_min):int(y_max), int(x_min):int(x_max)] #cv2.imwrite('1.jpg', im) # check whether the labels is correct # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # Bin values bins = np.array(range(-99, 102, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) # - 1 # Get target tensors labels = binned_pose cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): img = Image.open(os.path.join(self.data_dir, self.X_train[index].split('.')[0] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index].split('.')[0] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0,:]) y_min = min(pt2d[1,:]) x_max = max(pt2d[0,:]) y_max = max(pt2d[1,:]) # k = 0.2 to 0.40 k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # Bin values: 200 bins # 200 bins - Bin width 1 ; 40 bins - bin width 5; 66 bins - Bin Width 3 bin_width = int((102 - (-99))/self.num_bins) bins = np.array(range(-99, 102, bin_width)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 # Get target tensors labels = binned_pose cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): img = Image.open( os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0, :]) y_min = min(pt2d[1, :]) x_max = max(pt2d[0, :]) y_max = max(pt2d[1, :]) k = 0.20 x_min -= 2 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 2 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) ds = 3 # downsampling factor original_size = img.size img = img.resize((img.size[0] / ds, img.size[1] / ds), resample=Image.NEAREST) img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Bin values bins = np.array(range(-99, 102, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 map_to_minus = np.where(binned_pose == 66) binned_pose[map_to_minus] = -1 labels = torch.LongTensor(binned_pose) cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def generate(self): for index in range(self.length): img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0,:]) y_min = min(pt2d[1,:]) x_max = max(pt2d[0,:]) y_max = max(pt2d[1,:]) # k = 0.2 to 0.40 k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Bin values bins = np.array(range(-99, 99, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) # Get target tensors labels = binned_pose cont_labels = np.array([yaw, pitch, roll]) yield img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): wdata = open("/media/omnisky/D4T/huli/work/headpose/data/filename_mix_biwi_300e_lp.txt",'a') img_path = os.path.join(self.data_dir, self.X_train[index] + self.img_ext) # img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) # img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0,:]) y_min = min(pt2d[1,:]) x_max = max(pt2d[0,:]) y_max = max(pt2d[1,:]) # k = 0.35 to 0.650 k = np.random.random_sample() * 0.35 + 0.3 w,h = x_max - x_min,y_max - y_min ratio = h/w - 1 x_min -= (ratio/2*w+k*h) y_min -= (k*h+40) # x_min -= 0.6 * k * abs(x_max - x_min) # y_min -= 2 * k * abs(y_max - y_min) x_max += (ratio/2*w+k*h) y_max += (k*h-40) # x_min -= 0.6 * k * abs(x_max - x_min) # y_min -= 2 * k * abs(y_max - y_min) # x_max += 0.6 * k * abs(x_max - x_min) # y_max += 0.6 * k * abs(y_max - y_min) # img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Flip? # rnd = np.random.random_sample() # if rnd < 0.5: # yaw = -yaw # roll = -roll # img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? # rnd = np.random.random_sample() # if rnd < 0.05: # img = img.filter(ImageFilter.BLUR) # erase # img = RandomErasing()(img) # Bin values # bins = np.array(range(-99, 102, 3)) # binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 # Get target tensors # labels = binned_pose # cont_labels = torch.FloatTensor([yaw, pitch, roll]) # if self.transform is not None: # img = self.transform(img) wdata.write("{},{},{},{},{},{},{},{}\n".format(img_path,x_min,y_min,x_max,y_max,yaw,pitch,roll)) wdata.close() return 0
def __getitem__(self, index): img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0,:]) y_min = min(pt2d[1,:]) x_max = max(pt2d[0,:]) y_max = max(pt2d[1,:]) # k = 0.2 to 0.40 k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi ds = 1 + np.random.randint(0,4) * 5 original_size = img.size img = img.resize((img.size[0] // ds, img.size[1] // ds), resample=Image.NEAREST) img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST) # Rotate angle = np.random.uniform(-50, 50) R = utils.create_rotation_matrix(yaw, pitch, roll) R = np.matmul(R, utils.create_rotation_matrix(0, 0, -angle)) yaw_r, pitch_r, roll_r = utils.rotation_matrix_to_euler_angles(R) if np.abs(yaw_r) <= self.max_angle and np.abs(pitch_r) <= self.max_angle and np.abs(roll_r) <= self.max_angle: img = img.rotate(angle, resample=Image.NEAREST) #utils.draw_axis_pil(img, yaw_r, pitch_r, roll_r).show() yaw, pitch, roll = yaw_r, pitch_r, roll_r #exit() # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Change contrast? rnd = np.random.random_sample() if rnd < 0.35: if np.random.random_sample() < 0.5: img = ImageEnhance.Contrast(img).enhance(np.random.uniform(0.1, 0.9)) else: img = ImageEnhance.Contrast(img).enhance(np.random.uniform(1.1, 1.9)) # Change brightness? rnd = np.random.random_sample() if rnd < 0.35: if np.random.random_sample() < 0.5: img = ImageEnhance.Brightness(img).enhance(np.random.uniform(0.15, 0.9)) else: img = ImageEnhance.Brightness(img).enhance(np.random.uniform(1.1, 1.85)) # Change sharpness? rnd = np.random.random_sample() if rnd < 0.1: img = ImageEnhance.Sharpness(img).enhance(np.random.uniform(0.1, 1.9)) # Grayscale? rnd = np.random.random_sample() if rnd < 0.35: img = img.convert('L') img = img.convert(mode='RGB') # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # Bin values binned_pose = np.digitize([yaw, pitch, roll], self.bins) - 1 # Get target tensors labels = binned_pose cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): file_name, flag = self.X_train[index].split(",") ann_name, _ = self.y_train[index].split(",") yaw, pitch, roll = 0, 0, 0 if int(flag) == 1: img = cv2.imread( os.path.join(self.data_dir, file_name + '_rgb' + self.biwi_img_ext)) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) img = Image.fromarray(img.astype(np.uint8)) # img = Image.open(os.path.join(self.data_dir, file_name + '_rgb' + self.biwi_img_ext)) img = img.convert(self.image_mode) # orial_img = img.copy() pose_path = os.path.join(self.data_dir, ann_name + '_pose' + self.biwi_annot_ext) y_train_list = ann_name.split('/') y_train_new = y_train_list[0:-1] temp = '' for idt in y_train_new: temp = os.path.join(temp, idt) y_train_new = temp bbox_path = os.path.join( self.data_dir, y_train_new + '/dockerface-' + y_train_list[-1] + '_rgb' + self.biwi_annot_ext) # Load bounding box # bbox = open(bbox_path, 'r') with open(bbox_path, 'r') as tf: for tdata in tf.readlines(): line = tdata.split(" ") if float(line[1]) > 215.0: break # line = bbox.readline().split(' ') if len(line) < 4: x_min, y_min, x_max, y_max = 0, 0, img.size[0], img.size[1] else: x_min, y_min, x_max, y_max = [ float(line[1]), float(line[2]), float(line[3]), float(line[4]) ] # bbox.close() # Load pose in degrees pose_annot = open(pose_path, 'r') R = [] for line in pose_annot: line = line.strip('\n').split(' ') l = [] if line[0] != '': for nb in line: if nb == '': continue l.append(float(nb)) R.append(l) R = np.array(R) T = R[3, :] R = R[:3, :] pose_annot.close() R = np.transpose(R) roll = -np.arctan2(R[1][0], R[0][0]) * 180 / np.pi yaw = -np.arctan2(-R[2][0], np.sqrt(R[2][1]**2 + R[2][2]**2)) * 180 / np.pi pitch = np.arctan2(R[2][1], R[2][2]) * 180 / np.pi # Loosely crop face k = np.random.random_sample() * 0.3 + 0.15 # k = 0.35 w, h = x_max - x_min, y_max - y_min ratio = h / w - 1 x_min -= ((ratio / 2.0 * w) + k * h) #w*k*0.6 k*(y_max - y_min) y_min -= (k * h + 10) # y_min -= (k*abs(y_max - y_min)+0)#h*k x_max += (ratio / 2.0 * w) + k * h #w*h*0.6 + k*(y_max - y_min) y_max += (k * h - 10) # x_min -= 0.6 * k * abs(x_max - x_min) # y_min -= k * abs(y_max - y_min) # x_max += 0.6 * k * abs(x_max - x_min) # y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # Bin values bins = np.array(range(-99, 102, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 # Flip? # rnd = np.random.random_sample() # if rnd < 0.5: # yaw = -yaw # roll = -roll # img = img.transpose(Image.FLIP_LEFT_RIGHT) rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # orial_img = img.copy() # erase # img = RandomErasing()(img) if self.transform is not None: img = self.transform(img) labels = torch.LongTensor(binned_pose) cont_labels = torch.FloatTensor([yaw, pitch, roll]) # file_name = torch.tensor(file_name) # return img, labels, cont_labels, file_name else: img = cv2.imread( os.path.join(self.data_dir, file_name + self.t300w_lp_img_ext)) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) img = Image.fromarray(img.astype(np.uint8)) # img = Image.open(os.path.join(self.data_dir, file_name + '_rgb' + self.biwi_img_ext)) # img = img.convert(self.image_mode) # img = Image.open(os.path.join(self.data_dir, file_name + self.t300w_lp_img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, ann_name + self.t300w_lp_annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0, :]) y_min = min(pt2d[1, :]) x_max = max(pt2d[0, :]) y_max = max(pt2d[1, :]) # k = 0.35 to 0.650 k = np.random.random_sample() * 0.35 + 0.3 w, h = x_max - x_min, y_max - y_min ratio = h / w - 1 x_min -= (ratio / 2 * w + k * h) y_min -= (k * h + 40) # x_min -= 0.6 * k * abs(x_max - x_min) # y_min -= 2 * k * abs(y_max - y_min) x_max += (ratio / 2 * w + k * h) y_max += (k * h - 40) # x_min -= 0.6 * k * abs(x_max - x_min) # y_min -= 2 * k * abs(y_max - y_min) # x_max += 0.6 * k * abs(x_max - x_min) # y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # erase # img = RandomErasing()(img) # orial_img = img.copy() # Bin values bins = np.array(range(-99, 102, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 # Get target tensors labels = torch.LongTensor(binned_pose) cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) # print(file_name) file_name = file_name.split('/')[-1] return img, labels, cont_labels, file_name
def __getitem__(self, index): img = cv2.imread( os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) img = Image.fromarray(img.astype(np.uint8)) # img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) ih, iw = img.height, img.width x_list = pt2d[0, :] x_list = np.sort(x_list) r_x_list = x_list[np.argsort(-x_list)] for i in range(len(x_list)): if x_list[i] != -1: x_min = x_list[i] break for i in range(len(x_list)): if r_x_list[i] < iw: x_max = r_x_list[i] break y_list = pt2d[1, :] y_list = np.sort(y_list) r_y_list = y_list[np.argsort(-y_list)] for i in range(len(y_list)): if y_list[i] != -1: y_min = y_list[i] break for i in range(len(y_list)): if ih > r_y_list[i]: y_max = r_y_list[i] break w, h = x_max - x_min, y_max - y_min ratio = h / w k = 0.4 if ratio > 1: x_min = max(x_min - w * (ratio - 1.0) / 2.0 - h * k, 0) x_max = min(x_max + w * (ratio - 1.0) / 2.0 + h * k, iw) y_min = max(y_min - h * k - 35.0, 0) y_max = min(y_max + h * k - 35.0, ih) else: ratio = w / h y_min = max(y_min - h * (ratio - 1.0) / 2.0 - w * k - 35.0, 0) y_max = min(y_max + h * (ratio - 1.0) / 2.0 + w * k - 35.0, ih) x_min = max(x_min - w * k, 0) x_max = min(x_max + w * k, iw) # x_min = min(pt2d[0, :]) # y_min = min(pt2d[1, :]) # x_max = max(pt2d[0, :]) # y_max = max(pt2d[1, :]) # k = 0.20 # x_min -= 2 * k * abs(x_max - x_min) # y_min -= 2 * k * abs(y_max - y_min) # x_max += 2 * k * abs(x_max - x_min) # y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Bin values bins = np.array(range(-99, 102, 3)) labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1) cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def __getitem__(self, index): img = Image.open( os.path.join(self.data_dir, self.X_train[index] + self.img_ext)) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0, :]) y_min = min(pt2d[1, :]) x_max = max(pt2d[0, :]) y_max = max(pt2d[1, :]) # k = 0.35 to 0.650 k = np.random.random_sample() * 0.35 + 0.3 w, h = x_max - x_min, y_max - y_min ratio = h / w - 1 x_min -= (ratio / 2 * w + k * h) y_min -= (k * h + 40) # x_min -= 0.6 * k * abs(x_max - x_min) # y_min -= 2 * k * abs(y_max - y_min) x_max += (ratio / 2 * w + k * h) y_max += (k * h - 40) # x_min -= 0.6 * k * abs(x_max - x_min) # y_min -= 2 * k * abs(y_max - y_min) # x_max += 0.6 * k * abs(x_max - x_min) # y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) # And convert to degrees. pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) # erase img = RandomErasing()(img) # Bin values bins = np.array(range(-99, 102, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 # Get target tensors labels = binned_pose cont_labels = torch.FloatTensor([yaw, pitch, roll]) if self.transform is not None: img = self.transform(img) return img, labels, cont_labels, self.X_train[index]
def get(self): images = np.zeros( (self.batch_size, self.image_size, self.image_size, 3)) Llabels = np.zeros((self.batch_size, 3), np.int32) Lcont_labels = np.zeros((self.batch_size, 3)) count = 0 while count < self.batch_size: img = Image.open( os.path.join(self.data_dir, self.X_train[self.cursor] + self.img_ext)) #print('img', img.shape) img = img.convert(self.image_mode) mat_path = os.path.join(self.data_dir, self.y_train[self.cursor] + self.annot_ext) # Crop the face loosely pt2d = utils.get_pt2d_from_mat(mat_path) x_min = min(pt2d[0, :]) y_min = min(pt2d[1, :]) x_max = max(pt2d[0, :]) y_max = max(pt2d[1, :]) # k = 0.2 to 0.40 k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) y_max += 0.6 * k * abs(y_max - y_min) img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) # We get the pose in radians pose = utils.get_ypr_from_mat(mat_path) pitch = pose[0] * 180 / np.pi yaw = pose[1] * 180 / np.pi roll = pose[2] * 180 / np.pi # Flip? rnd = np.random.random_sample() if rnd < 0.5: yaw = -yaw roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) # Blur? rnd = np.random.random_sample() if rnd < 0.05: img = img.filter(ImageFilter.BLUR) #preprocess img = rescale(img) img = random_crop(img) img = nomalizing(img, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Bin values bins = np.array(range(-99, 102, 3)) binned_pose = np.digitize([yaw, pitch, roll], bins) - 1 labels = binned_pose cont_labels = [float(yaw), float(pitch), float(roll)] images[count, :, :, :] = img Llabels[count] = labels Lcont_labels[count] = cont_labels count += 1 self.cursor += 1 if self.cursor >= len(self.X_train): np.random.shuffle(self.X_train) self.cursor = 0 print("self.cursor ====0") #print(self.X_train[0]) return images, Llabels, Lcont_labels