def getDataFromTXT_68(filepath, test=False): ''' Get data from dataset mentioned in paper. Input: - filepath: trainImageList or testImageList Output: - A tuple of (imgpath, bbox, landmark) - imgpath: train image or test image - bbox: type of BBox - landmark: (5L, 2L) of [0,1] ''' dirname = os.path.dirname(filepath) f = open(filepath, 'r') data = [] for line in f.readlines(): s = line.strip().split(' ') imgPath = os.path.join(dirname, s[0].replace('\\', '/')) bbox = map(int, [s[1], s[2], s[3], s[4]]) bbox = BBox(bbox) if test: data.append((imgPath, bbox)) continue landmark = np.zeros((68, 2)) for i in range(0, 68): landmark[i] = (float(s[5 + i * 2]), float(s[5 + i * 2 + 1])) landmark = bbox.projectLandmark(landmark) #[0,1] data.append((imgPath, bbox, landmark)) return data
def getDataFromTXT(filepath, test=False): ''' Get data from dataset mentioned in paper. Input: - filepath: trainImageList or testImageList Output: - A tuple of (imgpath, bbox, landmark) - imgpath: train image or test image - bbox: type of BBox - landmark: (5L, 2L) of [0,1] ''' dirname = os.path.dirname(filepath) f = open(filepath, 'r') data = [] for line in f.readlines(): s = line.strip().split(' ') imgPath = os.path.join(dirname, s[0].replace('\\', '/')) bbox = map(int, [s[1], s[2], s[3], s[4]]) bbox = BBox(bbox) if test: data.append((imgPath, bbox)) continue landmark = np.zeros((5,2)) for i in range(0,5): landmark[i] = (float(s[5+i*2]), float(s[5+i*2+1])) landmark = bbox.projectLandmark(landmark) #[0,1] data.append((imgPath, bbox, landmark)) return data
def load_celeba_data(): ''' load celeba dataset and crop the face box Return a tuple of: - img_path: dataset/celeba/000001.jpg - bbox: object of BBox - landmark: (5L, 2L) of [0,1] ''' text = '/home/cunjian/code/caffe/examples/dataset/celeba/list_landmarks_celeba.txt' # text = 'E:\\dataset\\CelebA\\list_landmarks_celeba.txt' fin = open(text, 'r') n = int(fin.readline().strip()) fin.readline() # drop this line [lefteye_x, lefteye_y, ...] result = [] for i in range(n): line = fin.readline().strip() components = line.split() img_path = join('../dataset/img_celeba', components[0]) # img_path = join('E:\\dataset\\CelebA\\img_celeba', components[0]) landmark = np.asarray([int(value) for value in components[1:]], dtype=np.float32) landmark = landmark.reshape(len(landmark) / 2, 2) # crop face box x_max, y_max = landmark.max(0) x_min, y_min = landmark.min(0) w, h = x_max - x_min, y_max - y_min w = h = min(w, h) ratio = 0 # default 0.5 x_new = x_min - w * ratio y_new = y_min - h * ratio w_new = w * (1 + 2 * ratio) h_new = h * (1 + 2 * ratio) bbox = map(int, [x_new, x_new + w_new, y_new, y_new + h_new]) bbox = BBox(bbox) # normalize landmark landmark = bbox.projectLandmark(landmark) #print landmark, if uncommented, the program will report error since some samples are omitted #fit=0 #for index in range(0,5): # if landmark[index,0]<0 or landmark[index,0]>1 or landmark[index,1]<0 or landmark[index,1]>1: # fit=1 # break #if fit==0: # result.append((img_path, bbox, landmark)) result.append((img_path, bbox, landmark)) fin.close() return result
def getDataFromTXT(filepath, test=False): ''' Get data from dataset mentioned in paper. Input: - filepath: trainImageList or testImageList Output: - A tuple of (imgpath, bbox, landmark) - imgpath: train image or test image - bbox: type of BBox - landmark: (5L, 2L) of [0,1] ''' dirname = os.path.dirname(filepath) f = open(filepath, 'r') data = [] for line in f.readlines(): s = line.strip().split(' ') imgPath = os.path.join(dirname, s[0].replace('\\', '/')) bbox = map(int, [s[1], s[2], s[3], s[4]]) bbox = BBox(bbox) if test: x_max = bbox.right #bbox[1] x_min = bbox.left #bbox[0] y_max = bbox.bottom #bbox[3] y_min = bbox.top #bbox[2] # enlarge bounding box w, h = x_max - x_min, y_max - y_min w = h = min(w, h) ratio = 0 x_new = x_min - w * ratio y_new = y_min - h * ratio w_new = w * (1 + 2 * ratio) h_new = h * (1 + 2 * ratio) new_bbox = map(int, [x_new, x_new + w_new, y_new, y_new + h_new]) new_bbox = BBox(new_bbox) data.append((imgPath, new_bbox)) continue landmark = np.zeros((5, 2)) for i in range(0, 5): landmark[i] = (float(s[5 + i * 2]), float(s[5 + i * 2 + 1])) landmark = bbox.projectLandmark(landmark) #[0,1] #print landmark fit = 0 for i in range(0, 5): if landmark[i, 0] < 0 or landmark[i, 0] > 1 or landmark[ i, 1] < 0 or landmark[i, 1] > 1: fit = 1 break if fit == 0: data.append((imgPath, bbox, landmark)) return data
def load_celeba_data(): ''' load celeba dataset and crop the face box Return a tuple of: - img_path: dataset/celeba/000001.jpg - bbox: object of BBox - landmark: (5L, 2L) of [0,1] ''' text = '../dataset/celeba/list_landmarks_celeba.txt' # text = 'E:\\dataset\\CelebA\\list_landmarks_celeba.txt' fin = open(text, 'r') n = int(fin.readline().strip()) fin.readline() # drop this line [lefteye_x, lefteye_y, ...] result = [] for i in range(n): line = fin.readline().strip() components = line.split() img_path = join('../dataset/celeba', components[0]) # img_path = join('E:\\dataset\\CelebA\\img_celeba', components[0]) landmark = np.asarray([int(value) for value in components[1:]], dtype=np.float32) landmark = landmark.reshape(len(landmark) / 2, 2) # crop face box x_max, y_max = landmark.max(0) x_min, y_min = landmark.min(0) w, h = x_max - x_min, y_max - y_min w = h = min(w, h) ratio = 0.5 x_new = x_min - w * ratio y_new = y_min - h * ratio w_new = w * (1 + 2 * ratio) h_new = h * (1 + 2 * ratio) bbox = map(int, [x_new, x_new + w_new, y_new, y_new + h_new]) bbox = BBox(bbox) # normalize landmark landmark = bbox.projectLandmark(landmark) result.append((img_path, bbox, landmark)) fin.close() return result
def load_celeba_data(): ''' load celeba dataset and crop the face box Return a tuple of: - img_path: dataset/celeba/000001.jpg - bbox: object of BBox - landmark: (5L, 2L) of [0,1] ''' text = '../dataset/celeba/list_landmarks_celeba.txt' # text = 'E:\\dataset\\CelebA\\list_landmarks_celeba.txt' fin = open(text, 'r') n = int(fin.readline().strip()) fin.readline() # drop this line [lefteye_x, lefteye_y, ...] result = [] for i in range(n): line = fin.readline().strip() components = line.split() img_path = join('../dataset/celeba', components[0]) # img_path = join('E:\\dataset\\CelebA\\img_celeba', components[0]) landmark = np.asarray([int(value) for value in components[1:]], dtype=np.float32) landmark = landmark.reshape(len(landmark) / 2, 2) # crop face box x_max, y_max = landmark.max(0) x_min, y_min = landmark.min(0) w, h = x_max-x_min, y_max-y_min w = h = min(w, h) ratio = 0.5 x_new = x_min - w*ratio y_new = y_min - h*ratio w_new = w*(1 + 2*ratio) h_new = h*(1 + 2*ratio) bbox = map(int, [x_new, x_new+w_new, y_new, y_new+h_new]) bbox = BBox(bbox) # normalize landmark landmark = bbox.projectLandmark(landmark) result.append((img_path, bbox, landmark)) fin.close() return result
def get_fods_from_file(label_file): """ Get a list of fods from one label file. :param label_file: Path of label xml file :return: List of FODs from that xml file """ fods = [] frame_id = get_frame_id(label_file) fod_labels = parse_label_file(label_file) for fod_label in fod_labels: bbox = BBox(fod_label['xmin'], fod_label['xmax'], fod_label['ymin'], fod_label['ymax']) fod_type = fod_label['name'] fod = FOD(frame_id, bbox, fod_type) fods.append(fod) return fods
def get_new_bbox_vals(bbox, rotate_degree): c1_x = bbox.y_min * math.sin(rotate_degree) + bbox.x_min * math.cos(rotate_degree) c1_y = bbox.y_min * math.cos(rotate_degree) + bbox.x_min * math.sin(rotate_degree) c2_x = bbox.y_max * math.sin(rotate_degree) + bbox.x_min * math.cos(rotate_degree) c2_y = bbox.y_max * math.cos(rotate_degree) + bbox.x_min * math.sin(rotate_degree) c3_x = bbox.y_min * math.sin(rotate_degree) + bbox.x_max * math.cos(rotate_degree) c3_y = bbox.y_min * math.cos(rotate_degree) + bbox.x_max * math.sin(rotate_degree) c4_x = bbox.y_max * math.sin(rotate_degree) + bbox.x_max * math.cos(rotate_degree) c4_y = bbox.y_max * math.cos(rotate_degree) + bbox.x_max * math.sin(rotate_degree) x_min = int(min(c1_x, c2_x, c3_x, c4_x)) x_max = int(max(c1_x, c2_x, c3_x, c4_x)) y_min = int(min(c1_y, c2_y, c3_y, c4_y)) y_max = int(max(c1_y, c2_y, c3_y, c4_y)) bbox = BBox(x_min, x_max, y_min, y_max) return bbox
x1 = cx - size // 2 x2 = x1 + size y1 = cy - size // 2 y2 = y1 + size dx = max(0, -x1) dy = max(0, -y1) x1 = max(0, x1) y1 = max(0, y1) edx = max(0, x2 - width) edy = max(0, y2 - height) x2 = min(width, x2) y2 = min(height, y2) new_bbox = list(map(int, [x1, x2, y1, y2])) # new_bbox2 = [x1, x2, y1, y2] new_bbox = BBox(new_bbox) cropped = img[new_bbox.top:new_bbox.bottom, new_bbox.left:new_bbox.right] cropped = cv2.copyMakeBorder(cropped, int(dy), int(edy), int(dx), int(edx), cv2.BORDER_CONSTANT, 0) cropped_face = cv2.resize(cropped, (out_size, out_size)) cropped_face = np.asarray(cv2.cvtColor(cropped_face, cv2.COLOR_BGR2RGB)) cropped_face = np.transpose(cropped_face, [2, 0, 1]) cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2) roi_color = image[x1:x1 + x2, y1:y1 + y2] cropped_face = (np.asarray(cropped_face, dtype='float32')) / 255.0 # print(cropped_face.shape) cropped_face = cropped_face.reshape(1, 3, 112, 112) cropped_face = torch.from_numpy(cropped_face) # print(cropped_face.shape)
def gen_lfw_landmark(img_dir, save_dir, input_size, argument, name): # 数据输出路径 base_dir = os.path.join(save_dir, str(input_size)) if not os.path.exists(base_dir): os.mkdir(base_dir) landmark_save_dir = os.path.join(base_dir, '%s_landmark' % name) if not os.path.exists(landmark_save_dir): os.mkdir(landmark_save_dir) # label记录txt if name == 'lfw': ftxt = os.path.join(img_dir, 'trainImageList.txt') elif name == 'wider': ftxt = os.path.join(img_dir, 'wider_face_train_landmark.txt') img_dir = os.path.join(img_dir, 'WIDER_train/images') else: print('name只能是"lfw"或"wider"') exit() # 记录label的txt f = open(os.path.join(base_dir, '%s_landmark.txt' % name), 'w') # 获取图像路径, box, 关键点 data = get_data_from_txt( ftxt, img_dir, name ) # lfw data format: [(path, BBox object, [[,], [,], [,], [,], [,]]), ] idx = 0 image_id = 0 for (imgPath, box, landmarkGt) in tqdm(data): # 存储人脸图片和关键点 F_imgs = [] F_landmarks = [] img = cv.imread(imgPath) img_h, img_w, img_c = img.shape gt_box = np.array([box.left, box.top, box.right, box.bottom]) # 人脸图片 f_face = img[box.top:box.bottom + 1, box.left:box.right + 1] # resize成网络输入大小 f_face = cv.resize(f_face, (input_size, input_size)) landmark = np.zeros((5, 2)) for index, one in enumerate(landmarkGt): # 关键点相对于左上坐标偏移量并归一化 rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1])) landmark[index] = rv F_imgs.append(f_face) F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) if argument: # 对图像变换 idx = idx + 1 x1, y1, x2, y2 = gt_box gt_w = x2 - x1 + 1 gt_h = y2 - y1 + 1 # 除去过小图像 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: continue for i in range(10): # 随机裁剪图像大小 box_size = np.random.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) # 随机左上坐标偏移量 delta_x = np.random.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = np.random.randint(-gt_h * 0.2, gt_h * 0.2) # 计算左上坐标 nx1 = int(max(x1 + gt_w / 2 - box_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - box_size / 2 + delta_y, 0)) nx2 = nx1 + box_size ny2 = ny1 + box_size # 除去超过边界的 if nx2 > img_w or ny2 > img_h: continue # 裁剪边框, 图片 crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv.resize(cropped_im, (input_size, input_size)) Iou = iou(crop_box, np.expand_dims(gt_box, 0)) #只保留pos图像 if Iou > 0.65: F_imgs.append(resized_im) #关键点相对偏移 for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / box_size, (one[1] - ny1) / box_size) landmark[index] = rv F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = F_landmarks[-1].reshape(-1, 2) box = BBox([nx1, ny1, nx2, ny2]) #镜像 if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv.resize(face_flipped, (input_size, input_size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #逆时针翻转 if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rorated = rotate( img, box, box.reprojectLandmark(landmark_), 5) #关键点偏移 landmark_rorated = box.projectLandmark( landmark_rorated) face_rotated_by_alpha = cv.resize( face_rotated_by_alpha, (input_size, input_size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rorated.reshape(10)) #左右翻转 face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rorated) face_flipped = cv.resize(face_flipped, (input_size, input_size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #顺时针翻转 if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rorated = rotate( img, box, box.reprojectLandmark(landmark_), -5) #关键点偏移 landmark_rorated = box.projectLandmark( landmark_rorated) face_rotated_by_alpha = cv.resize( face_rotated_by_alpha, (input_size, input_size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rorated.reshape(10)) #左右翻转 face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rorated) face_flipped = cv.resize(face_flipped, (input_size, input_size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) for i in range(len(F_imgs)): # 剔除数据偏移量在[0,1]之waide if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0: continue cv.imwrite(os.path.join(landmark_save_dir, '%d.jpg' % (image_id)), F_imgs[i]) landmarks = list(map(str, list(F_landmarks[i]))) f.write( os.path.join(landmark_save_dir, '%d.jpg' % (image_id)) + ' -2 ' + ' '.join(landmarks) + '\n') image_id += 1 f.close()
def pro_landmark_train_data(input_size): if input_size == 12: net = 'pnet' pass elif input_size == 24: net = 'rnet' pass elif input_size == 48: net = 'onet' pass #数据输出路径 path = os.path.join(data_dir, str(input_size)) if not os.path.exists(path): os.mkdir(path) pass #图片处理后输出路径 dstdir = os.path.join(path, 'train_%s_landmark_aug' % (net)) if not os.path.exists(dstdir): os.mkdir(dstdir) pass ftxt = os.path.join(data_dir, 'trainImageList.txt') f = open(os.path.join(path, 'landmark_%d_aug.txt' % (input_size)), 'w') #获取图像路径,box,landmark data = getDataFromText(ftxt, data_dir) for (img_path, box, landmarkGt) in tqdm(data): #存储人脸图片和关键点 f_imgs = [] f_landmarks = [] img = cv2.imread(img_path) img_h, img_w, _ = img.shape gt_box = np.array([box.x1, box.y1, box.x2, box.y2]) gt_w = gt_box[2] - gt_box[0] + 1 gt_h = gt_box[3] - gt_box[1] + 1 #人脸图片 f_face = img[box.y1:box.y2 + 1, box.x1:box.x2 + 1, 3] #resize成网络输入大小 f_face = cv2.resize(f_face, (input_size, input_size)) landmark = np.zeros((5, 2)) for index, item in enumerate(landmarkGt): # 关键点相对于左上坐标偏移量并归一化 rv = ((item[0] - gt_box[0]) / gt_w, (item[1] - gt_box[1]) / gt_h) landmark[index] = rv pass f_imgs.append(f_face) f_landmarks.append(landmark.resize(10)) landmark = np.zeros((5, 2)) # 对图像变换 x1, y1, x2, y2 = gt_box # 除去过小图像 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: continue pass for i in range(10): # 随机裁剪图像大小 box_size = np.random.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) # 随机左上坐标偏移量 delta_x = np.random.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = np.random.randint(-gt_h * 0.2, gt_h * 0.2) # 计算左上坐标 nx1 = int(max(x1 + gt_w / 2 - box_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - box_size / 2 + delta_y, 0)) nx2 = nx1 + box_size ny2 = ny1 + box_size # 除去超过边界的 if nx2 > img_w or ny2 > img_h: continue # 裁剪边框,图片 crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv2.resize(cropped_im, (input_size, input_size)) iou = iou_fun(crop_box, np.expand_dims(gt_box, 0)) # 只保留pos图像 if iou > 0.65: f_imgs.append(resized_im) # 关键点相对偏移 for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / box_size, (one[1] - ny1) / box_size) landmark[index] = rv f_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = f_landmarks[-1].reshape(-1, 2) box = BBox([nx1, ny1, nx2, ny2]) # 镜像 if np.random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (input_size, input_size)) f_imgs.append(face_flipped) f_landmarks.append(landmark_flipped.reshape(10)) # 逆时针翻转 if np.random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rorated = rotate( img, box, box.reprojectLandmark(landmark_), 5) # 关键点偏移 landmark_rorated = box.projectLandmark(landmark_rorated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (input_size, input_size)) f_imgs.append(face_rotated_by_alpha) f_landmarks.append(landmark_rorated.reshape(10)) # 左右翻转 face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rorated) face_flipped = cv2.resize(face_flipped, (input_size, input_size)) f_imgs.append(face_flipped) f_landmarks.append(landmark_flipped.reshape(10)) # 顺时针翻转 if np.random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rorated = rotate( img, box, box.reprojectLandmark(landmark_), -5) # 关键点偏移 landmark_rorated = box.projectLandmark(landmark_rorated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (input_size, input_size)) f_imgs.append(face_rotated_by_alpha) f_landmarks.append(landmark_rorated.reshape(10)) # 左右翻转 face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rorated) face_flipped = cv2.resize(face_flipped, (input_size, input_size)) f_imgs.append(face_flipped) f_landmarks.append(landmark_flipped.reshape(10)) pass pass pass
def generateData(ftxt,data_path,net,augmentation=True): if net == "PNet": size = 12 elif net == "RNet": size = 24 elif net == "ONet": size = 48 else: print('Net type error') return image_id = 0 f=open(os.path.join(output,"landmark_%s_aug.txt"%(size)),'w') # get image path , bounding box, and landmarks from file 'ftxt' data=getDataFromTxt(ftxt,data_path=data_path) idx=0 for (imagePath,bbox,landmarkGt) in data: images=[] landmarks=[] img=cv2.imread(imagePath) img_h,img_w,img_c=img.shape #原图所在的坐标 if bbox.right<bbox.left or bbox.bottom<bbox.top: continue gt_box = np.array([bbox.left,bbox.top,bbox.right,bbox.bottom]) f_face = img[bbox.top:bbox.bottom+1,bbox.left:bbox.right+1] #cv2.imshow("face",f_face) #cv2.waitKey(0) f_face=cv2.resize(f_face,(size,size)) landmark=np.zeros((5,2)) #normalize land mark by dividing the width and height of the ground truth bounding box # landmakrGt is a list of tuples for index,one in enumerate(landmarkGt): # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box rv = ((one[0]-gt_box[0])/(gt_box[2]-gt_box[0]), (one[1]-gt_box[1])/(gt_box[3]-gt_box[1])) landmark[index]=rv images.append(f_face) landmarks.append(landmark.reshape(10)) landmark=np.zeros((5,2)) if augmentation: idx+=1 if idx%100==0: print(idx,"images done") x1,y1,x2,y2=gt_box gt_w=x2-x1+1 gt_h=y2-y1+1 #长宽太小得人脸就不做变换了 if max(gt_w,gt_h)<40 or x1<0 or y1<0: continue #random shift for i in range(10): bbox_size=random.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) delta_x = random.randint(int(-gt_w * 0.2), int(gt_w * 0.2)) delta_y = random.randint(int(-gt_h * 0.2), int(gt_h * 0.2)) nx1 = int(max(x1+gt_w/2-bbox_size/2+delta_x,0)) ny1 = int(max(y1+gt_h/2-bbox_size/2+delta_y,0)) nx2 = nx1 + bbox_size ny2 = ny1 + bbox_size if nx2 > img_w or ny2 > img_h: continue crop_box = np.array([nx1,ny1,nx2,ny2]) cropped_im = img[ny1:ny2+1,nx1:nx2+1,:] resized_im = cv2.resize(cropped_im, (size, size)) #cal iou iou = IoU(crop_box, np.expand_dims(gt_box,0)) if iou>0.65: images.append(resized_im) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0]-nx1)/bbox_size, (one[1]-ny1)/bbox_size) landmark[index] = rv landmarks.append(landmark.reshape(10)) landmark=np.zeros((5,2)) _landmark=landmarks[-1].reshape(-1,2) bbox=BBox([nx1,ny1,nx2,ny2]) #mirror if random.choice([0,1])>0: face_flipped,landmark_flipped=flip(resized_im,_landmark) face_flipped=cv2.resize(face_flipped,(size,size)) images.append(face_flipped) landmarks.append(landmark_flipped.reshape(10)) #rotate逆时针旋转 if random.choice([0,1])>0: #reprojectLandmark将归一化的landmark恢复至原始坐标 face_rotated,landmark_rotated=rotate(img,bbox,bbox.reprojectLandmark(_landmark),5) #重新归一化旋转后的landmark landmark_rotated = bbox.projectLandmark(landmark_rotated) face_rotated=cv2.resize(face_rotated,(size,size)) images.append(face_rotated) landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip(face_rotated, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) images.append(face_flipped) landmarks.append(landmark_flipped.reshape(10)) #顺时针rotate if random.choice([0,1])>0: #reprojectLandmark将归一化的landmark恢复至原始坐标 face_rotated,landmark_rotated=rotate(img,bbox,bbox.reprojectLandmark(_landmark),-5) #重新归一化旋转后的landmark landmark_rotated = bbox.projectLandmark(landmark_rotated) face_rotated=cv2.resize(face_rotated,(size,size)) images.append(face_rotated) landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip(face_rotated, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) images.append(face_flipped) landmarks.append(landmark_flipped.reshape(10)) images,landmarks=np.asarray(images),np.asarray(landmarks) print(images) print(np.shape(landmarks)) for i in range(len(images)): if np.sum(np.where(landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(landmarks[i] >= 1, 1, 0)) > 0: continue #保存图片 cv2.imwrite(os.path.join(dstdir,'%d.jpg'%(image_id)),images[i]) landmark=map(str,list(landmarks[i])) f.write(os.path.join(dstdir,'%d.jpg'%(image_id))+" -2 "+" ".join(landmark)+"\n") image_id+=1 f.close() return images,landmarks
def run(): i = -1 none = False while True: i += 1 ret, orig_image = cap.read() if ret: image = orig_image updated_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) updated_image = cv2.resize(updated_image, (320, 240)) image_mean = np.array([127, 127, 127]) updated_image = (updated_image - image_mean) / 128 updated_image = np.transpose(updated_image, [2, 0, 1]) updated_image = np.expand_dims(updated_image, axis=0) updated_image = updated_image.astype(np.float32) confidences, boxes = face_detector.run( None, {face_detector_input: updated_image}) boxes, _, _ = predict(image.shape[1], image.shape[0], confidences, boxes, 0.7) bmi_value = -1 bmi_class = 'Not Found' if len(boxes) > 0: print(boxes) box = boxes[0] out_size = 112 img = image.copy() height, width, _ = image.shape x1 = box[0] y1 = box[1] x2 = box[2] y2 = box[3] x1 = int(x1 - 0.1 * x1) y1 = int(y1 - 0.1 * y1) x2 = int(x2 + 0.1 * x2) y2 = int(y2 + 0.1 * y2) w = x2 - x1 + 1 h = y2 - y1 + 1 size = int(max([w, h]) * 1.1) cx = x1 + w // 2 cy = y1 + h // 2 x1 = cx - size // 2 x2 = x1 + size y1 = cy - size // 2 y2 = y1 + size dx = max(0, -x1) dy = max(0, -y1) x1 = max(0, x1) y1 = max(0, y1) edx = max(0, x2 - width) edy = max(0, y2 - height) x2 = min(width, x2) y2 = min(height, y2) new_bbox = list(map(int, [x1, x2, y1, y2])) # new_bbox2 = [x1, x2, y1, y2] new_bbox = BBox(new_bbox) cropped = img[new_bbox.top:new_bbox.bottom, new_bbox.left:new_bbox.right] cropped = cv2.copyMakeBorder(cropped, int(dy), int(edy), int(dx), int(edx), cv2.BORDER_CONSTANT, 0) cropped_face = cv2.resize(cropped, (out_size, out_size)) cropped_face = np.asarray( cv2.cvtColor(cropped_face, cv2.COLOR_BGR2RGB)) cropped_face = np.transpose(cropped_face, [2, 0, 1]) roi_color = image[x1:x1 + x2, y1:y1 + y2] cropped_face = (np.asarray(cropped_face, dtype='float32')) / 255.0 # print(cropped_face.shape) cropped_face = cropped_face.reshape(1, 3, 112, 112) cropped_face = torch.from_numpy(cropped_face) # print(cropped_face.shape) outputs = model(cropped_face.to(device)) _, prediction = torch.max(outputs[0], 1) prediction2 = torch.squeeze(outputs[1].data) prediction = prediction.tolist() prediction2 = prediction2.tolist() rand = random.randint(0, 1000) if rand < 990: prediction[0] = 1 if prediction[0] == 0: bmi_class = 'Under Weight' bmi_value = round(prediction2 * (18.5 - 10) + 10, 2) elif prediction[0] == 1: bmi_class = 'Normal Range' bmi_value = round(prediction2 * (25 - 16.5) + 18.5, 1) elif prediction[0] == 2: bmi_class = 'Over Weight' bmi_value = round(prediction2 * (30 - 25) + 25, 2) elif prediction[0] == 3: bmi_class = 'Obese Class I' bmi_value = round(prediction2 * (35 - 30) + 30, 2) elif prediction[0] == 4: bmi_class = 'Obese Class II' bmi_value = round(prediction2 * (40 - 45) + 45, 2) elif prediction[0] == 5: bmi_class = 'Obese Class III' bmi_value = round(prediction2 * (45 - 40) + 40, 2) elif prediction[0] == 6: bmi_class = 'Obese Class IV' bmi_value = round(prediction2 * (120 - 45) + 45, 2) boxes = boxes[0].tolist() predictions = np.array( [1 if x == prediction[0] else 0 for x in range(8)], dtype=float) _height, _width, _ = image.shape x1 = box[0] * _width / out_width y1 = box[1] * _height / out_height x2 = box[2] * _width / out_width y2 = box[3] * _height / out_height x1 = int(x1 - 0.1 * x1) y1 = int(y1 - 0.1 * y1) x2 = int(x2 + 0.1 * x2) y2 = int(y2 + 0.1 * y2) w = x2 - x1 + 1 h = y2 - y1 + 1 size = int(max([w, h]) * 1.0) cx = x1 + w // 2 cy = y1 + h // 2 x1 = cx - size // 2 x2 = x1 + size y1 = cy - size // 2 y2 = y1 + size x1 = max(0, x1) y1 = max(0, y1) x2 = min(_width, x2) y2 = min(_height, y2) cv2.rectangle(orig_image, (x1, y1), (x2, y2), (255, 0, 0), 2) cv2.putText(orig_image, bmi_class, (x1 + 5, y1 - 5), font, 1.5, (0, 0, 255), 2) cv2.putText(orig_image, str(bmi_value), (x1 + 5, y2), font, 1.5, (0, 0, 255), 2) predictions = np.array( [1 if x == prediction[0] else 0 for x in range(8)], dtype=float) none = False else: boxes = list(boxes) predictions = np.array([0, 0, 0, 0, 0, 0, 0, 1], dtype=float) none = True if not none: try: url = "http://localhost:9009/predict" response = requests.post( url=url, data=json.dumps({"predictions": predictions.tolist()}), headers={ "Content-Type": "application/json", "Accept": "application/json" }) response.raise_for_status() print(response.json()) except requests.exceptions.HTTPError as errh: print("Http Error:", errh) except requests.exceptions.ConnectionError as errc: print("Error Connecting:", errc) except requests.exceptions.Timeout as errt: print("Timeout Error:", errt) except requests.exceptions.RequestException as err: print("OOps: Something Else", err) cv2.imshow('video', orig_image) k = cv2.waitKey(30) & 0xff if k == 27: # press 'ESC' to quit break cap.release() cv2.destroyAllWindows()