def prepare_facebank(model, path='facebank', tta=True): model.eval() embeddings = [] names = [''] data_path = Path(path) for doc in data_path.iterdir(): if doc.is_file(): continue else: embs = [] for files in listdir_nohidden(doc): try: image_path = os.path.join(doc, files) img = cv2.imread(image_path) if img.shape != (112, 112, 3): bboxes, landmarks = create_mtcnn_net( img, 12, device, p_model_path='MTCNN/weights/pnet_Weights', r_model_path='MTCNN/weights/rnet_Weights', o_model_path='MTCNN/weights/onet_Weights') img = Face_alignment(img, default_square=True, landmarks=landmarks) if img == []: continue with torch.no_grad(): if tta: mirror = cv2.flip(img[0], 1) emb = model( test_transform(img[0]).to(device).unsqueeze(0)) emb_mirror = model( test_transform(mirror).to(device).unsqueeze(0)) embs.append(l2_norm(emb + emb_mirror)) else: embs.append( model( test_transform( img[0]).to(device).unsqueeze(0))) except: continue if len(embs) == 0: continue embedding = torch.cat(embs).mean(0, keepdim=True) embeddings.append(embedding) names.append(doc.name) embeddings = torch.cat(embeddings) names = np.array(names) print(names) torch.save(embeddings, os.path.join(path, 'facebank.pth')) np.save(os.path.join(path, 'names'), names) return embeddings, names
def MTCNN_NET(frame, scale, device, p_model_path, r_model_path, o_model_path): input = resize_image(frame, scale) bboxes, landmarks = create_mtcnn_net(input, args.miniface, device, p_model_path, r_model_path, o_model_path) if bboxes != []: bboxes = bboxes / scale landmarks = landmarks / scale return bboxes, landmarks
def MTCNN_NET(frame, device, p_model_path, r_model_path, o_model_path): bboxes, landmarks = create_mtcnn_net(frame, args.miniface, device, p_model_path, r_model_path, o_model_path) return bboxes, landmarks
basename = os.path.basename(im_path) imgname, suffix = os.path.splitext(basename) imgname_split = imgname.split('-') rec_x1y1 = imgname_split[2].split('_')[0].split('&') rec_x2y2 = imgname_split[2].split('_')[1].split('&') x1, y1, x2, y2 = int(rec_x1y1[0]), int(rec_x1y1[1]), int(rec_x2y2[0]), int( rec_x2y2[1]) boxes = np.zeros((1, 4), dtype=np.int32) boxes[0, 0], boxes[0, 1], boxes[0, 2], boxes[0, 3] = x1, y1, x2, y2 image = cv2.imread(im_path) bboxes = create_mtcnn_net(image, 50, device, p_model_path='../weights/pnet_Weights') dets = np.round(bboxes[:, 0:4]) if dets.shape[0] == 0: continue img = cv2.imread(im_path) idx += 1 height, width, channel = img.shape for box in dets: x_left, y_top, x_right, y_bottom = box[0:4].astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1
detect_model.eval() if args.update: targets, names = prepare_facebank(detect_model, path='facebank', tta=args.tta) print('facebank updated') else: targets, names = load_facebank(path='facebank') print('facebank loaded') # targets: number of candidate x 512 image = cv2.imread(args.img) t = time.time() bboxes, landmarks = create_mtcnn_net(image, 32, device, p_model_path='MTCNN/weights/pnet_Weights', r_model_path='MTCNN/weights/rnet_Weights', o_model_path='MTCNN/weights/onet_Weights') t1 = time.time() - t t = time.time() faces = Face_alignment(image, default_square = True,landmarks = landmarks) embs = [] test_transform = trans.Compose([ trans.ToTensor(), trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) for img in faces: if args.tta: mirror = cv2.flip(img,1)
else: targets, names = load_facebank(path='facebank') print('facebank loaded') # targets: number of candidate x 512 cap = cv2.VideoCapture(0) while True: isSuccess, frame = cap.read() if isSuccess: try: start_time = time.time() input = resize_image(frame, args.scale) bboxes, landmarks = create_mtcnn_net( input, args.mini_face, device, p_model_path='MTCNN/weights/pnet_Weights', r_model_path='MTCNN/weights/rnet_Weights', o_model_path='MTCNN/weights/onet_Weights') if bboxes != []: bboxes = bboxes / args.scale landmarks = landmarks / args.scale faces = Face_alignment(frame, default_square=True, landmarks=landmarks) embs = [] test_transform = trans.Compose([
idx = 0 for annotation in annotations: annotation = annotation.strip().split(' ') im_path = os.path.join(prefix, annotation[0]) print(im_path) bbox = list(map(float, annotation[1:])) boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4) # anno form is x1, y1, w, h, convert to x1, y1, x2, y2 boxes[:, 2] += boxes[:, 0] - 1 boxes[:, 3] += boxes[:, 1] - 1 image = cv2.imread(im_path) bboxes, landmarks = create_mtcnn_net(image, 12, device, p_model_path='../train/pnet_Weights', r_model_path='../train/rnet_Weights') dets = np.round(bboxes[:, 0:4]) if dets.shape[0] == 0: continue img = cv2.imread(im_path) idx += 1 height, width, channel = img.shape for box in dets: x_left, y_top, x_right, y_bottom = box[0:4].astype(int) width = x_right - x_left + 1
anno = ET.ElementTree(file=file_xml) xmin = int(anno.find('object').find('bndbox').find('xmin').text) ymin = int(anno.find('object').find('bndbox').find('ymin').text) xmax = int(anno.find('object').find('bndbox').find('xmax').text) ymax = int(anno.find('object').find('bndbox').find('ymax').text) #读取ground truth box1 = [xmin, ymin, xmax, ymax] x1, y1, x2, y2 = xmin, ymin, xmax, ymax boxes = np.zeros((1, 4), dtype=np.int32) boxes[0, 0], boxes[0, 1], boxes[0, 2], boxes[0, 3] = x1, y1, x2, y2 image = cv.imread(im_path) #经过pnet训练得到候选框 bboxes = create_mtcnn_net(image, (50, 15), device, p_model_path='./model/pnet_Weights', o_model_path=None) dets = np.round(bboxes[:, 0:4]) if dets.shape[0] == 0: continue img = cv.imread(im_path) idx += 1 height, width, channel = img.shape for box in dets: x_left, y_top, x_right, y_bottom = box[0:4].astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1
im_path = annotation print(im_path) basename = os.path.basename(im_path) imgname, suffix = os.path.splitext(basename) imgname_split = imgname.split('-') rec_x1y1 = imgname_split[2].split('_')[0].split('&') rec_x2y2 = imgname_split[2].split('_')[1].split('&') x1, y1, x2, y2 = int(rec_x1y1[0]), int(rec_x1y1[1]), int(rec_x2y2[0]), int(rec_x2y2[1]) boxes = np.zeros((1,4), dtype=np.int32) boxes[0,0], boxes[0,1], boxes[0,2], boxes[0,3] = x1, y1, x2, y2 image = cv2.imread(im_path) bboxes = create_mtcnn_net(image, [50,50], device, p_model_path='../train/pnet_Weights', o_model_path=None) dets = np.round(bboxes[:, 0:4]) if dets.shape[0] == 0: continue img = cv2.imread(im_path) idx += 1 height, width, channel = img.shape for box in dets: x_left, y_top, x_right, y_bottom = box[0:4].astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1
def generate_xml(file): global fail, success device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") img_dir = file num = os.path.basename(file).split(".")[0] base_name = os.path.basename(file) box = [] img = cv.imread(img_dir) print("{}加载成功".format(base_name)) print("MTCNN开始加载图片") bboxs = create_mtcnn_net(image=img, mini_lp_size=(50, 15), device=device, p_model_path="./model/pnet_Weights", o_model_path="./model/onet_Weights") max_prob = 0 if len(bboxs) == 0: ''' 预测失败,调高亮度 ''' b, g, r = cv.split(img) b = cv.equalizeHist(b) g = cv.equalizeHist(g) r = cv.equalizeHist(r) img = cv.merge((b, g, r)) bboxs = create_mtcnn_net(image=img, mini_lp_size=(50, 15), device=device, p_model_path="./model/pnet_Weights", o_model_path="./model/onet_Weights") print("{}第一次预测失败,重新校正预测".format(base_name)) if len(bboxs) != 1: #假如有多个预测框,首选预测概率最大的,然后再检测是否符合车牌面积 max_prob_index = np.argmax(bboxs[:, 4]) bbox = bboxs[max_prob_index, :4] bbox = [int(a) for a in bbox] w = int(bbox[2]) - int(bbox[0]) h = int(bbox[3]) - int(bbox[1]) if w * h > 1300: box = bbox else: #重新根据车牌的大小来筛选边框 for index in range(len(bboxs)): prob = bboxs[index, 4] bbox = bboxs[index, :4] bbox = [int(a) for a in bbox] w = int(bbox[2]) - int(bbox[0]) h = int(bbox[3]) - int(bbox[1]) if w * h > 1300: if max_prob < prob: box = bbox max_prob = prob else: box = bboxs[0, :4] box = [int(a) for a in box] if len(box) == 0: fail += 1 box = [0, 0, 0, 0] xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3] print("{} 模型预测成功".format(base_name)) success += 1 xml_pred = args.xml_save_dir filename = xml_pred + "/" + str(num) + ".xml" with open(filename, "w") as fw: print("<annotation><object><bndbox>", file=fw) print("<xmin>{}</xmin><ymin>{}</ymin><xmax>{}</xmax><ymax>{}</ymax>". format(xmin, ymin, xmax, ymax), file=fw) print("</bndbox></object></annotation>", file=fw) print("第{}张图片写入xml成功".format(num))