def main(): args = get_args() # Create torchvision model return_layers = {'layer2':1,'layer3':2,'layer4':3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) print(pre_state_dict['module.body.conv1.weight'].cpu().detach().numpy()) pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict} RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read image img = skimage.io.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2,0,1) if not args.scale == 1.0: size1 = int(img.shape[1]/args.scale) size2 = int(img.shape[2]/args.scale) img = resize(img.float(),(size1,size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1,2,0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB) font = cv2.FONT_HERSHEY_SIMPLEX for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark, score in zip(boxes,picked_landmarks[j],picked_scores[j]): cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2) cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2) cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2) cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2) cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2) cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2) cv2.putText(img, text=str(score.item())[:5], org=(box[0],box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path,image_name) #cv2.imwrite(save_path, img) cv2.imshow('RetinaFace-Pytorch',img) cv2.waitKey()
def detect_faces(image, model): picked_boxes, picked_landmarks, _ = get_detections(image, model, score_threshold=0.98, iou_threshold=0.2) scale = 2.5 faces = [] for i, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmarks in zip(boxes, picked_landmarks[i]): hor_shift = ((box[2] - box[0]) * (scale - 1)) / 2 vert_shift = ((box[3] - box[1]) * (scale - 1)) / 2 x1 = int(max(0, box[0] - hor_shift)) y1 = int(max(0, box[1] - vert_shift)) x2 = int(box[2] + hor_shift) y2 = int(box[3] + vert_shift) face = image[y1:y2, x1:x2] landmarks[0::2] -= x1 landmarks[1::2] -= y1 faces.append(align(face, landmarks)) return faces
def main(): args = get_args() # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read video cap = cv2.VideoCapture(args.video_path) codec = cv2.VideoWriter_fourcc(*'MJPG') width = int(cap.get(3)) height = int(cap.get(4)) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) fps = 25.0 out = cv2.VideoWriter('args.save_path', codec, fps, (width, height)) font = cv2.FONT_HERSHEY_SIMPLEX while (True): ret, img = cap.read() if not ret: print('Video open error.') break img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark, score in zip(boxes, picked_landmarks[j], picked_scores[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) cv2.putText(img, text=str(score.item())[:5], org=(box[0], box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) out.write(img) cv2.imshow('RetinaFace-Pytorch', img) key = cv2.waitKey(1) if key == ord('q'): print('Now quit.') break cap.release() out.release() cv2.destroyAllWindows()
def main(nummmmmm): args = get_args() # Create the model # if args.depth == 18: # RetinaFace = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # RetinaFace = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # RetinaFace = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # RetinaFace = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # RetinaFace = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load('stage_5_68_full_model_epoch_121.pt', map_location='cpu') pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace.to(device) import time video = cv2.VideoCapture(0) # Read image while True: start = time.time() ret, img = video.read() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = torch.from_numpy(img) img = img.permute(2, 0, 1) resized_img = img.float() # resized_img = resize(img.float(),(360,640)) # print(resized_img.shape) input_img = resized_img.float().unsqueeze(0) picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # print(picked_boxes) np_img = resized_img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) for i in range(0, 136, 2): cv2.circle(img, (landmark[i], landmark[i + 1]), radius=1, color=(0, 0, 255), thickness=2) cv2.imshow('RetinaFace-Pytorch', img) print(time.time() - start) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(nummmmmm): args = get_args() # Create the model # if args.depth == 18: # RetinaFace = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # RetinaFace = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # RetinaFace = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # RetinaFace = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # RetinaFace = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load( '/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_51.pt') pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace.to(device) import time dataset_val = TrainDataset('./widerface/train/label.txt', transform=transforms.Compose( [Resizer(640), PadToSquare()])) # dataset_val = ValDataset('./widerface/train/label.txt') for qq in range(100, 150): img = dataset_val[qq]['img'] # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # img=skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg".format(str(qq))) img = img.permute(2, 0, 1) resized_img = img.float() input_img = resized_img.unsqueeze(0).to(device) start = time.time() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.9, iou_threshold=0.2) print(time.time() - start) # print(picked_boxes) np_img = resized_img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) for i in range(0, 136, 2): cv2.circle(img, (landmark[i], landmark[i + 1]), radius=1, color=(0, 0, 255), thickness=2) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path, image_name) cv2.imwrite('./RetinaFace-Pytorch{}.jpg'.format(qq), cv2.resize(img, (640, 640)))
def main(): args = get_args() # Create torchvision model return_layers = {'layer2':1,'layer3':2,'layer4':3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict} RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() o = open("out.txt",'w') f = open(args.text_path, 'r') lines = f.readlines() for line in lines: o.write(line) print(line) line = line.rstrip() if line.startswith('#'): path = args.image_path_prefix+ line[2:].replace('/','\\') else: path = args.image_path_prefix+ line.replace('/','\\')+".jpg" if not os.path.exists(path): continue # Read image # change by yzk img1 = skimage.io.imread(path, as_gray=True).astype(np.float32) img = [] img2 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR) img = (img2 * 255.0).astype(np.uint8) #ori # img = skimage.io.imread(path) img = torch.from_numpy(img) img = img.permute(2,0,1) if not args.scale == 1.0: size1 = int(img.shape[1]/args.scale) size2 = int(img.shape[2]/args.scale) img = resize(img.float(),(size1,size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) #print(str(picked_boxes[0].shape[0])) if picked_boxes is None or picked_boxes[0] is None: o.write("0" + '\n') else: o.write(str(picked_boxes[0].shape[0]) + '\n') # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1,2,0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB) font = cv2.FONT_HERSHEY_SIMPLEX for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark, score in zip(boxes,picked_landmarks[j],picked_scores[j]): ss = boxes.cpu().detach().numpy() print(str(box[0].cpu().detach().numpy())+" " + str(box[1].cpu().detach().numpy())+" " + str(box[2].cpu().detach().numpy()- box[0].cpu().detach().numpy())+" " + str(box[3].cpu().detach().numpy() -box[1].cpu().detach().numpy()) + " " + str(score.item()) + '\n') o.write(str(box[0].cpu().detach().numpy())+" " + str(box[1].cpu().detach().numpy())+" " + str(box[2].cpu().detach().numpy() - box[0].cpu().detach().numpy())+" " + str(box[3].cpu().detach().numpy() - box[1].cpu().detach().numpy()) + " " + str(score.item()) + '\n') cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2) cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2) cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2) cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2) cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2) cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2) cv2.putText(img, text=str(score.item())[:5], org=(box[0],box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) image_name = path.split('\\')[-1] save_path = os.path.join(args.save_path,image_name) # cv2.imwrite(save_path, img) # cv2.imshow('RetinaFace-Pytorch',img) # cv2.waitKey() o.close()
def main(): args = get_args() # Create retinaface return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.f_model) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() print('Retinaface create success.') # Create hopenet Hopenet = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) saved_state_dict = torch.load(args.p_model) Hopenet.load_state_dict(saved_state_dict) Hopenet = Hopenet.cuda() Hopenet.eval() print('Hopenet create success.') idx_tensor = [idx for idx in range(66)] idx_tensor = torch.FloatTensor(idx_tensor).cuda() transformations = transforms.Compose([ transforms.Scale(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) if args.type == 'image': img = cv2.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): # Crop face x_min = int(box[0]) x_max = int(box[2]) y_min = int(box[1]) y_max = int(box[3]) # Clip x_min = x_min if x_min > 0 else 0 x_max = x_max if x_max < img.shape[1] else img.shape[1] y_min = y_min if y_min > 0 else 0 y_max = y_max if y_max < img.shape[0] else img.shape[0] if not x_min < x_max or not y_min < y_max: continue bbox_height = abs(y_max - y_min) face_img = img[y_min:y_max, x_min:x_max] face_img = Image.fromarray(face_img) # Transform face_img = transformations(face_img) img_shape = face_img.size() face_img = face_img.view(1, img_shape[0], img_shape[1], img_shape[2]) face_img = face_img.cuda() yaw, pitch, roll = Hopenet(face_img) yaw_predicted = F.softmax(yaw) pitch_predicted = F.softmax(pitch) roll_predicted = F.softmax(roll) # Get continuous predictions in degrees. yaw_predicted = torch.sum( yaw_predicted.data[0] * idx_tensor) * 3 - 99 pitch_predicted = torch.sum( pitch_predicted.data[0] * idx_tensor) * 3 - 99 roll_predicted = torch.sum( roll_predicted.data[0] * idx_tensor) * 3 - 99 utils.draw_axis(img, yaw_predicted, pitch_predicted, roll_predicted, tdx=(x_min + x_max) / 2, tdy=(y_min + y_max) / 2, size=bbox_height / 2) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 255), thickness=2) # cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2) # cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2) # cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2) # cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2) # cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2) cv2.imshow('RetinaFace-Hopenet', img) key = cv2.waitKey() else: # Read video cap = cv2.VideoCapture(args.video_path) codec = cv2.VideoWriter_fourcc(*'MJPG') width = int(cap.get(3)) height = int(cap.get(4)) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) fps = 25.0 out = cv2.VideoWriter(args.out, codec, fps, (width, height)) while (True): ret, img = cap.read() if not ret: print('Video open error.') break img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): # Crop face x_min = int(box[0]) x_max = int(box[2]) y_min = int(box[1]) y_max = int(box[3]) # Clip x_min = x_min if x_min > 0 else 0 x_max = x_max if x_max < img.shape[1] else img.shape[1] y_min = y_min if y_min > 0 else 0 y_max = y_max if y_max < img.shape[0] else img.shape[0] if not x_min < x_max or not y_min < y_max: continue bbox_height = abs(y_max - y_min) face_img = img[y_min:y_max, x_min:x_max] face_img = Image.fromarray(face_img) # Transform face_img = transformations(face_img) img_shape = face_img.size() face_img = face_img.view(1, img_shape[0], img_shape[1], img_shape[2]) face_img = face_img.cuda() yaw, pitch, roll = Hopenet(face_img) yaw_predicted = F.softmax(yaw) pitch_predicted = F.softmax(pitch) roll_predicted = F.softmax(roll) # Get continuous predictions in degrees. yaw_predicted = torch.sum( yaw_predicted.data[0] * idx_tensor) * 3 - 99 pitch_predicted = torch.sum( pitch_predicted.data[0] * idx_tensor) * 3 - 99 roll_predicted = torch.sum( roll_predicted.data[0] * idx_tensor) * 3 - 99 utils.draw_axis(img, yaw_predicted, pitch_predicted, roll_predicted, tdx=(x_min + x_max) / 2, tdy=(y_min + y_max) / 2, size=bbox_height / 2) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 255), thickness=2) # cv2.rectangle(img,(x_min,y_min),(x_max,y_max),(255,0,255),thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) out.write(img) cv2.imshow('RetinaFace-Pytorch', img) key = cv2.waitKey(1) if key == ord('q'): print('Now quit.') break cap.release() out.release() cv2.destroyAllWindows()
def main(): args = get_args() # Create the model # if args.depth == 18: # RetinaFace = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # RetinaFace = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # RetinaFace = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # RetinaFace = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # RetinaFace = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read image img = skimage.io.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2, 0, 1) padded_img, _ = pad_to_square(img, 0) resized_img = resize(padded_img.float(), (640, 640)) input_img = resized_img.unsqueeze(0).cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) np_img = resized_img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path, image_name) cv2.imwrite(save_path, img) cv2.imshow('RetinaFace-Pytorch', img) cv2.waitKey()
def main(): args = get_args() # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() vc = cv2.VideoCapture(0) while True: # 循环读取视频帧 rval, img_raw = vc.read() # Read image img = skimage.io.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) image_name = args.image_path.split('/')[-1] # save_path = os.path.join(args.save_path,image_name) # cv2.imwrite(save_path, img) cv2.imshow('RetinaFace-Pytorch', img) cv2.waitKey()
def main(): args = get_args() # Create the model # if args.depth == 18: # RetinaFace = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # RetinaFace = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # RetinaFace = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # RetinaFace = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # RetinaFace = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace.cuda() import time start = time.time() for qq in range(400, 500): img = skimage.io.imread( "/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg" .format(str(qq))) print(img.shape) # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = torch.from_numpy(img) img = img.permute(2, 0, 1) resized_img = resize(img.float(), (320, 320)) input_img = resized_img.unsqueeze(0).cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # print(picked_boxes) np_img = resized_img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) for i in range(0, 10, 2): cv2.circle(img, (landmark[i], landmark[i + 1]), radius=1, color=(0, 0, 255), thickness=2) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path, image_name) cv2.imwrite('RetinaFace-Pytorch{}.jpg'.format(qq), cv2.resize(img, (640, 640))) print(time.time() - start)
def detect_img(img): args = get_args() # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read image img = torch.from_numpy(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) font = cv2.FONT_HERSHEY_SIMPLEX for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark, score in zip(boxes, picked_landmarks[j], picked_scores[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) ''' x = (landmark[0] + landmark[2]) / 2 y = landmark[3] - (landmark[3] - box[1]) / 3 cv2.circle(img,(x,y),radius=5,color=(0,0,255),thickness=1) ''' ''' start_point_x = (landmark[0] + landmark[2]) / 2 start_point_y = (landmark[1] + landmark[3]) / 2 end_point_x = (landmark[8] + landmark[6]) / 2 end_point_y = (landmark[9] + landmark[7]) / 2 cv2.line(img , (start_point_x,start_point_y),(landmark[4],landmark[5]),color=(255,100,0),thickness=2) cv2.line(img , (landmark[4],landmark[5]),(end_point_x,end_point_y),color=(255,255,100),thickness=2) ''' ''' cv2.rectangle(img,(landmark[0],landmark[1]),(landmark[8],landmark[9]),(0,0,100),thickness=2) ''' cv2.putText(img, text=str(score.item())[:5], org=(box[0], box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) return img