def get_models(mtcnn_w_path='mtcnn/weights'): # First we create pnet, rnet, onet, and load weights from caffe model. pnet, rnet, onet = mtcnn.get_net_caffe(mtcnn_w_path) # Then we create a detector detector = mtcnn.FaceDetector(pnet, rnet, onet, device='cpu') embedder = insightface.iresnet100(pretrained=True) embedder.eval() return detector, embedder
def main(img): # detect faces pnet, rnet, onet = mtcnn.get_net_caffe('output/converted') detector = mtcnn.FaceDetector(pnet, rnet, onet, device='cuda:0') #img = '../FaceDetector/tests/asset/images/office5.jpg' img = imread(img) boxes, landmarks = detector.detect(img) img = torch.tensor(img.astype(np.float32), device=torch.device("cuda:0")).permute(2, 0, 1) # embed faces embedder = insightface.iresnet100(pretrained=True) embedder.eval() mean = [127.5] * 3 #[0.5] * 3 std = [128.] * 3 #[0.5 * 256 / 255] * 3 preprocess = transforms.Compose([transforms.Normalize(mean, std)]) landmarks = landmarks.float() boxcpu = boxes.cpu().numpy() for f0 in range(boxcpu.shape[0]): angle = torch.atan2(landmarks[f0][1, 1] - landmarks[f0][0, 1], \ landmarks[f0][1, 0] - landmarks[f0][0, 0])# + np.pi/2 local_patch = img[:, boxes[f0][1]:boxes[f0][3], boxes[f0][0]:boxes[f0][2]].unsqueeze(0) local_patch = rot_img(local_patch, angle, \ [1./local_patch.shape[2]*\ (-0.5*(boxes[f0][2]+boxes[f0][0])+landmarks[f0][2, 0]), \ 1./local_patch.shape[3]*\ (-0.5*(boxes[f0][3]+boxes[f0][1])+landmarks[f0][2, 1])], \ dtype=torch.cuda.FloatTensor) local_patch = local_patch.squeeze(0) tensor_face = F.interpolate(local_patch, size=112) tensor_face = tensor_face.permute(0, 2, 1) tensor_face = F.interpolate(tensor_face, size=112) tensor_face = tensor_face.permute(0, 2, 1) tensor_face = preprocess(tensor_face.cpu()) with torch.no_grad(): features = embedder(tensor_face.unsqueeze(0))[0].numpy() print(features)
parser = argparse.ArgumentParser(description='this is a description') parser.add_argument('--video_path', type=str, help="Read from video.") parser.add_argument('--output_folder', type=str, help="Save the tracking result.") parser.add_argument('--saved_path', type=str, default=None, help="If set, Save as video. Or show it on screen.") parser.add_argument("--minsize", type=int, default=24, help="Min size of faces you want to detect. Larger number will speed up detect method.") parser.add_argument('--min_interval', type=int, default=3, help="See FaceTracker.") parser.add_argument("--device", type=str, default='cpu', help="Target device to process video.") args = parser.parse_args() pnet, rnet, onet = mtcnn.get_net_caffe('output/converted') detector = mtcnn.FaceDetector(pnet, rnet, onet, device=args.device) tracker = mtcnn.FaceTracker(detector, min_interval=args.min_interval) tracker.set_detect_params(minsize=args.minsize) fourcc = cv2.VideoWriter_fourcc(*"XVID") cap = cv2.VideoCapture(args.video_path) fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) if args.saved_path is not None: out = cv2.VideoWriter(args.saved_path, fourcc, fps, size) while True:
def setUp(self): self.dataset = get_by_name(DEFAULT_DATASET) self.output_folder = os.path.join(here, '../output/test') self.top = 100 self.pnet, self.rnet, _ = get_net_caffe( os.path.join(here, '../output/converted'))
def __init__(self): # First we create pnet, rnet, onet, and load weights from caffe model. pnet, rnet, onet = mtcnn.get_net_caffe('output/converted') # Then we create a detector self.__detector = mtcnn.FaceDetector(pnet, rnet, onet, device='cuda:0')