Python MTCNN.detect Examples, facenet_pytorch.models.mtcnn.MTCNN.detect Python Examples

Example #1

0

Show file

class FaceExtractor:
    def __init__(self, use_gpu=False):
        if use_gpu:
            device = 'cuda:0'
        else:
            device = 'cpu'
        self.detector = MTCNN(keep_all=True, device=device)

    def extract_faces(self, frame, display_results=False):
        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        boxes, scores = self.detector.detect(img)

        if boxes is None:
            return []

        if display_results:
            frame_draw = img.copy()
            draw = ImageDraw.Draw(frame_draw)

        faces = []
        for box, score in zip(boxes, scores):
            if box is None or score < FACE_SCORE_THRESHOLD:
                continue
            if display_results:
                draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                draw.text((box[0], box[1]), str(score))
            faces.append(box)

        if display_results:
            cv2.imshow("frame",
                       cv2.cvtColor(np.array(frame_draw), cv2.COLOR_RGB2BGR))
            cv2.waitKey(1)

        return faces

Example #2

0

Show file

File: deepfake_utils.py Project: CryptoSalamander/DeepFake-Detection

class FastFaceExtractor:
    def __init__(self, video_read_fn):
        self.video_read_fn = video_read_fn
        self.detector = MTCNN(margin=0,
                              thresholds=[0.7, 0.8, 0.8],
                              device="cuda")

    def getFrame(self, data):
        idx, frame, my_idx = data
        h, w = frame.shape[:2]
        img = Image.fromarray(frame.astype(np.uint8))
        img = img.resize(size=[s // 2 for s in img.size])

        batch_boxes, probs = self.detector.detect(img, landmarks=False)

        faces = []
        scores = []
        if batch_boxes is None: return dict()
        for bbox, score in zip(batch_boxes, probs):
            if bbox is not None:
                xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
                w = xmax - xmin
                h = ymax - ymin
                p_h = h // 3
                p_w = w // 3
                crop = frame[max(ymin - p_h, 0):ymax + p_h,
                             max(xmin - p_w, 0):xmax + p_w]
                faces.append(crop)
                scores.append(score)

        frame_dict = {
            "video_idx": 0,
            "frame_idx": my_idx,
            "frame_w": w,
            "frame_h": h,
            "faces": faces,
            "scores": scores
        }
        return frame_dict

    def process_video(self, video, max_workers=16):
        result = self.video_read_fn(video)

        if result is None: return []

        my_frames, my_idxs = result
        results = []
        with ThreadPoolExecutor(max_workers=max_workers) as pool:
            futures = []
            for i, frame in enumerate(my_frames):
                future = pool.submit(self.getFrame, (i, frame, my_idxs[i]))
                futures.append(future)

            for future in futures:
                results.append(future.result())

        return results

Example #3

0

Show file

File: FaceDetector.py Project: Ander02/FaceRecognition

class FaceDetector:
    def __init__(self):
        torch.set_grad_enabled(False)
        cudnn.benchmark = True
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.mtcnn = MTCNN(image_size=input_image_size,
                           min_face_size=30,
                           prewhiten=True,
                           select_largest=True,
                           device=self.device)
        # self.detector = MTCNN()

    def pre_process(self, image):
        """
        Redimensiona e preprocessa imagem para extracao de features
        :param image: imagem do cv2
        :return: img_tensor pre-processado para extracao de features
        """
        try:
            image = cv2.resize(image, (input_image_size, input_image_size),
                               interpolation=cv2.INTER_AREA)
        except cv2.error:
            return None
        img_tensor = functional.to_tensor(np.float32(image)).to(self.device)
        return prewhiten(img_tensor)
        # face = F.to_tensor(np.float32(face))

    def detect(self, image):
        """
        Realiza deteccao facial e retorna boxes/scores detectados
        :rtype: numpy.ndarray ou None caso nao nenhuma face seja detectada
        :param image: imagem (do Pil ou do cv2) para a deteccao
        :return: arrays boxes com localizacoes das faces e scores, com a probabilidade de presenca de face
        """
        if type(image) == np.ndarray:
            image = Image.fromarray(image)

        boxes, scores = self.mtcnn.detect(image)
        if boxes is not None:
            boxes = np.rint(boxes).astype(int)

        return boxes, scores

    def extract_face(self, image, save_path=None):
        """
        Realiza deteccao facial, extrai a imagem da maior face, e pre-processa a imagem para extracao de features
        :rtype: torch.tensor
        :param image: imagem {PIL.Image ou numpy.ndarray do cv2} para a deteccao
        :param save_path: um caminho para salvar a face detectada (opcional)
        :return: imagem da face pre-processada
        """
        if type(image) == np.ndarray:
            image = Image.fromarray(image)

        return self.mtcnn(image, save_path=save_path, return_prob=True)

Example #4

0

Show file

File: process_data.py Project: TLMOS/mephi_pp_b20

class FaceDetector():
    def __init__(self, batch_size, thresholds, device=None):
        self.batch_size = batch_size
        self.detector = MTCNN(margin=0, thresholds=thresholds, device=device)
    
    def detect_faces(self, frames, scale_coef):
        boxes = []
        for i in range(math.ceil(len(frames) / self.batch_size)):
            batch_boxes, *_ = self.detector.detect(frames[i*self.batch_size:(i + 1)*self.batch_size])
            boxes += [(b/scale_coef).astype(int).tolist() if b is not None else None for b in batch_boxes]
        return boxes

Example #5

0

Show file

File: face_detector.py Project: sowmen/celeb-df

class FacenetDetector(VideoFaceDetector):
    
    def __init__(self, device="cuda:0") -> None:
        super().__init__()
        self.detector = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=device)
    
    @property
    def _batch_size(self):
        return 16
    
    def _detect_faces(self, frames) -> List:
        batch_boxes, *_ = self.detector.detect(frames, landmarks=False)
        return [b.tolist() if b is not None else None for b in batch_boxes]

Example #6

0

Show file

File: face_detector.py Project: shazaAhmed/DeepFake

class FacenetDetector(VideoFaceDetector):
    #To create an MTCNN detector that runs on the GPU, the model is instantiated with device='cuda:0'
    def __init__(self, device="cuda:0") -> None:
        #call the constructeur of VideoFaceDetector class
        super().__init__()
        #create a face detection pipeline using MTCNN without margin to add to bounding box, thresholds set according to dataset and device on which to run neural net passes.)
        #(Multi-Task Cascaded Convolutional Neural Networks is a neural network which detects faces and facial landmarks on images)
        self.detector = MTCNN(margin=0,thresholds=[0.85, 0.95, 0.95], device=device)

    def _detect_faces(self, frames) -> List:
        batch_boxes, *_ = self.detector.detect(frames, landmarks=False)
        return [b.tolist() if b is not None else None for b in batch_boxes]

    @property
    def _batch_size(self):
        return 32

Example #7

0

Show file

class FaceExtractor:
    def __init__(self, video_read_fn):
        self.video_read_fn = video_read_fn
        self.detector = MTCNN(margin=0,
                              thresholds=[0.7, 0.8, 0.8],
                              device="cuda")

    def process_videos(self, input_dir, filenames, video_idxs):
        videos_read = []
        frames_read = []
        frames = []
        results = []
        for video_idx in video_idxs:
            # Read the full-size frames from this video.
            filename = filenames[video_idx]
            video_path = os.path.join(input_dir, filename)
            result = self.video_read_fn(video_path)
            # Error? Then skip this video.
            if result is None: continue

            videos_read.append(video_idx)

            # Keep track of the original frames (need them later).
            my_frames, my_idxs = result

            frames.append(my_frames)
            frames_read.append(my_idxs)
            for i, frame in enumerate(my_frames):
                h, w = frame.shape[:2]
                img = Image.fromarray(frame.astype(np.uint8))
                img = img.resize(size=[s // 2 for s in img.size])

                batch_boxes, probs = self.detector.detect(img, landmarks=False)

                faces = []
                scores = []
                if batch_boxes is None:
                    continue
                for bbox, score in zip(batch_boxes, probs):
                    if bbox is not None:
                        xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
                        w = xmax - xmin
                        h = ymax - ymin
                        p_h = h // 3
                        p_w = w // 3
                        crop = frame[max(ymin - p_h, 0):ymax + p_h,
                                     max(xmin - p_w, 0):xmax + p_w]
                        faces.append(crop)
                        scores.append(score)

                frame_dict = {
                    "video_idx": video_idx,
                    "frame_idx": my_idxs[i],
                    "frame_w": w,
                    "frame_h": h,
                    "faces": faces,
                    "scores": scores
                }
                results.append(frame_dict)

        return results

    def process_video(self, video_path):
        """Convenience method for doing face extraction on a single video."""
        input_dir = os.path.dirname(video_path)
        filenames = [os.path.basename(video_path)]
        return self.process_videos(input_dir, filenames, [0])

Example #8

0

Show file

def calcEmbedsRec(urlNew):

    #initialize identified names
    recognized_names = []

    print('Received url: ', urlNew)
    device = torch.device('cuda:0')
    print('Running on device: {}'.format(device))

    mtcnn = MTCNN(image_size=160,
                  margin=0,
                  min_face_size=20,
                  thresholds=[0.6, 0.7, 0.7],
                  factor=0.709,
                  prewhiten=True,
                  device=device)

    #Function takes 2 vectors 'a' and 'b'
    #Returns the cosine similarity according to the definition of the dot product
    def cos_sim(a, b):
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)

    #cos_sim returns real numbers,where negative numbers have different interpretations.
    #So we use this function to return only positive values.
    def cos(a, b):
        minx = -1
        maxx = 1
        return (cos_sim(a, b) - minx) / (maxx - minx)

    # Define Inception Resnet V1 module (GoogLe Net)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

    # Define a dataset and data loader
    dataset = datasets.ImageFolder('student_data/Test')
    dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()}
    loader = DataLoader(dataset, collate_fn=lambda x: x[0])

    #Perfom MTCNN facial detection
    #Detects the face present in the image and prints the probablity of face detected in the image.
    aligned = []
    names = []
    for x, y in loader:
        x_aligned, prob = mtcnn(x, return_prob=True)
        if x_aligned is not None:
            print('Face detected with probability: {:8f}'.format(prob))
            aligned.append(x_aligned)
            names.append(dataset.idx_to_class[y])

    # Calculate the 512 face embeddings
    aligned = torch.stack(aligned).to(device)
    embeddings = resnet(aligned).to(device)

    # Print distance matrix for classes.
    #The embeddings are plotted in space and cosine distace is measured.
    cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
    for i in range(0, len(names)):
        emb = embeddings[i].unsqueeze(0)
        # The cosine similarity between the embeddings is given by 'dist'.
        dist = cos(embeddings[0], emb)

    dists = [[cos(e1, e2).item() for e2 in embeddings] for e1 in embeddings]
    # The print statement below is
    #Helpful for analysing the results and for determining the value of threshold.
    print(pd.DataFrame(dists, columns=names, index=names))

    i = 1
    # Haarcascade Classifier is used to detect faces through webcam.
    #It is preffered over MTCNN as it is faster. Real time basic applications needs to be fast.
    classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

    #Takes 2 vectors 'a' and 'b' .
    #Returns the cosine similarity according to the definition of the dot product.
    def cos_sim(a, b):
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)

    #cos_sim returns real numbers,where negative numbers have different interpretations.
    #So we use this function to return only positive values.
    def cos(a, b):
        minx = -1
        maxx = 1
        return (cos_sim(a, b) - minx) / (maxx - minx)

    #This is the function for doing face recognition.
    def verify(embedding, start_rec_time):
        for i, k in enumerate(embeddings):
            for j, l in enumerate(embedding):
                #Computing Cosine distance.
                dist = cos(k, l)

                #Chosen threshold is 0.85
                #Threshold is determined after seeing the table in the previous cell.
                if dist > 0.8:
                    #Name of the person identified is printed on the screen, as well as below the detecetd face (below the rectangular box).
                    text = names[i]

                    #textOnImg = text + " - Time Elapsed: " +  str(int(time.time() - start_rec_time)) + " s"
                    cv2.putText(img1, text, (boxes[j][0].astype(int),
                                             boxes[j][3].astype(int) + 17),
                                cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0),
                                2)
                    #cv2.putText(img1, textOnImg, (20, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,0,0), 2)
                    print(text)

                    #if text in names:
                    recognized_names.append(text)
                #else:
                textOnImg = "Time Elapsed: " + str(
                    int(time.time() - start_rec_time)) + " s"
                cv2.putText(img1, textOnImg, (20, 20),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)

    #Define Inception Resnet V1 module (GoogLe Net)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

    mtcnn = MTCNN(image_size=160,
                  margin=0,
                  min_face_size=20,
                  thresholds=[0.6, 0.7, 0.7],
                  factor=0.709,
                  prewhiten=True,
                  device=device,
                  keep_all=True)

    #Camera is opened. Webcam video streaming starts.
    #vs = WebcamVideoStream(src=0).start()
    print("Camera on")
    cv2.namedWindow("Detected faces")

    options = {
        "CAP_PROP_FRAME_WIDTH": 640,
        "CAP_PROP_FRAME_HEIGHT": 480,
        "CAP_PROP_FPS ": 30
    }
    output_params = {"-fourcc": "MJPG", "-fps": 30}
    writer = WriteGear(output_filename='Output.mp4',
                       compression_mode=False,
                       logging=True,
                       **output_params)
    #stream = VideoGear(source=0, time_delay=1, logging=True, **options).start()

    #url = "http://192.168.43.223:8080/shot.jpg"
    url = urlNew

    #run face recognition for 1 minute
    start_face_rec = time.time()
    end_face_rec = time.time() + 60

    while (time.time() < end_face_rec):

        # frm = stream.read()
        # if frm is None:
        #     break

        img_resp = requests.get(url)
        img_arr = np.array(bytearray(img_resp.content), dtype=np.uint8)

        img = cv2.imdecode(img_arr, -1)

        #im= vs.read()
        #Flip to act as a mirror

        im = cv2.flip(img, 1)

        #try:
        #The resize function of imutils maintains the aspect ratio
        #It provides the keyword arguments width and heightso the image can be resized to the intended width/height
        frame = imutils.resize(im, width=400)

        #Detecting faces using Haarcascade classifier.

        winlist = pcn.detect(frame)
        img1 = pcn.draw(frame, winlist)
        face = list(map(lambda win: crop_face(img1, win, 160), winlist))
        face = [f[0] for f in face]
        #cv2.imshow('Live Feed', img1)
        cnt = 1
        for f in face:
            #fc, u = crop_face(img, f)
            print('Printing Face no: ', cnt)
            cv2.imshow('Detected faces', f)
            cnt += 1

            #faces = classifier.detectMultiScale(face)
            path = "./student_data/Pics/".format(i)
            img_name = "image_{}.jpg".format(i)
            #The captured image is saved.
            cv2.imwrite(os.path.join(path, img_name), f)
            imgName = "./student_data/Pics/image_{}.jpg".format(i)

            # Get cropped and prewhitened image tensor
            img = Image.open(imgName)
            i = i + 1
            img_cropped = mtcnn(img)
            boxes, prob = mtcnn.detect(img)
            img_draw = img.copy()
            draw = ImageDraw.Draw(img_draw)
            #print(boxes)
            #Rectangular boxes are drawn on faces present in the image.
            #The detected and cropped faces are then saved.
            if (boxes is not None):
                for i, box in enumerate(boxes):
                    #draw.rectangle(box.tolist())
                    extract_face(
                        img,
                        box,
                        save_path='./student_data/Pics/Cropped_Face_{}.jpg'.
                        format(i))
                img_draw.save('./student_data/Pics/Faces_Detected.jpg')
                ima = cv2.imread('./student_data/Pics/Faces_Detected.jpg')

                #Calculate embeddings of each cropped face.
            if (img_cropped is not None):
                img_embedding = resnet(img_cropped.cuda()).to(device)

                #Call function verify.
                #Identify the person with the help of embeddings.
                cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
                verify(img_embedding, start_face_rec)
            #else:
            #textForImg = "Time Elapsed: " +  str(int(time.time() - start_face_rec)) + " s"
            #cv2.putText(frame, textForImg, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,255,255), 2)

            #'Detecting..' window opens.
            #Rectangular boxes are drawn on detected faces.
            #The identified faces have their respective name below the box.
            cv2.imshow('Detecting...', img1)
            writer.write(img1)

        if (not face):
            #cv2.imshow(f"Time Elapsed: ${str(int(time.time() - start_face_rec))}  s" ,frame)
            textForImg = "Time Elapsed: " + str(
                int(time.time() - start_face_rec)) + " s"
            cv2.putText(img1, textForImg, (40, 40),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)
            #print("no face")
            cv2.imshow('Detecting...', img1)
        # except:
        #     #In case 'try' doesn't work, "Get the image embedding" text is printed on the screen.
        #     #Run first cell
        #     text="Get the image embeddings"
        #     print(text)
        #     break

        key = cv2.waitKey(1)

        #13 is for 'Enter' key.
        #If 'Enter' key is pressed, all the windows are made to close forcefully.
        if key == 13:
            break

    print("calculating a list of all recognized faces...")

    rec_names_dict = {i: recognized_names.count(i) for i in recognized_names}

    filtered_names = []
    for key in rec_names_dict:
        if rec_names_dict[key] > 30:
            filtered_names.append(key)

    print("Total Recognized names: ", rec_names_dict)

    print("Filtered names: ", filtered_names)

    cv2.destroyAllWindows()
    writer.close()
    #vs.stop()
    #return {i:rec_names_dict[i] for i in filtered_names}
    return filtered_names

Example #9

0

Show file

detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device=device)
# detector = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=device)
video_capture = cv2.VideoCapture(0)
mode = 'landmark'
WIDTH = 224
while True:
    # Capture frame-by-frame
    start = time.time()
    ret, frame = video_capture.read()
    if ret:
        rate = WIDTH/frame.shape[0]
        frame = cv2.flip(frame, 1)
        resized = cv2.resize(frame, None, fx=rate, fy=rate)
        # print('resize time:',time.time()-start)
        if mode == 'face':
            face_boxes, pred = detector.detect(resized, landmarks=False)
            # print('detect face:', time.time() - start)
            if face_boxes is not None:
                for face_box in face_boxes:
                    x0, y0, x1, y1 = [int(t/rate) for t in face_box]
                    cv2.rectangle(frame, (x0, y0), (x1, y1), (0, 255, 0), 2)
        elif mode == 'landmark':
            face_boxes, pred, landmarks = detector.detect(resized, landmarks=True)
            if face_boxes is not None:
                for face_box in face_boxes:
                    x0, y0, x1, y1 = [int(t/rate) for t in face_box]
                    cv2.rectangle(frame, (x0, y0), (x1, y1), (0, 255, 0), 2)
            if landmarks is not None:
                for landmark in landmarks:
                    for circles in landmark:
                        x0, y0 = [int(t/rate) for t in circles]

Example #10

0

Show file

File: generate_test_datasets.py Project: noahch/PyAffact

def _create_test_images(config, df_test_labels, df_test_landmarks,
                        df_test_bounding_boxes, transformer):
    """
    Generates test images based on dataframes
    :param config: Configuration File
    :param df_test_labels: labels dataframe
    :param df_test_landmarks: Landmark dataframe
    :param df_test_bounding_boxes: bounding boxes dataframe
    :param transformer: transformer
    """

    create_directory(config.dataset_result_folder, recreate=True)
    print("created {}".format(config.dataset_result_folder))
    pbar = tqdm(range(len(df_test_labels.index)))

    mtcnn = MTCNN(select_largest=False, post_process=False, device='cuda:0')

    for index, (i, row) in enumerate(df_test_labels.iterrows()):
        image = bob.io.base.load('{}/{}'.format(
            config.dataset.dataset_image_folder, row.name))
        landmarks, bounding_boxes = None, None
        if config.dataset.bounding_box_mode == 0:
            landmarks = df_test_landmarks.iloc[index].tolist()
            landmarks = landmarks[:4] + landmarks[6:]
        elif config.dataset.bounding_box_mode == 1:
            bounding_boxes = df_test_bounding_boxes.iloc[index].tolist()
            bounding_boxes = bounding_boxes[1:]
        elif config.dataset.bounding_box_mode == 2:
            bounding_boxes, probs, lm = mtcnn.detect(Image.fromarray(
                np.transpose(image, (1, 2, 0)), 'RGB'),
                                                     landmarks=True)
            # print(bounding_boxes)
            scale = config.dataset.bounding_box_scale

            # If the MTCNN cannot find a bounding box, we load the bounding box from the disk
            try:
                bounding_boxes = bounding_boxes[0]
                bounding_boxes[2] = bounding_boxes[2] - bounding_boxes[0]
                bounding_boxes[3] = bounding_boxes[3] - bounding_boxes[1]
            except:
                # print(row.name)
                bounding_boxes = df_test_bounding_boxes.iloc[index].tolist()
                bounding_boxes = bounding_boxes[1:]

            bounding_boxes[0] = bounding_boxes[0] - (
                (scale - 1) / 2 * bounding_boxes[2])
            bounding_boxes[1] = bounding_boxes[1] - (
                (scale - 1) / 2 * bounding_boxes[3])
            bounding_boxes[2] = scale * (bounding_boxes[2])
            bounding_boxes[3] = scale * (bounding_boxes[3])

        input = {
            'image': image,
            'landmarks': landmarks,
            'bounding_boxes': bounding_boxes,
            'index': index
        }
        X = transformer(input)

        img = tensor_to_image(X)
        img.save('{}/{}'.format(config.dataset_result_folder,
                                row.name[:-3] + 'png'))

        pbar.update(1)
    pbar.close()