def __init__(self, device="cuda:0") -> None:
     #call the constructeur of VideoFaceDetector class
     super().__init__()
     #create a face detection pipeline using MTCNN without margin to add to bounding box, thresholds set according to dataset and device on which to run neural net passes.)
     #(Multi-Task Cascaded Convolutional Neural Networks is a neural network which detects faces and facial landmarks on images)
     self.detector = MTCNN(margin=0,
                           thresholds=[0.85, 0.95, 0.95],
                           device=device)
 def __init__(self):
     torch.set_grad_enabled(False)
     cudnn.benchmark = True
     self.device = torch.device(
         "cuda:0" if torch.cuda.is_available() else "cpu")
     self.mtcnn = MTCNN(image_size=input_image_size,
                        min_face_size=30,
                        prewhiten=True,
                        select_largest=True,
                        device=self.device)
Exemple #3
0
def load_data(path, device):
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    face_detector = MTCNN(margin=0, keep_all=False, select_largest=False, thresholds=[0.6, 0.7, 0.7],
                          min_face_size=60, factor=0.8, device=device).eval()
    video_fd = cv2.VideoCapture(path)
    if not video_fd.isOpened():
        print('problem of reading video')
        return

    frame_index = 0
    faces = []
    success, frame = video_fd.read()
    while success:
        cropped_face = detect_face(frame, face_detector)
        cropped_face = cv2.resize(cropped_face, (64, 64))
        if cropped_face is not None:
            cropped_face = transform(cropped_face)
            faces.append(cropped_face)
        frame_index += 1
        success, frame = video_fd.read()
    video_fd.release()
    print('video frame length:', frame_index)
    faces = torch.stack(faces, dim=0)
    faces = torch.unsqueeze(faces, 0)
    y = torch.ones(frame_index).type(torch.IntTensor)
    return faces, y
Exemple #4
0
def return_dataset(list, src_dir, output_dir, type='train', fd='dlib'):
    if fd == 'dlib':
        face_detector = dlib.cnn_face_detection_model_v1('./mmod_human_face_detector.dat')
    else:
        face_detector = MTCNN(margin=0, keep_all=False, select_largest=False, thresholds=[0.6, 0.7, 0.7],
                              min_face_size=60, factor=0.8, device='cuda').eval()
    dataset = []
    for videoName, className in tqdm(list):
        class_dir = os.path.join(output_dir, type, str(className))
        if not os.path.exists(class_dir):
            os.mkdir(class_dir)
        video_path = os.path.join(src_dir, videoName)
        video_fd = cv2.VideoCapture(video_path)
        if not video_fd.isOpened():
            print('Skpped: {}'.format(video_path))
            continue

        frame_index = 0
        success, frame = video_fd.read()
        while success:
            img_path = os.path.join(output_dir, type, str(className), '%s_%d.png'
                                    % (videoName.split('/')[-4] + '_' + videoName.split('/')[-1], frame_index))
            cropped_face = detect_face(frame, face_detector, fd)
            if cropped_face is not None:
                cv2.imwrite(img_path, cropped_face)
                info = [str(className), videoName.split('/')[-4] + '_' + videoName.split('/')[-1], img_path]
                # 将视频帧信息保存起来
                dataset.append(info)
            frame_index += 1
            success, frame = video_fd.read()
        print(frame_index)
        video_fd.release()

    return dataset
Exemple #5
0
class FaceExtractor:
    def __init__(self, use_gpu=False):
        if use_gpu:
            device = 'cuda:0'
        else:
            device = 'cpu'
        self.detector = MTCNN(keep_all=True, device=device)

    def extract_faces(self, frame, display_results=False):
        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        boxes, scores = self.detector.detect(img)

        if boxes is None:
            return []

        if display_results:
            frame_draw = img.copy()
            draw = ImageDraw.Draw(frame_draw)

        faces = []
        for box, score in zip(boxes, scores):
            if box is None or score < FACE_SCORE_THRESHOLD:
                continue
            if display_results:
                draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                draw.text((box[0], box[1]), str(score))
            faces.append(box)

        if display_results:
            cv2.imshow("frame",
                       cv2.cvtColor(np.array(frame_draw), cv2.COLOR_RGB2BGR))
            cv2.waitKey(1)

        return faces
Exemple #6
0
 def __init__(self):
     self.__aligner: MTCNN = MTCNN(keep_all=True,
                                   thresholds=[0.6, 0.7, 0.9])
     self.__facenet_preprocessor: Compose = transforms.Compose(
         [Whitening()])
     self.__facenet: InceptionResnetV1 = InceptionResnetV1(
         pretrained='vggface2').eval()
class FastFaceExtractor:
    def __init__(self, video_read_fn):
        self.video_read_fn = video_read_fn
        self.detector = MTCNN(margin=0,
                              thresholds=[0.7, 0.8, 0.8],
                              device="cuda")

    def getFrame(self, data):
        idx, frame, my_idx = data
        h, w = frame.shape[:2]
        img = Image.fromarray(frame.astype(np.uint8))
        img = img.resize(size=[s // 2 for s in img.size])

        batch_boxes, probs = self.detector.detect(img, landmarks=False)

        faces = []
        scores = []
        if batch_boxes is None: return dict()
        for bbox, score in zip(batch_boxes, probs):
            if bbox is not None:
                xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
                w = xmax - xmin
                h = ymax - ymin
                p_h = h // 3
                p_w = w // 3
                crop = frame[max(ymin - p_h, 0):ymax + p_h,
                             max(xmin - p_w, 0):xmax + p_w]
                faces.append(crop)
                scores.append(score)

        frame_dict = {
            "video_idx": 0,
            "frame_idx": my_idx,
            "frame_w": w,
            "frame_h": h,
            "faces": faces,
            "scores": scores
        }
        return frame_dict

    def process_video(self, video, max_workers=16):
        result = self.video_read_fn(video)

        if result is None: return []

        my_frames, my_idxs = result
        results = []
        with ThreadPoolExecutor(max_workers=max_workers) as pool:
            futures = []
            for i, frame in enumerate(my_frames):
                future = pool.submit(self.getFrame, (i, frame, my_idxs[i]))
                futures.append(future)

            for future in futures:
                results.append(future.result())

        return results
class FaceDetector:
    def __init__(self):
        torch.set_grad_enabled(False)
        cudnn.benchmark = True
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.mtcnn = MTCNN(image_size=input_image_size,
                           min_face_size=30,
                           prewhiten=True,
                           select_largest=True,
                           device=self.device)
        # self.detector = MTCNN()

    def pre_process(self, image):
        """
        Redimensiona e preprocessa imagem para extracao de features
        :param image: imagem do cv2
        :return: img_tensor pre-processado para extracao de features
        """
        try:
            image = cv2.resize(image, (input_image_size, input_image_size),
                               interpolation=cv2.INTER_AREA)
        except cv2.error:
            return None
        img_tensor = functional.to_tensor(np.float32(image)).to(self.device)
        return prewhiten(img_tensor)
        # face = F.to_tensor(np.float32(face))

    def detect(self, image):
        """
        Realiza deteccao facial e retorna boxes/scores detectados
        :rtype: numpy.ndarray ou None caso nao nenhuma face seja detectada
        :param image: imagem (do Pil ou do cv2) para a deteccao
        :return: arrays boxes com localizacoes das faces e scores, com a probabilidade de presenca de face
        """
        if type(image) == np.ndarray:
            image = Image.fromarray(image)

        boxes, scores = self.mtcnn.detect(image)
        if boxes is not None:
            boxes = np.rint(boxes).astype(int)

        return boxes, scores

    def extract_face(self, image, save_path=None):
        """
        Realiza deteccao facial, extrai a imagem da maior face, e pre-processa a imagem para extracao de features
        :rtype: torch.tensor
        :param image: imagem {PIL.Image ou numpy.ndarray do cv2} para a deteccao
        :param save_path: um caminho para salvar a face detectada (opcional)
        :return: imagem da face pre-processada
        """
        if type(image) == np.ndarray:
            image = Image.fromarray(image)

        return self.mtcnn(image, save_path=save_path, return_prob=True)
Exemple #9
0
class FaceDetector():
    def __init__(self, batch_size, thresholds, device=None):
        self.batch_size = batch_size
        self.detector = MTCNN(margin=0, thresholds=thresholds, device=device)
    
    def detect_faces(self, frames, scale_coef):
        boxes = []
        for i in range(math.ceil(len(frames) / self.batch_size)):
            batch_boxes, *_ = self.detector.detect(frames[i*self.batch_size:(i + 1)*self.batch_size])
            boxes += [(b/scale_coef).astype(int).tolist() if b is not None else None for b in batch_boxes]
        return boxes
Exemple #10
0
def load_image_classify_emotion(path):
    """
    Loads an image from a path and classifies the face's emotion
    Parameters
    -------
    path the path of the image
    Returns
    -------
    0 if the model determines the image is negative and 1 if the model determines the image is positive
    """
    pic = plt.imread(path)
    mtcnn = MTCNN()
    faces = mtcnn.forward(pic.copy())

    model = EmotionCNN()
    model.load_state_dict(torch.load("emotion_model_new.pt"))
    model.eval()

    pre_process = transforms.Compose([
        transforms.Resize(48),
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])
    ])

    classes = ["negative", "positive"]
    fig = plt.figure(figsize=(25, 4))
    ax = fig.add_subplot()

    image = np.transpose(faces.numpy(), (1, 2, 0))
    image = Image.fromarray((image * 255).astype(np.uint8))
    image = pre_process(image)

    plt.imshow(np.transpose(faces.numpy(), (1, 2, 0)))
    output = model(image.reshape(1, 1, 48, 48))
    prediction = torch.argmax(output, dim=1).item()

    ax.set_title(f"Predicted:{classes[prediction]}")

    return pic, prediction
Exemple #11
0
class FacenetDetector(VideoFaceDetector):
    
    def __init__(self, device="cuda:0") -> None:
        super().__init__()
        self.detector = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=device)
    
    @property
    def _batch_size(self):
        return 16
    
    def _detect_faces(self, frames) -> List:
        batch_boxes, *_ = self.detector.detect(frames, landmarks=False)
        return [b.tolist() if b is not None else None for b in batch_boxes]
Exemple #12
0
class FacenetDetector(VideoFaceDetector):
    #To create an MTCNN detector that runs on the GPU, the model is instantiated with device='cuda:0'
    def __init__(self, device="cuda:0") -> None:
        #call the constructeur of VideoFaceDetector class
        super().__init__()
        #create a face detection pipeline using MTCNN without margin to add to bounding box, thresholds set according to dataset and device on which to run neural net passes.)
        #(Multi-Task Cascaded Convolutional Neural Networks is a neural network which detects faces and facial landmarks on images)
        self.detector = MTCNN(margin=0,thresholds=[0.85, 0.95, 0.95], device=device)

    def _detect_faces(self, frames) -> List:
        batch_boxes, *_ = self.detector.detect(frames, landmarks=False)
        return [b.tolist() if b is not None else None for b in batch_boxes]

    @property
    def _batch_size(self):
        return 32
Exemple #13
0
    def __init__(self,
                 transform=None,
                 labels=None,
                 landmarks=None,
                 bounding_boxes=None,
                 config=None):
        """
        init
        :param transform: transformations to apply to each image
        :param labels: ground truth dataframe (y-label)
        :param landmarks: landmarks dataframe
        :param bounding_boxes: bounding box dataframe
        :param config: training configuration file
        """

        self.labels = labels

        # use landmarks
        if config.dataset.bounding_box_mode == 0:
            self.landmarks = landmarks

        # use bounding boxes
        elif config.dataset.bounding_box_mode == 1:
            self.bounding_boxes = bounding_boxes

        # use face detector
        elif config.dataset.bounding_box_mode == 2:
            self.mtcnn = MTCNN(
                select_largest=False,
                device=config.basic.cuda_device_name.split(',')[0])

        else:
            raise Exception(
                "Chose a valid bounding_box_mode (0=landmarks hand-labeled, 1=bbx hand-labeled, 2=bbx detected"
            )

        self.transform = transform
        assert self.transform is not None, "A basic transformation is needed. i.e.  Resize() and ToTensor()"
        self.config = config
def main():
    args = parse_args()
    trans = transforms.Compose(
        [preprocessing.ExifOrientationNormalize(),
         transforms.Resize(1024)])

    images = datasets.ImageFolder(root=args.input_folder)
    images.idx_to_class = {v: k for k, v in images.class_to_idx.items()}
    create_dirs(args.output_folder, images.classes)

    mtcnn = MTCNN(prewhiten=False)

    for idx, (path, y) in enumerate(images.imgs):
        print("Aligning {} {}/{} ".format(path, idx + 1, len(images)), end='')
        aligned_path = args.output_folder + os.path.sep + images.idx_to_class[
            y] + os.path.sep + os.path.basename(path)
        if not os.path.exists(aligned_path):
            img = mtcnn(img=trans(Image.open(path).convert('RGB')),
                        save_path=aligned_path)
            print("No face found" if img is None else '')
        else:
            print('Already aligned')
Exemple #15
0
def create_face_embeddings(image_dir):
    _logger.info(
        f"Starting creation of face embeddings in directory {image_dir}")

    # If required, create a face detection pipeline using MTCNN:
    mtcnn = MTCNN()

    # Create an inception resnet (in eval mode):
    resnet = InceptionResnetV1(pretrained='vggface2').eval()

    img_paths_and_embeddings = list()
    img_paths_and_embeddings.append(['Image Path', 'Embedding'])
    no_faces_found = list()
    no_faces_found.append(['Image Path'])

    pbar = tqdm(total=len(os.listdir(image_dir)))
    batch_size = 128
    for file_name_batch in walk_dir_in_batches(image_dir,
                                               batch_size=batch_size):
        for file_name in file_name_batch:
            # Calculate embedding
            if file_name.endswith(tuple(image_ext)):
                img_path = os.path.join(image_dir, file_name)
                _logger.debug(img_path)

                img_embedding = get_embedding(img_path, mtcnn, resnet)
                if img_embedding is None:
                    no_faces_found.append(img_path)
                else:
                    img_paths_and_embeddings.append(
                        [img_path,
                         img_embedding.detach().cpu().numpy()])
                pbar.update(1)
    pbar.close()
    _logger.info(
        f"Starting creation of face embeddings in directory {image_dir}")
    return img_paths_and_embeddings, no_faces_found
Exemple #16
0
 def __init__(self, batch_size, thresholds, device=None):
     self.batch_size = batch_size
     self.detector = MTCNN(margin=0, thresholds=thresholds, device=device)
Exemple #17
0
 def __init__(self, use_gpu=False):
     if use_gpu:
         device = 'cuda:0'
     else:
         device = 'cpu'
     self.detector = MTCNN(keep_all=True, device=device)
Exemple #18
0
import cv2,time
from facenet_pytorch.models.mtcnn import MTCNN

device = "cpu:0"
detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device=device)
# detector = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=device)
video_capture = cv2.VideoCapture(0)
mode = 'landmark'
WIDTH = 224
while True:
    # Capture frame-by-frame
    start = time.time()
    ret, frame = video_capture.read()
    if ret:
        rate = WIDTH/frame.shape[0]
        frame = cv2.flip(frame, 1)
        resized = cv2.resize(frame, None, fx=rate, fy=rate)
        # print('resize time:',time.time()-start)
        if mode == 'face':
            face_boxes, pred = detector.detect(resized, landmarks=False)
            # print('detect face:', time.time() - start)
            if face_boxes is not None:
                for face_box in face_boxes:
                    x0, y0, x1, y1 = [int(t/rate) for t in face_box]
                    cv2.rectangle(frame, (x0, y0), (x1, y1), (0, 255, 0), 2)
        elif mode == 'landmark':
            face_boxes, pred, landmarks = detector.detect(resized, landmarks=True)
            if face_boxes is not None:
                for face_box in face_boxes:
                    x0, y0, x1, y1 = [int(t/rate) for t in face_box]
                    cv2.rectangle(frame, (x0, y0), (x1, y1), (0, 255, 0), 2)
Exemple #19
0
 def __init__(self, image_size: int):
     self._cropper = MTCNN(image_size=image_size, margin=int(0.3 * image_size), device=torch.device("cuda"))
Exemple #20
0
 def __init__(self, video_read_fn):
     self.video_read_fn = video_read_fn
     self.detector = MTCNN(margin=0,
                           thresholds=[0.7, 0.8, 0.8],
                           device="cuda")
Exemple #21
0
class FaceExtractor:
    def __init__(self, video_read_fn):
        self.video_read_fn = video_read_fn
        self.detector = MTCNN(margin=0,
                              thresholds=[0.7, 0.8, 0.8],
                              device="cuda")

    def process_videos(self, input_dir, filenames, video_idxs):
        videos_read = []
        frames_read = []
        frames = []
        results = []
        for video_idx in video_idxs:
            # Read the full-size frames from this video.
            filename = filenames[video_idx]
            video_path = os.path.join(input_dir, filename)
            result = self.video_read_fn(video_path)
            # Error? Then skip this video.
            if result is None: continue

            videos_read.append(video_idx)

            # Keep track of the original frames (need them later).
            my_frames, my_idxs = result

            frames.append(my_frames)
            frames_read.append(my_idxs)
            for i, frame in enumerate(my_frames):
                h, w = frame.shape[:2]
                img = Image.fromarray(frame.astype(np.uint8))
                img = img.resize(size=[s // 2 for s in img.size])

                batch_boxes, probs = self.detector.detect(img, landmarks=False)

                faces = []
                scores = []
                if batch_boxes is None:
                    continue
                for bbox, score in zip(batch_boxes, probs):
                    if bbox is not None:
                        xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
                        w = xmax - xmin
                        h = ymax - ymin
                        p_h = h // 3
                        p_w = w // 3
                        crop = frame[max(ymin - p_h, 0):ymax + p_h,
                                     max(xmin - p_w, 0):xmax + p_w]
                        faces.append(crop)
                        scores.append(score)

                frame_dict = {
                    "video_idx": video_idx,
                    "frame_idx": my_idxs[i],
                    "frame_w": w,
                    "frame_h": h,
                    "faces": faces,
                    "scores": scores
                }
                results.append(frame_dict)

        return results

    def process_video(self, video_path):
        """Convenience method for doing face extraction on a single video."""
        input_dir = os.path.dirname(video_path)
        filenames = [os.path.basename(video_path)]
        return self.process_videos(input_dir, filenames, [0])
Exemple #22
0
    return img


#%%
trans = transforms.Compose([transforms.Resize(512)])

trans_cropped = transforms.Compose(
    [np.float32, transforms.ToTensor(), prewhiten])

#%%
dataset = datasets.ImageFolder('dataset/lfw', transform=trans)
dataset.idx_to_class = {k: v for v, k in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=lambda x: x[0])

#%%
mtcnn = MTCNN(device=torch.device('cpu'))

#%%
total_item = len(dataset)
names = []
aligned = []
for img, idx in tqdm(loader):
    name = dataset.idx_to_class[idx]
    # start = time()
    img_align = mtcnn(
        img)  #, save_path = "data/aligned/{}/{}.png".format(name, str(idx)))
    # print('MTCNN time: {:6f} seconds'.format(time() - start))

    if img_align is not None:
        names.append(name)
        aligned.append(img_align)
Exemple #23
0
 def __init__(self, device="cuda:0") -> None:
     super().__init__()
     self.detector = MTCNN(margin=0,
                           thresholds=[0.85, 0.95, 0.95],
                           device=device)
Exemple #24
0
def calcEmbedsRec(urlNew):

    #initialize identified names
    recognized_names = []

    print('Received url: ', urlNew)
    device = torch.device('cuda:0')
    print('Running on device: {}'.format(device))

    mtcnn = MTCNN(image_size=160,
                  margin=0,
                  min_face_size=20,
                  thresholds=[0.6, 0.7, 0.7],
                  factor=0.709,
                  prewhiten=True,
                  device=device)

    #Function takes 2 vectors 'a' and 'b'
    #Returns the cosine similarity according to the definition of the dot product
    def cos_sim(a, b):
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)

    #cos_sim returns real numbers,where negative numbers have different interpretations.
    #So we use this function to return only positive values.
    def cos(a, b):
        minx = -1
        maxx = 1
        return (cos_sim(a, b) - minx) / (maxx - minx)

    # Define Inception Resnet V1 module (GoogLe Net)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

    # Define a dataset and data loader
    dataset = datasets.ImageFolder('student_data/Test')
    dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()}
    loader = DataLoader(dataset, collate_fn=lambda x: x[0])

    #Perfom MTCNN facial detection
    #Detects the face present in the image and prints the probablity of face detected in the image.
    aligned = []
    names = []
    for x, y in loader:
        x_aligned, prob = mtcnn(x, return_prob=True)
        if x_aligned is not None:
            print('Face detected with probability: {:8f}'.format(prob))
            aligned.append(x_aligned)
            names.append(dataset.idx_to_class[y])

    # Calculate the 512 face embeddings
    aligned = torch.stack(aligned).to(device)
    embeddings = resnet(aligned).to(device)

    # Print distance matrix for classes.
    #The embeddings are plotted in space and cosine distace is measured.
    cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
    for i in range(0, len(names)):
        emb = embeddings[i].unsqueeze(0)
        # The cosine similarity between the embeddings is given by 'dist'.
        dist = cos(embeddings[0], emb)

    dists = [[cos(e1, e2).item() for e2 in embeddings] for e1 in embeddings]
    # The print statement below is
    #Helpful for analysing the results and for determining the value of threshold.
    print(pd.DataFrame(dists, columns=names, index=names))

    i = 1
    # Haarcascade Classifier is used to detect faces through webcam.
    #It is preffered over MTCNN as it is faster. Real time basic applications needs to be fast.
    classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

    #Takes 2 vectors 'a' and 'b' .
    #Returns the cosine similarity according to the definition of the dot product.
    def cos_sim(a, b):
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)

    #cos_sim returns real numbers,where negative numbers have different interpretations.
    #So we use this function to return only positive values.
    def cos(a, b):
        minx = -1
        maxx = 1
        return (cos_sim(a, b) - minx) / (maxx - minx)

    #This is the function for doing face recognition.
    def verify(embedding, start_rec_time):
        for i, k in enumerate(embeddings):
            for j, l in enumerate(embedding):
                #Computing Cosine distance.
                dist = cos(k, l)

                #Chosen threshold is 0.85
                #Threshold is determined after seeing the table in the previous cell.
                if dist > 0.8:
                    #Name of the person identified is printed on the screen, as well as below the detecetd face (below the rectangular box).
                    text = names[i]

                    #textOnImg = text + " - Time Elapsed: " +  str(int(time.time() - start_rec_time)) + " s"
                    cv2.putText(img1, text, (boxes[j][0].astype(int),
                                             boxes[j][3].astype(int) + 17),
                                cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0),
                                2)
                    #cv2.putText(img1, textOnImg, (20, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,0,0), 2)
                    print(text)

                    #if text in names:
                    recognized_names.append(text)
                #else:
                textOnImg = "Time Elapsed: " + str(
                    int(time.time() - start_rec_time)) + " s"
                cv2.putText(img1, textOnImg, (20, 20),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)

    #Define Inception Resnet V1 module (GoogLe Net)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

    mtcnn = MTCNN(image_size=160,
                  margin=0,
                  min_face_size=20,
                  thresholds=[0.6, 0.7, 0.7],
                  factor=0.709,
                  prewhiten=True,
                  device=device,
                  keep_all=True)

    #Camera is opened. Webcam video streaming starts.
    #vs = WebcamVideoStream(src=0).start()
    print("Camera on")
    cv2.namedWindow("Detected faces")

    options = {
        "CAP_PROP_FRAME_WIDTH": 640,
        "CAP_PROP_FRAME_HEIGHT": 480,
        "CAP_PROP_FPS ": 30
    }
    output_params = {"-fourcc": "MJPG", "-fps": 30}
    writer = WriteGear(output_filename='Output.mp4',
                       compression_mode=False,
                       logging=True,
                       **output_params)
    #stream = VideoGear(source=0, time_delay=1, logging=True, **options).start()

    #url = "http://192.168.43.223:8080/shot.jpg"
    url = urlNew

    #run face recognition for 1 minute
    start_face_rec = time.time()
    end_face_rec = time.time() + 60

    while (time.time() < end_face_rec):

        # frm = stream.read()
        # if frm is None:
        #     break

        img_resp = requests.get(url)
        img_arr = np.array(bytearray(img_resp.content), dtype=np.uint8)

        img = cv2.imdecode(img_arr, -1)

        #im= vs.read()
        #Flip to act as a mirror

        im = cv2.flip(img, 1)

        #try:
        #The resize function of imutils maintains the aspect ratio
        #It provides the keyword arguments width and heightso the image can be resized to the intended width/height
        frame = imutils.resize(im, width=400)

        #Detecting faces using Haarcascade classifier.

        winlist = pcn.detect(frame)
        img1 = pcn.draw(frame, winlist)
        face = list(map(lambda win: crop_face(img1, win, 160), winlist))
        face = [f[0] for f in face]
        #cv2.imshow('Live Feed', img1)
        cnt = 1
        for f in face:
            #fc, u = crop_face(img, f)
            print('Printing Face no: ', cnt)
            cv2.imshow('Detected faces', f)
            cnt += 1

            #faces = classifier.detectMultiScale(face)
            path = "./student_data/Pics/".format(i)
            img_name = "image_{}.jpg".format(i)
            #The captured image is saved.
            cv2.imwrite(os.path.join(path, img_name), f)
            imgName = "./student_data/Pics/image_{}.jpg".format(i)

            # Get cropped and prewhitened image tensor
            img = Image.open(imgName)
            i = i + 1
            img_cropped = mtcnn(img)
            boxes, prob = mtcnn.detect(img)
            img_draw = img.copy()
            draw = ImageDraw.Draw(img_draw)
            #print(boxes)
            #Rectangular boxes are drawn on faces present in the image.
            #The detected and cropped faces are then saved.
            if (boxes is not None):
                for i, box in enumerate(boxes):
                    #draw.rectangle(box.tolist())
                    extract_face(
                        img,
                        box,
                        save_path='./student_data/Pics/Cropped_Face_{}.jpg'.
                        format(i))
                img_draw.save('./student_data/Pics/Faces_Detected.jpg')
                ima = cv2.imread('./student_data/Pics/Faces_Detected.jpg')

                #Calculate embeddings of each cropped face.
            if (img_cropped is not None):
                img_embedding = resnet(img_cropped.cuda()).to(device)

                #Call function verify.
                #Identify the person with the help of embeddings.
                cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
                verify(img_embedding, start_face_rec)
            #else:
            #textForImg = "Time Elapsed: " +  str(int(time.time() - start_face_rec)) + " s"
            #cv2.putText(frame, textForImg, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,255,255), 2)

            #'Detecting..' window opens.
            #Rectangular boxes are drawn on detected faces.
            #The identified faces have their respective name below the box.
            cv2.imshow('Detecting...', img1)
            writer.write(img1)

        if (not face):
            #cv2.imshow(f"Time Elapsed: ${str(int(time.time() - start_face_rec))}  s" ,frame)
            textForImg = "Time Elapsed: " + str(
                int(time.time() - start_face_rec)) + " s"
            cv2.putText(img1, textForImg, (40, 40),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)
            #print("no face")
            cv2.imshow('Detecting...', img1)
        # except:
        #     #In case 'try' doesn't work, "Get the image embedding" text is printed on the screen.
        #     #Run first cell
        #     text="Get the image embeddings"
        #     print(text)
        #     break

        key = cv2.waitKey(1)

        #13 is for 'Enter' key.
        #If 'Enter' key is pressed, all the windows are made to close forcefully.
        if key == 13:
            break

    print("calculating a list of all recognized faces...")

    rec_names_dict = {i: recognized_names.count(i) for i in recognized_names}

    filtered_names = []
    for key in rec_names_dict:
        if rec_names_dict[key] > 30:
            filtered_names.append(key)

    print("Total Recognized names: ", rec_names_dict)

    print("Filtered names: ", filtered_names)

    cv2.destroyAllWindows()
    writer.close()
    #vs.stop()
    #return {i:rec_names_dict[i] for i in filtered_names}
    return filtered_names
os.environ["OMP_NUM_THREADS"] = "1"

from tqdm import tqdm


import cv2

cv2.ocl.setUseOpenCL(False)
cv2.setNumThreads(0)
from deepfake_classifier.classifier.preprocessing.utils import get_original_video_paths

from PIL import Image
from facenet_pytorch.models.mtcnn import MTCNN
import numpy as np

detector = MTCNN(margin=0, thresholds=[0.65, 0.75, 0.75], device="cpu")


def save_landmarks(ori_id, root_dir):
    ori_id = ori_id[:-4]
    ori_dir = os.path.join(root_dir, "crops", ori_id)
    landmark_dir = os.path.join(root_dir, "landmarks", ori_id)
    os.makedirs(landmark_dir, exist_ok=True)
    for frame in range(320):
        if frame % 10 != 0:
            continue
        for actor in range(2):
            image_id = "{}_{}.png".format(frame, actor)
            landmarks_id = "{}_{}".format(frame, actor)
            ori_path = os.path.join(ori_dir, image_id)
            landmark_path = os.path.join(landmark_dir, landmarks_id)
def _create_test_images(config, df_test_labels, df_test_landmarks,
                        df_test_bounding_boxes, transformer):
    """
    Generates test images based on dataframes
    :param config: Configuration File
    :param df_test_labels: labels dataframe
    :param df_test_landmarks: Landmark dataframe
    :param df_test_bounding_boxes: bounding boxes dataframe
    :param transformer: transformer
    """

    create_directory(config.dataset_result_folder, recreate=True)
    print("created {}".format(config.dataset_result_folder))
    pbar = tqdm(range(len(df_test_labels.index)))

    mtcnn = MTCNN(select_largest=False, post_process=False, device='cuda:0')

    for index, (i, row) in enumerate(df_test_labels.iterrows()):
        image = bob.io.base.load('{}/{}'.format(
            config.dataset.dataset_image_folder, row.name))
        landmarks, bounding_boxes = None, None
        if config.dataset.bounding_box_mode == 0:
            landmarks = df_test_landmarks.iloc[index].tolist()
            landmarks = landmarks[:4] + landmarks[6:]
        elif config.dataset.bounding_box_mode == 1:
            bounding_boxes = df_test_bounding_boxes.iloc[index].tolist()
            bounding_boxes = bounding_boxes[1:]
        elif config.dataset.bounding_box_mode == 2:
            bounding_boxes, probs, lm = mtcnn.detect(Image.fromarray(
                np.transpose(image, (1, 2, 0)), 'RGB'),
                                                     landmarks=True)
            # print(bounding_boxes)
            scale = config.dataset.bounding_box_scale

            # If the MTCNN cannot find a bounding box, we load the bounding box from the disk
            try:
                bounding_boxes = bounding_boxes[0]
                bounding_boxes[2] = bounding_boxes[2] - bounding_boxes[0]
                bounding_boxes[3] = bounding_boxes[3] - bounding_boxes[1]
            except:
                # print(row.name)
                bounding_boxes = df_test_bounding_boxes.iloc[index].tolist()
                bounding_boxes = bounding_boxes[1:]

            bounding_boxes[0] = bounding_boxes[0] - (
                (scale - 1) / 2 * bounding_boxes[2])
            bounding_boxes[1] = bounding_boxes[1] - (
                (scale - 1) / 2 * bounding_boxes[3])
            bounding_boxes[2] = scale * (bounding_boxes[2])
            bounding_boxes[3] = scale * (bounding_boxes[3])

        input = {
            'image': image,
            'landmarks': landmarks,
            'bounding_boxes': bounding_boxes,
            'index': index
        }
        X = transformer(input)

        img = tensor_to_image(X)
        img.save('{}/{}'.format(config.dataset_result_folder,
                                row.name[:-3] + 'png'))

        pbar.update(1)
    pbar.close()