Beispiel #1
0
class FaceCam():
    # Video class based on openCV
    def __init__(self):
        self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        self.mtcnn = MTCNN(device=self.device)
        self.open = True
        self.gender_model = def_model('gender', self.device)
        self.gaze_model = def_model('gaze', self.device)
        self.emotion_model = def_model('emotion', self.device)
        self.multimodal_model = def_model('multimodal', self.device)
        

    def rec(self):
        global label

        cap = cv2.VideoCapture(0)
        
        while(self.open==True):
            timer_start = time.time()

            print('start camera!')
            ret, frame = cap.read()

            try:
                # detect face box and probability
                boxes, probs = self.mtcnn.detect(frame, landmarks=False)

                # draw box on frame
                frame = draw_bbox(frame, boxes, probs)

                # perform only when face is detected
                if len(boxes) > 0:
                    # extract the face rois
                    rois = detect_rois(boxes)
                    for roi in rois:
                        (start_Y, end_Y, start_X, end_X) = roi
                        face = frame[start_Y:end_Y, start_X:end_X]
                        print('detect time: ', time.time()-timer_start)
                    
                    predict_start = time.time()
                    gender_i = predict(self.gender_model, face, self.device)
                    gaze_i = predict(self.gaze_model, face, self.device)
                    emotion_i = predict(self.emotion_model, face, self.device)
                    multimodal_i = predict(self.multimodal_model, face, self.device)

                    cv2.putText(frame, label['gender'][gender_i], (end_X-50, start_Y-55), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2, cv2.LINE_AA)
                    cv2.putText(frame, label['gaze'][gaze_i], (end_X-50, start_Y-40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2, cv2.LINE_AA)
                    cv2.putText(frame, label['emotion'][emotion_i], (end_X-50, start_Y-25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2, cv2.LINE_AA)
                    cv2.putText(frame, label['multimodal'][multimodal_i], (end_X-50), start_Y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2, cv2.LINE_AA)
                    print('predict time: ', time.time()-predict_start)
            except Exception as e:
                print(e)
                pass
            
            # show the frame
            cv2.imshow('Demo', frame)
            
            # q to quit
            if cv2.waitKey(1) & 0xFF == ord('q'):
                print('Interrupted by user!')
                break

        # clear program and close windows
        cap.release()
        cv2.destroyAllWindows()
        print('All done!')
Beispiel #2
0
            saveimg = np.squeeze(saveimg.transpose(1, 2, 0))
            Image.fromarray(saveimg).save(f"{name}_{i}.png")

        result_cropped_tensors.append(cropped_tensors.to(device))

    if len(no_face_indices) > 20:
        # few videos start with silence, allow 0.5 seconds of silence else remove
        return None
    del frames
    # Stack all frames
    result_cropped_tensors = torch.stack(result_cropped_tensors)
    # Embed all frames
    result_cropped_tensors = result_cropped_tensors.to(device)
    if use_half:
        result_cropped_tensors = result_cropped_tensors.half()

    with torch.no_grad():
        emb = resnet(result_cropped_tensors)
    if use_half:
        emb = emb.float()
    return emb.to(cpu_device)


if __name__ == "__main__":
    mtcnn = MTCNN(keep_all=True).eval()
    resnet = InceptionResnetV1(pretrained="vggface2").eval()
    device = torch.device("cpu")
    res = input_face_embeddings(["a.jpg", "b.jpg"], True, mtcnn, resnet, device)
    print(res.shape)  # 512D
    print("Passed")
Beispiel #3
0
import numpy as np
import matplotlib.pyplot as plt
from facenet_pytorch import MTCNN
from glob import glob
import os
from tqdm.auto import tqdm

raw_data_dir = r'/media/rrtammyfs/labDatabase/celeb_a/faces'
processed_data_dir = r'/media/rrtammyfs/labDatabase/celeb_a/faces/processed/'
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'

list_imgs = glob(os.path.join(raw_data_dir, "*/*.jpg"))
mtcnn = MTCNN(margin=10, select_largest=True,
              post_process=False)  #, device='cuda:0')
for img_path in tqdm(list_imgs):
    img = plt.imread(img_path)
    face = mtcnn(img)
    if face is not None:
        os.makedirs(os.path.join(processed_data_dir,
                                 img_path.split('/')[-2]),
                    exist_ok=True)
        face = face.permute(1, 2, 0).int().numpy()
        plt.imsave(
            os.path.join(processed_data_dir,
                         img_path.split('/')[-2],
                         img_path.split('/')[-1]), face.astype(np.uint8))
Beispiel #4
0
class Demo():
    def __init__(self, args):
        ## configs
        self.device = 'cuda:0' if args.gpu else 'cpu'
        self.checkpoint_path = args.checkpoint
        self.detect_human_face = args.detect_human_face
        self.render_video = args.render_video
        self.output_size = args.output_size
        self.image_size = 64
        self.min_depth = 0.9
        self.max_depth = 1.1
        self.border_depth = 1.05
        self.xyz_rotation_range = 60
        self.xy_translation_range = 0.1
        self.z_translation_range = 0
        self.fov = 10  # in degrees

        self.depth_rescaler = lambda d: (1 + d) / 2 * self.max_depth + (
            1 - d) / 2 * self.min_depth  # (-1,1) => (min_depth,max_depth)
        self.depth_inv_rescaler = lambda d: (d - self.min_depth) / (
            self.max_depth - self.min_depth)  # (min_depth,max_depth) => (0,1)

        fx = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180))
        fy = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180))
        cx = (self.image_size - 1) / 2
        cy = (self.image_size - 1) / 2
        K = [[fx, 0., cx], [0., fy, cy], [0., 0., 1.]]
        K = torch.FloatTensor(K).to(self.device)
        self.inv_K = torch.inverse(K).unsqueeze(0)
        self.K = K.unsqueeze(0)

        ## NN models
        self.netD = EDDeconv(cin=3, cout=1, nf=64, zdim=256, activation=None)
        self.netA = EDDeconv(cin=3, cout=3, nf=64, zdim=256)
        self.netL = Encoder(cin=3, cout=4, nf=32)
        self.netV = Encoder(cin=3, cout=6, nf=32)

        self.netD = self.netD.to(self.device)
        self.netA = self.netA.to(self.device)
        self.netL = self.netL.to(self.device)
        self.netV = self.netV.to(self.device)
        self.load_checkpoint()

        self.netD.eval()
        self.netA.eval()
        self.netL.eval()
        self.netV.eval()

        ## face detecter
        if self.detect_human_face:
            from facenet_pytorch import MTCNN
            self.face_detector = MTCNN(select_largest=True, device=self.device)

        ## renderer
        if self.render_video:
            from unsup3d.renderer import Renderer
            assert 'cuda' in self.device, 'A GPU device is required for rendering because the neural_renderer only has GPU implementation.'
            cfgs = {
                'device': self.device,
                'image_size': self.output_size,
                'min_depth': self.min_depth,
                'max_depth': self.max_depth,
                'fov': self.fov,
            }
            self.renderer = Renderer(cfgs)

    def load_checkpoint(self):
        print(f"Loading checkpoint from {self.checkpoint_path}")
        cp = torch.load(self.checkpoint_path, map_location=self.device)
        self.netD.load_state_dict(cp['netD'])
        self.netA.load_state_dict(cp['netA'])
        self.netL.load_state_dict(cp['netL'])
        self.netV.load_state_dict(cp['netV'])

    def depth_to_3d_grid(self, depth, inv_K=None):
        if inv_K is None:
            inv_K = self.inv_K
        b, h, w = depth.shape
        grid_2d = get_grid(b, h, w,
                           normalize=False).to(depth.device)  # Nxhxwx2
        depth = depth.unsqueeze(-1)
        grid_3d = torch.cat((grid_2d, torch.ones_like(depth)), dim=3)
        grid_3d = grid_3d.matmul(inv_K.transpose(2, 1)) * depth
        return grid_3d

    def get_normal_from_depth(self, depth):
        b, h, w = depth.shape
        grid_3d = self.depth_to_3d_grid(depth)

        tu = grid_3d[:, 1:-1, 2:] - grid_3d[:, 1:-1, :-2]
        tv = grid_3d[:, 2:, 1:-1] - grid_3d[:, :-2, 1:-1]
        normal = tu.cross(tv, dim=3)

        zero = normal.new_tensor([0, 0, 1])
        normal = torch.cat(
            [zero.repeat(b, h - 2, 1, 1), normal,
             zero.repeat(b, h - 2, 1, 1)], 2)
        normal = torch.cat(
            [zero.repeat(b, 1, w, 1), normal,
             zero.repeat(b, 1, w, 1)], 1)
        normal = normal / (((normal**2).sum(3, keepdim=True))**0.5 + EPS)
        return normal

    def detect_face(self, im):
        print("Detecting face using MTCNN face detector")
        try:
            bboxes, prob = self.face_detector.detect(im)
            w0, h0, w1, h1 = bboxes[0]
        except:
            print("Could not detect faces in the image")
            return None

        hc, wc = (h0 + h1) / 2, (w0 + w1) / 2
        crop = int(((h1 - h0) + (w1 - w0)) / 2 / 2 * 1.1)
        im = np.pad(
            im, ((crop, crop), (crop, crop), (0, 0)),
            mode='edge')  # allow cropping outside by replicating borders
        h0 = int(hc - crop + crop + crop * 0.15)
        w0 = int(wc - crop + crop)
        return im[h0:h0 + crop * 2, w0:w0 + crop * 2]

    def run(self, pil_im):
        im = np.uint8(pil_im)

        ## face detection
        if self.detect_human_face:
            im = self.detect_face(im)
            if im is None:
                return -1

        h, w, _ = im.shape
        im = torch.FloatTensor(im / 255.).permute(2, 0, 1).unsqueeze(0)
        # resize to 128 first if too large, to avoid bilinear downsampling artifacts
        if h > self.image_size * 4 and w > self.image_size * 4:
            im = nn.functional.interpolate(
                im, (self.image_size * 2, self.image_size * 2),
                mode='bilinear',
                align_corners=False)
        im = nn.functional.interpolate(im, (self.image_size, self.image_size),
                                       mode='bilinear',
                                       align_corners=False)

        with torch.no_grad():
            self.input_im = im.to(self.device) * 2. - 1.
            b, c, h, w = self.input_im.shape

            ## predict canonical depth
            self.canon_depth_raw = self.netD(self.input_im).squeeze(1)  # BxHxW
            self.canon_depth = self.canon_depth_raw - self.canon_depth_raw.view(
                b, -1).mean(1).view(b, 1, 1)
            self.canon_depth = self.canon_depth.tanh()
            self.canon_depth = self.depth_rescaler(self.canon_depth)

            ## clamp border depth
            depth_border = torch.zeros(1, h, w - 4).to(self.input_im.device)
            depth_border = nn.functional.pad(depth_border, (2, 2),
                                             mode='constant',
                                             value=1)
            self.canon_depth = self.canon_depth * (
                1 - depth_border) + depth_border * self.border_depth

            ## predict canonical albedo
            self.canon_albedo = self.netA(self.input_im)  # Bx3xHxW

            ## predict lighting
            canon_light = self.netL(self.input_im)  # Bx4
            self.canon_light_a = canon_light[:, :1] / 2 + 0.5  # ambience term
            self.canon_light_b = canon_light[:, 1:2] / 2 + 0.5  # diffuse term
            canon_light_dxy = canon_light[:, 2:]
            self.canon_light_d = torch.cat(
                [canon_light_dxy,
                 torch.ones(b, 1).to(self.input_im.device)], 1)
            self.canon_light_d = self.canon_light_d / (
                (self.canon_light_d**2).sum(
                    1, keepdim=True))**0.5  # diffuse light direction

            ## shading
            self.canon_normal = self.get_normal_from_depth(self.canon_depth)
            self.canon_diffuse_shading = (
                self.canon_normal *
                self.canon_light_d.view(-1, 1, 1, 3)).sum(3).clamp(
                    min=0).unsqueeze(1)
            canon_shading = self.canon_light_a.view(
                -1, 1, 1, 1) + self.canon_light_b.view(
                    -1, 1, 1, 1) * self.canon_diffuse_shading
            self.canon_im = (self.canon_albedo / 2 +
                             0.5) * canon_shading * 2 - 1

            ## predict viewpoint transformation
            self.view = self.netV(self.input_im)
            self.view = torch.cat([
                self.view[:, :3] * np.pi / 180 * self.xyz_rotation_range,
                self.view[:, 3:5] * self.xy_translation_range,
                self.view[:, 5:] * self.z_translation_range
            ], 1)

            ## export to obj strings
            vertices = self.depth_to_3d_grid(self.canon_depth)  # BxHxWx3
            self.objs, self.mtls = export_to_obj_string(
                vertices, self.canon_normal)

            ## resize to output size
            self.canon_depth = nn.functional.interpolate(
                self.canon_depth.unsqueeze(1),
                (self.output_size, self.output_size),
                mode='bilinear',
                align_corners=False).squeeze(1)
            self.canon_normal = nn.functional.interpolate(
                self.canon_normal.permute(0, 3, 1, 2),
                (self.output_size, self.output_size),
                mode='bilinear',
                align_corners=False).permute(0, 2, 3, 1)
            self.canon_normal = self.canon_normal / (self.canon_normal**2).sum(
                3, keepdim=True)**0.5
            self.canon_diffuse_shading = nn.functional.interpolate(
                self.canon_diffuse_shading,
                (self.output_size, self.output_size),
                mode='bilinear',
                align_corners=False)
            self.canon_albedo = nn.functional.interpolate(
                self.canon_albedo, (self.output_size, self.output_size),
                mode='bilinear',
                align_corners=False)
            self.canon_im = nn.functional.interpolate(
                self.canon_im, (self.output_size, self.output_size),
                mode='bilinear',
                align_corners=False)

            if self.render_video:
                self.render_animation()

    def render_animation(self):
        print(f"Rendering video animations")
        b, h, w = self.canon_depth.shape

        ## morph from target view to canonical
        morph_frames = 15
        view_zero = torch.FloatTensor([0.15 * np.pi / 180 * 60, 0, 0, 0, 0,
                                       0]).to(self.canon_depth.device)
        morph_s = torch.linspace(0, 1,
                                 morph_frames).to(self.canon_depth.device)
        view_morph = morph_s.view(-1, 1, 1) * view_zero.view(1, 1, -1) + (
            1 - morph_s.view(-1, 1, 1)) * self.view.unsqueeze(0)  # TxBx6

        ## yaw from canonical to both sides
        yaw_frames = 80
        yaw_rotations = np.linspace(-np.pi / 2, np.pi / 2, yaw_frames)
        # yaw_rotations = np.concatenate([yaw_rotations[40:], yaw_rotations[::-1], yaw_rotations[:40]], 0)

        ## whole rotation sequence
        view_after = torch.cat(
            [view_morph, view_zero.repeat(yaw_frames, b, 1)], 0)
        yaw_rotations = np.concatenate([np.zeros(morph_frames), yaw_rotations],
                                       0)

        def rearrange_frames(frames):
            morph_seq = frames[:, :morph_frames]
            yaw_seq = frames[:, morph_frames:]
            out_seq = torch.cat([
                morph_seq[:, :1].repeat(1, 5, 1, 1, 1),
                morph_seq,
                morph_seq[:, -1:].repeat(1, 5, 1, 1, 1),
                yaw_seq[:, yaw_frames // 2:],
                yaw_seq.flip(1),
                yaw_seq[:, :yaw_frames // 2],
                morph_seq[:, -1:].repeat(1, 5, 1, 1, 1),
                morph_seq.flip(1),
                morph_seq[:, :1].repeat(1, 5, 1, 1, 1),
            ], 1)
            return out_seq

        ## textureless shape
        front_light = torch.FloatTensor([0, 0, 1]).to(self.canon_depth.device)
        canon_shape_im = (self.canon_normal *
                          front_light.view(1, 1, 1, 3)).sum(3).clamp(
                              min=0).unsqueeze(1)
        canon_shape_im = canon_shape_im.repeat(1, 3, 1, 1) * 0.7
        shape_animation = self.renderer.render_yaw(
            canon_shape_im,
            self.canon_depth,
            v_after=view_after,
            rotations=yaw_rotations)  # BxTxCxHxW
        self.shape_animation = rearrange_frames(shape_animation)

        ## normal map
        canon_normal_im = self.canon_normal.permute(0, 3, 1, 2) / 2 + 0.5
        normal_animation = self.renderer.render_yaw(
            canon_normal_im,
            self.canon_depth,
            v_after=view_after,
            rotations=yaw_rotations)  # BxTxCxHxW
        self.normal_animation = rearrange_frames(normal_animation)

        ## textured
        texture_animation = self.renderer.render_yaw(
            self.canon_im / 2 + 0.5,
            self.canon_depth,
            v_after=view_after,
            rotations=yaw_rotations)  # BxTxCxHxW
        self.texture_animation = rearrange_frames(texture_animation)

    def save_results(self, save_dir):
        print(f"Saving results to {save_dir}")
        save_image(save_dir, self.input_im[0] / 2 + 0.5, 'input_image')
        save_image(
            save_dir,
            self.depth_inv_rescaler(self.canon_depth)[0].repeat(3, 1, 1),
            'canonical_depth')
        save_image(save_dir, self.canon_normal[0].permute(2, 0, 1) / 2 + 0.5,
                   'canonical_normal')
        save_image(save_dir, self.canon_diffuse_shading[0].repeat(3, 1, 1),
                   'canonical_diffuse_shading')
        save_image(save_dir, self.canon_albedo[0] / 2 + 0.5,
                   'canonical_albedo')
        save_image(save_dir, self.canon_im[0].clamp(-1, 1) / 2 + 0.5,
                   'canonical_image')

        with open(os.path.join(save_dir, 'result.mtl'), "w") as f:
            f.write(self.mtls[0].replace('$TXTFILE', './canonical_image.png'))
        with open(os.path.join(save_dir, 'result.obj'), "w") as f:
            f.write(self.objs[0].replace('$MTLFILE', './result.mtl'))

        if self.render_video:
            save_video(save_dir, self.shape_animation[0], 'shape_animation')
            save_video(save_dir, self.normal_animation[0], 'normal_animation')
            save_video(save_dir, self.texture_animation[0],
                       'texture_animation')
Beispiel #5
0
class FaceDetect:
    def __init__(self, thresholds=[0.9, 0.9, 0.9], min_face_size=100):
        self.mtcnn = MTCNN(thresholds=thresholds,
                           select_largest=True,
                           post_process=False,
                           device='cuda:0',
                           min_face_size=min_face_size)

    def detect(self,
               img_ls,
               crop_size=None,
               mode='Extract_largest',
               save_faces=False,
               save_annotate=False,
               save_path='face_result'):
        """face detection

        Args:
            img_ls (list): list of array
            crop_size (tuple, optional): crop images with (left, top, right, bottom). Defaults to None.
            mode (str, optional): There're 3 modes, 'Detect', 'Detect_bool', and 'Extract'. 
                                    If you only want to know whether there're any faces, use 'Detect_bool' mode. 
                                    If you want to get boxes and probs of faces, use 'Detect'.
                                    If you want to get all information about faces, use 'Extract'.
                                    Defaults to 'Detect_bool'.
            face_num (int, optional): Number of faces to be extracted. Defaults to 1.
            save_faces (bool, optional): For 'Extract' mode. Defaults to False.
            save_annotate (bool, optional): For 'Extract' mode. Save images with annotations. Defaults to False.

        Returns:
            tuple: depends on the mode.

        """
        if crop_size:
            for i, img in enumerate(img_ls):
                img_ls[i] = img.crop(crop_size)

        try:
            boxes, probs = self.mtcnn.detect(img_ls)
        except Exception as e:
            print(
                f'{e} \n...add crop_size=(left, top, right, bottom) to make images the same'
            )

        if mode == 'Detect_bool':
            return isinstance(boxes, np.ndarray)
        elif mode == 'Detect':
            return boxes, probs
        elif 'Extract' in mode:
            faces = []
            annotates = []
            boxes = boxes.tolist()
            probs = probs.tolist()
            for id_, img in enumerate(img_ls):
                face_batch = []
                img_annotate = img.copy()
                draw = ImageDraw.Draw(img_annotate)
                box_all = boxes[id_]
                if mode == 'Extract_largest':
                    for i, box in enumerate(box_all):
                        left = max(0, box[0])
                        top = max(0, box[1])
                        right = min(np.array(img_ls[id_]).shape[1], box[2])
                        down = min(np.array(img_ls[id_]).shape[0], box[3])
                        box_all[i] = [left, top, right, down]
                    area = list(map(self._cal_area, box_all))
                    max_id = area.index(max(area))
                    box = box_all[max_id]
                    box_head = [
                        box[0] - box[0] / 8, box[1] - box[1] / 5,
                        box[2] + box[2] / 8, box[3] + box[3] / 10
                    ]
                    boxes[id_] = [box_head]
                    probs[id_] = [probs[id_][max_id]]

                    draw.rectangle(box_head, width=5)
                    if save_faces:
                        if not os.path.exists(save_path):
                            os.mkdir(save_path)
                        if not os.path.exists(os.path.join(save_path,
                                                           'faces')):
                            os.mkdir(os.path.join(save_path, 'faces'))
                        face_batch.append(
                            extract_face(img,
                                         box_head,
                                         save_path=os.path.join(
                                             save_path,
                                             f'detected_face_{id_}-{0}.png')))
                    else:
                        face_batch.append(extract_face(img, box_head))
                elif mode == 'Extract_all':
                    for i, box in enumerate(box_all):
                        box_head = [
                            box[0] - box[0] / 3, box[1] - box[1] / 3,
                            box[2] + box[2] / 83, box[3] + box[3] / 10
                        ]
                        box_all[i] = box_head
                        draw.rectangle(box_head, width=5)  # box.tolist()
                        if save_faces:
                            if not os.path.exists(save_path):
                                os.mkdir(save_path)
                            if not os.path.exists(
                                    os.path.join(save_path, 'faces')):
                                os.mkdir(os.path.join(save_path, 'faces'))
                            face_batch.append(
                                extract_face(
                                    img,
                                    box_head,
                                    save_path=os.path.join(
                                        save_path,
                                        f'detected_face_{id_}-{i}.png')))
                        else:
                            face_batch.append(extract_face(img, box_head))
                else:
                    print(f"Error: there's no mode called {mode}")
                faces.append(face_batch)
                annotates.append(np.asarray(img_annotate))
                if save_annotate:
                    if not os.path.exists(save_path):
                        os.mkdir(save_path)
                    if not os.path.exists(
                            os.path.join(save_path, 'annotations')):
                        os.mkdir(os.path.join(save_path, 'annotations'))
                    img_annotate.save(
                        os.path.join(save_path, f'annotated_faces_{id_}.png'))
            return np.asarray(boxes), probs, annotates, faces
        else:
            print(f"Error: there's no mode called {mode}")

    def _cal_area(self, ls):
        return (ls[2] - ls[0]) * (ls[3] - ls[1])
Beispiel #6
0
from facenet_pytorch import MTCNN, InceptionResnetV1
from keras.models import load_model
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import mtcnn
import cv2
import time
import os

mtcnn = MTCNN(image_size=240, margin=0, keep_all=True,
              min_face_size=40)  # keep_all=True
resnet = InceptionResnetV1(pretrained='vggface2').eval()


def cropped_image(img_path):
    img = cv2.imread(img_path)
    img0 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_cropped_list, prob_list = mtcnn(img0, return_prob=True)
    new_img1 = cv2.imread('white.png')
    new_img2 = cv2.imread('white.png')
    new_img3 = cv2.imread('white.png')
    new_img4 = cv2.imread('white.png')

    if img_cropped_list is not None:
        boxes, _, faces = mtcnn.detect(img0, landmarks=True)

        for i, prob in enumerate(prob_list):
            box = boxes[i]
            cropped = img[int(box[1])-50: int(box[3])+50,
Beispiel #7
0
class FaceAndHandDetector(QThread):
    frame_update_signal = pyqtSignal(QPixmap)

    def __init__(self):
        QThread.__init__(self)

        self.frame = 0
        self.mtcnn = MTCNN()
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        # print(self.device)
        self.frame_counter = 0
        self.prev_frame_counter = 0
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.fps_count)
        self.timer.start(1000)

        self.model = HPSearchNET(cnn_num=3,
                                 fc_num=2,
                                 kern_size=3,
                                 func_act='elu',
                                 nn_prn=True,
                                 in_shape=160).to(self.device)
        self.model.load_state_dict(torch.load("hnd_net_elu_cnn3_fc2_kr3.pth", map_location=self.device))
        self.model.eval()

    # Функция рисования прямоугольника лица
    def draw_face(self, frame, boxes, probs):   # , landmarks
        try:
            cnt = 0
            for box, prob in zip(boxes, probs):   # , ld , landmarks
                cnt += 1
                print(f"Лицо {cnt} box: {box} prob: {prob:.4f}")
                # Рисуем обрамляющий прямоугольник лица на кадре
                cv2.rectangle(frame,
                              (box[0], box[1]),
                              (box[2], box[3]),
                              (0, 0, 255),
                              thickness=2)
        except Exception as e:
            print('Error in _draw')
            print(f'error : {e}')
        return frame

    # Функция рисования прямоугольников рук
    def draw_hand(self, frame, hand_landmarks):
        # mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        max_x = max_y = 0
        min_x = min_y = 65535
        for mark in hand_landmarks.landmark:
            if mark.x > max_x:
                max_x = mark.x
            if mark.x < min_x:
                min_x = mark.x
            if mark.y > max_y:
                max_y = mark.y
            if mark.y < min_y:
                min_y = mark.y
        max_x = round(max_x * IMAGE_WIDTH) + 30
        min_x = round(min_x * IMAGE_WIDTH) - 30
        max_y = round(max_y * IMAGE_HEIGHT) + 30
        min_y = round(min_y * IMAGE_HEIGHT) - 30
        if min_x < 0:
            min_x = 0
        if min_y < 0:
            min_y = 0
        if max_x > IMAGE_WIDTH:
            max_x = IMAGE_WIDTH
        if max_y > IMAGE_HEIGHT:
            max_y = IMAGE_HEIGHT
        print(f"\tmax_x: {max_x} min_x: {min_x} max_y: {max_y} min_y: {min_y}")
        # Рисуем обрамляющий прямоугольник руки на кадре
        cv2.rectangle(frame,
                      (min_x, min_y),
                      (max_x, max_y),
                      (0, 255, 0),
                      thickness=2)
        return frame, [min_x, min_y, max_x, max_y]

    def fps_count(self):
        self.prev_frame_counter, self.frame_counter = self.frame_counter, 0
        # self.frame_counter = 0

    # Определение наличия рук в кадре
    def hand_detection_mp(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)   # cv2.flip(frame, 1)
        frame.flags.writeable = False
        results = hands.process(frame)
        frame.flags.writeable = True

        if results.multi_hand_landmarks:
            count = 0
            for hand_landmarks in results.multi_hand_landmarks:
                count += 1
                print(f"Рука {count}")
                print(
                    f'\tIndex finger tip coordinates: ('
                    f'x: {round(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * IMAGE_WIDTH)}, '
                    f'y: {round(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * IMAGE_HEIGHT)})'
                )
                for num, mark in enumerate(hand_landmarks.landmark):
                    print(f"\tМетка {arm_marks[num]}"
                          f"- x: {round(mark.x * IMAGE_WIDTH)}, y: {round(mark.y * IMAGE_HEIGHT)}")

        return results

    # Функция в которой будет происходить процесс считывания и обработки каждого кадра
    def run(self):
        # Заходим в бесконечный цикл
        while True:
            if cam_index_list:
                # Считываем каждый новый кадр - frame
                # ret - логическая переменая. Смысл - считали ли мы кадр с потока или нет

                hands = []
                ret, self.frame = cam.read()
                self.frame = cv2.flip(self.frame, 1)
                try:
                    # детектируем расположение лица на кадре, вероятности на сколько это лицо
                    boxes, probs = self.mtcnn.detect(self.frame, landmarks=False)   # , landmarks

                    if boxes is not None:
                        # Рисуем на кадре
                        self.frame = self.draw_face(self.frame, boxes, probs)   # , landmarks
                        # Ищем руки
                        hand_detect_rez = self.hand_detection_mp(self.frame)
                        if hand_detect_rez.multi_hand_landmarks:
                            for hand_landmarks in hand_detect_rez.multi_hand_landmarks:
                                self.frame, hand_box = self.draw_hand(self.frame, hand_landmarks)
                                hands.append(self.filter_hand(self.frame, hand_box))
                                # размер 160х160
                                # Нормализуем изображение в значениях [0, 1]
                                img = torch.from_numpy(hands[-1]) / 255
                                img = img.unsqueeze(0).unsqueeze(0)
                                with torch.no_grad():
                                    outputs = self.model(img.to(self.device))
                                    _, predicted = torch.max(outputs.data, 1)
                                    print(f"predicted: {labels_texts[int(predicted)]}")
                                    # пишем в кадре какой жест
                                    cv2.putText(self.frame,
                                                labels_texts[int(predicted)],
                                                (hand_box[2], hand_box[3]),
                                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

                except Exception as e:
                    print(f'Error {e} in run')

                # пишем в кадре число FPS
                cv2.putText(self.frame,
                            f"FPS: {self.prev_frame_counter}",
                            (20, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE_AA)

                self.frame_counter += 1
                self.frame_update_signal.emit(self.frame_to_qpixmap(self.frame))   # cv2.imshow(self.label, self.frame)
                # if hands:
                #     self.frame_update_signal.emit(self.frame_to_qpixmap(hands[0]))

    # Функция преобразования врейма в QPixmap
    def frame_to_qpixmap(self, frame):
        rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        convert_to_qt_format = QImage(rgb_image.data,
                                      rgb_image.shape[1],
                                      rgb_image.shape[0],
                                      QImage.Format_RGB888)
        convert_to_qt_format = QPixmap.fromImage(convert_to_qt_format)
        pixmap = QPixmap(convert_to_qt_format)
        return pixmap

    def filter_hand(self, frame, hand_box):
        hand_img = frame[int(hand_box[1]):int(hand_box[3]),
                         int(hand_box[0]):int(hand_box[2])]
        # hand_img = cv2.resize(hand_img, (48, 48))

        hsv = cv2.cvtColor(hand_img, cv2.COLOR_BGR2HSV)

        # define range of skin color in HSV
        lower_skin = np.array([0, 20, 70], dtype=np.uint8)
        upper_skin = np.array([20, 255, 255], dtype=np.uint8)

        # extract skin colur imagw
        mask = cv2.inRange(hsv, lower_skin, upper_skin)

        # extrapolate the hand to fill dark spots within
        # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))   #
        kernel = np.ones((3, 3), np.uint8)
        mask = cv2.erode(mask, kernel, iterations=2)
        mask = cv2.dilate(mask, kernel, iterations=2)  # mask = cv2.dilate(mask, kernel, iterations=4)

        # blur the image
        mask = cv2.GaussianBlur(mask, (3, 3), 0)  # 10

        # mask = cv2.resize(mask, (48, 48))
        # hand_img = cv2.resize(hand_img, (48, 48))
        res = cv2.bitwise_and(hand_img, hand_img, mask=mask)
        res = cv2.resize(res, (160, 160))

        # Превращаем в 1-канальное серое изображение
        res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
        return res
Beispiel #8
0
def main(output_dir: str = 'output',
         features: Features = Features(),
         parameters: TrainParameters = TrainParameters(),
         datasets_dir: str = 'datasets',
         dataset: str = 'UTKFace',
         pretrained=None,
         pretrained_encoder='models/encoder/05_mobilenet_v3_small_003.pt',
         preload: bool = False,
         use_preprocessed: bool = False,
         device: torch.device = torch.device('cpu')):
    global log
    log = logger.Train(output_dir)

    model = models.getIntegrated(
        age=features.age,
        gender=features.gender,
        pretrained=pretrained,
        pretrained_encoder=pretrained_encoder).to(device)

    mtcnn = MTCNN(
        keep_all=True,
        min_face_size=100,
        image_size=160,
        margin=14,
        selection_method="center_weighted_size",
        post_process=True,
        device=device,
    )

    pre_transforms = transforms.Compose([
        transforms.Resize((160, 160)), np.float32,
        transforms.ToTensor(), fixed_image_standardization
    ])

    dataset_handler = dataLoaders.get(dataset=dataset,
                                      datasets_dir=datasets_dir,
                                      preload=preload,
                                      use_preprocessed=use_preprocessed,
                                      device=device)
    train_loader, validate_loader, test_loader = dataset_handler.get_loaders(
        transform=pre_transforms,
        train_size=parameters.train_size,
        validate_size=parameters.validate_size,
        test_size=parameters.test_size,
        batch_size=parameters.batch_size)

    criterion = nn.CrossEntropyLoss().to(device)

    if len(train_loader) > 0:
        for epoch in range(1, parameters.epochs + 1):
            log.epochBegin(epoch, parameters.epochs)
            train_all(model, features, train_loader, criterion, parameters,
                      device)

            if len(validate_loader) > 0:
                validate_all(model, features, validate_loader, criterion,
                             output_dir, device)

    if len(test_loader) > 0:
        test_all(model, features, test_loader, criterion, device)

    torch.save(model.state_dict(), os.path.join(output_dir, 'final_model.pt'))
    pass
from facenet_pytorch import MTCNN
import cv2
from PIL import Image

from os import listdir, makedirs
import glob
from os.path import join, exists
from skimage.io import imsave

mtcnn = MTCNN(keep_all=True,
              margin=40,
              select_largest=False,
              post_process=False,
              device="cuda:0")

# Directory containing images respective to each video
source_frames_folders = ["./train_frames/0", "./train_frames/1"]
# Destination location where faces cropped out from images will be saved
dest_faces = "./train_face/"

for i in source_frames_folders:
    counter = 0
    for j in listdir(i):
        if i.find("0") != -1:
            dest_faces_folder = "{}0".format(dest_faces)
        else:
            dest_faces_folder = "{}1".format(dest_faces)

        imgs = glob.glob(join(i, j, "*.jpg"))

        if counter % 1000 == 0:
Beispiel #10
0
    def detect_live(self):
        
        mtcnn = MTCNN()
        faces = {}
        frameCount = 0

        vid = cv2.VideoCapture(0)

        if self.record_for is not None : 
            start_time = time.time()

        while vid.isOpened():

            if self.record_for is not None :
                curr_time = time.time() - start_time
                if curr_time > self.record_for :
                    break                 
        
            _, frame = vid.read()
            frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
            frameCount = frameCount + 1

            boxes, probs = mtcnn.detect(frame)

            frame_draw = frame.copy()
            draw = ImageDraw.Draw(frame_draw)
            if boxes is not None:

                faces["frame_{}".format(frameCount)] = []

                for box, p in zip(boxes, probs) : 
                    if p > 0.70 :
                        draw.rectangle(box.tolist(), outline = (255, 0, 0), width = 1)
                    if self.extract == True :
                        face = extract_face(frame, box.tolist())
                        faces["frame_{}".format(frameCount)].append(face)
                        if self.save == True :
                            img = self.tsfms(face)

                            if self.saveIn is None :
                                raise ValueError

                            else :
                                img.save(os.path.join(self.saveIn, "frame_{}.jpg".format(len(faces))))

                cv2.imshow("Tracking window", cv2.cvtColor(np.array(frame_draw), cv2.COLOR_RGB2BGR))
                if self.save_video == True : 
                    self.frames_tracked.append(frame_draw)                
                if cv2.waitKey(1) == ord("a") :
                    break
                

        
        vid.release()
        
        if self.save_video == True:
            print(len(self.frames_tracked))
            self.saveVideo(self.saveIn, self.frames_tracked, "trackedVid")

        if self.save == True :
            return len(faces.keys()), faces
        else :
            return None, None         
Beispiel #11
0
    def detect(self):
    
        vid = cv2.VideoCapture(self.lookIn)
        frameCount = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) - 1

        mtcnn = MTCNN()

        bboxes_and_probs = []
        count = frameCount
        while vid.isOpened():
            
            #if count <  frameCount:
                #break

            _,  frame = vid.read()
            print("%d to go.." %(count))
            count -= 1
            frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            boxes, prob = mtcnn.detect(frame)
            
            frame_draw = frame.copy()
            draw = ImageDraw.Draw(frame_draw)
            if boxes is None :
                #print("Skipping Frame")
                if self.writeMode == True:
                    detected_frames.append(frame_draw)                 
                cv2.imshow("Frame", cv2.cvtColor(np.asarray(frame_draw), cv2.COLOR_BGR2RGB)) 
                if cv2.waitKey(2) & 0xFF == ord('y'):
                    break
                continue
            for box, p in zip(boxes,prob):
                
                if p > 0.80:   
                                   
                    #print("Not skipping!")      
                    draw.rectangle(box.tolist(), outline= (255, 0, 0), width= 1)
                    bboxes_and_probs.append({"bbox":box, "prob":p})

                if self.writeMode == True:
                    detected_frames.append(frame_draw)
            
            cv2.imshow("Frame", cv2.cvtColor(np.asarray(frame_draw), cv2.COLOR_BGR2RGB)) 
            if cv2.waitKey(1) & 0xFF == ord('y'):
                break
                       
            
        
        print("releasing capture")
        vid.release()
        
        if self.writeMode == True :
            dim = detected_frames[0].size
            print(dim , int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")    
            video_tracked = cv2.VideoWriter(self.saveIn, fourcc, 25.0, dim)
            for frame in detected_frames:
                video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
            video_tracked.release()
        return bboxes_and_probs




        
Beispiel #12
0
from facenet_pytorch import MTCNN
import torch
import numpy as np
import cv2
from PIL import Image
from img_rotate import rotate
import os
import argparse

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))
mtcnn = MTCNN(keep_all=True,
              device=device,
              margin=50,
              select_largest=True,
              image_size=256)


def extract_face(frame, align=True, margin=5):
    if align:
        frame = rotate(np.array(frame))
    frame = Image.fromarray(frame)
    # mtcnn(frame, save_path=name)
    boxes, _ = mtcnn.detect(frame)
    for box in boxes:
        box_list = box.tolist()
        # bounding box coordinated
        x1 = int(box_list[0])
        y1 = int(box_list[1])
        x2 = int(box_list[2])
        y2 = int(box_list[3])
Beispiel #13
0
 def __init__(self):
     self._config = ConfigProvider.config()
     self._device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
     print(f'Running on device: {self._device}')
     self._mtcnn = MTCNN(keep_all=True, device=self._device)
     self._bbox_tracker = BboxTracker()
Beispiel #14
0
from facenet_pytorch import MTCNN, extract_face
import torch
import numpy as np
import mmcv
import cv2
from PIL import Image, ImageDraw
from IPython import display
import glob
import os

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

mtcnn = MTCNN(keep_all=True, device=device)

frames = []
#files = glob.glob("/home/jeff/datasets/TUM Gait/data_person1+2/image/p001/b01/*")
files = glob.glob("reid/b01/*")
for myFile in files:
    fileName = os.path.splitext(os.path.basename(myFile))[0]

    img = Image.open(myFile)

    boxes, probs, points = mtcnn.detect(img, landmarks=True)
    if boxes is not None:
        # Draw boxes and save faces
        img_draw = img.copy()
        draw = ImageDraw.Draw(img_draw)
        for i, (box, point) in enumerate(zip(boxes, points)):
            draw.rectangle(box.tolist(), width=5)
            for p in point:
Beispiel #15
0
    return im[int(h0):int(h1), int(w0):int(w1)]


#########################################################
UPLOAD_FOLDER = 'img_data/upload'
TARGET_FOLDER = 'img_data/target'
RESULT_FOLDER = 'img_data/result'

cudnn.benchmark = True

default_args = parse_args()
CelebA_HQ = create_model(copy.copy(default_args), 'CelebA-HQ')
AFHQ = create_model(copy.copy(default_args), 'AFHQ')

face_detector = MTCNN(select_largest=True, device=torch.device('cuda'))

requests_queue = Queue()
#########################################################
app = Flask(__name__, template_folder="./static/")
app.config['MAX_CONTENT_LENGTH'] = 1 * 1024 * 1024

BATCH_SIZE = 1
CHECK_INTERVAL = 0.1


#run model
def run(input_file, model_type):
    f_id = str(uuid.uuid4())
    fname = secure_filename(input_file.filename)
import datetime
import pickle
import argparse

import os

ap = argparse.ArgumentParser()
ap.add_argument("-v", "--videos", help="path to the video", default=0)
args = vars(ap.parse_args())

model = load_model('models/facenet_keras.h5')
# svm_model = joblib.load('models/svm_face_classification.pkl')
with open('models/svm_classification_1.pkl', 'rb') as file:
    svm_model = pickle.load(file)

mtcnn = MTCNN(keep_all=True, post_process=False)

names_array = [
    'Afsan', 'Amresh', 'Amritansh', 'Ayush', 'Harish', 'Keyur', 'Rahul'
]

video = cv2.VideoCapture(args['videos'])
cv2.namedWindow('face Recognition', cv2.WINDOW_NORMAL)
cv2.resizeWindow('face Recognition', 800, 800)
loop = True
while loop:
    ret, frame = video.read()
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    try:
        faces = mtcnn(frame_rgb)
        boxes, _ = mtcnn.detect(frame_rgb)
def main(source_path, dest_path):
    """
    Main function to iterate over the images in the raw data and generate data samples
    to train/test FaceID model.
    """

    # img_dir = os.path.join(raw_data_path, 'aligned_images_DB')
    frame_dir = os.path.join(source_path, 'frame_images_DB')

    if not os.path.exists(dest_path):
        os.makedirs(dest_path)

    # set parameters
    num_imgs_per_face = 1
    target_im_shape = (160, 120)

    # set device
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Running on device: {}'.format(device))

    # create models
    mtcnn = MTCNN(image_size=80,
                  margin=0,
                  min_face_size=20,
                  thresholds=[0.6, 0.7, 0.7],
                  factor=0.709,
                  post_process=True,
                  device=device)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

    # run models on the images
    num_persons = 0
    num_faces = 0

    embedding_dict = {}
    subj_name_list = os.listdir(frame_dir)

    for f_n, face_file in enumerate(subj_name_list):
        if (f_n % 100) == 0:
            print('Subject %d of %d' % (f_n, len(subj_name_list)))
        f_path = os.path.join(frame_dir, face_file)
        if os.path.isfile(f_path):
            if face_file.endswith('txt'):
                with open(f_path, 'r') as file:
                    lines = file.readlines()
                    num_persons += 1
                    for line in lines:
                        num_faces += 1
                        img_name = line.split(',')[0]
                        subj_name, video_no, file_name = img_name.split('\\')
                        img_path = os.path.join(frame_dir, subj_name, video_no,
                                                file_name)
                        img = imread(img_path)

                        x_aligned, _, _ = mtcnn(img, return_prob=True)
                        if x_aligned is not None:
                            aligned = x_aligned[None, :, :, :].to(device)
                            embedding = resnet(
                                aligned).detach().cpu().numpy()[0]

                            if subj_name not in embedding_dict:
                                embedding_dict[subj_name] = {}
                                subj_path = os.path.join(dest_path, subj_name)
                                if not os.path.exists(subj_path):
                                    os.mkdir(subj_path)
                            if video_no not in embedding_dict[subj_name]:
                                embedding_dict[subj_name][video_no] = {}
                                video_path = os.path.join(
                                    dest_path, subj_name, video_no)
                                if not os.path.exists(video_path):
                                    os.mkdir(video_path)

                            embedding_dict[subj_name][video_no][
                                file_name] = embedding.tolist()
                            x_aligned_int = x_aligned.cpu().numpy()
                            x_aligned_int -= np.min(x_aligned_int)
                            x_aligned_int /= np.max(x_aligned_int)
                            x_aligned_int = (255.0 * x_aligned_int).astype(
                                np.uint8)
                            np.save(
                                os.path.join(dest_path, subj_name, video_no,
                                             file_name), x_aligned_int)

                            rect = line.split(',')[2:6]
                            for i in range(4):
                                rect[i] = int(rect[i])

                            box = np.array([
                                int(rect[0]) - int(rect[2]) // 2,
                                int(rect[1]) - int(rect[3]) // 2,
                                int(rect[0]) + int(rect[2]) // 2,
                                int(rect[1]) + int(rect[3]) // 2
                            ])

                            img_arr, _, img, box = generate_image(
                                img, box, num_imgs_per_face)
                            for img_idx in range(num_imgs_per_face):
                                new_file_name = '_'.join([
                                    file_name,
                                    str(target_im_shape[0]),
                                    str(target_im_shape[1]),
                                    str(img_idx)
                                ])
                                cropped_im_path = os.path.join(
                                    dest_path, subj_name, video_no,
                                    new_file_name)
                                np.save(cropped_im_path, img_arr[img_idx])

    print('Number of People: %d' % num_persons)
    print('Number of Faces: %d' % num_faces)

    # save embeddings to json file
    with open(os.path.join(dest_path, 'embeddings.json'), 'w') as out_file:
        json.dump(embedding_dict, out_file)
Beispiel #18
0
from PIL import Image, ImageDraw
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import time
import os

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(image_size=160,
              margin=0,
              min_face_size=20,
              thresholds=[0.6, 0.7, 0.7],
              factor=0.709,
              post_process=True,
              device=device)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)


def collate_fn(x):
    return x[0]


def process_image_database():
    dataset = datasets.ImageFolder('./test_images')
    dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()}
    loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=1)

    aligned = []
    names = []
Beispiel #19
0
    def find_face(self):
        """
        find face on the frames

        create:
            self.faces
            self.frame_ids
        """
        def del_skipped_frames():
            idxs = [idx for idx, val in enumerate(self.centers) if val == 0]
            for index in sorted(idxs, reverse=True):
                del self.centers[index]
                del self.frames[index]
                del self.frame_ids[index]

        def medfilt_filter(step=7):
            y_ = medfilt([i[0] for i in self.centers], step)
            x_ = medfilt([i[1] for i in self.centers], step)
            return y_, x_

        self.centers, h_shift, w_shift, centers = ([], [], [], None)

        # fast mtcnn pytorch; uses with cuda
        if cuda.is_available():
            frames_cropped = []
            box_prev = None

            mtcnn = MTCNN(image_size=200, device=device)
            for frame in tqdm(self.frames):
                box, _ = mtcnn.detect(frame)
                if box is not None:
                    box = np.array(box[0]).astype(int)
                    x1, x2, y1, y2 = box[1], box[3], box[0], box[2]
                    h_shift += [(y2 - y1) // 2]
                    w_shift += [(x2 - x1) // 2]
                    centers = [y1 + h_shift[-1], x1 + w_shift[-1]]
                    #plt.imshow(frame[x1:x2, y1:y2])
                    #plt.show()
                    if centers is not None:
                        self.centers += [centers]
                    else:
                        self.centers += [0]
                else:
                    self.centers += [0]

            del mtcnn

            del_skipped_frames()

        # haard; uses without cuda
        else:
            face_cascade = cv2.CascadeClassifier(
                'haarcascade_frontalface_default.xml')

            for frame in tqdm(self.frames):
                gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
                faces = face_cascade.detectMultiScale(gray)
                for (x, y, w, h) in faces:
                    h_shift += [h // 2]
                    w_shift += [w // 2]
                    centers = [y + h // 2, x + w // 2]
                if centers is not None:
                    self.centers += [centers]
                else:
                    self.centers += [0]

            del face_cascade

            del_skipped_frames()

        self.box_shift = [
            np.mean(w_shift, dtype=int),
            np.mean(h_shift, dtype=int)
        ]
        # drop discharges from signal
        if len(self.centers) == 0:
            raise ValueError("Невозможно определить лицо")
        if cuda.is_available(): y_, x_ = medfilt_filter(5)
        else: y_, x_ = medfilt_filter()

        self.centers = [[int(y), int(x)] for x, y in zip(x_, y_)]
        for frame, (y, x) in tqdm(zip(self.frames, self.centers)):
            face = frame[x - self.box_shift[0]:x + self.box_shift[0],
                         y - self.box_shift[1]:y + self.box_shift[1]]
            self.faces += [face]
Beispiel #20
0
                    temp = 1
                else:
                    cv2.putText(img, 'Unknown' + ': ' + '{0}'.format(diff),
                                (start[0], start[1] - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1)
                    temp = 0
        cv2.imshow('Detection', img)
        if temp == 1:
            return 1
        else:
            return 0


# Init MTCNN object
mtcnn = MTCNN(image_size=image_size,
              keep_all=True,
              device=device,
              post_process=True)
model = InceptionResnetV1(pretrained='vggface2', classify=False).eval()
# Real time data from webcam
frames = []
boxes = []

# Load stored face data related to respective card number
faces = []
face_names = []
face_file = None
try:
    for person in os.listdir(card_number):
        face_file = open(card_number + '/' + person, 'rb')
        if face_file is not None:
            face = pickle.load(face_file)
Beispiel #21
0
import os
from PIL import Image
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
import json
import pandas as pd
import datetime

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(keep_all=False, device=device)
resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval()


def get_photo():
    names = []
    photos = []
    for _, dirs, files in os.walk('face'):
        names = dirs
        break

    for name in names:
        photo = Image.open(os.path.join("face", name, name + ".jpg"))
        photos.append(photo)

    return photos, names


def get_embedding():
    faces = []
    photos, names = get_photo()
    for photo in photos:
Beispiel #22
0
from PIL import Image,ImageDraw
import numpy as np
from facenet_pytorch import MTCNN
from matplotlib.pyplot import imshow

import cv2

import math
import os
from tqdm import tqdm
import pandas as pd
mtcnn = MTCNN(image_size=120,select_largest=False)





def rotate(origin, point, angle, row):
    """ rotate coordinates in image coordinate system
    :param origin: tuple of coordinates,the rotation center
    :param point: tuple of coordinates, points to rotate
    :param angle: degrees of rotation
    :param row: row size of the image
    :return: rotated coordinates of point
    """
    x1, y1 = point
    x2, y2 = origin
    y1 = row - y1
    y2 = row - y2
    angle = math.radians(angle)
    x = x2 + math.cos(angle) * (x1 - x2) - math.sin(angle) * (y1 - y2)
Beispiel #23
0
    def __init__(self, args):
        ## configs
        self.device = 'cuda:0' if args.gpu else 'cpu'
        self.checkpoint_path = args.checkpoint
        self.detect_human_face = args.detect_human_face
        self.render_video = args.render_video
        self.output_size = args.output_size
        self.image_size = 64
        self.min_depth = 0.9
        self.max_depth = 1.1
        self.border_depth = 1.05
        self.xyz_rotation_range = 60
        self.xy_translation_range = 0.1
        self.z_translation_range = 0
        self.fov = 10  # in degrees

        self.depth_rescaler = lambda d: (1 + d) / 2 * self.max_depth + (
            1 - d) / 2 * self.min_depth  # (-1,1) => (min_depth,max_depth)
        self.depth_inv_rescaler = lambda d: (d - self.min_depth) / (
            self.max_depth - self.min_depth)  # (min_depth,max_depth) => (0,1)

        fx = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180))
        fy = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180))
        cx = (self.image_size - 1) / 2
        cy = (self.image_size - 1) / 2
        K = [[fx, 0., cx], [0., fy, cy], [0., 0., 1.]]
        K = torch.FloatTensor(K).to(self.device)
        self.inv_K = torch.inverse(K).unsqueeze(0)
        self.K = K.unsqueeze(0)

        ## NN models
        self.netD = EDDeconv(cin=3, cout=1, nf=64, zdim=256, activation=None)
        self.netA = EDDeconv(cin=3, cout=3, nf=64, zdim=256)
        self.netL = Encoder(cin=3, cout=4, nf=32)
        self.netV = Encoder(cin=3, cout=6, nf=32)

        self.netD = self.netD.to(self.device)
        self.netA = self.netA.to(self.device)
        self.netL = self.netL.to(self.device)
        self.netV = self.netV.to(self.device)
        self.load_checkpoint()

        self.netD.eval()
        self.netA.eval()
        self.netL.eval()
        self.netV.eval()

        ## face detecter
        if self.detect_human_face:
            from facenet_pytorch import MTCNN
            self.face_detector = MTCNN(select_largest=True, device=self.device)

        ## renderer
        if self.render_video:
            from unsup3d.renderer import Renderer
            assert 'cuda' in self.device, 'A GPU device is required for rendering because the neural_renderer only has GPU implementation.'
            cfgs = {
                'device': self.device,
                'image_size': self.output_size,
                'min_depth': self.min_depth,
                'max_depth': self.max_depth,
                'fov': self.fov,
            }
            self.renderer = Renderer(cfgs)
Beispiel #24
0
import torch
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
import copy
import torch.nn.functional as F
from facenet_pytorch import MTCNN

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(keep_all=True, device=device)


def corp_img(img, c):
    return img[c[1]:c[3], c[0]:c[2]]


def predict_draw(model, img):
    model.eval()
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    boxes, a = mtcnn.detect(img)
    if (type(boxes) is not np.ndarray):
        return img

    for i in range(len(boxes)):
        bnd = boxes[i].astype(int)
        if (bnd[3] - bnd[1] < 40):
            continue
        img2 = corp_img(img, bnd) / 255
        if (len(img.shape) != 3):
            return img
Beispiel #25
0
 def __init__(self, thresholds=[0.9, 0.9, 0.9], min_face_size=100):
     self.mtcnn = MTCNN(thresholds=thresholds,
                        select_largest=True,
                        post_process=False,
                        device='cuda:0',
                        min_face_size=min_face_size)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

mtcnn_params = dict(
    image_size=160,
    margin=0,
    min_face_size=30,
    thresholds=[0.6, 0.7, 0.7],
    factor=0.709,
    post_process=False,
    selection_method="probability",
    select_largest=True,
    keep_all=False,
    device=device,
)

mtcnn = MTCNN(**mtcnn_params)


class FrameExtractor(object):
    @staticmethod
    def get_video(fp: str) -> cv2.VideoCapture:
        """
        Method that returns a video read from disk by cv2

        Parameters
        ----------
        fp : str
            Filepath to file

        Returns
        -------
Beispiel #27
0
def input_face_embeddings(
    frames: Union[List[str], np.ndarray],
    is_path: bool,
    mtcnn: MTCNN,
    resnet: InceptionResnetV1,
    face_embed_cuda: bool,
    use_half: bool,
    coord: List,
    name: str = None,
    save_frames: bool = False,
) -> torch.Tensor:
    """
        Get the face embedding

        NOTE: If a face is not detected by the detector,
        instead of throwing an error it zeros the input
        for embedder.

        NOTE: Memory hungry function, hence the profiler.

        Args:
            frames: Frames from the video
            is_path: Whether to read from filesystem or memory
            mtcnn: face detector
            resnet: face embedder
            face_embed_cuda: use cuda for model
            use_half: use half precision

        Returns:
            emb: Embedding for all input frames
    """
    if face_embed_cuda and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")
    result_cropped_tensors = []
    no_face_indices = []
    for i, f in enumerate(frames):
        if is_path:
            frame = Image.open(f)
        else:
            frame = Image.fromarray(f.astype("uint8"))

        with torch.no_grad():
            cropped_tensors = None
            height, width, c = f.shape
            bounding_box, prob = mtcnn.detect(frame)

            if bounding_box is not None:
                for box in bounding_box:
                    x1, y1, x2, y2 = box
                    if x1 > x2:
                        x1, x2 = x2, x1
                    if y1 > y2:
                        y1, y2 = y2, y1

                    # for point in coord:
                    x, y = coord[0], coord[1]
                    x *= width
                    y *= height
                    if x >= x1 and y >= y1 and x <= x2 and y <= y2:
                        cropped_tensors = extract_face(frame, box)
                        # print("found", box, x, y, end='\r')
                        break

        if cropped_tensors is None:
            # Face not detected, for some reason
            cropped_tensors = torch.zeros((3, 160, 160))
            no_face_indices.append(i)

        if save_frames:
            name = name.replace(".mp4", "")
            saveimg = cropped_tensors.detach().cpu().numpy().astype("uint8")
            saveimg = np.squeeze(saveimg.transpose(1, 2, 0))
            Image.fromarray(saveimg).save(f"{name}_{i}.png")

        result_cropped_tensors.append(cropped_tensors.to(device))

    if len(no_face_indices) > 20:
        # few videos start with silence, allow 0.5 seconds of silence else remove
        return None
    del frames
    # Stack all frames
    result_cropped_tensors = torch.stack(result_cropped_tensors)
    # Embed all frames
    result_cropped_tensors = result_cropped_tensors.to(device)
    if use_half:
        result_cropped_tensors = result_cropped_tensors.half()

    with torch.no_grad():
        emb = resnet(result_cropped_tensors)
    if use_half:
        emb = emb.float()
    return emb.to(cpu_device)
Beispiel #28
0
from skimage.io import imsave
from facenet_pytorch import MTCNN
import cv2
from PIL import Image
import os
from pathlib import Path


def show_img(img):
    cv2.imshow(winname="Fa", mat=img)
    cv2.waitKey(delay=0)
    cv2.destroyAllWindows()


mtcnn = MTCNN(margin=50,
              select_largest=False,
              post_process=False,
              device="cuda:0")

# =============================================================================
# source_frames_folders = Path(r'C:\Users\jeremy\Desktop\2021DF\model\666\os\000_003')
#
#
# video = [x for x in source_frames_folders.iterdir()]
#
# problem = []  # some videos which is failed
#
# dst = r'C:\Users\jeremy\Desktop\2021DF\model\666\os\000_003\tt\546.jpg'
# =============================================================================

# =============================================================================
# for i in video:
    fname = sorted(fname)
    dname = sorted(dname)
    return fname, dname


if __name__ == "__main__":

    # Parâmetros
    basedir = '/projects/jeff/TUMGAIDimage'

    # Checar se há GPU disponível
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
    print('Running on device: {}'.format(device))

    # Definir parâmetros do módulo MTCNN
    mtcnn = MTCNN(keep_all=False, device=device, post_process=False)

    # Obter lista de arquivos e diretorios
    fname, dname = listar_imagens(basedir)

    # Detectar faces e salvar na pasta facecrops
    inicio = time.time()
    print('Processamento iniciado')
    facecrop = [it.replace(basedir, basedir+'_faces') for it in fname]
    for f, filename in enumerate(fname):
        try:
            img = Image.open(filename)
            box, prob = mtcnn.detect(img)
        except:
            print('Falha no processamento do arquivo '+filename)
            continue
Beispiel #30
0
def main():
    # Read options
    opt = TestOptions().parse(save=False)
    # If demo directory to save generated frames is given
    if opt.demo_dir is not None and not os.path.exists(opt.demo_dir):
        os.makedirs(opt.demo_dir)

    # hardcoded constant values
    opt.nThreads = 0
    opt.batchSize = 1
    opt.serial_batches = True
    # GPU id to be used for mxnet/reconstructor
    opt.gpu_id = opt.gpu_ids[-1]
    # Device to be used for MTCNN face detector
    detector_device = 'cpu'
    # Face bounding box margin
    margin = 120
    # How many frames from the target's training video
    # to consider when gathering head pose and eye size statistics
    n_frames_target_used = 1000
    # How many of the first source frames to consider for eye size adaptation
    # between source and target.
    n_frames_init = 25
    # For cuda initialization errors.
    torch.multiprocessing.set_start_method('spawn', force=True)

    # Initialize video renderer.
    modelG = create_model(opt)
    # Initialize NMFC renderer.
    renderer = NMFCRenderer(opt)
    # Initialize face detector.
    detector = MTCNN(image_size=opt.loadSize,
                     margin=margin,
                     post_process=False,
                     device=detector_device)
    # Initialize landmark extractor.
    dlib_detector = dlib.get_frontal_face_detector()
    dlib_predictor = dlib.shape_predictor(
        'preprocessing/files/shape_predictor_68_face_landmarks.dat')

    # Read the identity parameters from the target person.
    id_params, _ = read_params(
        'id', os.path.join(opt.dataroot, 'train', 'id_coeffs'),
        opt.target_name)
    # Read camera parameters from target
    t_cam_params, _ = read_params('cam',
                                  os.path.join(opt.dataroot, 'train', 'misc'),
                                  opt.target_name)
    t_cam_params = t_cam_params[:n_frames_target_used]
    # Read eye landmarks from target's video.
    eye_landmarks_target = read_eye_landmarks(
        os.path.join(opt.dataroot, 'train', 'landmarks70'), opt.target_name)
    eye_landmarks_target[0] = eye_landmarks_target[0][:n_frames_target_used]
    eye_landmarks_target[1] = eye_landmarks_target[1][:n_frames_target_used]

    # Setup camera capturing
    window_name = 'Hea2Head Demo'
    video_capture = cv2.VideoCapture(0)
    video_capture.set(cv2.CAP_PROP_BUFFERSIZE,
                      2)  # set double buffer for capture
    fps = video_capture.get(cv2.CAP_PROP_FPS)
    print("Video capture at {} fps.".format(fps))

    proccesses = []

    # Face tracker / detector
    box_redecect_nframes = opt.box_redetect_nframes
    box = None  # Face bounding box, calculated by first frame

    # Face reconstructor / NMFC renderer
    nmfc = None  # Current nmfc image
    s_cam_params = []  # camera parameters of source video.
    adapted_cam_params = [
    ]  # camera parameters of source video, adapted to target.

    # Facial (eyes) landmarks detector
    prev_eye_centres = None  # Eye centres in previous frame
    eye_landmarks = None  # Final eye landmarks, send to video renderer.
    eye_landmarks_source = [
        [], []
    ]  # Eye landmarks from n_frames_init first frames of source video.
    eye_landmarks_source_queue = Queue(
    )  # Queue to write extracted eye landmarks from source video.
    landmarks_success_queue = Queue(
    )  # Queue to write whether eye landmark detection was successful
    frames_queue = Queue(
    )  # Queue for writing video frames, read by the landmark detector process.
    # Process for running 68 + 2 landmark detection in parallel with Face reconstruction / NMFC renderering
    proccess_eye_landmarks = Process(
        target=compute_eye_landmarks,
        args=(dlib_detector, dlib_predictor, eye_landmarks_source_queue,
              landmarks_success_queue, frames_queue))
    proccess_eye_landmarks.start()
    proccesses.append(proccess_eye_landmarks)
    print('Launced landmark extractor!')

    # Video renderer (GAN).
    input_queue = torchQueue()  # Queue of GAN's input
    output_queue = torchQueue()  # Queue of GAN's output
    # Process for running the video renderer without waiting NMFC + eye lands creation.
    proccess_video_renderer = torchProcess(target=compute_fake_video,
                                           args=(input_queue, output_queue,
                                                 modelG, opt))
    proccess_video_renderer.start()
    proccesses.append(proccess_video_renderer)
    print('Launced video renderer!')

    camera = None
    if opt.realtime:
        try:
            import pyfakewebcam
            stream_id = opt.realtime_cam_id
            webcam_width = webcam_height = opt.loadSize
            camera = pyfakewebcam.FakeWebcam(f'/dev/video{stream_id}',
                                             webcam_width, webcam_height)
            camera.print_capabilities()
            print(f'Fake webcam created on /dev/video{stream_id}.')
        except Exception as ex:
            print('Fake webcam initialization failed:')
            print(str(ex))

    iter = 0
    # Start main Process (Face reconstruction / NMFC renderering)
    while True:
        t0 = time.perf_counter()
        try:  # Read generated frames from video renderer's output Queue.
            # Non-blocking
            fake_frame, real_frame = output_queue.get_nowait()
            result = np.concatenate([real_frame, fake_frame[..., ::-1]],
                                    axis=1)
            # If output directory is specified save frames there.
            if opt.demo_dir is not None:
                result_path = os.path.join(opt.demo_dir,
                                           "{:06d}".format(iter) + '.png')
                cv2.imwrite(result_path, result)
            elif camera is not None:
                camera.schedule_frame(fake_frame)
            else:
                cv2.imshow(window_name, result)
                cv2.waitKey(1)
        except queue.Empty:  # If empty queue continue.
            pass
        # Read next frame
        _, frame = video_capture.read()
        # Crop the larger dimension of frame to make it square
        frame = make_frame_square(frame)

        if box_redecect_nframes > 0 and iter % box_redecect_nframes == 0:
            box = None
        # If no bounding box has been detected yet, run MTCNN (once in first frame)
        if box is None:
            box = detect_box(detector, frame)
        # If no face detected exit.
        if box is None:
            break
        # Crop frame at the point were the face was seen in the first frame.
        frame = extract_face(frame, box, opt.loadSize, margin)
        frame = tensor2npimage(frame)
        frame = np.transpose(frame, (1, 2, 0))
        # Send ROI frame to landmark detector, while the main Process performs face reconstruction.
        frames_queue.put(frame)
        # Get expression and pose, adapt pose and identity to target and render NMFC.
        success, s_cam_params, adapted_cam_params, new_nmfc = \
            compute_reconstruction(renderer, id_params, t_cam_params, s_cam_params,
                                   adapted_cam_params, frame)
        # Update the current NMFC if reconstruction was successful
        if success:
            nmfc = new_nmfc
        # If not, use previous nmfc. If it does not exist, exit.
        if not success and nmfc is None:
            break
        # Find eye centres using nmfc image.
        eye_centres, prev_eye_centres = search_eye_centres([nmfc[:, :, ::-1]],
                                                           prev_eye_centres)
        # Read Queue to get eye landmarks, if detection was successful.
        if landmarks_success_queue.get():
            eye_landmarks = eye_landmarks_source_queue.get()
        # If not, use previous eye landmarks. If they do not exist, exit.
        if eye_landmarks is None:
            break
        # If in first frames, determine the source-target eye size (height) ratio.
        if iter < n_frames_init:
            eye_landmarks_source[0].append(eye_landmarks[0])
            eye_landmarks_source[1].append(eye_landmarks[1])
            eye_ratios = compute_eye_landmarks_ratio(eye_landmarks_source,
                                                     eye_landmarks_target)
        # Adapt the eye landmarks to the target face, by placing to the eyes centre
        # and re-scaling their size to match the NMFC size and target eyes mean height (top-down distance).
        eye_lands = adapt_eye_landmarks(
            [[eye_landmarks[0]], [eye_landmarks[1]]], eye_centres, eye_ratios,
            s_cam_params[-1:], adapted_cam_params[-1:])
        # Send the conditional input to video renderer
        input_queue.put((nmfc, eye_lands[0], frame))
        iter += 1
        # Show frame rate.
        t1 = time.perf_counter()
        dt = t1 - t0
        print('fps: %0.2f' % (1 / dt))

    # Terminate proccesses and join
    for process in proccesses:
        process.terminate()
        process.join()

    renderer.clear()
    print('Main process exiting')