예제 #1
0
def process_video(
    item: Tuple[pd.Index, pd.Series],
    source_dir: Path,
    facedestination_dir: Path,
    checkpoint_folder: Path,
    face_size: int,
    face_extractor: FaceExtractor,
    lazycheck: bool = False,
    deepcheck: bool = False,
) -> (pd.DataFrame, Path, pd.DataFrame, Path, List[Tuple[Image.Image,
                                                         Path]]) or None:
    idx, record = item

    # Checkpoint
    video_faces_checkpoint_path = checkpoint_folder.joinpath(
        record['path']).with_suffix('.faces.pkl')

    if not lazycheck:
        if video_faces_checkpoint_path.exists():
            try:
                df_video_faces = pd.read_pickle(
                    str(video_faces_checkpoint_path))
                for _, r in df_video_faces.iterrows():
                    face_path = facedestination_dir.joinpath(r.name)
                    assert (face_path.exists())
                    if deepcheck:
                        img = Image.open(face_path)
                        img_arr = np.asarray(img)
                        assert (img_arr.ndim == 3)
                        assert (np.prod(img_arr.shape) > 0)
            except Exception as e:
                print('Error while checking: {}'.format(
                    video_faces_checkpoint_path))
                print(e)
                video_faces_checkpoint_path.unlink()

    if not (video_faces_checkpoint_path.exists()):

        try:

            video_face_dict_list = []

            # Load faces
            frames = face_extractor.process_video(
                source_dir.joinpath(record['path']))

            if len(frames) == 0:
                return

            face_extractor.keep_only_best_face(frames)
            for frame_idx, frame in enumerate(frames):
                frames[frame_idx]['subjects'] = [0] * len(
                    frames[frame_idx]['detections'])

            # Extract and save faces, bounding boxes, keypoints
            images_to_save: List[Tuple[Image.Image, Path]] = []
            for frame_idx, frame in enumerate(frames):
                if len(frames[frame_idx]['detections']):
                    fullframe = Image.fromarray(frames[frame_idx]['frame'])

                    # Preserve the only found face even if not a good one, otherwise preserve only clusters > -1
                    subjects = np.unique(frames[frame_idx]['subjects'])
                    if len(subjects) > 1:
                        subjects = np.asarray([s for s in subjects if s > -1])

                    for face_idx, _ in enumerate(frame['faces']):
                        subj_id = frames[frame_idx]['subjects'][face_idx]
                        if subj_id in subjects:  # Exclude outliers if other faces detected
                            face_path = facedestination_dir.joinpath(
                                record['path'],
                                'fr{:03d}_subj{:1d}.jpg'.format(
                                    frames[frame_idx]['frame_idx'], subj_id))

                            face_dict = {
                                'facepath':
                                str(face_path.relative_to(
                                    facedestination_dir)),
                                'video':
                                idx,
                                'label':
                                record['label'],
                                'videosubject':
                                subj_id
                            }
                            for field_idx, key in enumerate(
                                    blazeface.BlazeFace.detection_keys):
                                face_dict[key] = frames[frame_idx][
                                    'detections'][face_idx][field_idx]

                            cropping_bb = adapt_bb(
                                frame_height=fullframe.height,
                                frame_width=fullframe.width,
                                bb_height=face_size,
                                bb_width=face_size,
                                left=face_dict['xmin'],
                                top=face_dict['ymin'],
                                right=face_dict['xmax'],
                                bottom=face_dict['ymax'])
                            face = fullframe.crop(cropping_bb)

                            for key in blazeface.BlazeFace.detection_keys:
                                if (key[0] == 'k'
                                        and key[-1] == 'x') or (key[0] == 'x'):
                                    face_dict[key] -= cropping_bb[0]
                                elif (key[0] == 'k'
                                      and key[-1] == 'y') or (key[0] == 'y'):
                                    face_dict[key] -= cropping_bb[1]

                            face_dict['left'] = face_dict.pop('xmin')
                            face_dict['top'] = face_dict.pop('ymin')
                            face_dict['right'] = face_dict.pop('xmax')
                            face_dict['bottom'] = face_dict.pop('ymax')

                            face_path.parent.mkdir(parents=True, exist_ok=True)
                            images_to_save.append((face, face_path))

                            video_face_dict_list.append(face_dict)

            if len(video_face_dict_list) > 0:

                df_video_faces = pd.DataFrame(video_face_dict_list)
                df_video_faces.index = df_video_faces['facepath']
                del df_video_faces['facepath']

                # type conversions
                for key in [
                        'kp1x', 'kp1y', 'kp2x', 'kp2y', 'kp3x', 'kp3y', 'kp4x',
                        'kp4y', 'kp5x', 'kp5y', 'kp6x', 'kp6y', 'left', 'top',
                        'right', 'bottom'
                ]:
                    df_video_faces[key] = df_video_faces[key].astype(np.int16)
                df_video_faces['conf'] = df_video_faces['conf'].astype(
                    np.float32)
                df_video_faces['video'] = df_video_faces['video'].astype(
                    'category')

                video_faces_checkpoint_path.parent.mkdir(parents=True,
                                                         exist_ok=True)

            else:
                print('No faces extracted for video {}'.format(record['path']))
                df_video_faces = pd.DataFrame()

            return df_video_faces, video_faces_checkpoint_path, images_to_save

        except Exception as e:
            print('Error while processing: {}'.format(record['path']))
            print("-" * 60)
            traceback.print_exc(file=sys.stdout, limit=5)
            print("-" * 60)
            return
예제 #2
0
print('Model loaded!')

transf = utils.get_transformer(face_policy,
                               face_size,
                               net.get_normalizer(),
                               train=False)

facedet = BlazeFace().to(device)
facedet.load_weights("blazeface/blazeface.pth")
facedet.load_anchors("blazeface/anchors.npy")
videoreader = VideoReader(verbose=False)
video_read_fn = lambda x: videoreader.read_frames(x,
                                                  num_frames=frames_per_video)
face_extractor = FaceExtractor(video_read_fn=video_read_fn, facedet=facedet)

vid_real_faces = face_extractor.process_video(
    'samples/490868123550446422495477631417.mp4')
vid_fake_faces = face_extractor.process_video(
    'samples/284649338838012868101332189709.mp4')

## Predict scores for each frame

# For each frame, we consider the face with the highest confidence score found by BlazeFace (= frame['faces'][0])
faces_real_t = torch.stack([
    transf(image=frame['faces'][0])['image'] for frame in vid_real_faces
    if len(frame['faces'])
])
faces_fake_t = torch.stack([
    transf(image=frame['faces'][0])['image'] for frame in vid_fake_faces
    if len(frame['faces'])
])
예제 #3
0
# In[9]:

torch.cuda.device(0)

# In[10]:

torch.cuda.device_count()

# In[11]:

torch.cuda.get_device_name(0)

# In[12]:

vid_real_faces = face_extractor.process_video('samples/lynaeydofd.mp4')
vid_fake_faces = face_extractor.process_video('samples/mqzvfufzoq.mp4')

# In[13]:

im_real_face = vid_real_faces[0]['faces'][0]
im_fake_face = vid_fake_faces[0]['faces'][0]

# In[14]:

fig, ax = plt.subplots(1, 2, figsize=(8, 4))

ax[0].imshow(im_real_face)
ax[0].set_title('REAL')

ax[1].imshow(im_fake_face)
예제 #4
0
def run_nb(modelname):
    # ## Parameters

    # In[2]:
    """
    Choose an architecture between
    - EfficientNetB4
    - EfficientNetB4ST
    - EfficientNetAutoAttB4
    - EfficientNetAutoAttB4ST
    - Xception
    """
    net_model = modelname
    """
    Choose a training dataset between
    - DFDC
    - FFPP
    """
    train_db = 'DFDC'

    # In[3]:

    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    face_policy = 'scale'
    face_size = 224
    frames_per_video = 32

    # ## Initialization

    # In[4]:

    print('=' * 20)
    model_url = weights.weight_url['{:s}_{:s}'.format(net_model, train_db)]
    print('=' * 20)
    net = getattr(fornet, net_model)().eval().to(device)
    print('=' * 20)
    net.load_state_dict(
        load_url(model_url, map_location=device, check_hash=True))

    # In[5]:

    transf = utils.get_transformer(face_policy,
                                   face_size,
                                   net.get_normalizer(),
                                   train=False)

    # In[6]:

    facedet = BlazeFace().to(device)
    facedet.load_weights("../blazeface/blazeface.pth")
    facedet.load_anchors("../blazeface/anchors.npy")
    videoreader = VideoReader(verbose=False)
    video_read_fn = lambda x: videoreader.read_frames(
        x, num_frames=frames_per_video)
    face_extractor = FaceExtractor(video_read_fn=video_read_fn,
                                   facedet=facedet)

    # ## Detect faces

    # In[7]:

    torch.cuda.is_available()

    # In[8]:

    torch.cuda.current_device()

    # In[9]:

    torch.cuda.device(0)

    # In[10]:

    torch.cuda.device_count()

    # In[11]:

    torch.cuda.get_device_name(0)

    # In[12]:

    vid_real_faces = face_extractor.process_video('samples/lynaeydofd.mp4')
    vid_fake_faces = face_extractor.process_video('samples/mqzvfufzoq.mp4')

    # In[13]:

    im_real_face = vid_real_faces[0]['faces'][0]
    im_fake_face = vid_fake_faces[0]['faces'][0]

    # In[14]:

    fig, ax = plt.subplots(1, 2, figsize=(8, 4))

    ax[0].imshow(im_real_face)
    ax[0].set_title('REAL')

    ax[1].imshow(im_fake_face)
    ax[1].set_title('FAKE')

    # ## Predict scores for each frame

    # In[15]:

    # For each frame, we consider the face with the highest confidence score found by BlazeFace (= frame['faces'][0])
    faces_real_t = torch.stack([
        transf(image=frame['faces'][0])['image'] for frame in vid_real_faces
        if len(frame['faces'])
    ])
    faces_fake_t = torch.stack([
        transf(image=frame['faces'][0])['image'] for frame in vid_fake_faces
        if len(frame['faces'])
    ])

    with torch.no_grad():
        faces_real_pred = net(faces_real_t.to(device)).cpu().numpy().flatten()
        faces_fake_pred = net(faces_fake_t.to(device)).cpu().numpy().flatten()

    # In[16]:

    fig, ax = plt.subplots(1, 2, figsize=(12, 4))

    ax[0].stem([f['frame_idx'] for f in vid_real_faces if len(f['faces'])],
               expit(faces_real_pred),
               use_line_collection=True)
    ax[0].set_title('REAL')
    ax[0].set_xlabel('Frame')
    ax[0].set_ylabel('Score')
    ax[0].set_ylim([0, 1])
    ax[0].grid(True)

    ax[1].stem([f['frame_idx'] for f in vid_fake_faces if len(f['faces'])],
               expit(faces_fake_pred),
               use_line_collection=True)
    ax[1].set_title('FAKE')
    ax[1].set_xlabel('Frame')
    ax[1].set_ylabel('Score')
    ax[1].set_ylim([0, 1])
    ax[1].set_yticks([0, 1], ['REAL', 'FAKE'])

    # In[17]:
    """
    Print average scores.
    An average score close to 0 predicts REAL. An average score close to 1 predicts FAKE.
    """
    print('Average score for REAL video: {:.4f}'.format(
        expit(faces_real_pred.mean())))
    print('Average score for FAKE face: {:.4f}'.format(
        expit(faces_fake_pred.mean())))