def aggregate_masks_and_face_locations(source_dir_root, methods, compression,
                                       cpu_count):

    face_information_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        methods=methods,
        compressions=(compression, ),
        data_types=(DataType.face_information, ),
    )

    bounding_boxs_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        methods=methods,
        compressions=(compression, ),
        data_types=(DataType.bounding_boxes, ),
    )

    mask_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        methods=methods,
        compressions=(Compression.masks, ),
        data_types=(DataType.bounding_boxes, ),
    )

    for face_information, bounding_boxes, mask_data in zip(
            face_information_data_structure.get_subdirs(),
            bounding_boxs_data_structure.get_subdirs(),
            mask_data_structure.get_subdirs(),
    ):

        if not face_information.exists():
            continue

        bounding_boxes.mkdir(parents=True, exist_ok=True)
        logger.info(
            f"Processing {face_information.parts[-2]}, {face_information.parts[-3]}"
        )

        if not mask_data.exists():
            logging.info("Didn't find any mask data.")

            # compute mask bounding box for each folder
            Parallel(n_jobs=cpu_count)(delayed(
                lambda _face_information_video: _filter_face_information(
                    _face_information_video, None, bounding_boxes))(
                        face_information_video)
                                       for face_information_video in tqdm(
                                           sorted(face_information.iterdir())))

        else:
            # compute mask bounding box for each folder
            Parallel(n_jobs=cpu_count)(
                delayed(lambda _face_information_video, _mask_data_video:
                        _filter_face_information(  # noqa: E501
                            _face_information_video, _mask_data_video,
                            bounding_boxes))(face_information_video,
                                             mask_data_video)
                for face_information_video, mask_data_video in tqdm(
                    zip(sorted(face_information.iterdir()),
                        sorted(mask_data.iterdir()))))
Exemple #2
0
def resample_videos(source_dir_root, compressions, methods, fps):
    videos_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        compressions=compressions,
        data_types=(DataType.videos, ),
        methods=methods,
    )

    resampled_videos_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        compressions=compressions,
        methods=methods,
        data_types=(DataType.resampled_videos, ),
    )

    for videos, resampled_videos in zip(
            videos_data_structure.get_subdirs(),
            resampled_videos_data_structure.get_subdirs(),
    ):
        logger.info(f"Current method: {videos.parents[1].name}")

        resampled_videos.mkdir(exist_ok=True)

        p = mp.Pool(mp.cpu_count())

        p.map(
            _resampled_video,
            tqdm([(_video_folder, resampled_videos, fps)
                  for _video_folder in sorted(videos.iterdir())]),
        )
Exemple #3
0
def migrate_bounding_boxes_to_face_information(source_dir_root, methods):

    source_dir_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        methods=methods,
        compressions=(Compression.c40, Compression.c23, Compression.raw),
        data_types=(DataType.bounding_boxes, ),
    )

    target_dir_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        methods=methods,
        compressions=(Compression.c40, Compression.c23, Compression.raw),
        data_types=(DataType.face_information, ),
    )

    for source_sub_dir, target_sub_dir in zip(
            source_dir_data_structure.get_subdirs(),
            target_dir_data_structure.get_subdirs()):

        if not source_sub_dir.exists():
            continue

        target_sub_dir.mkdir(parents=True, exist_ok=True)
        logger.info(
            f"Processing {source_sub_dir.parts[-2]}, {source_sub_dir.parts[-3]}"
        )

        source_sub_dir.rename(target_sub_dir)
Exemple #4
0
def extract_faces(source_dir_root, compressions, methods, cpu_count):
    full_images_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        compressions=compressions,
        data_types=(DataType.full_images, ),
        methods=methods,
    )

    bounding_boxes_dir_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        compressions=compressions,
        data_types=(DataType.bounding_boxes, ),
        methods=methods,
    )

    face_images_dir_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        compressions=compressions,
        data_types=(DataType.face_images, ),
        methods=methods,
    )

    for full_images, bounding_boxes, face_images in zip(
            full_images_data_structure.get_subdirs(),
            bounding_boxes_dir_data_structure.get_subdirs(),
            face_images_dir_data_structure.get_subdirs(),
    ):
        logger.info(f"Current method: {full_images.parents[1].name}")

        face_images.mkdir(exist_ok=True)

        # extract faces from videos in parallel
        Parallel(n_jobs=cpu_count)(
            delayed(lambda _video_folder: _extract_faces_from_video(
                _video_folder, bounding_boxes, face_images))(video_folder)
            for video_folder in tqdm(sorted(full_images.iterdir())))
def extract_bounding_box_from_masks(source_dir_root, methods, cpu_count):

    # use FaceForensicsDataStructure to iterate over the correct image folders
    source_dir_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        methods=methods,
        compressions=(Compression.masks,),
        data_types=(DataType.videos,),
    )

    # this will be used to iterate the same way as the source dir
    # -> create same data structure again
    target_dir_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        methods=methods,
        compressions=(Compression.masks,),
        data_types=(DataType.bounding_boxes,),
    )

    # zip source and target structure to iterate over both simultaneously
    for source_sub_dir, target_sub_dir in zip(
        source_dir_data_structure.get_subdirs(), target_dir_data_structure.get_subdirs()
    ):

        if not source_sub_dir.exists():
            continue

        target_sub_dir.mkdir(parents=True, exist_ok=True)
        logger.info(
            f"Processing {source_sub_dir.parts[-2]}, {source_sub_dir.parts[-3]}"
        )

        # compute mask bounding box for each folder
        Parallel(n_jobs=cpu_count)(
            delayed(
                lambda _video_path: extract_bounding_boxes_from_video(
                    _video_path, target_sub_dir
                )
            )(video_path)
            for video_path in tqdm(sorted(source_sub_dir.iterdir()))
        )
def _create_file_list(
    methods,
    compressions,
    data_types,
    min_sequence_length,
    output_file,
    samples_per_video_train,
    samples_per_video_val,
    source_dir_root,
):
    file_list = FileList(root=source_dir_root,
                         classes=methods,
                         min_sequence_length=min_sequence_length)
    # use faceforensicsdatastructure to iterate elegantly over the correct
    # image folders
    source_dir_data_structure = FaceForensicsDataStructure(
        source_dir_root,
        methods=methods,
        compressions=compressions,
        data_types=data_types,
    )

    _min_sequence_length = _get_min_sequence_length(source_dir_data_structure)
    if (_min_sequence_length < samples_per_video_train
            or _min_sequence_length < samples_per_video_val):
        logger.warning(
            f"There is a sequence that has less frames "
            f"then you would like to sample: {_min_sequence_length}")

    for split, split_name in [(TRAIN, TRAIN_NAME), (VAL, VAL_NAME),
                              (TEST, TEST_NAME)]:
        for source_sub_dir in source_dir_data_structure.get_subdirs():
            target = source_sub_dir.parts[-3]
            for video_folder in sorted(source_sub_dir.iterdir()):
                if video_folder.name.split("_")[0] in split:

                    images = sorted(video_folder.glob("*.png"))
                    filtered_images_idx = []

                    # find all frames that have at least min_sequence_length-1 preceeding
                    # frames
                    if len(images) == 0:
                        continue

                    sequence_start = _img_name_to_int(images[0])
                    last_idx = sequence_start
                    for list_idx, image in enumerate(images):
                        image_idx = _img_name_to_int(image)
                        if last_idx + 1 != image_idx:
                            sequence_start = image_idx
                        elif image_idx - sequence_start >= min_sequence_length - 1:
                            filtered_images_idx.append(list_idx)
                        last_idx = image_idx

                    # for the test-set all frames are going to be taken
                    # otherwise distribute uniformly

                    if split_name == TRAIN_NAME:
                        samples_per_video = samples_per_video_train
                    elif split_name == VAL_NAME:
                        samples_per_video = samples_per_video_val
                    elif split_name == TEST_NAME:
                        samples_per_video = -1

                    selected_frames = _select_frames(len(filtered_images_idx),
                                                     samples_per_video)

                    sampled_images_idx = np.asarray(
                        filtered_images_idx)[selected_frames]
                    file_list.add_data_points(
                        path_list=images,
                        target_label=target,
                        split=split_name,
                        sampled_images_idx=sampled_images_idx,
                    )

    file_list.save(output_file)
    logger.info(f"{output_file} created.")
    return file_list