def create_file_list(
    source_dir_root,
    target_dir_root,
    output_file,
    compressions,
    data_types,
    samples_per_video,
    min_sequence_length,
):

    try:
        # if file exists, we don't have to create it again
        file_list = FileList.load(output_file)
    except FileNotFoundError:
        file_list = _create_file_list(
            compressions,
            data_types,
            min_sequence_length,
            output_file,
            samples_per_video,
            source_dir_root,
        )

    if target_dir_root:
        file_list.copy_to(Path(target_dir_root))
        file_list.save(output_file)

    for split in [TRAIN_NAME, VAL_NAME, TEST_NAME]:
        data_set = FileList.get_dataset_form_file(output_file, split)
        logger.info(f"{split}-data-set: {data_set}")
Beispiel #2
0
f = FileList(str(root_dir), classes=["FAKE", "REAL"], min_sequence_length=1)

train_data_numbers = list(range(5, 50))
val_data_numbers = list(range(5))

for train_data_number in train_data_numbers:
    block = root_dir / f"extracted_images_{train_data_number}"
    if block.exists():
        for label in block.iterdir():
            images = list(label.glob("*/*.png"))
            f.add_data_points(images, label.name, "train",
                              np.arange(0, len(images)))

for val_data_number in val_data_numbers:
    block = root_dir / f"extracted_images_{val_data_number}"
    if block.exists():
        for label in block.iterdir():
            images = list(label.glob("*/*.png"))
            f.add_data_points(images, label.name, "val",
                              np.arange(0, len(images)))
            f.add_data_points(images, label.name, "test",
                              np.arange(0, len(images)))

f.save("/data/ssd1/file_lists/dfdc/5_45_split.json")

for split in [TRAIN_NAME, VAL_NAME, TEST_NAME]:
    data_set = FileList.get_dataset_form_file(
        "/data/ssd1/file_lists/dfdc/5_45_split.json", split)
    print(f"{split}-data-set: {data_set}")