def create_file_list( source_dir_root, target_dir_root, output_file, compressions, data_types, samples_per_video, min_sequence_length, ): try: # if file exists, we don't have to create it again file_list = FileList.load(output_file) except FileNotFoundError: file_list = _create_file_list( compressions, data_types, min_sequence_length, output_file, samples_per_video, source_dir_root, ) if target_dir_root: file_list.copy_to(Path(target_dir_root)) file_list.save(output_file) for split in [TRAIN_NAME, VAL_NAME, TEST_NAME]: data_set = FileList.get_dataset_form_file(output_file, split) logger.info(f"{split}-data-set: {data_set}")
def visualize(): net = SimpleAEL1Pretrained().eval() file_list = FileList.load( "/data/ssd1/file_lists/avspeech/resampled_and_avspeech_100_samples_consolidated.json" ) dataset = file_list.get_dataset("train", sequence_length=1) for class_id in range(6): sample_images = _get_images_from_class(dataset, class_id=class_id, num_images=10) sample_images = torch.stack(sample_images) output_images = net(sample_images)[RECON_X] diff = sample_images - output_images x_12 = sample_images.view(-1, 3, 112, 112) x_12_recon = output_images.contiguous().view(-1, 3, 112, 112) diff_recon = diff.contiguous().view(-1, 3, 112, 112) x_12 = torch.cat((x_12, x_12_recon, diff_recon), dim=2) # this needs to stack the images differently datapoints = make_grid(x_12, nrow=10, range=(-1, 1), normalize=True) d = datapoints.detach().permute(1, 2, 0).numpy() * 255 d = cv2.cvtColor(d, cv2.COLOR_BGR2RGB) cv2.imwrite(f"images_classes_{class_id}.png", d)
def change_root(file_list_path, new_root): f = FileList.load(file_list_path) print(f"Old root: {f.root}") f.root = str(new_root) print(f"New root: {f.root}") save_filelist = (click.prompt("Is this root correct and save file list?", type=str, default="y").lower() == "y") if save_filelist: f.save(file_list_path) print("Succesfully saved.") else: print("Aborted.")
def transform_dataset(size, source_file_list, target_dataset_folder, target_file_list): f = FileList.load(source_file_list) old_root = Path(f.root) new_root = Path(target_dataset_folder) new_root.mkdir(exist_ok=False) transform = Compose(resized_crop(size)) for split in [TRAIN_NAME, VAL_NAME, TEST_NAME]: Parallel(n_jobs=mp.cpu_count())( delayed(lambda sample: resize(old_root, sample[0], new_root, transform))( sample_ ) for sample_ in tqdm(f.samples[split]) ) f.root = str(new_root) f.save(target_file_list)
def _create_file_list( min_sequence_length: int, output_file: str, samples_per_video: int, source_dir_root: str, ): file_list = FileList( root=source_dir_root, classes=[AVSPEECH_NAME], min_sequence_length=min_sequence_length, ) source_dir_root = Path(source_dir_root) # split between train and val videos = sorted(source_dir_root.iterdir()) train = videos[:int(len(videos) * 0.9)] val = videos[int(len(videos) * 0.9):] for split, split_name in [(train, TRAIN_NAME), (val, VAL_NAME)]: for video_folder in sorted(split): images = sorted(video_folder.glob("*.png")) filtered_images_idx = [] # find all frames that have at least min_sequence_length-1 preceeding # frames sequence_start = img_name_to_int(images[0]) last_idx = sequence_start for list_idx, image in enumerate(images): image_idx = img_name_to_int(image) if last_idx + 1 != image_idx: sequence_start = image_idx elif image_idx - sequence_start >= min_sequence_length - 1: filtered_images_idx.append(list_idx) last_idx = image_idx selected_frames = select_frames(len(filtered_images_idx), samples_per_video) sampled_images_idx = np.asarray( filtered_images_idx)[selected_frames] file_list.add_data_points( path_list=images, target_label=AVSPEECH_NAME, split=split_name, sampled_images_idx=sampled_images_idx, ) file_list.save(output_file) logger.info(f"{output_file} created.") return file_list
def run_inference_for_video(audio_mode, folder, model, trainer): image_file_list = folder / "image_file_list.json" audio_file_list = folder / "audio_file_list.json" f = FileList.load(str(image_file_list)) test_video_dataset = f.get_dataset( "test", image_transforms=model.resize_transform, tensor_transforms=model.tensor_augmentation_transforms, sequence_length=model.model.sequence_length, audio_file_list=SimpleFileList.load(audio_file_list), audio_mode=audio_mode, ) loader = get_fixed_dataloader( test_video_dataset, model.hparams["batch_size"], sampler=SequentialSampler, num_workers=model.hparams["n_cpu"], worker_init_fn=lambda worker_id: np.random.seed(worker_id), ) model.test_dataloader = lambda: loader # this seems not to work at all print(folder, len(loader)) trainer.test(model)
def change_class_order(): file_list = ( "/mnt/ssd1/sebastian/file_lists/c40/" "youtube_Deepfakes_Face2Face_FaceSwap_NeuralTextures_c40_face_images_tracked_100_100_8.json" ) # file_list = "/data/ssd1/file_lists/c40/tracked_resampled_faces.json" f_new = FileList.load(file_list) f_new.class_to_idx = { "Deepfakes": 0, "Face2Face": 1, "FaceSwap": 2, "NeuralTextures": 3, "youtube": 4, } f_new.classes = [ "Deepfakes", "Face2Face", "FaceSwap", "NeuralTextures", "youtube" ] for split in f_new.samples.values(): for item in split: item[1] = (item[1] + 4) % 5
def do_error_stuff(): r = Five00BatchesResnet18().cuda().eval() f = FileList.load("/data/ssd1/file_lists/imagenet/ssd_raw.json") val_data = f.get_dataset( "val", image_transforms=[transforms.Resize(256), transforms.CenterCrop(224)] ) val_data_loader = DataLoader( val_data, batch_size=256, shuffle=True, num_workers=2, pin_memory=True ) with torch.no_grad(): acc = 0 num_items = 0 t_bar = tqdm(val_data_loader) for batch in t_bar: images, targets = batch targets, images = targets.cuda(), images.cuda() predictions = r.forward(images) top1_error = r.calculate_accuracy(predictions, targets) acc += top1_error num_items += 1 t_bar.set_description(f"Curr acc: {acc/num_items}") acc /= num_items print(acc)
def _compute_statistics_of_path(path: str, model, batch_size, dims, cuda, split="val"): if path.endswith(".npz"): f = np.load(path) m, s = f["mu"][:], f["sigma"][:] f.close() else: file_list = FileList.load(path) root = Path(file_list.root) idx = file_list.samples_idx[split] files = np.array( list(map(lambda x: root / x[0], file_list.samples[split]))) files = files[idx] m, s = calculate_activation_statistics(files, model, batch_size, dims, cuda) return m, s
from forgery_detection.data.file_lists import FileList resampled_file_list = FileList.load( "/mnt/ssd1/sebastian/file_lists/c40/tracked_resampled_faces.json") resampled_file_list.samples["train"] = resampled_file_list.samples["val"] resampled_file_list.samples["val"] = resampled_file_list.samples["test"] resampled_file_list.samples_idx["train"] = resampled_file_list.samples_idx[ "val"] resampled_file_list.samples_idx["val"] = resampled_file_list.samples_idx[ "test"] resampled_file_list.save( "/mnt/ssd1/sebastian/file_lists/c40/tracked_resampled_faces_val_test_as_train_val.json" )
import json from pathlib import Path import numpy as np from forgery_detection.data.face_forensics.splits import TEST_NAME from forgery_detection.data.face_forensics.splits import TRAIN_NAME from forgery_detection.data.face_forensics.splits import VAL_NAME from forgery_detection.data.file_lists import FileList root_dir = Path("/data/hdd/dfdc/extracted_images") with open(root_dir.parent / "all_metadata.json", "r") as f: all_meta_data = json.load(f) f = FileList(str(root_dir), classes=["FAKE", "REAL"], min_sequence_length=1) train_data_numbers = list(range(5, 50)) val_data_numbers = list(range(5)) for train_data_number in train_data_numbers: block = root_dir / f"extracted_images_{train_data_number}" if block.exists(): for label in block.iterdir(): images = list(label.glob("*/*.png")) f.add_data_points(images, label.name, "train", np.arange(0, len(images))) for val_data_number in val_data_numbers: block = root_dir / f"extracted_images_{val_data_number}" if block.exists():
def __init__(self, kwargs: Union[dict, Namespace]): super(Supervised, self).__init__() self.hparams = DictHolder(kwargs) # load data-sets self.file_list = FileList.load(self.hparams["data_dir"]) self.model: LightningModel = self.MODEL_DICT[self.hparams["model"]]( num_classes=len(self.file_list.classes)) if len(self.file_list.classes) != self.model.num_classes: logger.error( f"Classes of model ({self.model.num_classes}) != classes of dataset" f" ({len(self.file_list.classes)})") self.sampling_probs = self.hparams["sampling_probs"] if self.sampling_probs: self.sampling_probs = np.array(self.sampling_probs.split(" "), dtype=float) if self.sampling_probs is not None and len( self.file_list.classes) != len(self.sampling_probs): raise ValueError( f"Classes of dataset ({len(self.file_list.classes)}) != classes of " f"sampling probs ({len(self.sampling_probs)})!") self.resize_transform = self._get_transforms( self.hparams["resize_transforms"]) image_augmentation_transforms = self._get_transforms( self.hparams["image_augmentation_transforms"]) self.tensor_augmentation_transforms = self._get_transforms( self.hparams["tensor_augmentation_transforms"]) if self.hparams["audio_file"]: self.audio_file_list = SimpleFileList.load( self.hparams["audio_file"]) else: self.audio_file_list = None self.audio_mode = AudioMode[self.hparams["audio_mode"]] self.train_data = self.file_list.get_dataset( TRAIN_NAME, image_transforms=self.resize_transform + image_augmentation_transforms, tensor_transforms=self.tensor_augmentation_transforms, sequence_length=self.model.sequence_length, audio_file_list=self.audio_file_list, audio_mode=self.audio_mode, should_align_faces=self.hparams["crop_faces"], ) self.val_data = self.file_list.get_dataset( VAL_NAME, image_transforms=self.resize_transform, tensor_transforms=self.tensor_augmentation_transforms, sequence_length=self.model.sequence_length, audio_file_list=self.audio_file_list, audio_mode=self.audio_mode, should_align_faces=self.hparams["crop_faces"], ) # handle empty test_data better self.test_data = self.file_list.get_dataset( TEST_NAME, image_transforms=self.resize_transform, tensor_transforms=self.tensor_augmentation_transforms, sequence_length=self.model.sequence_length, audio_file_list=self.audio_file_list, audio_mode=self.audio_mode, should_align_faces=self.hparams["crop_faces"], ) self.hparams.add_dataset_size(len(self.train_data), TRAIN_NAME) self.hparams.add_dataset_size(len(self.val_data), VAL_NAME) self.hparams.add_dataset_size(len(self.test_data), TEST_NAME) if self.hparams["dont_balance_data"]: self.sampler_cls = RandomSampler else: self.sampler_cls = BalancedSampler self.system_mode = self.hparams.pop("mode") if self.system_mode is SystemMode.TRAIN: self.hparams.add_nb_trainable_params(self.model) if self.hparams["class_weights"]: labels, weights = calculate_class_weights(self.val_data) self.hparams.add_class_weights(labels, weights) self.class_weights = torch.tensor(weights, dtype=torch.float) else: self.class_weights = None elif self.system_mode is SystemMode.TEST: self.class_weights = None elif self.system_mode is SystemMode.BENCHMARK: pass # hparams logging self.decay = 0.95 self.acc = -1 self.loss = -1 logger.warning(f"{self.train_data.class_to_idx}") self._optimizer = None
import os from pathlib import Path import numpy as np from forgery_detection.data.face_forensics.splits import TEST_NAME from forgery_detection.data.face_forensics.splits import TRAIN_NAME from forgery_detection.data.face_forensics.splits import VAL_NAME from forgery_detection.data.file_lists import FileList file_list_a = FileList.load( "/data/ssd1/file_lists/avspeech/100k_100_samples_consolidated.json" ) file_list_b = FileList.load( "/data/ssd1/file_lists/avspeech/avspeech_moria_20k_100_samples_consolidated.json" ) a_root = Path(file_list_a.root) b_root = Path(file_list_b.root) common_path = os.path.commonpath([b_root, a_root]) a_relative_to_root = os.path.relpath(a_root, common_path) b_relative_to_root = os.path.relpath(b_root, common_path) print(common_path, a_relative_to_root, b_relative_to_root) file_list_a.root = common_path for split in file_list_a.samples.values(): for item in split:
import os from pathlib import Path import numpy as np from forgery_detection.data.face_forensics.splits import TEST_NAME from forgery_detection.data.face_forensics.splits import TRAIN_NAME from forgery_detection.data.face_forensics.splits import VAL_NAME from forgery_detection.data.file_lists import FileList resampled_file_list = FileList.load( "/data/ssd1/file_lists/c40/tracked_resampled_faces_224.json" ) imagenet = FileList.load("/data/ssd1/file_lists/imagenet/ssd_256_center_crop_224.json") resampled_root = Path(resampled_file_list.root) imagenet_root = Path(imagenet.root) common_path = os.path.commonpath([imagenet_root, resampled_root]) resampled_relative_to_root = os.path.relpath(resampled_root, common_path) imagenet_relative_to_root = os.path.relpath(imagenet_root, common_path) print("path stuff", common_path, resampled_relative_to_root, imagenet_relative_to_root) resampled_file_list.class_to_idx = { **imagenet.class_to_idx, **dict( map( lambda x: (x[0], x[1] + len(imagenet.class_to_idx)), resampled_file_list.class_to_idx.items(), ) ),
# flake8: noqa #%% from pathlib import Path from forgery_detection.data.file_lists import FileList from forgery_detection.data.file_lists import SimpleFileList from forgery_detection.data.utils import resized_crop f = FileList.load( "/home/sebastian/data/file_lists/avspeech_crop_tests/aligned_faces.json") a = f.get_dataset( "test", audio_file_list=SimpleFileList.load( "/home/sebastian/data/file_lists/avspeech_crop_tests/mfcc_features.json" ), sequence_length=8, image_transforms=resized_crop(112), ) path = Path("/home/ondyari/avspeech_test_formats/cropped_images_aligned_faces") for p in sorted(path.iterdir()): if p.is_dir(): try: a.audio_file_list.files[p.name] except KeyError: print(f"{p.name} not in audio") #%% from forgery_detection.data.loading import get_fixed_dataloader from forgery_detection.data.loading import BalancedSampler from forgery_detection.data.file_lists import FileList from forgery_detection.data.utils import resized_crop
import matplotlib from cv2 import cv2 from torchvision.utils import make_grid from forgery_detection.data.face_forensics.splits import TRAIN_NAME from forgery_detection.data.file_lists import FileList from forgery_detection.data.loading import get_fixed_dataloader matplotlib.use("agg") avspeech_filelist = FileList.load( "/data/ssd1/file_lists/avspeech/avspeech_seti_20k_100_samples.json" ) dataloader = get_fixed_dataloader( avspeech_filelist.get_dataset(TRAIN_NAME, sequence_length=8), 4, num_workers=1 ) for batch in dataloader: # generate image out of batch x, target = batch x = x.view(-1, 3, 112, 112) im = make_grid(x, normalize=True, range=(-1, 1)).permute(1, 2, 0) im = im.numpy() * 255 # torch converts images to bgr colour space im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # save it for now cv2.imwrite("/home/sebastian/loaded_iamge.png", im) # plt.imshow(im) # plt.show()
def _create_file_list( compressions, data_types, min_sequence_length, output_file, samples_per_video, source_dir_root, ): file_list = FileList( root=source_dir_root, classes=FaceForensicsDataStructure.METHODS, min_sequence_length=min_sequence_length, ) # use faceforensicsdatastructure to iterate elegantly over the correct # image folders source_dir_data_structure = FaceForensicsDataStructure( source_dir_root, compressions=compressions, data_types=data_types) _min_sequence_length = _get_min_sequence_length(source_dir_data_structure) if _min_sequence_length < samples_per_video: logger.warning( f"There is a sequence that is sequence that has less frames " f"then you would like to sample: " f"{_min_sequence_length}<{samples_per_video}") for split, split_name in [(TRAIN, TRAIN_NAME), (VAL, VAL_NAME), (TEST, TEST_NAME)]: for source_sub_dir, target in zip( source_dir_data_structure.get_subdirs(), file_list.classes): for video_folder in sorted(source_sub_dir.iterdir()): if video_folder.name.split("_")[0] in split: images = sorted(video_folder.glob("*.png")) filtered_images_idx = [] # find all frames that have at least min_sequence_length-1 preceeding # frames sequence_start = img_name_to_int(images[0]) last_idx = sequence_start for list_idx, image in enumerate(images): image_idx = img_name_to_int(image) if last_idx + 1 != image_idx: sequence_start = image_idx elif image_idx - sequence_start >= min_sequence_length - 1: filtered_images_idx.append(list_idx) last_idx = image_idx selected_frames = select_frames(len(filtered_images_idx), samples_per_video) sampled_images_idx = np.asarray( filtered_images_idx)[selected_frames] file_list.add_data_points( path_list=images, target_label=target, split=split_name, sampled_images_idx=sampled_images_idx, ) file_list.save(output_file) logger.info(f"{output_file} created.") return file_list
import os from pathlib import Path import numpy as np from forgery_detection.data.face_forensics.splits import TEST_NAME from forgery_detection.data.face_forensics.splits import TRAIN_NAME from forgery_detection.data.face_forensics.splits import VAL_NAME from forgery_detection.data.file_lists import FileList resampled_file_list = FileList.load( "/data/ssd1/file_lists/c40/tracked_resampled_faces.json") celeba = FileList.load( "/data/ssd1/file_lists/avspeech/100k_100_samples_consolidated.json") resampled_file_list.root = "/mnt/ssd2/sebastian/set/tracked_resampled_faces_112/" resampled_root = Path(resampled_file_list.root) celeba_root = Path(celeba.root) common_path = os.path.commonpath([celeba_root, resampled_root]) resampled_relative_to_root = os.path.relpath(resampled_root, common_path) celeba_relative_to_root = os.path.relpath(celeba_root, common_path) print(common_path, resampled_relative_to_root, celeba_relative_to_root) # change class idx values for resampled file list # make youtube one value higher resampled_file_list.class_to_idx["avspeech"] = 5 resampled_file_list.classes.append("avspeech") # resampled_file_list.class_to_idx["youtube"] = 5 # resampled_file_list.classes.append("youtube")
# flake8: noqa #%% import torch from forgery_detection.data.file_lists import FileList from forgery_detection.data.utils import resized_crop f = FileList.load( "/mnt/ssd1/sebastian/file_lists/c40/" "youtube_Deepfakes_Face2Face_FaceSwap_NeuralTextures_c40_face_images_tracked_100_100_8.json" ) unnormalize = lambda x: x * torch.tensor([0.229, 0.224, 0.225]).unsqueeze( 1).unsqueeze(1) + torch.tensor([0.485, 0.456, 0.406]).unsqueeze( 1).unsqueeze(1) d = f.get_dataset( "train", image_transforms=resized_crop(229), tensor_transforms=[unnormalize], should_align_faces=True, ) class_idx_to_label = """0: unlabeled 1: person 2: bicycle 3: car 4: motorcycle 5: airplane 6: bus 7: train 8: truck 9: boat
from pathlib import Path import numpy as np from forgery_detection.data.file_lists import FileList root_dir = Path("/mnt/ssd2/sebastian/set/avspeech_moria_112") f = FileList(root_dir, ["avspeech"], 8) # # images = list((root_dir / "train19k").glob("*.jpg")) # f.add_data_points(images, "celeba", "train", np.arange(0, len(images))) # # images = list((root_dir / "val").glob("*.jpg")) # f.add_data_points(images, "celeba", "train", np.arange(0, len(images))) # # images = list((root_dir / "test").glob("*.jpg")) # f.add_data_points(images, "celeba", "val", np.arange(0, len(images))) # # images = list((root_dir / "test").glob("*.jpg")) # f.add_data_points(images, "celeba", "test", np.arange(0, len(images))) videos = sorted(root_dir.iterdir()) train = videos[: int(len(videos) * 0.9)] val = videos[int(len(videos) * 0.9) :] samples_per_video = 100 for label in train: images = sorted(label.glob("*.png"))
import os from pathlib import Path from forgery_detection.data.face_forensics.splits import TRAIN_NAME from forgery_detection.data.file_lists import FileList resampled_file_list = FileList.load( "/data/ssd1/file_lists/c40/tracked_resampled_faces.json") detection_file_list = FileList.load( "/data/ssd1/file_lists/c40/detection_challenge_112.json") resampled_root = Path(resampled_file_list.root) detection_root = Path(detection_file_list.root) common_path = os.path.commonpath([detection_root, resampled_root]) resampled_relative_to_root = os.path.relpath(resampled_root, common_path) detection_relative_to_root = os.path.relpath(detection_root, common_path) print(common_path, resampled_relative_to_root, detection_relative_to_root) # change class idx values for resampled file list # make youtube one value higher resampled_file_list.class_to_idx = {"DeepFakeDetection": 0, "youtube": 1} resampled_file_list.classes = ["DeepFakeDetection", "youtube"] resampled_file_list.root = common_path for split in resampled_file_list.samples.values(): for item in split: if item[1] == 4: item[1] = 1 else: item[1] = 0
"NeuralTextures": 3, "youtube": 4, } f_new.classes = [ "Deepfakes", "Face2Face", "FaceSwap", "NeuralTextures", "youtube" ] for split in f_new.samples.values(): for item in split: item[1] = (item[1] + 4) % 5 if __name__ == "__main__": # change_class_order() # 1 / 0 file_list = "/data/ssd1/file_lists/c40/trf_100_100_full_size_relative_bb_8_sl.json" # file_list = "/data/ssd1/file_lists/c40/tracked_resampled_faces.json" f = FileList.load(file_list) # first_val_path = f.samples["val"][f.samples_idx["val"][0]] val_loader = get_fixed_dataloader( f.get_dataset("val", sequence_length=1, should_align_faces=True), batch_size=1, num_workers=1, ) iter = val_loader.__iter__() for i in tqdm.trange(1000): next(iter)
# flake8: noqa #%% from pathlib import Path from forgery_detection.data.file_lists import FileList from forgery_detection.data.file_lists import SimpleFileList # trained with /mnt/ssd1/sebastian/file_lists/c40/tracked_resampled_faces_yt_only_112_8_sequence_length.json # and /data/hdd/audio_features/mfcc_features.npy -> /data/hdd/audio_features/mfcc_features_file_list.json audio_features = SimpleFileList.load( "/mnt/raid5/sebastian/audio_features/mfcc_features_file_list.json") f = FileList.load( "/mnt/ssd1/sebastian/file_lists/c40/tracked_resampled_faces_yt_only_112_8_sequence_length.json" ) #%% vids = sorted(set( map(lambda x: x[0].split("/")[-2], f.samples["train"])))[:10] # take first 10 videos in train set images = {} def _path_to_frame_nr(path): return int(path.split("/")[-1].split(".")[0]) for vid in vids: for path, _ in f.samples["train"]: if vid in path.split("/")[-2]: frame_nr = _path_to_frame_nr(path)
# flake8: noqa #%% from forgery_detection.data.file_lists import FileList from forgery_detection.data.utils import resized_crop f = FileList.load( "/home/sebastian/data/file_lists/c40/trf_-1_-1_full_size_relative_bb_8_sl.json" ) a = f.get_dataset("test", audio_file_list=None, sequence_length=8, image_transforms=resized_crop(112)) #%% f_2 = FileList.load( "/home/sebastian/data/file_lists/c40/tracked_resampled_faces_all_112_8_sequence_length.json" ) a_2 = f_2.get_dataset("test", audio_file_list=None, sequence_length=8, image_transforms=[]) #%% zero = a[((100000, 100000), None)][0] zero_2 = a_2[((100001, 100001), None)][0] #%% print((zero == zero_2).all())