def test_background_swap_numpy(): """ Test background swap on a single ndarray input. """ mnist = MNIST(DATA_PATH, download=True, train=True) cifar = CIFAR10(DATA_PATH, download=True, train=True) bg_swap = BackgroundSwap(cifar, input_dim=(28, 28)) im = mnist.get_data()[0][0] im = bg_swap(im)
def __init__(self, data_path: str = "", split: str = "train", download: bool = True, seed: int = 1): if split not in ("train", "val", "test"): raise ValueError( f"Split must be train, val, or test; not {split}.") train = split in ("train", "val") datasets = [ MNIST(data_path=data_path, train=train, download=download), DTD(data_path=data_path, train=train, download=download), FashionMNIST(data_path=data_path, train=train, download=download), SVHN(data_path=data_path, train=train, download=download), CIFAR10(data_path=data_path, train=train, download=download) ] if split == "train": proportions = [400, 400, 400, 400, 4000] elif split == "val": proportions = [200, 200, 200, 200, 2000] else: proportions = None super().__init__(datasets=datasets, proportions=proportions, class_counter=[0, 10, 57, 67, 77], seed=seed, split=split)
def test_encode_scenario_MNIST(): filename_h5 = "test_encode_scenario.hdf5" if os.path.exists(filename_h5): os.remove(filename_h5) dataset = MNIST(data_path=DATA_PATH, download=False, train=True) scenario = ClassIncremental(dataset, increment=2) model = nn.Linear(28 * 28, 50) inference_fct = lambda model, x: model(x.view(-1, 28 * 28)) encoded_scenario = encode_scenario(model=model, scenario=scenario, batch_size=264, filename=filename_h5, inference_fct=inference_fct) assert scenario.nb_tasks == encoded_scenario.nb_tasks for encoded_taskset, taskset in zip(encoded_scenario, scenario): assert len(encoded_taskset) == len(taskset) assert encoded_scenario[0][0][0].shape[0] == 50 os.remove(filename_h5)
def test_background_tranformation(): """ Example code using TransformationIncremental to create a setting with 3 tasks. """ cifar = CIFAR10(DATA_PATH, train=True) mnist = MNIST(DATA_PATH, download=False, train=True) nb_task = 3 list_trsf = [] for i in range(nb_task): list_trsf.append([ torchvision.transforms.ToTensor(), BackgroundSwap(cifar, bg_label=i, input_dim=(28, 28)), torchvision.transforms.ToPILImage() ]) scenario = TransformationIncremental( mnist, base_transformations=[torchvision.transforms.ToTensor()], incremental_transformations=list_trsf) folder = "tests/samples/background_trsf/" if not os.path.exists(folder): os.makedirs(folder) for task_id, task_data in enumerate(scenario): task_data.plot(path=folder, title=f"background_{task_id}.jpg", nb_samples=100, shape=[28, 28, 3]) loader = DataLoader(task_data) _, _, _ = next(iter(loader))
def mixed_samples(config: Config): """ Fixture that produces some samples from each task. """ dataset = MNIST(config.data_dir, download=True, train=True) datasets: List[TaskSet] = ClassIncremental(dataset, nb_tasks=5) n_samples_per_task = 10 indices = list(range(10)) samples_per_task: Dict[int, Tensor] = { i: tuple(map(torch.as_tensor, taskset.get_samples(indices))) for i, taskset in enumerate(datasets) } yield samples_per_task
def test_example_doc(): from torch.utils.data import DataLoader import numpy as np from continuum import Logger, ClassIncremental from continuum.datasets import MNIST train_scenario = ClassIncremental( MNIST(data_path="/tmp", download=True, train=True), increment=2 ) test_scenario = ClassIncremental( MNIST(data_path="/tmp", download=True, train=False), increment=2 ) logger = Logger() for task_id, (train_taskset, test_taskset) in enumerate(zip(train_scenario, test_scenario)): train_loader = DataLoader(train_taskset) test_loader = DataLoader(test_taskset) for x, y, t in train_loader: predictions = torch.clone(y) logger.add_batch(predictions, y) _ = (f"Online accuracy: {logger.online_accuracy}") preds, targets, task_ids = [], [], [] for x, y, t in test_loader: preds.append(y.cpu().numpy()) targets.append(y.cpu().numpy()) task_ids.append(t.cpu().numpy()) logger.add_step( np.concatenate(preds), np.concatenate(targets), np.concatenate(task_ids) ) _ = (f"Task: {task_id}, acc: {logger.accuracy}, avg acc: {logger.average_incremental_accuracy}") _ = (f"BWT: {logger.backward_transfer}, FWT: {logger.forward_transfer}")
def load_dataset(dataset): if dataset == "MNIST": data = MNIST("MNIST", train=True, download=True) data = ClassIncremental(data, nb_tasks=10, transformations=transformations) elif dataset == "FashionMNIST": data = FashionMNIST("FashionMNIST", train=True, download=True) data = ClassIncremental(data, nb_tasks=10, transformations=transformations) return data
def test_example_doc(): from torch.utils.data import DataLoader import numpy as np from continuum import ClassIncremental from continuum.datasets import MNIST from continuum.metrics import Logger train_scenario = ClassIncremental( MNIST(data_path="my/data/path", download=True, train=True), increment=2 ) test_scenario = ClassIncremental( MNIST(data_path="my/data/path", download=True, train=False), increment=2 ) # model = ... logger = Logger(list_subsets=['train', 'test']) for task_id, (train_taskset, test_taskset) in enumerate(zip(train_scenario, test_scenario)): train_loader = DataLoader(train_taskset) test_loader = DataLoader(test_taskset) for x, y, t in train_loader: predictions = y # model(x) logger.add([predictions, y, None], subset="train") _ = (f"Online accuracy: {logger.online_accuracy}") for x_test, y_test, t_test in test_loader: preds_test = y_test logger.add([preds_test, y_test, t_test], subset="test") _ = (f"Task: {task_id}, acc: {logger.accuracy}, avg acc: {logger.average_incremental_accuracy}") _ = (f"BWT: {logger.backward_transfer}, FWT: {logger.forward_transfer}") logger.end_task()
def test_visualization_permutations(tmpdir): scenario = Permutations(cl_dataset=MNIST(data_path=tmpdir, download=True, train=True), nb_tasks=3, seed=0) folder = os.path.join(tmpdir, "samples", "permutation") if not os.path.exists(folder): os.makedirs(folder) for task_id, taskset in enumerate(scenario): taskset.plot(path=folder, title="MNIST_Permutations_{}.jpg".format(task_id), nb_samples=100, shape=[28, 28, 1])
def get_permuted_MNIST(path, batch_size, train): im_width = im_height = 28 #val_size = 10000 rand_perm = RandomPermutation(0, 0, im_width, im_height) normalization = transforms.Normalize((0.1307, ), (0.3081, )) #Todo: rethink RandomPermutation usage slows down dataloading by a factor > 6, Should try directly on batches. transfrom = transforms.Compose( [transforms.ToTensor(), rand_perm, normalization]) if (train): set = ClassIncremental(MNIST(data_path="./src/data/MNIST", download=True, train=True), increment=2) else: set = ClassIncremental(MNIST(data_path="./src/data/MNIST", download=True, train=False), increment=2) return set
def test_visualization_rotations(): scenario = Rotations(cl_dataset=MNIST(data_path="./tests/Datasets", download=True, train=True), nb_tasks=3, list_degrees=[0, 45, 92]) folder = "./tests/Samples/Rotations/" if not os.path.exists(folder): os.makedirs(folder) for task_id, taskset in enumerate(scenario): taskset.plot(path=folder, title="MNIST_Rotation_{}.jpg".format(task_id), nb_samples=100, shape=[28, 28, 1])
def init(self, train: bool) -> Tuple[np.ndarray, np.ndarray, None]: base_data = MNIST.init(self, train) x, y = [base_data[0]], [base_data[1]] class_increment = len(np.unique(base_data[1])) for i, value in enumerate(self._transformations, start=1): x_transformed = self._transform(base_data[0], value) x.append(x_transformed) y.append(base_data[1] + i * class_increment) x = np.concatenate(x) y = np.concatenate(y) return x, y, None
def test_shuffle(config: Config): dataset = MNIST(data_path=config.data_dir, train=True) cl_dataset = concat(ClassIncremental(dataset, increment=2)) shuffled_dataset = shuffle(cl_dataset) assert (shuffled_dataset._y != cl_dataset._y).sum() > len(cl_dataset) / 2 assert (shuffled_dataset._t != cl_dataset._t).sum() > len(cl_dataset) / 2
def test_concat_smooth_boundaries(config: Config): from continuum.datasets import MNIST from continuum.scenarios import ClassIncremental from continuum.tasks import split_train_val dataset = MNIST(config.data_dir, download=True, train=True) scenario = ClassIncremental( dataset, increment=2, ) print(f"Number of classes: {scenario.nb_classes}.") print(f"Number of tasks: {scenario.nb_tasks}.") train_datasets = [] valid_datasets = [] for task_id, train_taskset in enumerate(scenario): train_taskset, val_taskset = split_train_val(train_taskset, val_split=0.1) train_datasets.append(train_taskset) valid_datasets.append(val_taskset) # train_datasets = [Subset(task_dataset, np.arange(20)) for task_dataset in train_datasets] train_dataset = smooth_task_boundaries_concat(train_datasets, seed=123) xs = np.arange(len(train_dataset)) y_counters: List[Counter] = [] t_counters: List[Counter] = [] dataloader = DataLoader(train_dataset, batch_size=100, shuffle=False) for x, y, t in dataloader: y_count = Counter(y.tolist()) t_count = Counter(t.tolist()) y_counters.append(y_count) t_counters.append(t_count) classes = list(set().union(*y_counters)) nb_classes = len(classes) x = np.arange(len(dataloader)) import matplotlib.pyplot as plt fig, axes = plt.subplots(2) for label in range(nb_classes): y = [y_counter.get(label) for y_counter in y_counters] axes[0].plot(x, y, label=f"class {label}") axes[0].legend() axes[0].set_title("y") axes[0].set_xlabel("Batch index") axes[0].set_ylabel("Count in batch") for task_id in range(scenario.nb_tasks): y = [t_counter.get(task_id) for t_counter in t_counters] axes[1].plot(x, y, label=f"Task id {task_id}") axes[1].legend() axes[1].set_title("task_id") axes[1].set_xlabel("Batch index") axes[1].set_ylabel("Count in batch") plt.legend()
def __init__(self, *args, nb_permutations=4, **kwargs): MNIST.__init__(self, *args, **kwargs) self._transformations = list(range(nb_permutations)) self._mapping = None
def __init__(self, *args, angles=[45, 90, 135, 180], **kwargs): MNIST.__init__(self, *args, **kwargs) # pylint: disable=non-parent-init-called self._transformations = angles self._mapping = None