def get_permuted_CIFAR10(path, batch_size,train): im_width = im_height = 32 rand_perm = RandomPermutation(0, 0, im_width, im_height) normalization = transforms.Normalize((0.1307,), (0.3081,)) #Todo: rethink RandomPermutation usage slows down dataloading by a factor > 6, Should try directly on batches. transfrom = transforms.Compose([ transforms.ToTensor(), rand_perm, normalization] ) if(train): set = ClassIncremental( CIFAR10(data_path="./src/data/CIFAR10", download=True, train=True), increment=2 ) else: set = ClassIncremental( CIFAR10(data_path="./src/data/CIFAR10", download=True, train=False), increment=2 ) return set
def test_inMemory_keepLabels_Fellowship(increment, dataset7c, dataset10c, dataset20c): fellow = Fellowship([dataset7c, dataset10c, dataset20c], update_labels=False) x, y, t = fellow.get_data() assert len(np.unique(t)) == 3 assert len(np.unique(y)) == 20 if isinstance(increment, list): with pytest.raises(Exception): continuum = ClassIncremental(fellow, increment=increment) else: continuum = ClassIncremental(fellow, increment=increment) assert continuum.nb_classes == 20 assert continuum.nb_tasks == 20
def test_Fellowship_Dimension_Fail(list_datasets): cl_dataset = Fellowship(data_path="./tests/Datasets", dataset_list=list_datasets) # This does not work since CIFAR10 and MNIST data are not same shape with pytest.raises(ValueError): continuum = ClassIncremental(cl_dataset, increment=10)
def test_MNIST_Fellowship(): scenario = MNISTFellowship(data_path="./tests/Datasets", train=True, download=True) scenario.get_data() continuum = ClassIncremental(scenario, increment=10) assert len(continuum) == 3
def scenario(): x = np.random.randn(100, 2) y = np.concatenate([np.ones(10) * i for i in range(10)]) t = None dataset = InMemoryDataset(x, y, t) return ClassIncremental(dataset, increment=2)
def test_inMemory_updateLabels_Fellowship(increment, dataset7c, dataset10c, dataset20c): fellow = Fellowship([dataset7c, dataset10c, dataset20c], update_labels=True) x, y, t = fellow.get_data() assert len(np.unique(t)) == 3 assert len(np.unique(y)) == 37 if isinstance(increment, list): continuum = ClassIncremental(fellow, increment=increment) assert continuum.nb_classes == 37 assert continuum.nb_tasks == len(increment) else: continuum = ClassIncremental(fellow, increment=increment) assert continuum.nb_tasks == 37 assert continuum.nb_classes == 37
def test_Fellowship_Dimension_Fail(tmpdir, list_datasets): cl_dataset = Fellowship(datasets=[ d(data_path=tmpdir, download=True, train=True) for d in list_datasets ]) # This does not work since CIFAR10 and MNIST data are not same shape with pytest.raises(ValueError): continuum = ClassIncremental(cl_dataset, increment=10)
def make_test_cl_scenario(self, test_dataset: _ContinuumDataset) -> _BaseScenario: """ Creates a test ClassIncremental object from continuum. """ return ClassIncremental( test_dataset, nb_tasks=self.nb_tasks, increment=self.test_increment, initial_increment=self.test_initial_increment, class_order=self.test_class_order, transformations=self.transforms, )
def test_Fellowship_classes(tmpdir, list_datasets, nb_tasks): cl_dataset = Fellowship(data_path=tmpdir, dataset_list=list_datasets) scenario = ClassIncremental(cl_dataset, increment=10) assert len(scenario) == nb_tasks for task_id, taskset in enumerate(scenario): classes = taskset.get_classes() # we check if all classes are here assert len(classes) == (classes.max() - classes.min() + 1)
def mixed_samples(config: Config): """ Fixture that produces some samples from each task. """ dataset = MNIST(config.data_dir, download=True, train=True) datasets: List[TaskSet] = ClassIncremental(dataset, nb_tasks=5) n_samples_per_task = 10 indices = list(range(10)) samples_per_task: Dict[int, Tensor] = { i: tuple(map(torch.as_tensor, taskset.get_samples(indices))) for i, taskset in enumerate(datasets) } yield samples_per_task
def test_example_doc(): from torch.utils.data import DataLoader import numpy as np from continuum import Logger, ClassIncremental from continuum.datasets import MNIST train_scenario = ClassIncremental( MNIST(data_path="/tmp", download=True, train=True), increment=2 ) test_scenario = ClassIncremental( MNIST(data_path="/tmp", download=True, train=False), increment=2 ) logger = Logger() for task_id, (train_taskset, test_taskset) in enumerate(zip(train_scenario, test_scenario)): train_loader = DataLoader(train_taskset) test_loader = DataLoader(test_taskset) for x, y, t in train_loader: predictions = torch.clone(y) logger.add_batch(predictions, y) _ = (f"Online accuracy: {logger.online_accuracy}") preds, targets, task_ids = [], [], [] for x, y, t in test_loader: preds.append(y.cpu().numpy()) targets.append(y.cpu().numpy()) task_ids.append(t.cpu().numpy()) logger.add_step( np.concatenate(preds), np.concatenate(targets), np.concatenate(task_ids) ) _ = (f"Task: {task_id}, acc: {logger.accuracy}, avg acc: {logger.average_incremental_accuracy}") _ = (f"BWT: {logger.backward_transfer}, FWT: {logger.forward_transfer}")
def test_example_doc(): from torch.utils.data import DataLoader import numpy as np from continuum import ClassIncremental from continuum.datasets import MNIST from continuum.metrics import Logger train_scenario = ClassIncremental( MNIST(data_path="my/data/path", download=True, train=True), increment=2 ) test_scenario = ClassIncremental( MNIST(data_path="my/data/path", download=True, train=False), increment=2 ) # model = ... logger = Logger(list_subsets=['train', 'test']) for task_id, (train_taskset, test_taskset) in enumerate(zip(train_scenario, test_scenario)): train_loader = DataLoader(train_taskset) test_loader = DataLoader(test_taskset) for x, y, t in train_loader: predictions = y # model(x) logger.add([predictions, y, None], subset="train") _ = (f"Online accuracy: {logger.online_accuracy}") for x_test, y_test, t_test in test_loader: preds_test = y_test logger.add([preds_test, y_test, t_test], subset="test") _ = (f"Task: {task_id}, acc: {logger.accuracy}, avg acc: {logger.average_incremental_accuracy}") _ = (f"BWT: {logger.backward_transfer}, FWT: {logger.forward_transfer}") logger.end_task()
def test_observation_spaces_match_dataset(dataset_name: str): """ Test to check that the `observation_spaces` and `reward_spaces` dict really correspond to the entries of the corresponding datasets, before we do anything with them. """ # CIFARFellowship, MNISTFellowship, ImageNet100, # ImageNet1000, CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST, # QMNIST, FashionMNIST, dataset_class = ClassIncrementalSetting.available_datasets[dataset_name] dataset = dataset_class("data") observation_space = base_observation_spaces[dataset_name] reward_space = reward_spaces[dataset_name] for task_dataset in ClassIncremental(dataset, nb_tasks=1): first_item = task_dataset[0] x, t, y = first_item assert x in observation_space assert y in reward_space
def main(args): def print2(parms, *aargs, **kwargs): redirect(parms, path=args.outfile, *aargs, **kwargs) start_time = time.time() # print args recap print2(args, end='\n\n') # Load the core50 data # TODO: check the symbolic links as for me no '../' prefix needed. if args.download: print2('cli switch download set to True so download will occur...') print2(' alternatively the batch script fetch_data_and_setup.sh can be used') print2('using directory for data_path path {}'.format(args.data_path)) core50 = Core50(args.data_path, train=True, download=args.download) core50_val = Core50(args.data_path, train=False, download=args.download) # A new classes scenario, using continuum scenario = ClassIncremental( core50, increment=5, initial_increment=10, # following values come from the the mean and std of ImageNet - the basis of resnet. transformations=[ ToTensor(), Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])] ) scenario_val = ClassIncremental( core50_val, increment=5, initial_increment=10, # following values come from the the mean and std of ImageNet - the basis of resnet. transformations=[ ToTensor(), Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])] ) print2(f"Number of classes: {scenario.nb_classes}.") print2(f"Number of tasks: {scenario.nb_tasks}.") # Define a model # model if args.classifier == 'resnet18': classifier = models.resnet18(pretrained=True) classifier.fc = torch.nn.Linear(512, args.n_classes) elif args.classifier == 'resnet101': classifier = models.resnet101(pretrained=True) classifier.fc = nn.Linear(2048, args.n_classes) elif args.classifier == 'resnet34': classifier = models.resnet34(pretrained=True) classifier.fc = nn.Linear(512, args.n_classes) else: raise Exception('no classifier picked') # Fix for RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same if torch.cuda.is_available(): classifier.cuda() # TODO: fix device specific cuda usage to we can parallel # TODO: right now probably due to marshalling parallel taking slightly longer # TODO: this parm is now default to false. if args.use_parallel and torch.cuda.device_count() > 1: print2(f"Let's use {torch.cuda.device_count()} GPUs!") classifier = nn.DataParallel(classifier) # Tune the model hyperparameters max_epochs = args.epochs # 8 convergence_criterion = args.convergence_criterion # 0.004 # End early if loss is less than this lr = args.lr # 0.00001 weight_decay = args.weight_decay # 0.000001 momentum = args.momentum # 0.9 # Define a loss function and criterion criterion = nn.CrossEntropyLoss() optimizer = optim.SGD( classifier.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentum ) print2("Criterion: " + str(criterion)) print2("Optimizer: " + str(optimizer)) # Validation accuracies accuracies = [] # Iterate through our NC scenario for task_id, train_taskset in enumerate(scenario): print2(f"<-------------- Task {task_id + 1} ---------------->") # Use replay if it's specified if args.replay: # Add replay examples to current taskset replay_examples = taskset_with_replay(scenario, task_id, args.replay) train_taskset._x = np.append(train_taskset._x, replay_examples['x']) train_taskset._y = np.append(train_taskset._y, replay_examples['y']) train_taskset._t = np.append(train_taskset._t, replay_examples['t']) train_loader = DataLoader(train_taskset, batch_size=32, shuffle=True) unq_cls_train = np.unique(train_taskset._y) print2(f"This task contains {len(unq_cls_train)} unique classes") print2(f"Training classes: {unq_cls_train}") # Train the model classifier.train() if args.importance: # EWC if task_id == 0: train(classifier, task_id, train_loader, criterion, optimizer, max_epochs, convergence_criterion) else: old_tasks = [] for prev_id, prev_taskset in enumerate(scenario): if prev_id == task_id: break else: old_tasks = old_tasks + list(prev_taskset._x) train_ewc(classifier, task_id, train_loader, criterion, EWC(classifier, train_taskset, scenario, task_id), args.importance, optimizer, max_epochs, convergence_criterion) else: train(classifier, task_id, train_loader, criterion, optimizer, max_epochs, convergence_criterion) print2("=== Finished Training ===") classifier.eval() # Validate against separate validation data cum_accuracy = 0.0 for val_task_id, val_taskset in enumerate(scenario_val): # Validate on all previously trained tasks (but not future tasks) if val_task_id > task_id: break val_loader = DataLoader(val_taskset, batch_size=32, shuffle=True) # Make sure we're validating the correct classes unq_cls_validate = np.unique(val_taskset._y) print2(f"Validating classes: {unq_cls_validate} -- val_task_id:{val_task_id} task_id:{task_id}") total = 0.0 correct = 0.0 pred_classes = np.array([]) with torch.no_grad(): for x, y, t in val_loader: x, y = x.cuda(), y.cuda() outputs = classifier(x) _, predicted = torch.max(outputs.data, 1) pred_classes = np.unique(np.append(pred_classes, predicted.cpu())) total += y.size(0) correct += (predicted == y).sum().item() print2(f"Classes predicted: {pred_classes}") print2(f"=== Validation Accuracy: {100.0 * correct / total}%\n") cum_accuracy += (correct / total) avg_accuracy = cum_accuracy / 9 print2(f"Average Accuracy: {100.0 * avg_accuracy:.5f}% [{avg_accuracy:.5f}]") accuracies.append((cum_accuracy / 9)) # print2(f"Average Accuracy: {100.0 * cum_accuracy / 9.0}%") # Running Time print2("--- %s seconds ---" % (time.time() - start_time)) # TO DO Add EWC Training # Some plots over time from pathlib import Path Path('continuum/output').mkdir(parents=True, exist_ok=True) plt.plot([1, 2, 3, 4, 5, 6, 7, 8, 9], accuracies, '-o', label="Naive") #plt.plot([1, 2, 3, 4, 5, 6, 7, 8, 9], rehe_accs, '-o', label="Rehearsal") #plt.plot([1, 2, 3, 4, 5, 6, 7, 8, 9], ewc_accs, '-o', label="EWC") plt.xlabel('Tasks Encountered', fontsize=14) plt.ylabel('Average Accuracy', fontsize=14) plt.title('Rehersal Strategy on Core50 w/ResNet18', fontsize=14) plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9]) plt.legend(prop={'size': 16}) plt.show() filenames = dt.datetime.now().strftime("%Y%m%d-%H%M%S") plt.savefig('continuum/output/run_'+filenames+'.png')
def test_MNIST_Fellowship(tmpdir): dataset = MNISTFellowship(data_path=tmpdir, train=True, download=True) dataset.get_data() continuum = ClassIncremental(dataset, increment=10) assert len(continuum) == 3
def test_split_batch_fn(): # from continuum.datasets import MNIST batch_size = 5 max_batches = 10 def split_batch_fn( batch: Tuple[Tensor, Tensor, Tensor] ) -> Tuple[Tuple[Tensor, Tensor], Tensor]: x, y, t = batch return (x, t), y # dataset = MNIST("data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])) from continuum import ClassIncremental from continuum.datasets import MNIST from continuum.tasks import split_train_val scenario = ClassIncremental( MNIST("data", download=True, train=True), increment=2, transformations=Compose( [Transforms.to_tensor, Transforms.three_channels]), ) classes_per_task = scenario.nb_classes // scenario.nb_tasks print(f"Number of classes per task {classes_per_task}.") for i, task_dataset in enumerate(scenario): env = PassiveEnvironment( task_dataset, n_classes=classes_per_task, batch_size=batch_size, split_batch_fn=split_batch_fn, # Need to pass the observation space, in this case. observation_space=spaces.Tuple([ spaces.Box(low=0, high=1, shape=(3, 28, 28)), spaces.Discrete(scenario.nb_tasks), # task label ]), action_space=spaces.Box( low=np.array([i * classes_per_task]), high=np.array([(i + 1) * classes_per_task]), dtype=int, ), ) assert spaces.Box( low=np.array([i * classes_per_task]), high=np.array([(i + 1) * classes_per_task]), dtype=int, ).shape == (1, ) assert isinstance(env.observation_space[0], spaces.Box) assert env.observation_space[0].shape == (batch_size, 3, 28, 28) assert env.observation_space[1].shape == (batch_size, ) assert env.action_space.shape == (batch_size, 1) assert env.reward_space.shape == (batch_size, 1) env.seed(123) obs = env.reset() assert len(obs) == 2 x, t = obs assert x.shape == (batch_size, 3, 28, 28) assert t.shape == (batch_size, ) obs, reward, done, info = env.step(env.action_space.sample()) assert x.shape == (batch_size, 3, 28, 28) assert t.shape == (batch_size, ) assert reward.shape == (batch_size, ) assert not done env.close()
def test_CIFAR_Fellowship(): cl_dataset = CIFARFellowship(data_path="./tests/Datasets", train=True, download=True) scenario = ClassIncremental(cl_dataset, increment=10) assert len(scenario) == 11