def prepare_dataset(p: TrainParameters): tc, transformations, dataset_name, task = p.tc, p.transformations, p.dataset_name, p.task strategy = TransformationStrategy.random_sample if p.task == Task.Regression: dataset = datasets.get_regression(dataset_name) dim_output = len(transformations) dataset.normalize_features() train_dataset = ImageTransformRegressionDataset( NumpyDataset(dataset.x_train), p.transformations, strategy) test_dataset = ImageTransformRegressionDataset( NumpyDataset(dataset.x_test), p.transformations, strategy) elif task == Task.Classification: dataset = datasets.get_classification(dataset_name) dim_output = dataset.num_classes dataset.normalize_features() train_dataset = ImageClassificationDataset( NumpyDataset(dataset.x_train, dataset.y_train), p.transformations, strategy) test_dataset = ImageClassificationDataset( NumpyDataset(dataset.x_test, dataset.y_test), p.transformations, strategy) else: raise ValueError(task) return train_dataset, test_dataset, dataset.input_shape, dim_output
def run(self): dataset_names = ["mnist", "cifar10"] cudas = [False, True] r, s, t, combined = config.common_transformations_combined transformation_sets = [r, s, t, combined] n_images = 4 p = profiler.Profiler() p.event("start") for transformations, dataset_name, use_cuda in itertools.product( transformation_sets, dataset_names, cudas): print(f"### Loading dataset {dataset_name} ....") folderpath = self.folderpath / f"{dataset_name}" folderpath.mkdir(exist_ok=True, parents=True) dataset = datasets.get_classification(dataset_name) dataset.normalize_features() adapter = tm.NumpyPytorchImageTransformationAdapter( use_cuda=use_cuda) numpy_dataset = NumpyDataset(dataset.x_test, dataset.y_test) x, y = numpy_dataset.get_batch(range(n_images)) if use_cuda: x = x.cuda() n_t = len(transformations) print( f"Dataset {dataset_name}, Transformations: {n_t} ({transformations})" ) for i in range(n_images): print(f"Generating plots for image {i}") original_torch = x[i, :] # transformed_images = [] transformed_torch = torch.zeros((n_t, *original_torch.shape)) original_torch = original_torch.unsqueeze(0) for j, t in enumerate(transformations): transformed_torch[j, :] = t(original_torch) transformed_numpy = adapter.post_adapt(transformed_torch) cuda_str = "_cuda" if use_cuda else "" filepath = folderpath / f"samples_first_{i}_{transformations}{cuda_str}.png" util.plot_image_grid(transformed_numpy, samples=n_t, grid_cols=16, show=False, save=filepath) p.event("end") print(p.summary(human=True))
def get_data_generator( x: np.ndarray, y: np.ndarray, transformation: tm.TransformationSet, batch_size: int, num_workers: int, transformation_strategy: TransformationStrategy) -> DataLoader: dataset = NumpyDataset(x, y) # TODO verify this image_dataset = ImageClassificationDataset(dataset, transformation, transformation_strategy) dataloader = DataLoader( image_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, pin_memory=True, ) return dataloader
def plot(self, plot_folderpath: Path, model: ObservableLayersModule, dataset: datasets.ClassificationDataset, transformations: tm.TransformationSet, result: tm.MeasureResult, images=8, most_invariant_k: int = 4, least_invariant_k: int = 4, conv_aggregation=tm.AggregateTransformation()): numpy_dataset = NumpyDataset(dataset.x_test[:images, :], dataset.y_test[:images]) iterator = tm.NormalPytorchActivationsIterator( model, numpy_dataset, transformations, 32, 0, torch.cuda.is_available()) visualization.plot_invariant_feature_maps(plot_folderpath, iterator, result, most_invariant_k, least_invariant_k, conv_aggregation)
import datasets from testing.util import plot_image_grid from pytorch.numpy_dataset import NumpyDataset from transformational_measures.pytorch import ImageDataset import transformational_measures as tm dataformat = "NCHW" dataset = datasets.get_classification("cifar10", dataformat=dataformat) print(dataset.summary()) numpy_dataset = NumpyDataset(dataset.x_test, dataset.y_test) transformations = tm.SimpleAffineTransformationGenerator(r=360, s=5, t=3) image_dataset = ImageDataset(numpy_dataset, transformations, dataformat=dataformat) x, y = image_dataset.get_batch(list(range(128))) x = x.permute(0, 2, 3, 1).numpy() print("pytorch_iterators", x.shape, x.dtype, x.min(axis=(0, 1, 2)), x.max(axis=(0, 1, 2))) filepath = f"testing/{dataset.name}_samples.png" print(f"Saving transformed image batch to {filepath}") plot_image_grid(x, y, show=False, save=filepath)
from experiment import model_loading model, rotated_model, scores, config = model_loading.get_model( model_name, dataset, use_cuda) print(model.name, dataset.name) import transformational_measures as tm import numpy as np import matplotlib matplotlib.use('Agg') samples = 512 dataset.x_test = dataset.x_test[:samples, ] dataset.y_test = dataset.y_test[:samples] numpy_dataset = NumpyDataset(dataset.x_test, dataset.y_test) n_rotations = 4 rotations = np.linspace(-np.pi, np.pi, n_rotations, endpoint=False) iterator = tm.NormalStrategy(model, numpy_dataset, transformations, batch_size=256) import time begin = time.time() variance_result, v_transformation, v_sample = measure.eval() print(variance_result) print(f"Time elapsed(normal): {time.time()-begin}")
import itertools folderpath=config.testing_path()/ "numpy_iterator" folderpath.mkdir(parents=True,exist_ok=True) images= 32 preprocessing=True print(f"Using datasets: {datasets.names}") for dataset_name,preprocessing,normalize in itertools.product(datasets.names,[True,False],[True,False]): print(dataset_name,preprocessing,normalize) dataset = datasets.get_classification(dataset_name) # print(dataset.summary()) pre_str = 'preprocessing_' if preprocessing else "" normalize_str = 'normalized_' if preprocessing else "" filepath = folderpath / f"{pre_str}{normalize_str}{dataset_name}.png" numpy_dataset=NumpyDataset(dataset.x_test,dataset.y_test) if preprocessing: numpy_dataset= ImageClassificationDataset(numpy_dataset) x,y= numpy_dataset.get_batch(list(range(images))) if not preprocessing: x = x.float()/255 # print(x.shape,y.shape) # permute to NHWC order x = x.permute(0, 2, 3, 1) x = x.numpy() plot_image_grid(x,show=False,save=filepath,normalize=normalize)