def test_filter_by_str_slice(): ds = datasets.mnist("test", shuffle_files=False, preprocessing_fn=None, framework="tf") dataset_size = 10000 with pytest.raises(ValueError): datasets.filter_by_str_slice(ds, "[10000:]", dataset_size) ds = datasets.mnist("test", shuffle_files=False, preprocessing_fn=None) assert ds.size == dataset_size ys = np.hstack([next(ds)[1] for i in range(10)]) # first 10 labels for index, target in ( ("[:5]", ys[:5]), ("[3:8]", ys[3:8]), ("[0:5]", ys[0:5]), ): ds = datasets.mnist("test", shuffle_files=False, preprocessing_fn=None, index=index) assert ds.size == len(target) ys_index = np.hstack([y for (x, y) in ds]) assert (target == ys_index).all()
def test_tf1_mnist(): classifier_module = import_module("armory.baseline_models.tf_graph.mnist") classifier_fn = getattr(classifier_module, "get_art_model") classifier = classifier_fn(model_kwargs={}, wrapper_kwargs={}) train_dataset = datasets.mnist( split="train", epochs=1, batch_size=600, dataset_dir=DATASET_DIR, ) test_dataset = datasets.mnist( split="test", epochs=1, batch_size=100, dataset_dir=DATASET_DIR, ) classifier.fit_generator( train_dataset, nb_epochs=1, ) accuracy = 0 for _ in range(test_dataset.batches_per_epoch): x, y = test_dataset.get_batch() predictions = classifier.predict(x) accuracy += np.sum(np.argmax(predictions, axis=1) == y) / len(y) assert (accuracy / test_dataset.batches_per_epoch) > 0.9
def test_tf_pytorch_equality(): batch_size = 10 ds_tf = datasets.mnist( split_type="test", batch_size=batch_size, dataset_dir=DATASET_DIR, framework="tf", shuffle_files=False, ) ds_pytorch = iter( datasets.mnist( split_type="test", batch_size=batch_size, dataset_dir=DATASET_DIR, framework="pytorch", shuffle_files=False, ) ) for ex_tf, ex_pytorch in zip(ds_tf, ds_pytorch): img_tf = ex_tf[0].numpy() label_tf = ex_tf[1].numpy() img_pytorch = ex_pytorch[0].numpy() label_pytorch = ex_pytorch[1].numpy() assert np.amax(np.abs(img_tf - img_pytorch)) == 0 assert np.amax(np.abs(label_tf - label_pytorch)) == 0
def test_filter_by_index(): ds = datasets.mnist( "test", shuffle_files=False, preprocessing_fn=None, framework="tf" ) dataset_size = 10000 for index in ([], [-4, 5, 6], ["1:3"]): with pytest.raises(ValueError): datasets.filter_by_index(ds, index, dataset_size) ds = datasets.mnist("test", shuffle_files=False, preprocessing_fn=None) assert ds.size == dataset_size ys = np.hstack([next(ds)[1] for i in range(10)]) # first 10 labels for index in ( [1, 3, 6, 5], [0], [6, 7, 8, 9, 9, 8, 7, 6], list(range(10)), ): ds = datasets.mnist( "test", shuffle_files=False, preprocessing_fn=None, index=index ) index = sorted(set(index)) assert ds.size == len(index) ys_index = np.hstack([y for (x, y) in ds]) # ys_index = np.hstack([next(ds)[1] for i in range(len(index))]) assert (ys[index] == ys_index).all()
def test_tf_generator(): dataset = datasets.mnist( split_type="train", epochs=1, batch_size=16, dataset_dir=DATASET_DIR, framework="tf", ) assert isinstance(dataset, tf.data.Dataset)
def test_pytorch_generator(): with pytest.raises(NotImplementedError): _ = datasets.mnist( split_type="train", epochs=1, batch_size=16, dataset_dir=DATASET_DIR, framework="pytorch", )
def test_numpy_generator(): dataset = datasets.mnist( split="train", epochs=1, batch_size=16, dataset_dir=DATASET_DIR, framework="numpy", ) x, y = dataset.get_batch() assert isinstance(x, np.ndarray)
def test_generator(): batch_size = 600 for split, size in [("train", 60000)]: dataset = datasets.mnist( split=split, epochs=1, batch_size=batch_size, dataset_dir=DATASET_DIR, ) for x, y in dataset: assert x.shape == (batch_size, 28, 28, 1) assert y.shape == (batch_size,) break
def test_parse_split_index_ordering(): """ Ensure that output order is deterministic for multiple splits """ index = [5, 37, 38, 56, 111] # test has max index 9999 split = "test" kwargs = dict(epochs=1, batch_size=1, dataset_dir=DATASET_DIR, shuffle_files=False) ds = datasets.mnist(split=split, **kwargs) fixed_order = [] for i, (x, y) in enumerate(ds): if i in index: fixed_order.append(x) if i >= max(index): break sliced_split = f"{split}[{index}]" ds = datasets.mnist(split=sliced_split, **kwargs) output_x = [x for (x, y) in ds] assert len(fixed_order) == len(output_x) for x_i, x_j in zip(fixed_order, output_x): assert (x_i == x_j).all()
def test_tf_generator(): dataset = datasets.mnist( split="train", epochs=1, batch_size=16, dataset_dir=DATASET_DIR, framework="tf", preprocessing_fn=None, fit_preprocessing_fn=None, ) assert isinstance(dataset, (tf.compat.v2.data.Dataset, tf.compat.v1.data.Dataset))
def test_mnist(): batch_size = 600 for split, size in [("train", 60000), ("test", 10000)]: dataset = datasets.mnist( split=split, epochs=1, batch_size=batch_size, dataset_dir=DATASET_DIR, ) assert dataset.size == size assert dataset.batch_size == batch_size assert dataset.batches_per_epoch == ( size // batch_size + bool(size % batch_size) ) x, y = dataset.get_batch() assert x.shape == (batch_size, 28, 28, 1) assert y.shape == (batch_size,)
def test_pytorch_generator_mnist(): batch_size = 16 dataset = datasets.mnist( split_type="train", epochs=1, batch_size=batch_size, dataset_dir=DATASET_DIR, framework="pytorch", ) assert isinstance(dataset, torch.utils.data.DataLoader) images, labels = next(iter(dataset)) assert labels.dtype == torch.int64 assert labels.shape == (batch_size,) assert images.dtype == torch.uint8 assert images.shape == (batch_size, 28, 28, 1)
def test_keras_mnist_pretrained(): classifier_module = import_module("armory.baseline_models.keras.mnist") classifier_fn = getattr(classifier_module, "get_art_model") weights_path = maybe_download_weights_from_s3("undefended_mnist_5epochs.h5") classifier = classifier_fn( model_kwargs={}, wrapper_kwargs={}, weights_path=weights_path ) test_dataset = datasets.mnist( split="test", epochs=1, batch_size=100, dataset_dir=DATASET_DIR, ) accuracy = 0 for _ in range(test_dataset.batches_per_epoch): x, y = test_dataset.get_batch() predictions = classifier.predict(x) accuracy += np.sum(np.argmax(predictions, axis=1) == y) / len(y) assert (accuracy / test_dataset.batches_per_epoch) > 0.98
def test_pytorch_generator_epochs(): batch_size = 10 dataset = datasets.mnist( split_type="test", epochs=2, batch_size=batch_size, dataset_dir=DATASET_DIR, framework="pytorch", ) cnt = 0 for images, labels in dataset: if cnt == 0: first_batch = labels if cnt == 1000: second_batch = labels cnt += 1 assert cnt == 2000 assert not torch.all(torch.eq(first_batch, second_batch))
def test_pytorch_mnist_pretrained(): classifier_module = import_module("armory.baseline_models.pytorch.mnist") classifier_fn = getattr(classifier_module, "get_art_model") classifier = classifier_fn( model_kwargs={}, wrapper_kwargs={}, weights_file="undefended_mnist_5epochs.pth" ) preprocessing_fn = getattr(classifier_module, "preprocessing_fn") test_dataset = datasets.mnist( split_type="test", epochs=1, batch_size=100, dataset_dir=DATASET_DIR, preprocessing_fn=preprocessing_fn, ) accuracy = 0 for _ in range(test_dataset.batches_per_epoch): x, y = test_dataset.get_batch() predictions = classifier.predict(x) accuracy += np.sum(np.argmax(predictions, axis=1) == y) / len(y) assert (accuracy / test_dataset.batches_per_epoch) > 0.98