def get_aeloaders(dataset, batch, dataroot, ae_file, trans_type=TRANSFORMATION.clean): train_sampler, trainloader, validloader, _ = get_dataloaders( dataset, batch, dataroot, trans_type) _, test_aug = get_augmentation(dataset) _, (_, y_test) = load_data(dataset) x_ae = load_model(ae_file) x_ae = transform(x_ae, trans_type) x_ae = data_utils.rescale(x_ae) x_ae = data_utils.set_channels_first(x_ae) testset = MyDataset(x_ae, y_test, aug=test_aug) testloader = torch.utils.data.DataLoader( testset, batch_size=batch, shuffle=False, num_workers=32, pin_memory=torch.cuda.is_available(), drop_last=False) return train_sampler, trainloader, validloader, testloader
def ensemble_defenses( # modelsDir, models, modelFilenamePrefix, transformationList, datasetFilePath, nClasses, ensembleID, useLogit=False, checkTimeCost=False): ''' input: modelFilenamePrefix and transformationList are used to obtain the filename of models. Assume modle's filename has the format, model-<modelFilenamePrefix-<transformType>.h5 If this assumption changes, please change the corresponding in load_models(). output: labels: nSamples ''' if useLogit: convertToLogit = True else: convertToLogit = False # models = load_models(modelsDir, modelFilenamePrefix, transformationList, convertToLogit=convertToLogit) data = np.load(datasetFilePath) data = data_utils.rescale(data) # ensure its values inside [0, 1] rawPred, transTCs, predTCs = prediction(data, models, nClasses, transformationList) return ensemble_defenses_util(rawPred, ensembleID)
if __name__ == '__main__': transformations = TRANSFORMATION.supported_types() data = { 'dataset': DATA.mnist, 'architecture': 'cnn', } (X_train, y_train), (X_test, y_test) = load_data(data['dataset']) nb_classes = y_train.shape[-1] X_train = data_utils.set_channels_last(X_train) X_test = data_utils.set_channels_last(X_test) y_train = np.argmax(y_train, axis=1) y_test = np.argmax(y_test, axis=1) for trans in transformations: data['trans'] = trans data['train'] = (data_utils.rescale(transform(X_train, trans)), y_train) data['test'] = (data_utils.rescale(transform(X_test, trans)), y_test) model = train(data, nb_classes=nb_classes, eval=True, conf=train_conf) filename = 'model-{}-{}-{}.h5'.format(data['dataset'], data['architecture'], data['trans']) filename = os.path.join(PATH.MODEL, filename) model_utils.save(model, filename)
def load_data(dataset, trans_type=TRANSFORMATION.clean, trans_set='both'): assert dataset in DATA.get_supported_datasets() assert trans_set is None or trans_set in ['none', 'train', 'test', 'both'] X_train = None Y_train = None X_test = None Y_test = None img_rows = 0 img_cols = 0 nb_channels = 0 nb_classes = 0 if DATA.mnist == dataset: """ Dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images. """ (X_train, Y_train), (X_test, Y_test) = mnist.load_data() nb_examples, img_rows, img_cols = X_test.shape nb_channels = 1 nb_classes = 10 elif DATA.fation_mnist == dataset: """ Dataset of 60,000 28x28 grayscale images of 10 fashion categories, along with a test set of 10,000 images. The class labels are: Label Description 0 T-shirt/top 1 Trouser 2 Pullover 3 Dress 4 Coat 5 Sandal 6 Shirt 7 Sneaker 8 Bag 9 Ankle boot """ (X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data() nb_examples, img_rows, img_cols = X_test.shape nb_channels = 1 nb_classes = 10 elif DATA.cifar_10 == dataset: """ Dataset of 50,000 32x32 color training images, labeled over 10 categories, and 10,000 test images. """ (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() nb_examples, img_rows, img_cols, nb_channels = X_test.shape nb_classes = 10 elif DATA.cifar_100 == dataset: (X_train, Y_train), (X_test, Y_test) = cifar100.load_data(label_mode='fine') nb_examples, img_rows, img_cols, nb_channels = X_test.shape nb_classes = 100 X_train = X_train.reshape(-1, img_rows, img_cols, nb_channels) X_test = X_test.reshape(-1, img_rows, img_cols, nb_channels) """ cast pixels to floats, normalize to [0, 1] range """ X_train = X_train.astype(np.float32) X_test = X_test.astype(np.float32) X_train = data_utils.rescale(X_train, range=(0., 1.)) X_test = data_utils.rescale(X_test, range=(0., 1.)) """ one-hot-encode the labels """ Y_train = keras.utils.to_categorical(Y_train, nb_classes) Y_test = keras.utils.to_categorical(Y_test, nb_classes) """ transform images """ if trans_set is not None: if trans_set in ['train', 'both']: X_train = transform(X_train, trans_type) X_train = data_utils.rescale(X_train, range=(0., 1.)) X_train = data_utils.set_channels_first(X_train) if trans_set in ['test', 'both']: X_test = transform(X_test, trans_type) X_test = data_utils.rescale(X_test, range=(0., 1.)) X_test = data_utils.set_channels_first(X_test) """ summarize data set """ print('Dataset({}) Summary:'.format(dataset.upper())) print('Train set: {}, {}'.format(X_train.shape, Y_train.shape)) print('Test set: {}, {}'.format(X_test.shape, Y_test.shape)) return (X_train, Y_train), (X_test, Y_test)
def train(dataset, model=None, trans_type=TRANSFORMATION.clean, save_path='cnn_mnist.h5', eval=True, **kwargs): """ Train a cnn model on MNIST or Fashion-MNIST. :param dataset: :param model: a model to train. :param trans_type: transformation associated to the model. :param save_path: file name, including the path, to save the trained model. :param kwargs: customized loss function, optimizer, etc. for cleverhans to craft AEs. :return: the trained model """ lr = 0.001 validation_rate = 0.2 optimizer = kwargs.get('optimizer', keras.optimizers.Adam(lr=lr)) loss_fn = kwargs.get('loss', keras.losses.categorical_crossentropy) metrics = kwargs.get('metrics', 'default') logger.info('optimizer: [{}].'.format(optimizer)) logger.info('loss function: [{}].'.format(loss_fn)) logger.info('metrics: [{}].'.format(metrics)) (X_train, Y_train), (X_test, Y_test) = data.load_data(dataset) X_train = data_utils.set_channels_last(X_train) X_test = data_utils.set_channels_last(X_test) # Apply transformation (associated to the weak defending model) X_train = data_utils.rescale(transform(X_train, trans_type)) X_test = data_utils.rescale(transform(X_test, trans_type)) nb_examples, img_rows, img_cols, nb_channels = X_train.shape nb_train_samples = int(nb_examples * (1. - validation_rate)) train_examples = X_train[:nb_train_samples] train_labels = Y_train[:nb_train_samples] val_examples = X_train[nb_train_samples:] val_labels = Y_train[nb_train_samples:] if model is None: model = create_model(input_shape=(img_rows, img_cols, nb_channels)) # Compile model if ('default' == metrics): model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy']) else: model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy', metrics]) # Train model batch_size = kwargs.get('batch_size', 128) epochs = kwargs.get('epochs', 20) start = time.monotonic() history = model.fit(train_examples, train_labels, batch_size=batch_size, epochs=epochs, verbose=2, validation_data=(val_examples, val_labels)) cost = time.monotonic() - start logger.info('Done training. It costs {} minutes.'.format(cost / 60.)) if eval: scores_train = model.evaluate(train_examples, train_labels, batch_size=128, verbose=0) scores_val = model.evaluate(val_examples, val_labels, batch_size=128, verbose=0) scores_test = model.evaluate(X_test, Y_test, batch_size=128, verbose=0) logger.info('Evaluation on [{} set]: {}.'.format( 'training', scores_train)) logger.info('Evaluation on [{} set]: {}.'.format( 'validation', scores_val)) logger.info('Evaluation on [{} set]: {}.'.format( 'testing', scores_test)) logger.info('Save the trained model to [{}].'.format(save_path)) model.save(save_path) checkpoints_file = save_path.split('/')[-1].split('.')[0] checkpoints_file = 'checkpoints_train_' + checkpoints_file + '.csv' checkpoints_file = os.path.join(LOG_DIR, checkpoints_file) if not os.path.dirname(LOG_DIR): os.mkdir(LOG_DIR) logger.info('Training checkpoints have been saved to file [{}].'.format( checkpoints_file)) file.dict2csv(history.history, checkpoints_file) save_path = save_path.split('/')[-1].split('.')[0] save_path = 'hist_train_' + save_path + '.pdf' plot_training_history(history, save_path) return model
def load_all_modalities_concatenated(self, split, split_type, downsample=1): all_images_t1, all_labels_t1, all_images_t2, all_labels_t2, all_index = [], [], [], [], [] volumes = self.get_volumes_for_split(split, split_type) for v in volumes: images_t1, labels_t1 = self._load_volume(v, 't1') images_t2, labels_t2 = self._load_volume(v, 't2') # for each CHAOS subject, create pairs of T1 and T2 slices that approximately correspond to the same # position in the 3D volume, i.e. contain the same anatomical parts if v == 1: images_t2 = images_t2[1:] labels_t2 = labels_t2[1:] images_t1 = images_t1[0:26] labels_t1 = labels_t1[0:26] images_t2 = images_t2[4:24] labels_t2 = labels_t2[4:24] images_t1 = np.concatenate([images_t1[0:5], images_t1[7:10], images_t1[13:17], images_t1[18:]], axis=0) labels_t1 = np.concatenate([labels_t1[0:5], labels_t1[7:10], labels_t1[13:17], labels_t1[18:]], axis=0) if v == 2: images_t1 = np.concatenate([images_t1[4:7], images_t1[8:23]], axis=0) labels_t1 = np.concatenate([labels_t1[4:7], labels_t1[8:23]], axis=0) images_t2 = images_t2[3:22] labels_t2 = labels_t2[3:22] images_t1 = np.concatenate([images_t1[0:11], images_t1[12:18]], axis=0) labels_t1 = np.concatenate([labels_t1[0:11], labels_t1[12:18]], axis=0) images_t2 = np.concatenate([images_t2[0:11], images_t2[12:18]], axis=0) labels_t2 = np.concatenate([labels_t2[0:11], labels_t2[12:18]], axis=0) if v == 3: images_t1 = np.concatenate([images_t1[11:14], images_t1[15:26]], axis=0) labels_t1 = np.concatenate([labels_t1[11:14], labels_t1[15:26]], axis=0) images_t2 = images_t2[9:23] labels_t2 = labels_t2[9:23] if v == 5: images_t1 = np.concatenate([images_t1[4:5], images_t1[8:24]], axis=0) labels_t1 = np.concatenate([labels_t1[4:5], labels_t1[8:24]], axis=0) images_t2 = images_t2[2:22] labels_t2 = labels_t2[2:22] images_t2 = np.concatenate([images_t2[0:6], images_t2[9:]], axis=0) labels_t2 = np.concatenate([labels_t2[0:6], labels_t2[9:]], axis=0) images_t1 = np.concatenate([images_t1[0:8], images_t1[9:]], axis=0) labels_t1 = np.concatenate([labels_t1[0:8], labels_t1[9:]], axis=0) images_t2 = np.concatenate([images_t2[0:8], images_t2[9:]], axis=0) labels_t2 = np.concatenate([labels_t2[0:8], labels_t2[9:]], axis=0) if v == 8: images_t1 = images_t1[2:-2] labels_t1 = labels_t1[2:-2] images_t1 = np.concatenate([images_t1[5:11], images_t1[12:27]], axis=0) labels_t1 = np.concatenate([labels_t1[5:11], labels_t1[12:27]], axis=0) images_t2 = images_t2[6:27] labels_t2 = labels_t2[6:27] if v == 10: images_t1 = images_t1[14:38] labels_t1 = labels_t1[14:38] images_t2 = images_t2[5:24] labels_t2 = labels_t2[5:24] images_t1 = np.concatenate([images_t1[0:8], images_t1[12:18], images_t1[19:]], axis=0) labels_t1 = np.concatenate([labels_t1[0:8], labels_t1[12:18], labels_t1[19:]], axis=0) if v == 13: images_t1 = images_t1[4:29] labels_t1 = labels_t1[4:29] images_t2 = images_t2[3:28] labels_t2 = labels_t2[3:28] if v == 15: images_t1 = images_t1[:22] labels_t1 = labels_t1[:22] images_t2 = images_t2[:22] labels_t2 = labels_t2[:22] if v == 19: images_t1 = images_t1[8:27] labels_t1 = labels_t1[8:27] images_t2 = images_t2[5:24] labels_t2 = labels_t2[5:24] if v == 20: images_t1 = images_t1[2:21] labels_t1 = labels_t1[2:21] images_t2 = images_t2[2:21] labels_t2 = labels_t2[2:21] if v == 21: images_t1 = images_t1[3:19] labels_t1 = labels_t1[3:19] images_t2 = images_t2[5:21] labels_t2 = labels_t2[5:21] if v == 22: images_t1 = images_t1[:-2] labels_t1 = labels_t1[:-2] images_t1 = np.concatenate([images_t1[8:17], images_t1[18:26]], axis=0) labels_t1 = np.concatenate([labels_t1[8:17], labels_t1[18:26]], axis=0) images_t2 = np.concatenate([images_t2[3:12], images_t2[15:23]], axis=0) labels_t2 = np.concatenate([labels_t2[3:12], labels_t2[15:23]], axis=0) if v == 31: images_t1 = images_t1[7:23] labels_t1 = labels_t1[7:23] images_t2 = np.concatenate([images_t2[5:12], images_t2[13:22]], axis=0) labels_t2 = np.concatenate([labels_t2[5:12], labels_t2[13:22]], axis=0) if v == 32: images_t1 = images_t1[5:32] labels_t1 = labels_t1[5:32] images_t2 = images_t2[3:30] labels_t2 = labels_t2[3:30] if v == 33: images_t1 = images_t1[7:-5] labels_t1 = labels_t1[7:-5] images_t2 = np.concatenate([images_t2[3:12], images_t2[15:-2]], axis=0) labels_t2 = np.concatenate([labels_t2[3:12], labels_t2[15:-2]], axis=0) if v == 34: images_t1 = np.concatenate([images_t1[1:2], images_t1[3:4], images_t1[5:6], images_t1[7:27]], axis=0) labels_t1 = np.concatenate([labels_t1[1:2], labels_t1[3:4], labels_t1[5:6], labels_t1[7:27]], axis=0) images_t1 = np.concatenate([images_t1[0:14], images_t1[15:16], images_t1[17:18], images_t1[19:22], images_t1[23:24]], axis=0) labels_t1 = np.concatenate([labels_t1[0:14], labels_t1[15:16], labels_t1[17:18], labels_t1[19:22], labels_t1[23:24]], axis=0) images_t2 = images_t2[2:21] labels_t2 = labels_t2[2:21] if v == 36: images_t1 = images_t1[8:25] labels_t1 = labels_t1[8:25] images_t2 = np.concatenate([images_t2[4:6], images_t2[7:22]], axis=0) labels_t2 = np.concatenate([labels_t2[4:6], labels_t2[7:22]], axis=0) if v == 37: images_t1 = np.concatenate([images_t1[9:23], images_t1[24:-1]], axis=0) labels_t1 = np.concatenate([labels_t1[9:23], labels_t1[24:-1]], axis=0) images_t2 = np.concatenate([images_t2[4:6], images_t2[7:21], images_t2[22:-7]], axis=0) labels_t2 = np.concatenate([labels_t2[4:6], labels_t2[7:21], labels_t2[22:-7]], axis=0) if v == 38: images_t1 = images_t1[9:24] labels_t1 = labels_t1[9:24] images_t2 = images_t2[9:24] labels_t2 = labels_t2[9:24] if v == 39: images_t1 = images_t1[3:22] labels_t1 = labels_t1[3:22] images_t2 = images_t2[3:22] labels_t2 = labels_t2[3:22] images_t1 = np.concatenate([data_utils.rescale(images_t1[i:i + 1], -1, 1) for i in range(images_t1.shape[0])]) images_t2 = np.concatenate([data_utils.rescale(images_t2[i:i + 1], -1, 1) for i in range(images_t2.shape[0])]) assert images_t1.max() == 1 and images_t1.min() == -1, '%.3f to %.3f' % (images_t1.max(), images_t1.min()) assert images_t2.max() == 1 and images_t2.min() == -1, '%.3f to %.3f' % (images_t2.max(), images_t2.min()) all_images_t1.append(images_t1) all_labels_t1.append(labels_t1) all_images_t2.append(images_t2) all_labels_t2.append(labels_t2) all_index.append(np.array([v] * images_t1.shape[0])) all_images_t1, all_labels_t1 = data_utils.crop_same(all_images_t1, all_labels_t1, self.input_shape[:-1]) all_images_t2, all_labels_t2 = data_utils.crop_same(all_images_t2, all_labels_t2, self.input_shape[:-1]) all_images_t1 = np.concatenate(all_images_t1, axis=0) all_labels_t1 = np.concatenate(all_labels_t1, axis=0) all_images_t2 = np.concatenate(all_images_t2, axis=0) all_labels_t2 = np.concatenate(all_labels_t2, axis=0) if self.modalities == ['t1', 't2']: all_images = np.concatenate([all_images_t1, all_images_t2], axis=-1) all_labels = np.concatenate([all_labels_t1, all_labels_t2], axis=-1) elif self.modalities == ['t2', 't1']: all_images = np.concatenate([all_images_t2, all_images_t1], axis=-1) all_labels = np.concatenate([all_labels_t2, all_labels_t1], axis=-1) else: raise ValueError('invalid self.modalities', self.modalities) all_index = np.concatenate(all_index, axis=0) assert all_labels.max() == 1 and all_labels.min() == 0, '%.3f to %.3f' % (all_labels.max(), all_labels.min()) return MultimodalPairedData(all_images, all_labels, all_index, downsample=downsample)