def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3)
        x_test = np.swapaxes(x_test, 1, 3)

        # Create simple CNN
        model = Model()

        # Define a loss function and optimizer
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Get classifier
        ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28),
                                10)
        ptc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=1)

        # Attack
        nf = NewtonFool(ptc, max_iter=5)
        x_test_adv = nf.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        y_pred = ptc.predict(x_test)
        y_pred_adv = ptc.predict(x_test_adv)
        y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred
        y_pred_max = y_pred.max(axis=1)
        y_pred_adv_max = y_pred_adv[y_pred_bool]
        self.assertTrue((y_pred_max >= y_pred_adv_max).all())
Esempio n. 2
0
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3)
        x_test = np.swapaxes(x_test, 1, 3)

        # Define the network
        model = Model()

        # Define a loss function and optimizer
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Get classifier
        ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28),
                                10)
        ptc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10)

        # First attack
        cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10)
        params = {'y': random_targets(y_test, ptc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10)
        params = {'y': random_targets(y_test, ptc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target != y_pred_adv).any())

        # Third attack
        cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10)
        params = {}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        y_pred = np.argmax(ptc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())
Esempio n. 3
0
    def test_iris_pt(self):
        """
        Third test for Pytorch.
        :return:
        """
        # Build PyTorchClassifier
        victim_ptc = get_iris_classifier_pt()

        class Model(nn.Module):
            """
            Create Iris model for PyTorch.
            """
            def __init__(self):
                super(Model, self).__init__()

                self.fully_connected1 = nn.Linear(4, 10)
                self.fully_connected2 = nn.Linear(10, 10)
                self.fully_connected3 = nn.Linear(10, 3)

            # pylint: disable=W0221
            # disable pylint because of API requirements for function
            def forward(self, x):
                x = self.fully_connected1(x)
                x = self.fully_connected2(x)
                logit_output = self.fully_connected3(x)

                return logit_output

        # Define the network
        model = Model()

        # Define a loss function and optimizer
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # Get classifier
        thieved_ptc = PyTorchClassifier(model=model,
                                        loss=loss_fn,
                                        optimizer=optimizer,
                                        input_shape=(4, ),
                                        nb_classes=3,
                                        clip_values=(0, 1),
                                        channel_index=1)

        # Create attack
        copycat_cnn = CopycatCNN(classifier=victim_ptc,
                                 batch_size_fit=BATCH_SIZE,
                                 batch_size_query=BATCH_SIZE,
                                 nb_epochs=NB_EPOCHS,
                                 nb_stolen=NB_STOLEN)
        thieved_ptc = copycat_cnn.extract(x=self.x_train,
                                          thieved_classifier=thieved_ptc)

        victim_preds = np.argmax(victim_ptc.predict(x=self.x_train[:100]),
                                 axis=1)
        thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train[:100]),
                                  axis=1)
        acc = np.sum(victim_preds == thieved_preds) / len(victim_preds)

        self.assertGreater(acc, 0.3)
Esempio n. 4
0
def main(config_filepath):

    config = load_config(config_filepath)

    if os.path.isfile(config.metrics_output_path):
        click.confirm(f"Overwrite {config.metrics_output_path}?", abort=True)

    np.random.seed(config.seed)
    torch.manual_seed(config.seed)

    # Load data
    x = torch.load(config.x_filepath)
    y = torch.load(config.y_filepath)

    # Flatten
    x = x.reshape(x.shape[0], -1)

    model = torch.load(config.trained_model_filepath)

    clip_values = {}
    with open(config.clip_values_filepath, "r") as f:
        clip_values = json.load(f)
    clip_values = (
        clip_values.get("min_pixel_value"),
        clip_values.get("max_pixel_value"),
    )

    classifier = PyTorchClassifier(
        model=model,
        clip_values=clip_values,
        loss=model.criterion,
        optimizer=model.optimizer,
        input_shape=(1, 28, 28),
        nb_classes=10,
    )  # TODO: move these parameters to config

    # Evaluate the classifier on benign data
    predictions = classifier.predict(x)

    # Convert one-hots to numbers for metrics
    y = utils.one_hot_to_num(y)
    predictions = utils.one_hot_to_num(predictions)
    accuracy = {
        "Accuracy": metrics.accuracy_score(y, predictions),
        "Confusion Matrix": metrics.confusion_matrix(y, predictions).tolist(),
    }

    # Save data
    with open(config.metrics_output_path, "w") as f:
        json.dump(
            accuracy,
            f,
            ensure_ascii=False,
            sort_keys=True,
            indent=4,
            separators=(",", ": "),
        )
Esempio n. 5
0
def apply_attack(model_path, dataset, model_attack_config):
	model = torch.load(model_path, map_location='cpu').model

	input_shape = model_attack_config[lookup.input_shape]
	criterion = model_attack_config[lookup.criterion]
	optimizer = model_attack_config[lookup.optimizer]
	nb_classes = model_attack_config[lookup.nb_classes]
	attack_method = model_attack_config[lookup.attack_method]
	robust_db_name = model_attack_config[lookup.robust_db_name]

	if criterion == 'cross_entropy':
		criterion = nn.CrossEntropyLoss()
	else:
		raise ValueError

	if optimizer == 'SGD':
		optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
	elif optimizer == 'Adam':
		optimizer = optim.Adam(model.parameters(), lr=1e-4)
	else:
		raise ValueError

	classifier = PyTorchClassifier(model=model, input_shape=input_shape, loss=criterion, optimizer=optimizer, nb_classes=nb_classes)
	x = np.array([x_element.numpy()[0] for x_element in dataset[0]])
	y = np.array(dataset[1])

	predictions = classifier.predict(x)
	accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y, axis=1)) / len (y)

	print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

	attack_function = get_attack_method(attack_method)
	attack_instance = attack_function(classifier=classifier)
	x_adv = attack_instance.generate(x=x)

	predictions = classifier.predict(x_adv)
	accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y, axis=1)) / len(y)
	print('Accuracy on adversarial test examples: {}%'.format(accuracy * 100))

	path_to_directory = join(abspath(lookup.get_db_dirs()[lookup.dataset]), fs.get_uuid())
	fs.make_dir(path_to_directory)

	db_uuid = processor.convert_to_image(path_to_directory, robust_db_name, x_adv)
	return db_uuid
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train = np.swapaxes(x_train, 1, 3)
        x_test = np.swapaxes(x_test, 1, 3)

        # Create simple CNN
        # Define the network
        model = Model()

        # Define a loss function and optimizer
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Get classifier
        ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28),
                                10)
        ptc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=1)

        # Attack
        # TODO Launch with all possible attacks
        attack_params = {
            "attacker": "newtonfool",
            "attacker_params": {
                "max_iter": 5
            }
        }
        up = UniversalPerturbation(ptc)
        x_train_adv = up.generate(x_train, **attack_params)
        self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)

        x_test_adv = x_test + up.v
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1)
        test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all())
        self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
Esempio n. 7
0
def main(args):
    (x_train, y_train), (x_test,
                         y_test), min_, max_ = load_dataset(str('cifar10'))
    x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
    x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

    x_train = x_train
    y_train = y_train
    model = VGG('VGG16')

    model.load_state_dict(torch.load("./logs/pytorch_vgg16.model"))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-2)

    classifier = PyTorchClassifier(model=model,
                                   clip_values=(min_, max_),
                                   loss=criterion,
                                   optimizer=optimizer,
                                   input_shape=(3, 32, 32),
                                   nb_classes=10)

    predictions = classifier.predict(x_test)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

    pred_y = classifier.predict(x_train)
    v_max = 0

    n = 1000
    n_sp = list(split_list(x_train, n))
    for i, X_t in enumerate(n_sp):
        print(f'split_No {i + 1}')
        v_max = simulated_anniling(classifier, X_t, args)

    # Compute fooling rate
    adv_x = x_train + v_max
    adv_y = classifier.predict(adv_x)
    fooling_rate = loss_fn(pred_y, adv_y, mode="fool_rate")
    print(fooling_rate)
Esempio n. 8
0
def main(args):
    attack_fn = {'fgsm': fgsm, 'saliency_map': saliency_map}
    _, val_dataset, input_shape, n_classes = utils.load_dataset(args.dataset)
    inputs = np.array([x.numpy() for x, _ in val_dataset])
    inputs /= 255
    targets = np.array([int(y) for _, y in val_dataset])

    model = torch.load(args.model_file)
    model = model.eval().to(args.device)

    loss = torch.nn.CrossEntropyLoss()
    classifier = PyTorchClassifier(model,
                                   loss,
                                   None,
                                   input_shape,
                                   n_classes,
                                   preprocessing=(0, 1 / 255),
                                   clip_values=(0, 1))

    base_preds = np.argmax(classifier.predict(inputs, 1024), 1)
    results = {
        # 'baseline': utils.evaluate(model, val_dataset, args.device)
        'baseline': np.sum(base_preds == targets) / len(inputs)
    }
    for attack in args.attacks:
        logger.info('Crafting Adversarial Examples Using %s' % attack)
        x_test_adv = attack_fn[attack](classifier, inputs, targets,
                                       args.epsilon)
        # adv_dataset = mDataset(x_test_adv, targets)
        preds = np.argmax(classifier.predict(x_test_adv, 1024), 1)
        acc = np.sum(preds == targets) / len(inputs)
        results[attack] = acc
        x_test_adv = np.transpose(x_test_adv, (0, 2, 3, 1))  #.astype('uint8')
        save_images(x_test_adv, os.path.join(args.outdir, attack))

        # logger.info('adversarial_pred\tbaseline_pred\ttarget')
        # for ap,bp,t in zip(preds, base_preds, targets):
        #     logger.info('%d\t\t\t%d\t\t\t%d' % (ap,bp,t))

    logger.info(resultsToString(results))
Esempio n. 9
0
def test():
    (x_train, y_train), (x_test, y_test), min_, max_ = load_dataset(str('cifar10'))

    x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
    x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

    model = VGG('VGG16')
    model.load_state_dict(torch.load("./logs/pytorch_vgg16.h5.model"))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-2)

    classifier = PyTorchClassifier(model=model, clip_values=(min_, max_ ), loss=criterion,
                                optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10)


    predictions = classifier.predict(x_test)
    accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
    print('Accuracy on benign test examples: {}%'.format(accuracy * 100))
Esempio n. 10
0
print("training...")
classifier1.fit(model1_x_train,
                model1_y_train,
                batch_size=batch_size,
                nb_epochs=n_epochs)
classifier2.fit(model2_x_train,
                model2_y_train,
                batch_size=batch_size,
                nb_epochs=n_epochs)

# evaluation
model1.eval()
model2.eval()

predictions = classifier1.predict(shared_x_test)
acc = accuracy(predictions, shared_y_test)
print('Accuracy of model1 on shared test examples: {}%'.format(acc * 100))

top_five_acc = accuracy_n(predictions, shared_y_test, 5)
print('Top 5 accuracy of model1 on shared test examples: {}%'.format(
    top_five_acc * 100))

predictions = classifier2.predict(shared_x_test)
acc = accuracy(predictions, shared_y_test)
print('Accuracy of model2 on shared test examples: {}%'.format(acc * 100))

top_five_acc = accuracy_n(predictions, shared_y_test, 5)
print('Top 5 accuracy of model2 on shared test examples: {}%'.format(
    top_five_acc * 100))
Esempio n. 11
0
def gzsl_launch(dataloader_seen, dataloader_unseen, all_vectors, criterion,
                params):

    if params["dataset"] == "CUB":
        from configs.config_CUB import MODEL_PATH, SMOOTHED_MODEL_PATH
    elif params["dataset"] == "AWA2":
        from configs.config_AWA2 import MODEL_PATH, SMOOTHED_MODEL_PATH
    elif params["dataset"] == "SUN":
        from configs.config_SUN import MODEL_PATH, SMOOTHED_MODEL_PATH

    resnet = torchvision.models.resnet101(pretrained=True).cuda()
    feature_extractor = nn.Sequential(*list(resnet.children())[:-1])

    if params["hasDefense"] and params["defense"] == "label_smooth":
        model_ale = torch.load(SMOOTHED_MODEL_PATH).cuda()
    else:
        model_ale = torch.load(MODEL_PATH).cuda()

    full_graph = FullGraph(feature_extractor, model_ale, all_vectors).cuda()
    full_graph.eval()
    optimizer = optim.SGD(full_graph.parameters(), lr=0.01, momentum=0.5)

    if params["dataset"] == "CUB":
        no_classes = 200
    elif params["dataset"] == "AWA2":
        no_classes = 50
    elif params["dataset"] == "SUN":
        no_classes = 717

    classifier = PyTorchClassifier(model=full_graph,
                                   loss=criterion,
                                   optimizer=optimizer,
                                   input_shape=(1, 150, 150),
                                   nb_classes=no_classes)

    if params["attack"] == "fgsm":
        batch_size = 1
        attack = FastGradientMethod(classifier=classifier,
                                    eps=params["fgsm_params"]["epsilon"],
                                    batch_size=batch_size)

    elif params["attack"] == "deepfool":
        batch_size = 1
        attack = DeepFool(classifier,
                          max_iter=params["deepfool_params"]["max_iter"],
                          epsilon=params["deepfool_params"]["epsilon"],
                          nb_grads=params["deepfool_params"]["nb_grads_gzsl"],
                          batch_size=batch_size)

    elif params["attack"] == "carlini_wagner":
        batch_size = params["batch_size"] if params["custom_collate"] else 1
        attack = CarliniL2Method(
            classifier,
            confidence=params["carliniwagner_params"]["confidence"],
            learning_rate=params["carliniwagner_params"]["learning_rate"],
            binary_search_steps=params["carliniwagner_params"]
            ["binary_search_steps"],
            max_iter=params["carliniwagner_params"]["max_iter"],
            initial_const=params["carliniwagner_params"]["initial_const"],
            max_halving=params["carliniwagner_params"]["max_halving"],
            max_doubling=params["carliniwagner_params"]["max_doubling"],
            batch_size=batch_size)

    preds_seen = []
    preds_seen_defended = []

    adv_preds_seen = []
    adv_preds_seen_defended = []
    labels_seen_ = []

    start = time.time()
    if params["hasDefense"]:
        if params["defense"] == "spatial_smooth":
            defense = SpatialSmoothing(
                window_size=params["ss_params"]["window_size"])
        elif params["defense"] == "totalvar":
            defense = TotalVarMin(
                max_iter=params["totalvar_params"]["max_iter"])

    for index, sample in enumerate(dataloader_seen):
        img = sample[0].numpy()
        label = sample[1].numpy()

        if params["clean_results"]:
            if params["hasDefense"] and params["defense"] != "label_smooth":
                img_def, _ = defense(img)
                predictions_defended = classifier.predict(
                    img_def, batch_size=batch_size)
                preds_seen_defended.extend(
                    np.argmax(predictions_defended, axis=1))
            predictions = classifier.predict(img, batch_size=batch_size)
            preds_seen.extend(np.argmax(predictions, axis=1))

        img_perturbed = attack.generate(x=img)
        if params["hasDefense"] and params["defense"] != "label_smooth":
            img_perturbed_defended, _ = defense(img_perturbed)
            predictions_adv_defended = classifier.predict(
                img_perturbed_defended, batch_size=batch_size)
            adv_preds_seen_defended.extend(
                np.argmax(predictions_adv_defended, axis=1))

        predictions_adv = classifier.predict(img_perturbed,
                                             batch_size=batch_size)
        adv_preds_seen.extend(np.argmax(predictions_adv, axis=1))
        labels_seen_.extend(label)

        if index % 1000 == 0:
            print(index, len(dataloader_seen))

    labels_seen_ = np.array(labels_seen_)
    adv_preds_seen = np.array(adv_preds_seen)
    adv_preds_seen_defended = np.array(adv_preds_seen_defended)
    uniq_labels_seen = np.unique(labels_seen_)

    adv_preds_unseen = []
    adv_preds_unseen_defended = []
    labels_unseen_ = []

    if params["clean_results"]:
        preds_unseen = []
        preds_seen = np.array(preds_seen)
        preds_unseen_defended = []
        preds_seen_defended = np.array(preds_seen_defended)

    for index, sample in enumerate(dataloader_unseen):
        img = sample[0].numpy()
        label = sample[1].numpy()

        if params["clean_results"]:
            if params["hasDefense"] and params["defense"] != "label_smooth":
                img_def, _ = defense(img)
                predictions_defended = classifier.predict(
                    img_def, batch_size=batch_size)
                preds_unseen_defended.extend(
                    np.argmax(predictions_defended, axis=1))
            predictions = classifier.predict(img, batch_size=batch_size)
            preds_unseen.extend(np.argmax(predictions, axis=1))

        img_perturbed = attack.generate(x=img)
        if params["hasDefense"] and params["defense"] != "label_smooth":
            img_perturbed_defended, _ = defense(img_perturbed)
            predictions_adv_defended = classifier.predict(
                img_perturbed_defended, batch_size=batch_size)
            adv_preds_unseen_defended.extend(
                np.argmax(predictions_adv_defended, axis=1))

        predictions_adv = classifier.predict(img_perturbed,
                                             batch_size=batch_size)
        adv_preds_unseen.extend(np.argmax(predictions_adv, axis=1))
        labels_unseen_.extend(label)

        if index % 1000 == 0:
            print(index, len(dataloader_unseen))

    end = time.time()

    labels_unseen_ = np.array(labels_unseen_)
    adv_preds_unseen = np.array(adv_preds_unseen)
    adv_preds_unseen_defended = np.array(adv_preds_unseen_defended)
    uniq_labels_unseen = np.unique(labels_unseen_)

    combined_labels = np.concatenate((labels_seen_, labels_unseen_))
    combined_preds_adv = np.concatenate((adv_preds_seen, adv_preds_unseen))
    combined_preds_adv_defended = np.concatenate(
        (adv_preds_seen_defended, adv_preds_unseen_defended))

    if params["clean_results"]:
        preds_unseen = np.array(preds_unseen)
        combined_preds = np.concatenate((preds_seen, preds_unseen))

        seen, unseen, h = harmonic_score_gzsl(combined_preds, combined_labels,
                                              uniq_labels_seen,
                                              uniq_labels_unseen)
        print("GZSL Clean (s/u/h):", seen, unseen, h)

        if params["hasDefense"] and params["defense"] != "label_smooth":
            preds_unseen_defended = np.array(preds_unseen_defended)
            combined_preds_defended = np.concatenate(
                (preds_seen_defended, preds_unseen_defended))
            seen, unseen, h = harmonic_score_gzsl(combined_preds_defended,
                                                  combined_labels,
                                                  uniq_labels_seen,
                                                  uniq_labels_unseen)
            print("GZSL Clean + defended (s/u/h):", seen, unseen, h)

    seen, unseen, h = harmonic_score_gzsl(combined_preds_adv, combined_labels,
                                          uniq_labels_seen, uniq_labels_unseen)
    print("GZSL Attacked (s/u/h):", seen, unseen, h)

    if params["hasDefense"] and params["defense"] != "label_smooth":
        seen, unseen, h = harmonic_score_gzsl(combined_preds_adv_defended,
                                              combined_labels,
                                              uniq_labels_seen,
                                              uniq_labels_unseen)
        print("GZSL Attacked + defended (s/u/h):", seen, unseen, h)

    print(end - start, "seconds passed for GZSL.")
Esempio n. 12
0
def train_adv(data='mnist',
              model_name='basic',
              n_samples=2000,
              eps=2.,
              path_output='results',
              path_data='data',
              is_test=False,
              batch_size=128,
              device='cpu'):
    # Prepare data
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])
    if data == 'mnist':
        dataset_test = datasets.MNIST(path_data,
                                      train=False,
                                      download=True,
                                      transform=transforms)
    elif data == 'cifar10':
        dataset_test = datasets.CIFAR10(path_data,
                                        train=False,
                                        download=True,
                                        transform=transforms)
    else:
        raise NotImplementedError
    loader_test = DataLoader(dataset_test,
                             batch_size=batch_size,
                             shuffle=False)

    # Load model
    if data == 'mnist':
        model = BaseModel(use_prob=False).to(device)
        n_features = (1, 28, 28)
        pretrained = 'mnist_200.pt'
    elif data == 'cifar10':
        n_features = (3, 32, 32)
        if model_name == 'resnet':
            model = Resnet(use_prob=False).to(device)
            pretrained = 'cifar10_resnet_200.pt'
        elif model_name == 'vgg':
            model = Vgg(use_prob=False).to(device)
            pretrained = 'cifar10_vgg_200.pt'
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError

    pretrained_path = os.path.join(path_output, pretrained)
    model.load_state_dict(torch.load(pretrained_path, map_location=device))
    optimizer = optim.SGD(model.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    loss = nn.CrossEntropyLoss()
    _, acc_test = validate(model, loader_test, loss, device)
    print('Accuracy on test set: {:.4f}%'.format(acc_test * 100))

    tensor_test_X, tensor_test_y = get_correct_examples(model,
                                                        dataset_test,
                                                        device=device,
                                                        return_tensor=True)
    # Get samples from the tail
    if not is_test:
        # This is for training the surrogate model
        tensor_test_X = tensor_test_X[-n_samples:]
        tensor_test_y = tensor_test_y[-n_samples:]
    else:
        # This is for testing the surrogate model
        tensor_test_X = tensor_test_X[-n_samples - 2000:-2000]
        tensor_test_y = tensor_test_y[-n_samples - 2000:-2000]
    dataset_test = TensorDataset(tensor_test_X, tensor_test_y)
    loader_test = DataLoader(dataset_test,
                             batch_size=batch_size,
                             shuffle=False)
    _, acc_perfect = validate(model, loader_test, loss, device)
    print('Accuracy on {} filtered test set: {:.4f}%'.format(
        len(dataset_test), acc_perfect * 100))

    classifier = PyTorchClassifier(model=model,
                                   loss=loss,
                                   input_shape=n_features,
                                   optimizer=optimizer,
                                   nb_classes=10,
                                   clip_values=(0.0, 1.0),
                                   device_type='gpu')
    attack = AutoProjectedGradientDescent(estimator=classifier,
                                          eps=eps,
                                          eps_step=0.1,
                                          max_iter=1000,
                                          batch_size=batch_size,
                                          targeted=False)

    X_benign = tensor_test_X.cpu().detach().numpy()
    y_true = tensor_test_y.cpu().detach().numpy()
    adv = attack.generate(x=X_benign)
    pred_adv = np.argmax(classifier.predict(adv), axis=1)
    acc_adv = np.mean(pred_adv == y_true)
    print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100))

    if not is_test:
        output_file = '{}_{}_baard_surro_train_eps{}_size{}.pt'.format(
            data, model_name, eps, n_samples)
    else:
        output_file = '{}_{}_baard_surro_test_eps{}_size{}.pt'.format(
            data, model_name, eps, n_samples)
    file_path = os.path.join(path_output, output_file)
    output = {'X': X_benign, 'adv': adv, 'y': y_true}
    torch.save(output, file_path)
    print('Save to:', file_path)
Esempio n. 13
0
def main(args):
    (x_train, y_train), (x_test,
                         y_test), min_, max_ = load_dataset(str('cifar10'))
    x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
    x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

    x_train = x_train[:50]
    y_train = y_train[:50]

    model = VGG('VGG16')
    model.load_state_dict(torch.load("./logs/pytorch_vgg16.model"))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-2)

    classifier = PyTorchClassifier(model=model,
                                   clip_values=(min_, max_),
                                   loss=criterion,
                                   optimizer=optimizer,
                                   input_shape=(3, 32, 32),
                                   nb_classes=10)

    predictions = classifier.predict(x_test)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

    attack_params = {
        "attacker": "fgsm",
        # "attacker_params": {
        #     "max_iter": 1000,
        #     "epsilon": 0.02
        # },
        "delta": 0.01,
        "max_iter": 1000,
        "eps": 13.0 / 255.0,
        "norm": np.inf
    }

    # Craft attack on training examples
    adv_crafter = UniversalPerturbation(classifier, **attack_params)
    x_train_adv = adv_crafter.generate(x_train)

    # fooling rate on train set
    adv_crafter.fooling_rate
    # # Convergence
    adv_crafter.converged

    print('\nCraft attack train examples')
    # adv_crafter.v: vector (array) for perturbation
    # perturbation = adv_crafter.v[0, :]
    # universal perturbation
    perturbation = adv_crafter.noise
    x_train_adv = x_train + perturbation

    # randomized perturbation (control)
    # perturbation_rand = np.random.permutation(perturbation.reshape(32 * 32 * 3)).reshape(3, 32, 32)
    # x_train_adv_rand = x_train + perturbation_rand

    preds = np.argmax(classifier.predict(x_train), axis=1)
    preds_adv = np.argmax(classifier.predict(x_train_adv), axis=1)
    acc = np.sum(preds != preds_adv) / y_train.shape[0]
    # Fooling rate on train set (universal perturbation)
    print("\nFooling rate: %.2f%%" % (acc * 100))
Esempio n. 14
0
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Build PyTorchClassifier
        victim_ptc = get_classifier_pt()

        class Model(nn.Module):
            """
            Create model for pytorch.
            """
            def __init__(self):
                super(Model, self).__init__()

                self.conv = nn.Conv2d(in_channels=1,
                                      out_channels=1,
                                      kernel_size=7)
                self.pool = nn.MaxPool2d(4, 4)
                self.fullyconnected = nn.Linear(25, 10)

            # pylint: disable=W0221
            # disable pylint because of API requirements for function
            def forward(self, x):
                """
                Forward function to evaluate the model

                :param x: Input to the model
                :return: Prediction of the model
                """
                x = self.conv(x)
                x = torch.nn.functional.relu(x)
                x = self.pool(x)
                x = x.reshape(-1, 25)
                x = self.fullyconnected(x)
                x = torch.nn.functional.softmax(x)

                return x

        # Define the network
        model = Model()

        # Define a loss function and optimizer
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Get classifier
        thieved_ptc = PyTorchClassifier(model=model,
                                        loss=loss_fn,
                                        optimizer=optimizer,
                                        input_shape=(1, 28, 28),
                                        nb_classes=10,
                                        clip_values=(0, 1))

        # Create attack
        copycat_cnn = CopycatCNN(classifier=victim_ptc,
                                 batch_size_fit=BATCH_SIZE,
                                 batch_size_query=BATCH_SIZE,
                                 nb_epochs=NB_EPOCHS,
                                 nb_stolen=NB_STOLEN)

        self.x_train = np.swapaxes(self.x_train, 1, 3)
        thieved_ptc = copycat_cnn.extract(x=self.x_train,
                                          thieved_classifier=thieved_ptc)
        victim_preds = np.argmax(victim_ptc.predict(x=self.x_train[:100]),
                                 axis=1)
        thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train[:100]),
                                  axis=1)
        self.x_train = np.swapaxes(self.x_train, 1, 3)

        acc = np.sum(victim_preds == thieved_preds) / len(victim_preds)

        self.assertGreater(acc, 0.3)
Esempio n. 15
0
class Evaluator(object):
    """Evaluator for adversarial robust"""
    def __init__(self, model, dataset):
        model.eval()
        self.model = model
        self.dataset = dataset
        optimizer = torch.optim.Adam(model.parameters())  # Useless
        self.nb_classes = nb_classes[dataset]
        self.classifier = PyTorchClassifier((0., 1.),
                                            model=self.model,
                                            loss=nn.CrossEntropyLoss(),
                                            optimizer=optimizer,
                                            input_shape=input_shape[dataset],
                                            nb_classes=self.nb_classes)

    def evaluate(self, data_loader):
        """Natural evaluation"""
        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        preds = np.argmax(self.classifier.predict(examples), axis=1)
        acc = np.sum(preds == labels) / labels.shape[0]
        return acc

    def evaluate_fgsm(self, data_loader):
        """Adversarial evaluation by FGSM"""
        norm, eps = np.inf, attack_configs['FGSM'][self.dataset]['epsilon']
        adv_crafter = FastGradientMethod(self.classifier, norm=norm, eps=eps)

        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        labels_one_hot = np.eye(self.nb_classes)[labels]
        examples_adv = adv_crafter.generate(examples, y=labels_one_hot)

        preds = np.argmax(self.classifier.predict(examples_adv), axis=1)
        acc = np.sum(preds == labels) / labels.shape[0]
        return acc

    def evaluate_pgd(self, data_loader, num_iter=40):
        """Adversarial evaluation by PGD"""
        norm, eps = np.inf, attack_configs['PGD'][self.dataset]['epsilon']
        eps_step = 2 * eps / num_iter
        adv_crafter = ProjectedGradientDescent(self.classifier,
                                               norm=norm,
                                               eps=eps,
                                               eps_step=eps_step,
                                               max_iter=num_iter,
                                               random_init=True)

        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        labels_one_hot = np.eye(self.nb_classes)[labels]
        examples_adv = adv_crafter.generate(examples, y=labels_one_hot)

        preds = np.argmax(self.classifier.predict(examples_adv), axis=1)
        acc = np.sum(preds == labels) / labels.shape[0]
        return acc

    def evaluate_cw(self, data_loader):
        eps = attack_configs['PGD'][self.dataset]['epsilon']
        adv_crafter = CarliniLInfMethod(self.classifier,
                                        targeted=False,
                                        eps=eps)

        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        labels_one_hot = np.eye(self.nb_classes)[labels]
        examples_adv = adv_crafter.generate(examples, y=labels_one_hot)

        preds = np.argmax(self.classifier.predict(examples_adv), axis=1)
        acc = np.sum(preds == labels) / labels.shape[0]
        return acc

    def evaluate_cw_l2(self, data_loader):
        adv_crafter = CarliniL2Method(self.classifier, targeted=False)

        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        labels_one_hot = np.eye(self.nb_classes)[labels]
        examples_adv = adv_crafter.generate(examples, y=labels_one_hot)

        preds = np.argmax(self.classifier.predict(examples_adv), axis=1)
        acc = np.sum(preds == labels) / labels.shape[0]
        return acc

    def evaluate_robust(self, data_loader):
        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        labels_one_hot = np.eye(self.nb_classes)[labels]

        losses = []
        # Compute loss with implicit batching
        batch_size = 256
        for batch_id in range(
                int(np.ceil(examples.shape[0] / float(batch_size)))):
            batch_index_1, batch_index_2 = batch_id * batch_size, (
                batch_id + 1) * batch_size
            batch = examples[batch_index_1:batch_index_2]
            batch_labels = labels_one_hot[batch_index_1:batch_index_2]

            loss = loss_sensitivity(self.classifier, batch, batch_labels)
            losses.append(loss * batch.shape[0])

        res = sum(losses) / examples.shape[0]
        return res
Esempio n. 16
0
def main(args):
    mode = args.mode
    eps = args.eps
    norm = norm_list.get(args.norm)
    p_mod = args.p_mod
    temperature = args.temperature

    (x_train, y_train), (x_test, y_test), min_, max_ = load_data()

    x_train = x_train[:500]
    y_train = y_train[:500]

    model = VGG('VGG16')

    model.load_state_dict(torch.load("./logs/pytorch_vgg16.model"))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-2)

    classifier = PyTorchClassifier(model=model,
                                   clip_values=(min_, max_),
                                   loss=criterion,
                                   optimizer=optimizer,
                                   input_shape=(3, 32, 32),
                                   nb_classes=10)

    predictions = classifier.predict(x_test)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

    x = x_train.copy().astype(np.float32)

    nb_instances = len(x)
    pred_y = classifier.predict(x)

    nb_dims = x.shape[1] * x.shape[2] * x.shape[3]
    # 変更を加えるピクセル数
    n_mod = int(round(nb_dims * p_mod))

    v = random_sphere(nb_points=1, nb_dims=nb_dims, radius=eps, norm=norm)
    v = v.reshape(1, x.shape[1], x.shape[2], x.shape[3])
    v = np.array(v, dtype='float32')
    v_max = v.copy()
    current_y = classifier.predict(x + v)
    fooling_rate = loss_fn(pred_y, current_y, mode=mode)
    fooling_rate_max = fooling_rate

    # Go through the data set and compute the perturbation increments sequentially
    for j in range(len(x) * 1000):
        v_ = v.reshape(nb_dims).copy()
        # 変更を加えるピクセルの番地
        idx = random.sample(range(len(v_)), n_mod)
        # 正規分布からの乱数で置き換え
        #v_[idx] = np.random.normal(loc=0.0, scale=np.std(v_), size=n_mod)
        # 正規分布からの乱数を加算
        v_[idx] = v_[idx] + np.random.normal(
            loc=0.0, scale=np.std(v_), size=n_mod)
        # 均一分布からの乱数を加算
        #v_[idx] = v_[idx] + np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod)
        # 均一分布から乱数で置き換え
        #v_[idx] = np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod)

        # 摂動が任意の長さに収まるように射影
        v_ = projection(v_.reshape(1, 3, 32, 32), eps, norm)
        # fooling rate
        current_y = classifier.predict(x + v_)
        fooling_rate_ = loss_fn(pred_y, current_y, mode=mode)
        # 判定
        if random.random() < np.exp(
            (fooling_rate_ - fooling_rate_max) / (temperature + 1e-10)):
            print(j, fooling_rate_, fooling_rate_max, temperature,
                  np.linalg.norm(v_))
            v = v_.copy()
            if fooling_rate_max < fooling_rate_:
                fooling_rate_max = fooling_rate_
                v_max = v_.copy()
            fooling_rate = fooling_rate_
        temperature = 0.99 * temperature

    # Compute fooling rate
    adv_x = x + v_max
    plot_image(v_max[0], 'sample1.png')
    plot_image(adv_x[1], 'sample2.png')
    plot_image(x[1], 'sample3.png')
    adv_y = classifier.predict(adv_x)
    fooling_rate = loss_fn(pred_y, adv_y, mode="fool_rate")
    print(fooling_rate)
Esempio n. 17
0
# Step 2: Create the model

vgg_ver = "VGG16"
model = VGG(vgg_ver)

# Step 2a: Define the loss function and the optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-2)

# Step 3: Create the ART classifier

classifier = PyTorchClassifier(model=model,
                               clip_values=(min_, max_),
                               loss=criterion,
                               optimizer=optimizer,
                               input_shape=(3, 32, 32),
                               nb_classes=10)

# Step 4: Train the ART classifier

classifier.fit(x_train, y_train, batch_size=128, nb_epochs=30)
classifier.save(f"pytorch_{vgg_ver}", "./logs")

# Step 5: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(
    np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy on benign test examples: {}%'.format(accuracy * 100))
def main(seed, dataset_name, clf_name, detector_name, epsilon_lst, input_shape,
         json_param, path):
    set_seeds(SEEDS[seed])

    device = device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    print("load the classifier")
    file_model = os.path.join(
        'result_{:}'.format(seed),
        '{:}_{:}_model.pt'.format(dataset_name, clf_name))
    if clf_name == 'dnn':
        model = BaseModel(use_prob=False).to(device)
    elif clf_name == 'resnet':
        model = Resnet(use_prob=False).to(device)
    else:
        raise ValueError("model idx unknown")
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join(
        'result_{:}'.format(seed),
        '{:}_{:}_apgd2_2000.pt'.format(dataset_name, clf_name))
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    print("undefended model acc")
    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    # Split data
    X_att_test = X[2000:3000].astype(np.float32)
    y_att_test = y[2000:3000].astype(np.float32)

    print("x attr shape ", X_att_test.shape)

    #################################################################
    print("Load Magnet")
    with open(json_param) as j:
        param = json.load(j)

    print("before load magnet")
    model_with_reformer_nn_module, detector_nn_module, full_magnet_orig  = \
        loadmagnet(dataset_name, clf_name,param, device,path, model)

    print("Magnet loaded")

    loss = torch.nn.CrossEntropyLoss()
    # this one return the logits
    art_classifier = PyTorchClassifier(model=model_with_reformer_nn_module,
                                       loss=loss,
                                       input_shape=input_shape,
                                       nb_classes=10,
                                       optimizer=None)

    # y_pred = model_with_reformer_nn_module(X)
    # print("model_with_reformer_nn_module", y_pred.shape)

    y_pred = art_classifier.predict(X)
    print("art_classifier", y_pred.shape)

    print("check full magnet ")
    _, y_pred = full_magnet_orig.detect(X)
    print("full magnet", y_pred.shape)

    print("check detector nn module")
    # correcly return an array with the logits
    y_pred = detector_nn_module(X)
    print("y pred ", y_pred)
    print("detector_nn_module", y_pred.shape)

    print("create pytorch detector")
    # must be only the detector
    art_detector = PyTorchClassifier(model=detector_nn_module,
                                     loss=loss,
                                     input_shape=input_shape,
                                     nb_classes=2,
                                     optimizer=None)

    print("check art detector")
    y_pred = art_detector.predict(X + 1000)
    print("detector_nn_module", y_pred.shape)
    print("art detector ok")

    print("y pred ", y_pred)
    print("detected by detector used by attack ",
          np.mean(y_pred.argmax(axis=1) == 1))

    clip_fun = None
    #################################################################

    pred_folder = 'result_{:}/predictions_wb_eval/{:}_{:}_{:}'.format(
        seed, dataset_name, clf_name, detector_name)

    print("compute prediction for samples at epsilon 0")
    x = X_att_test[:10]
    y = y_att_test[:10]

    # compute and save predictions
    cmpt_and_save_predictions(art_classifier, full_magnet_orig, art_detector,
                              device, x, y, pred_folder, 0)

    for eps in epsilon_lst:

        print("epsilon ", eps)

        print("detector threshold ", detector_nn_module.detector.threshold)

        attack = AutoProjectedGradientDescentDetectorsMagnet(
            estimator=art_classifier,
            detector=art_detector,
            detector_th=0,
            detector_clip_fun=clip_fun,
            loss_type='logits_difference',
            batch_size=128,
            norm=2,
            eps=eps,
            eps_step=0.9,
            beta=1.0,
            max_iter=100)

        adv_x = attack.generate(x=x, y=None)

        # compute and save predictions
        cmpt_and_save_predictions(art_classifier, full_magnet_orig,
                                  art_detector, device, adv_x, y, pred_folder,
                                  eps)
Esempio n. 19
0
def main():
    set_seeds(SEED)

    device = device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    file_model = os.path.join('result_0', 'mnist_dnn_model.pt')
    model = BaseModel(use_prob=False).to(device)
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join('result_0', 'mnist_dnn_apgd2_3000.pt')
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    pred = predict_numpy(model, adv, device)
    print('Acc on adv:', np.mean(pred == y))

    # Split data
    X_def_test = X[:1000]
    y_def_test = y[:1000]
    adv_def_test = adv[:1000]
    pred_adv_def_test = pred[:1000]

    X_def_val = X[1000:2000]
    y_def_val = y[1000:2000]
    adv_def_val = adv[1000:2000]
    pred_adv_def_val = pred[1000:2000]

    X_att_test = X[2000:4000]
    y_att_test = y[2000:4000]
    adv_att_test = adv[2000:4000]
    pred_adv_att_test = pred[2000:4000]

    X_surro_train = X[4000:]
    y_surro_train = y[4000:]
    adv_surro_train = adv[4000:]
    pred_adv_surro_train = pred[4000:]

    # Load baard
    file_baard_train = os.path.join('result_0',
                                    'mnist_dnn_baard_s1_train_data.pt')
    obj = torch.load(file_baard_train)
    X_baard_train_s1 = obj['X_s1']
    X_baard_train = obj['X']
    y_baard_train = obj['y']

    stages = []
    stages.append(ApplicabilityStage(n_classes=10, quantile=1., verbose=False))
    stages.append(
        ReliabilityStage(n_classes=10, k=10, quantile=1., verbose=False))
    stages.append(
        DecidabilityStage(n_classes=10, k=100, quantile=1., verbose=False))
    detector = BAARDOperator(stages=stages)

    detector.stages[0].fit(X_baard_train_s1, y_baard_train)
    for stage in detector.stages[1:]:
        stage.fit(X_baard_train, y_baard_train)

    file_baard_threshold = os.path.join('result_0',
                                        'mnist_dnn_baard_threshold.pt')
    thresholds = torch.load(file_baard_threshold)['thresholds']
    detector.load(file_baard_threshold)

    file_surro = os.path.join('result_0', 'mnist_dnn_baard_surrogate.pt')
    surrogate = get_pretrained_surrogate(file_surro, device)

    # Test surrogate model
    X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:]))
    pred_test = predict_numpy(model, X_test, device)
    label_test = detector.detect(X_test, pred_test)
    acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:])
    fpr = np.mean(label_test[:1000])
    print('BAARD Acc_on_adv:', acc)
    print('BAARD FPR:', fpr)

    label_surro = predict_numpy(surrogate, X_test, device)
    acc = np.mean(label_surro == label_test)
    print('Acc on surrogate:', acc)

    loss = torch.nn.CrossEntropyLoss()
    optimizer_clf = torch.optim.SGD(model.parameters(),
                                    lr=0.01,
                                    momentum=0.9,
                                    weight_decay=5e-4)
    art_classifier = PyTorchClassifier(model=model,
                                       loss=loss,
                                       input_shape=(1, 28, 28),
                                       nb_classes=10,
                                       optimizer=optimizer_clf)

    optimizer_sur = torch.optim.SGD(surrogate.parameters(),
                                    lr=0.01,
                                    momentum=0.9,
                                    weight_decay=5e-4)
    art_detector = PyTorchClassifier(model=surrogate,
                                     loss=loss,
                                     input_shape=(1, 28, 28),
                                     nb_classes=2,
                                     optimizer=optimizer_sur)

    loss_multiplier = 1. / 36.
    clip_fun = BAARD_Clipper(detector)

    attack = AutoProjectedGradientDescentDetectors(
        estimator=art_classifier,
        detector=art_detector,
        detector_th=0,  #fpr,
        clf_loss_multiplier=loss_multiplier,
        detector_clip_fun=clip_fun,
        loss_type='logits_difference',
        batch_size=128,
        norm=2,
        eps=8.0,
        eps_step=0.9,
        beta=0.5,
        max_iter=100)

    # X_toy = np.random.rand(128, 1, 28, 28).astype(np.float32)
    # pred_toy = art_classifier.predict(X_toy)
    # rejected_s1 = detector.stages[0].predict(X_toy, pred_toy)
    # print('Without:', np.mean(rejected_s1))

    # X_clipped = clip_fun(X_toy, art_classifier)
    # rejected_s1 = detector.stages[0].predict(X_clipped, pred_toy)
    # print('With:', np.mean(rejected_s1))
    # adv_x = attack.generate(x=X_toy)
    # pred_adv = predict_numpy(model, adv_x, device)
    # pred_sur = art_detector.predict(adv_x)
    # print('From surrogate model:', np.mean(pred_sur == 1))
    # labelled_as_adv = detector.detect(adv_x, pred_adv)
    # print('From BAARD', np.mean(labelled_as_adv == 1))

    # # Test it stage by stage
    # reject_s1 = detector.stages[0].predict(adv_x, pred_adv)
    # print('reject_s1', np.mean(reject_s1))
    # reject_s2 = detector.stages[1].predict(adv_x, pred_adv)
    # print('reject_s2', np.mean(reject_s2))
    # reject_s3 = detector.stages[2].predict(adv_x, pred_adv)
    # print('reject_s3', np.mean(reject_s3))

    x = X_att_test[:10]
    y = y_att_test[:10]
    adv_x = attack.generate(x=x, y=None)
    pred_adv = predict_numpy(model, adv_x, device)
    pred_sur = art_detector.predict(adv_x)

    pred = predict_numpy(model, adv_x, device)
    print('Acc classifier:', np.mean(pred == y))

    print('From surrogate model:', np.mean(pred_sur == 1))
    labelled_as_adv = detector.detect(adv_x, pred_adv)
    print('From BAARD', np.mean(labelled_as_adv == 1))

    # Test it stage by stage
    reject_s1 = detector.stages[0].predict(adv_x, pred_adv)
    print('reject_s1', np.mean(reject_s1))
    reject_s2 = detector.stages[1].predict(adv_x, pred_adv)
    print('reject_s2', np.mean(reject_s2))
    reject_s3 = detector.stages[2].predict(adv_x, pred_adv)
    print('reject_s3', np.mean(reject_s3))
    print()
    max_iter=40,
    targeted=False,
    num_random_init=5,
    batch_size=32,
)

# Step 4: Create the trainer object - AdversarialTrainerFBFPyTorch
# if you have apex installed, change use_amp to True
epsilon = 8.0 / 255.0
trainer = AdversarialTrainerFBFPyTorch(classifier, eps=epsilon, use_amp=False)

# Build a Keras image augmentation object and wrap it in ART
art_datagen = PyTorchDataGenerator(iterator=dataloader,
                                   size=x_train.shape[0],
                                   batch_size=128)

# Step 5: fit the trainer
trainer.fit_generator(art_datagen, nb_epochs=30)

x_test_pred = np.argmax(classifier.predict(x_test), axis=1)
print(
    "Accuracy on benign test samples after adversarial training: %.2f%%" %
    (np.sum(x_test_pred == np.argmax(y_test, axis=1)) / x_test.shape[0] * 100))

x_test_attack = attack.generate(x_test)
x_test_attack_pred = np.argmax(classifier.predict(x_test_attack), axis=1)
print(
    "Accuracy on original PGD adversarial samples after adversarial training: %.2f%%"
    % (np.sum(x_test_attack_pred == np.argmax(y_test, axis=1)) /
       x_test.shape[0] * 100))
Esempio n. 21
0
def main():
    set_seeds(SEED)

    device = device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    file_model = os.path.join('result_0', 'mnist_dnn_model.pt')
    model = BaseModel(use_prob=False).to(device)
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join('result_0', 'mnist_dnn_apgd2_3000.pt')
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    pred = predict_numpy(model, adv, device)
    print('Acc on adv:', np.mean(pred == y))

    # Split data
    X_def_test = X[:1000]
    y_def_test = y[:1000]
    adv_def_test = adv[:1000]
    pred_adv_def_test = pred[:1000]

    X_def_val = X[1000:2000]
    y_def_val = y[1000:2000]
    adv_def_val = adv[1000:2000]
    pred_adv_def_val = pred[1000:2000]

    X_att_test = X[2000:4000]
    y_att_test = y[2000:4000]
    adv_att_test = adv[2000:4000]
    pred_adv_att_test = pred[2000:4000]

    X_surro_train = X[4000:]
    y_surro_train = y[4000:]
    adv_surro_train = adv[4000:]
    pred_adv_surro_train = pred[4000:]

    # Load baard
    file_baard_train = os.path.join(
        'result_0', 'mnist_dnn_baard_s1_train_data.pt')
    obj = torch.load(file_baard_train)
    X_baard_train_s1 = obj['X_s1']
    X_baard_train = obj['X']
    y_baard_train = obj['y']

    file_baard_threshold = os.path.join(
        'result_0', 'mnist_dnn_baard_threshold.pt')
    thresholds = torch.load(file_baard_threshold)['thresholds']

    stage1 = ApplicabilityStage(n_classes=10, quantile=1.)
    stage1.thresholds_ = thresholds[0]

    file_surro = os.path.join('result_0', 'mnist_dnn_baard_surrogate.pt')
    surrogate = get_pretrained_surrogate(file_surro, device)

    # Test surrogate model
    X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:]))
    pred_test = predict_numpy(model, X_test, device)
    # label_test = detector.detect(X_test, pred_test)
    # acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:])
    # fpr = np.mean(label_test[:1000])
    # print('BAARD Acc_on_adv:', acc)
    # print('BAARD FPR:', fpr)

    label_surro = predict_numpy(surrogate, X_test, device)
    # acc = np.mean(label_surro == label_test)
    # print('Acc on surrogate:', acc)

    loss = torch.nn.CrossEntropyLoss()
    optimizer_clf = torch.optim.SGD(
        model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=(1, 28, 28),
        nb_classes=10,
        optimizer=optimizer_clf
    )

    optimizer_sur = torch.optim.SGD(
        surrogate.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_detector = PyTorchClassifier(
        model=surrogate,
        loss=loss,
        input_shape=(1, 28, 28),
        nb_classes=2,
        optimizer=optimizer_sur
    )

    fpr = 0.05
    attack = AutoProjectedGradientDescentDetectors(
        estimator=art_classifier,
        detector=art_detector,
        detector_th=fpr,
        clf_loss_multiplier=1. / 36.,
        loss_type='logits_difference',
        batch_size=128,
        norm=2,
        eps=5.0,
        eps_step=0.9,
        beta=0.5,
        max_iter=100)

    # adv_x = attack.generate(x=X_att_test[:100], y=y_att_test[:100])
    file_whitebox_adv = 'mnist_apgd2_3000_whitebox_size100.npy'
    # np.save(file_whitebox_adv, adv_x)
    adv_x = np.load(file_whitebox_adv)
    print('adv_x', adv_x.shape)

    pred_adv = predict_numpy(model, adv_x, device)
    adv_x = clip_by_threshold(adv_x, pred_adv, thresholds[0])
    pred_sur = art_detector.predict(adv_x)
    print('From surrogate model:', np.mean(pred_sur == 1))
    labelled_as_adv = stage1.predict(adv_x, pred_adv)
    print('From BAARD', np.mean(labelled_as_adv == 1))
    
    # Testing 
    # X_toy = np.random.rand(128, 1, 28, 28).astype(np.float32)  # Same size as MNIST in a single batch
    # y_toy = np.concatenate((np.zeros(50), np.ones(50)))
    # rejected = stage1.predict(X_toy, y_toy)
    # print('rejected', np.mean(rejected))
    # X_bypass = clip_by_threshold(X_toy, y_toy, thresholds[0])
    # rejected_after = stage1.predict(X_bypass, y_toy)
    # print('rejected_after', np.mean(rejected_after))

    print('Pause')