Beispiel #1
0
def attack_statistics(args, model, x_test, distances, perturbed, or_acc,
                      m_train):

    attacks = list(distances.keys())

    SR = {}
    ptb = {}
    ptbr = {}
    confidence = {}

    for a in attacks:
        ind = ind_perturbed(distances[a])

        SR[a] = np.round(
            100 * np.sum(
                np.logical_and(np.greater(distances[a], 0),
                               ~np.isinf(distances[a]))) / or_acc, 2)

        if len(ind) > 0:
            x_adv = perturbed[a][ind]
            confidence[a] = np.max(model.predict(x_test - m_train,
                                                 batch_size=args.batch_size),
                                   axis=-1)
            ptb[a] = distance(x_test[ind], x_adv, 'ptb')
            ptbr[a] = distance(x_test[ind], x_adv, 'ptbr')

    return SR, confidence, ptb, ptbr
Beispiel #2
0
def evaluate(x_test, distances, perturbed, or_acc):

    ind = ind_perturbed(distances)
    x_adv = perturbed[ind]
    l2_dist = np.zeros(len(distances))
    l2_dist[ind] = distance(x_test[ind], x_adv, 2)

    ind_non_pert = np.where(distances == np.inf)[0]
    l2_dist[ind_non_pert] = np.inf

    unique_dist, counts = np.unique(l2_dist, return_counts=True)
    sorted_dist = np.zeros_like(l2_dist)
    acc = or_acc * np.ones_like(sorted_dist)
    tot_c = 0
    for i in range(len(unique_dist)):
        sorted_dist[tot_c:tot_c + counts[i]] = unique_dist[i]
        if unique_dist[i] == np.inf:
            acc[tot_c: tot_c + counts[i]] = acc[max(tot_c - 1, 0)]
        elif unique_dist[i] > 0:
            acc[tot_c: tot_c + counts[i]] = acc[max(tot_c - 1, 0)] - (np.arange(counts[i]) + 1)
        tot_c += counts[i]

    return sorted_dist, acc/len(x_test)
Beispiel #3
0
def run_attacks(args, filepath, x_test, y_test, y_test_onehot, attacks, m_train, save_all=True):

    base_model_name = args.model_name
    if args.extension is not None:
        base_model_name = re.sub('_' + args.extension, '', base_model_name)

    if not args.pretrained_weights:
        model = load_model(filepath['models'] + filepath['dataset'] + args.model_name + '.h5')
    else:
        model = select_model(x_test.shape[1:], base_model_name, SGD(0.001, momentum=0.9, nesterov=True), args.weight_decay)
        print(f"Loading pretrained weights for model: {base_model_name}")
        model.load_weights(filepath['trained_weights'] + filepath['dataset'] + args.model_name + '.h5')

    if args.pixel_mean:
        fb_model = foolbox.models.TensorFlowModel.from_keras(model=model, bounds=(0., 1.), preprocessing={'mean': m_train})
    else:
        m_train = 0
        fb_model = foolbox.models.TensorFlowModel.from_keras(model=model, bounds=(0., 1.))

    print("Model: ", args.model_name)
    test_loss, test_acc = model.evaluate(x_test - m_train, y_test_onehot, batch_size=args.batch_size, verbose=0)
    y_pred = np.argmax(model.predict(x_test - m_train, batch_size=args.batch_size), axis=-1)
    or_acc = np.sum(y_test == y_pred)
    print(f"Test loss: {test_loss}, test acc: {test_acc}")
    # print(f"Or acc: ", or_acc)

    # For extracting statistics. Initialising with empty lists
    if os.path.exists(filepath['output'] + filepath['dataset'] + args.model_name + '.npz'):
        print(f"Resuming attacks for model: {args.model_name}")
        npzfile = np.load(filepath['output'] + filepath['dataset'] + args.model_name + '.npz', allow_pickle=True)
        labels = npzfile['arr_0'].item()
        perturbed = npzfile['arr_1'].item()
        distances = npzfile['arr_2'].item()
        l2_distances = npzfile['arr_3'].item()
        sorted_distances = npzfile['arr_4'].item()
        adv_acc = npzfile['arr_5'].item()
        dict_loss = npzfile['arr_9'].item()
        dict_acc = npzfile['arr_10'].item()
    else:
        print(f"Starting attack from scratch for model: {args.model_name}")
        labels = {}
        perturbed = {}
        distances = {}
        l2_distances = {}
        sorted_distances = {}
        adv_acc = {}
        dict_loss = {}
        dict_acc = {}

    for r in range(args.repetitions):

        batch_ind = batch_generator(len(y_test), batch_size=args.adversarial_batch_size, shuffle=args.shuffle)
        # print("B: ", batch_ind)

        for a in attacks:

            label = []
            dist = []
            pert = []

            fb_attack = foolbox_attack(a, fb_model)

            for b in batch_ind:
                if a == 'ShiftsAttack':
                    adversarials = fb_attack(x_test[b], y_test[b], unpack=False, do_rotations=False)
                else:
                    adversarials = fb_attack(x_test[b], y_test[b], unpack=False)

                label.extend([ad.adversarial_class for ad in adversarials])
                dist.extend([ad.distance.value for ad in adversarials])
                pert.extend([ad.perturbed for ad in adversarials])
                # print("Label shape: ", len(label))

            label = np.array(label)
            dist = np.array(dist)
            pert = np.array(pert)
            # print(f"label: {label.shape}, dist: {dist.shape}, pert: {pert.shape}")

            if args.shuffle:
                unshuffle_ind = unshuffle_index(batch_ind)
                label = label[unshuffle_ind]
                dist = dist[unshuffle_ind]
                pert = pert[unshuffle_ind]

            ind_pert = ind_perturbed(dist)
            ind_not_inf = ind_not_infinite(dist)

            dist = dist.astype(np.float32)
            if label.dtype == 'object':
                label = convert_object_array(label, in_shape=len(x_test), ind_pert=ind_not_inf, dtype=np.int8)
                pert = convert_object_array(pert, in_shape=x_test.shape[1:], ind_pert=ind_not_inf, dtype=np.float32)

            x_adv = np.array(x_test, copy=True)
            if len(ind_pert) > 0:
                x_adv[ind_pert] = pert[ind_pert]
            l2_dist = distance(x_test, x_adv, 2)

            if a not in labels.keys():
                labels[a] = label
                distances[a] = dist
                perturbed[a] = pert
                l2_distances[a] = l2_dist
            else:
                ind = np.where(np.logical_and(l2_dist > 0, np.logical_or(l2_dist < l2_distances[a], l2_distances[a] == 0)))[0]

                if np.sum(ind) > 0:
                    labels[a][ind] = label[ind]
                    distances[a][ind] = dist[ind]
                    # print("Pert: ", pert[ind].shape)
                    # print("PP: ", perturbed[a][ind].shape)
                    perturbed[a][ind] = pert[ind]
                    l2_distances[a][ind] = l2_dist[ind]

            if r == args.repetitions-1:
                sorted_dist, acc = evaluate(x_test, distances[a], perturbed[a], or_acc)
                sorted_distances[a] = sorted_dist
                adv_acc[a] = acc

    if args.ablation:
        dict_loss, dict_acc = ablation(args, model, x_test-m_train, y_test_onehot, distances, perturbed, m_train)

    sorted_distances, adv_acc = to_nparray(sorted_distances), to_nparray(adv_acc)
    SR, confidence, ptb, ptbr = attack_statistics(args, model, x_test, distances, perturbed, or_acc, m_train)

    if save_all:
        np.savez(filepath['output'] + filepath['dataset'] + args.model_name,
                 labels, perturbed, distances, l2_distances, sorted_distances, adv_acc,
                 SR, ptb, ptbr, dict_loss, dict_acc)

    return labels, perturbed, distances, l2_distances, sorted_distances, adv_acc, SR, ptb, ptbr, dict_loss, dict_acc