def kkt_attack(two_class_kkt,
               target_grad,
               target_theta,
               total_epsilon,
               epsilon_pos,
               epsilon_neg,
               X_train,
               Y_train,
               class_map,
               centroids,
               centroid_vec,
               sphere_radii,
               slab_radii,
               target_bias,
               target_bias_grad,
               max_losses,
               sv_centroids=None,
               sv_sphere_radii=None):

    x_pos, x_neg, epsilon_pos, epsilon_neg = two_class_kkt.solve(
        target_grad,
        target_theta,
        epsilon_pos,
        epsilon_neg,
        class_map,
        centroids,
        centroid_vec,
        sphere_radii,
        slab_radii,
        target_bias=target_bias,
        target_bias_grad=target_bias_grad,
        max_losses=max_losses,
        verbose=False)

    obj = np.linalg.norm(target_grad - epsilon_pos * x_pos.reshape(-1) +
                         epsilon_neg * x_neg.reshape(-1))
    print("** Actual objective value: %.4f" % obj)
    num_train = X_train.shape[0]
    total_points_to_add = int(np.round(total_epsilon * X_train.shape[0]))
    num_pos = int(np.round(epsilon_pos * X_train.shape[0]))
    num_neg = total_points_to_add - num_pos
    assert num_neg >= 0

    X_modified, Y_modified = data.add_points(x_pos,
                                             1,
                                             X_train,
                                             Y_train,
                                             num_copies=num_pos)
    X_modified, Y_modified = data.add_points(x_neg,
                                             -1,
                                             X_modified,
                                             Y_modified,
                                             num_copies=num_neg)

    return X_modified, Y_modified, obj, x_pos, x_neg, num_pos, num_neg
Ejemplo n.º 2
0
def init_gradient_attack_from_mask(X_train,
                                   Y_train,
                                   epsilon,
                                   feasible_flipped_mask,
                                   general_train_idx,
                                   sensitive_file,
                                   attack_method,
                                   use_copy=True):

    DATA_FOLDER = './data'
    dataset_path = os.path.join(DATA_FOLDER)
    f = np.load(os.path.join(dataset_path, sensitive_file))
    group_label = f['group_label']

    advantaged = 1

    male_train_index = np.where(
        group_label[0:general_train_idx] == 0)[0].astype(np.int32)
    female_train_index = np.where(
        group_label[0:general_train_idx] == 1)[0].astype(np.int32)
    male_test_index = np.where(group_label[general_train_idx:] == 0)[0].astype(
        np.int32)
    female_test_index = np.where(
        group_label[general_train_idx:] == 1)[0].astype(np.int32)

    index_male_true_train = np.where(
        np.logical_and(group_label[0:general_train_idx] == 0,
                       Y_train == 1))[0].astype(np.int32)
    index_female_true_train = np.where(
        np.logical_and(group_label[0:general_train_idx] == 1,
                       Y_train == 1))[0].astype(np.int32)

    train_data_one_female_prob = group_label[0:general_train_idx][
        index_female_true_train].shape[0] / female_train_index.shape[0]
    train_data_one_male_prob = group_label[0:general_train_idx][
        index_male_true_train].shape[0] / male_train_index.shape[0]

    gender_labels = np.zeros(general_train_idx)
    for k in range(general_train_idx):
        if (k in male_train_index):
            gender_labels[k] = 1
        elif (k in female_train_index):
            gender_labels[k] = -1

    if not use_copy:
        num_copies = int(np.round(epsilon * X_train.shape[0]))

        idx_to_copy = np.random.choice(np.where(feasible_flipped_mask)[0],
                                       size=num_copies,
                                       replace=True)

        X_modified = data.vstack(X_train, X_train[idx_to_copy, :])
        Y_modified = np.append(Y_train, -Y_train[idx_to_copy])
        copy_array = None
        indices_to_poison = np.arange(X_train.shape[0], X_modified.shape[0])

    else:
        num_copies = int(np.round(epsilon * X_train.shape[0]))
        # Choose this in inverse class balance
        num_pos_copies = int(np.round(np.mean(Y_train == -1) * num_copies))
        num_neg_copies = num_copies - num_pos_copies

        np.random.seed(0)

        if (train_data_one_female_prob > train_data_one_male_prob):
            advantaged = -1
            pos_idx_to_copy = np.random.choice(
                np.where(feasible_flipped_mask & (Y_train == 1)
                         & (gender_labels == -1))[0])
            neg_idx_to_copy = np.random.choice(
                np.where(feasible_flipped_mask & (Y_train == -1)
                         & (gender_labels == 1))[0])
        else:
            advantaged = 1
            pos_idx_to_copy = np.random.choice(
                np.where(feasible_flipped_mask & (Y_train == 1)
                         & (gender_labels == 1))[0])
            neg_idx_to_copy = np.random.choice(
                np.where(feasible_flipped_mask & (Y_train == -1)
                         & (gender_labels == -1))[0])

        if (neg_idx_to_copy in female_train_index):
            print("female")
        else:
            print("male")
        if (pos_idx_to_copy in female_train_index):
            print("female")
        else:
            print("male")
        print(neg_idx_to_copy)
        print(pos_idx_to_copy)
        #exit()
        num_pos_copies -= 1
        num_neg_copies -= 1

        X_modified, Y_modified = data.add_points(X_train[pos_idx_to_copy, :],
                                                 1,
                                                 X_train,
                                                 Y_train,
                                                 num_copies=1)
        X_modified, Y_modified = data.add_points(X_train[neg_idx_to_copy, :],
                                                 -1,
                                                 X_modified,
                                                 Y_modified,
                                                 num_copies=1)
        X_modified, Y_modified = data.add_points(X_train[pos_idx_to_copy, :],
                                                 1,
                                                 X_modified,
                                                 Y_modified,
                                                 num_copies=num_pos_copies)
        X_modified, Y_modified = data.add_points(X_train[neg_idx_to_copy, :],
                                                 -1,
                                                 X_modified,
                                                 Y_modified,
                                                 num_copies=num_neg_copies)
        copy_array = [num_pos_copies, num_neg_copies]
        indices_to_poison = np.arange(X_train.shape[0], X_train.shape[0] + 2)

    return X_modified, Y_modified, indices_to_poison, copy_array, advantaged
Ejemplo n.º 3
0
                X_flipped, Y_flipped)
            loss_diffs = poisoned_losses - orig_losses
            q = q_finder.solve(loss_diffs, verbose=True)
            print("At iteration %s, q is:" % iter_idx)
            print(q)
            if np.all(old_q == q):
                print('Done, terminating')
                break

        q_idx = np.where(q)[0][0]
        assert q[q_idx] == num_points_to_add
        if sparse.issparse(X_flipped):
            x = X_flipped[q_idx, :].toarray()
        else:
            x = X_flipped[q_idx, :]
        X_modified, Y_modified = data.add_points(x,
                                                 Y_flipped[q_idx],
                                                 X_train,
                                                 Y_train,
                                                 num_copies=num_points_to_add)

        attack_save_path = datasets.get_target_attack_npz_path(
            dataset_name, epsilon, weight_decay, percentile, attack_label)

        if sparse.issparse(X_modified):
            X_poison = X_modified[n:, :].asfptype()
        else:
            X_poison = X_modified[n:, :]

        np.savez(attack_save_path, X_poison=X_poison, Y_poison=Y_modified[n:])
Ejemplo n.º 4
0
def init_gradient_attack_from_mask(
    X_train, Y_train,
    epsilon,
    feasible_flipped_mask,
    use_copy=True):

    if not use_copy:
        num_copies = int(np.round(epsilon * X_train.shape[0]))

        idx_to_copy = np.random.choice(
            np.where(feasible_flipped_mask)[0],
            size=num_copies,
            replace=True)

        X_modified = data.vstack(X_train, X_train[idx_to_copy, :])
        Y_modified = np.append(Y_train, -Y_train[idx_to_copy])
        copy_array = None
        indices_to_poison = np.arange(X_train.shape[0], X_modified.shape[0])

    else:
        num_copies = int(np.round(epsilon * X_train.shape[0]))
        # Choose this in inverse class balance
        num_pos_copies = int(np.round(np.mean(Y_train == -1) * num_copies))
        num_neg_copies = num_copies - num_pos_copies

        np.random.seed(0)
        pos_idx_to_copy = np.random.choice(
            np.where(feasible_flipped_mask & (Y_train == -1))[0])
        neg_idx_to_copy = np.random.choice(
            np.where(feasible_flipped_mask & (Y_train == 1))[0])

        num_pos_copies -= 1
        num_neg_copies -= 1

        X_modified, Y_modified = data.add_points(
            X_train[pos_idx_to_copy, :],
            1,
            X_train,
            Y_train,
            num_copies=1)
        X_modified, Y_modified = data.add_points(
            X_train[neg_idx_to_copy, :],
            -1,
            X_modified,
            Y_modified,
            num_copies=1)
        X_modified, Y_modified = data.add_points(
            X_train[pos_idx_to_copy, :],
            1,
            X_modified,
            Y_modified,
            num_copies=num_pos_copies)
        X_modified, Y_modified = data.add_points(
            X_train[neg_idx_to_copy, :],
            -1,
            X_modified,
            Y_modified,
            num_copies=num_neg_copies)
        copy_array = [num_pos_copies, num_neg_copies]
        indices_to_poison = np.arange(X_train.shape[0], X_train.shape[0]+2)

    return X_modified, Y_modified, indices_to_poison, copy_array
def kkt_for_lr(d,
               args,
               target_grad,
               theta_p,
               bias_p,
               total_eps,
               eps_pos,
               eps_neg,
               X_train,
               Y_train,
               x_pos_tuple=None,
               x_neg_tuple=None,
               lr=1e-5,
               num_steps=3000,
               trials=10,
               optimizer='adam'):
    # we did not implement defenses for KKT for logistic regression
    x_min_pos, x_max_pos = x_pos_tuple
    x_min_neg, x_max_neg = x_neg_tuple

    best_obj = 1e10
    for trial in range(trials):
        # print("------ trial {}------".format(trial))
        # optimization variables
        if args.dataset == 'dogfish':
            x_pos = np.array([
                upper_bounds.random_sample(x_min_pos[i], x_max_pos[i])
                for i in range(len(x_min_pos))
            ])
            x_neg = np.array([
                upper_bounds.random_sample(x_min_neg[i], x_max_neg[i])
                for i in range(len(x_min_neg))
            ])
        else:
            x_pos = np.array([
                upper_bounds.random_sample(x_min_pos, x_max_pos)
                for i in range(d)
            ])
            x_neg = np.array([
                upper_bounds.random_sample(x_min_neg, x_max_neg)
                for i in range(d)
            ])

        if optimizer == 'adagrad':
            # store the square of gradients
            grads_squared_pos = np.zeros(d)
            grads_squared_neg = np.zeros(d)
            initial_accumulator_value = 0.001
            grads_squared_pos.fill(initial_accumulator_value)
            grads_squared_neg.fill(initial_accumulator_value)
            epsilon = 1e-7
        elif optimizer == 'adam':
            grads_first_moment_pos = np.zeros(d)
            grads_second_moment_pos = np.zeros(d)
            grads_first_moment_neg = np.zeros(d)
            grads_second_moment_neg = np.zeros(d)

            beta1 = 0.9
            beta2 = 0.999
            epsilon = 1e-8

        prev_obj = 1e10
        for step in range(num_steps):
            score_pos = np.dot(theta_p, x_pos) + bias_p
            score_neg = np.dot(theta_p, x_neg) + bias_p

            # sigmoid prediction confidence
            prediction_pos = upper_bounds.sigmoid(score_pos)
            prediction_neg = upper_bounds.sigmoid(score_neg)
            # output_error_signal_pos = 1 - prediction_pos  # this is also the gradient of b for positive x part
            # output_error_signal_neg = -1 - prediction_neg  # this is also the gradient of b for negative x part

            # the objective value of KKT attack is the norm of following vector
            kkt_obj_grad = target_grad + eps_pos * (
                1 - prediction_pos) * x_pos + eps_neg * (
                    -prediction_neg
                ) * x_neg  # note that, we use negative label as 0, not -1
            kkt_obj = np.linalg.norm(kkt_obj_grad)**2
            if step == 0:
                print("(random) initial obj value:", kkt_obj)
            # constant values for x_pos and x_neg
            grad_pos = 2 * eps_pos * (1 - prediction_pos) * kkt_obj_grad
            grad_neg = 2 * eps_neg * (
                -prediction_neg
            ) * kkt_obj_grad  # note that, we use negative label as 0, not -1

            if optimizer == 'gd':
                x_pos -= lr * grad_pos
                x_neg -= lr * grad_neg
            elif optimizer == 'adagrad':
                """Weights update using adagrad.
                grads2 = grads2 + grads**2
                w' = w - lr * grads / (sqrt(grads2) + epsilon)
                """
                # update x_pos
                grads_squared_pos = grads_squared_pos + grad_pos**2
                x_pos = x_pos - lr * grad_pos / (np.sqrt(grads_squared_pos) +
                                                 epsilon)
                # update x_neg
                grads_squared_neg = grads_squared_neg + grad_neg**2
                x_neg = x_neg - lr * grad_neg / (np.sqrt(grads_squared_neg) +
                                                 epsilon)
            elif optimizer == 'adam':
                """Weights update using Adam.
                
                g1 = beta1 * g1 + (1 - beta1) * grads
                g2 = beta2 * g2 + (1 - beta2) * g2
                g1_unbiased = g1 / (1 - beta1**time)
                g2_unbiased = g2 / (1 - beta2**time)
                w = w - lr * g1_unbiased / (sqrt(g2_unbiased) + epsilon)
                """
                time = step + 1
                # update x_pos
                grads_first_moment_pos = beta1 * grads_first_moment_pos + \
                                        (1. - beta1) * grad_pos
                grads_second_moment_pos = beta2 * grads_second_moment_pos + \
                                        (1. - beta2) * grad_pos**2
                grads_first_moment_unbiased_pos = grads_first_moment_pos / (
                    1. - beta1**time)
                grads_second_moment_unbiased_pos = grads_second_moment_pos / (
                    1. - beta2**time)
                x_pos = x_pos - lr * grads_first_moment_unbiased_pos / (
                    np.sqrt(grads_second_moment_unbiased_pos) + epsilon)

                # update x_neg
                grads_first_moment_neg = beta1 * grads_first_moment_neg + \
                                        (1. - beta1) * grad_neg
                grads_second_moment_neg = beta2 * grads_second_moment_neg + \
                                        (1. - beta2) * grad_neg**2

                grads_first_moment_unbiased_neg = grads_first_moment_neg / (
                    1. - beta1**time)
                grads_second_moment_unbiased_neg = grads_second_moment_neg / (
                    1. - beta2**time)

                x_neg = x_neg - lr * grads_first_moment_unbiased_neg / (
                    np.sqrt(grads_second_moment_unbiased_neg) + epsilon)
            # print(y_tmp,output_error_signal_c, output_error_signal_p)
            # projection step to ensure it is within bounded norm
            x_pos = np.clip(x_pos, x_min_pos, x_max_pos)
            x_neg = np.clip(x_neg, x_min_neg, x_max_neg)

            # print("added: min max",np.amin(lr * (gradient_c - gradient_p)),np.amax(lr * (gradient_c - gradient_p)))
            # print("before: min max",np.amin(x),np.amax(x))

            # objective function value found so far (minimization)
            kkt_obj_grad = target_grad + eps_pos * (
                1 - prediction_pos) * x_pos + eps_neg * (
                    -prediction_neg
                ) * x_neg  # again, negative label is 0, not -1
            kkt_obj = np.linalg.norm(kkt_obj_grad)**2
            if best_obj > kkt_obj:
                best_obj = kkt_obj
                best_x_pos = x_pos
                best_x_neg = x_neg

            if np.abs(prev_obj - kkt_obj) < 1e-7:
                print("Enough convergence")
                print(
                    "steps: {}  current norm (objective): {:.4f}  minimum norm: {:.4f}"
                    .format(step + 1, kkt_obj, best_obj))

                break

            prev_obj = kkt_obj

            # # Print log-likelihood every so often
            # if (step+1) % 2000 == 0:
            #     print("current obj:",kkt_obj)

    print("** Actual objective value: %.4f" % best_obj)
    # num_train = X_train.shape[0]
    total_points_to_add = int(np.round(total_eps * X_train.shape[0]))
    num_pos = int(np.round(eps_pos * X_train.shape[0]))
    num_neg = total_points_to_add - num_pos
    assert num_neg >= 0

    X_modified, Y_modified = data.add_points(best_x_pos,
                                             1,
                                             X_train,
                                             Y_train,
                                             num_copies=num_pos)
    X_modified, Y_modified = data.add_points(best_x_neg,
                                             -1,
                                             X_modified,
                                             Y_modified,
                                             num_copies=num_neg)

    return X_modified, Y_modified, best_obj, best_x_pos, best_x_neg, num_pos, num_neg