def evaluate_proposed_membership_inference(per_instance_loss, membership, proposed_mi_outputs, fpr_threshold=None, per_class_thresh=False): true_y, v_true_y, v_membership, v_per_instance_loss, v_counts, counts = proposed_mi_outputs print('-' * 10 + 'Using Attack Method 1' + '-' * 10 + '\n') if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(membership)) for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(-v_per_instance_loss[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where(per_instance_loss[c_indices] <= -thresh, 1, 0) else: thresh = get_inference_threshold(-v_per_instance_loss, v_membership, fpr_threshold) pred_membership = np.where(per_instance_loss <= -thresh, 1, 0) prety_print_result(membership, pred_membership) print('-' * 10 + 'Using Attack Method 2' + '-' * 10 + '\n') if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(membership)) for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(v_counts[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where(counts[c_indices] >= thresh, 1, 0) else: thresh = get_inference_threshold(v_counts, v_membership, fpr_threshold) pred_membership = np.where(counts >= thresh, 1, 0) prety_print_result(membership, pred_membership)
def yeom_membership_inference(per_instance_loss, membership, train_loss, test_loss=None): print('-' * 10 + 'YEOM\'S MEMBERSHIP INFERENCE' + '-' * 10 + '\n') if test_loss == None: pred_membership = np.where(per_instance_loss <= train_loss, 1, 0) else: pred_membership = np.where(stats.norm(0, train_loss).pdf(per_instance_loss) >= stats.norm(0, test_loss).pdf(per_instance_loss), 1, 0) prety_print_result(membership, pred_membership) return pred_membership
def yeom_attribute_inference(true_x, true_y, classifier, membership, features, train_loss, test_loss=None): print('-' * 10 + 'YEOM\'S ATTRIBUTE INFERENCE' + '-' * 10 + '\n') pred_membership_all = [] for feature in features: orignial_attribute = np.copy(true_x[:, feature]) low_value, high_value, true_attribute_value = get_attribute_variations( true_x, feature) true_x[:, feature] = low_value pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': true_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, low_op = get_predictions(predictions) low_op = low_op.astype('float32') low_op = log_loss(true_y, low_op) true_x[:, feature] = high_value pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': true_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, high_op = get_predictions(predictions) high_op = high_op.astype('float32') high_op = log_loss(true_y, high_op) high_prob = np.sum(true_attribute_value) / len(true_attribute_value) low_prob = 1 - high_prob if test_loss == None: pred_attribute_value = np.where( low_prob * stats.norm(0, train_loss).pdf(low_op) >= high_prob * stats.norm(0, train_loss).pdf(high_op), 0, 1) mask = [1] * len(pred_attribute_value) else: low_mem = np.where( stats.norm(0, train_loss).pdf(low_op) >= stats.norm( 0, test_loss).pdf(low_op), 1, 0) high_mem = np.where( stats.norm(0, train_loss).pdf(high_op) >= stats.norm( 0, test_loss).pdf(high_op), 1, 0) pred_attribute_value = [ np.argmax([low_prob * a, high_prob * b]) for a, b in zip(low_mem, high_mem) ] mask = [a | b for a, b in zip(low_mem, high_mem)] pred_membership = mask & (pred_attribute_value ^ true_attribute_value ^ [1] * len(pred_attribute_value)) prety_print_result(membership, pred_membership) pred_membership_all.append(pred_membership) true_x[:, feature] = orignial_attribute return pred_membership_all
def train_attack_model(classes, dataset=None, n_hidden=50, learning_rate=0.01, batch_size=200, epochs=50, model='nn', l2_ratio=1e-7): if dataset is None: dataset = load_attack_data() train_x, train_y, test_x, test_y = dataset train_classes, test_classes = classes train_indices = np.arange(len(train_x)) test_indices = np.arange(len(test_x)) unique_classes = np.unique(train_classes) true_y = [] pred_y = [] pred_scores = [] true_x = [] for c in unique_classes: #print('Training attack model for class {}...'.format(c)) c_train_indices = train_indices[train_classes == c] c_train_x, c_train_y = train_x[c_train_indices], train_y[ c_train_indices] c_test_indices = test_indices[test_classes == c] c_test_x, c_test_y = test_x[c_test_indices], test_y[c_test_indices] c_dataset = (c_train_x, c_train_y, c_test_x, c_test_y) classifier = train_model(c_dataset, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, model=model, l2_ratio=l2_ratio) pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': c_test_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) c_pred_y, c_pred_scores = get_predictions(predictions) true_y.append(c_test_y) pred_y.append(c_pred_y) true_x.append(c_test_x) pred_scores.append(c_pred_scores) print('-' * 10 + 'FINAL EVALUATION' + '-' * 10 + '\n') true_y = np.concatenate(true_y) pred_y = np.concatenate(pred_y) true_x = np.concatenate(true_x) pred_scores = np.concatenate(pred_scores) #print('Testing Accuracy: {}'.format(accuracy_score(true_y, pred_y))) #print(classification_report(true_y, pred_y)) prety_print_result(true_y, pred_y) fpr, tpr, thresholds = roc_curve(true_y, pred_y, pos_label=1) attack_adv = tpr[1] - fpr[1] return (attack_adv, pred_scores)
def evaluate_on_all_features(membership, proposed_mi_outputs, proposed_ai_outputs, features, fpr_threshold=None, attack_method=1, per_class_thresh=False): true_y, v_true_y, v_membership, v_per_instance_loss, v_counts, counts = proposed_mi_outputs true_attribute_value_all, low_per_instance_loss_all, high_per_instance_loss_all, low_counts_all, high_counts_all = proposed_ai_outputs for i in range(len(features)): high_prob = np.sum(true_attribute_value_all[i]) / len(true_attribute_value_all[i]) low_prob = 1 - high_prob # Attack Method 1 if attack_method == 1: if per_class_thresh: classes = np.unique(true_y) low_mem, high_mem = np.zeros(len(membership), dtype='int32'), np.zeros(len(membership), dtype='int32') for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(-v_per_instance_loss[v_c_indices], v_membership[v_c_indices], fpr_threshold) low_mem[c_indices] = np.where(np.array(low_per_instance_loss_all[i])[c_indices] <= -thresh, 1, 0) high_mem[c_indices] = np.where(np.array(high_per_instance_loss_all[i])[c_indices] <= -thresh, 1, 0) else: thresh = get_inference_threshold(-v_per_instance_loss, v_membership, fpr_threshold) low_mem = np.where(low_per_instance_loss_all[i] <= -thresh, 1, 0) high_mem = np.where(high_per_instance_loss_all[i] <= -thresh, 1, 0) # Attack Method 2 elif attack_method == 2: if per_class_thresh: classes = np.unique(true_y) low_mem, high_mem = np.zeros(len(membership), dtype='int32'), np.zeros(len(membership), dtype='int32') for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(v_counts[v_c_indices], v_membership[v_c_indices], fpr_threshold) low_mem[c_indices] = np.where(np.array(low_counts_all[i])[c_indices] >= thresh, 1, 0) high_mem[c_indices] = np.where(np.array(high_counts_all[i])[c_indices] >= thresh, 1, 0) else: thresh = get_inference_threshold(v_counts, v_membership, fpr_threshold) low_mem = np.where(low_counts_all[i] >= thresh, 1, 0) high_mem = np.where(high_counts_all[i] >= thresh, 1, 0) pred_attribute_value = [np.argmax([low_prob * a, high_prob * b]) for a, b in zip(low_mem, high_mem)] mask = [a | b for a, b in zip(low_mem, high_mem)] pred_membership = mask & (pred_attribute_value ^ true_attribute_value_all[i] ^ [1]*len(pred_attribute_value)) prety_print_result(membership, pred_membership)