def evaluate_proposed_membership_inference(per_instance_loss, membership, proposed_mi_outputs, fpr_threshold=None, per_class_thresh=False): true_y, v_true_y, v_membership, v_per_instance_loss, v_counts, counts = proposed_mi_outputs print('-' * 10 + 'Using Attack Method 1' + '-' * 10 + '\n') if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(membership)) for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(-v_per_instance_loss[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where(per_instance_loss[c_indices] <= -thresh, 1, 0) else: thresh = get_inference_threshold(-v_per_instance_loss, v_membership, fpr_threshold) pred_membership = np.where(per_instance_loss <= -thresh, 1, 0) prety_print_result(membership, pred_membership) print('-' * 10 + 'Using Attack Method 2' + '-' * 10 + '\n') if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(membership)) for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(v_counts[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where(counts[c_indices] >= thresh, 1, 0) else: thresh = get_inference_threshold(v_counts, v_membership, fpr_threshold) pred_membership = np.where(counts >= thresh, 1, 0) prety_print_result(membership, pred_membership)
def get_pred_mem_ai(per_instance_loss, proposed_mi_outputs, proposed_ai_outputs, i, method=1, fpr_threshold=None): true_y, v_true_y, v_membership, v_per_instance_loss, v_counts, counts = proposed_mi_outputs true_attribute_value_all, low_per_instance_loss_all, high_per_instance_loss_all, low_counts_all, high_counts_all = proposed_ai_outputs high_prob = np.sum(true_attribute_value_all[i]) / len( true_attribute_value_all[i]) low_prob = 1 - high_prob if method == 1: thresh = get_inference_threshold(-v_per_instance_loss, v_membership, fpr_threshold) low_mem = np.where(low_per_instance_loss_all[i] <= -thresh, 1, 0) high_mem = np.where(high_per_instance_loss_all[i] <= -thresh, 1, 0) else: thresh = get_inference_threshold(v_counts, v_membership, fpr_threshold) low_mem = np.where(low_counts_all[i] >= thresh, 1, 0) high_mem = np.where(high_counts_all[i] >= thresh, 1, 0) pred_attribute_value = [ np.argmax([low_prob * a, high_prob * b]) for a, b in zip(low_mem, high_mem) ] mask = [a | b for a, b in zip(low_mem, high_mem)] return thresh, mask & (pred_attribute_value ^ true_attribute_value_all[i] ^ [1] * len(pred_attribute_value))
def get_pred_mem_mi(per_instance_loss, proposed_mi_outputs, method=1, fpr_threshold=None, per_class_thresh=False, fixed_thresh=False): true_y, v_true_y, v_membership, v_per_instance_loss, v_counts, counts = proposed_mi_outputs if method == 1: if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(v_membership)) threshs = [] for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] if fixed_thresh: thresh = np.mean(v_per_instance_loss[list( filter(lambda i: i < 10000, v_c_indices))]) else: thresh = -get_inference_threshold( -v_per_instance_loss[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where( per_instance_loss[c_indices] <= thresh, 1, 0) threshs.append(thresh) print(max(0, min(threshs)), max(0, np.median(threshs)), max(0, max(threshs))) #plt.yscale('log') #plt.ylim(1e-6, 1e1) #plt.plot(list(range(1, 101)), list(map(lambda x: -x, threshs))) #plt.show() return max(0, threshs[0]), pred_membership else: thresh = -get_inference_threshold(-v_per_instance_loss, v_membership, fpr_threshold) return max(0, thresh), np.where(per_instance_loss <= thresh, 1, 0) else: if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(v_membership)) threshs = [] for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(v_counts[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where( counts[c_indices] >= thresh, 1, 0) threshs.append(thresh) #print(min(threshs), np.median(threshs), max(threshs)) #plt.plot(list(range(1, 101)), threshs) #plt.ylim(0, 100) #plt.show() return threshs[0], pred_membership else: thresh = get_inference_threshold(v_counts, v_membership, fpr_threshold) return thresh, np.where(counts >= thresh, 1, 0)
def get_pred_mem_mi(per_instance_loss, proposed_mi_outputs, method='yeom', fpr_threshold=None, per_class_thresh=False, fixed_thresh=False): # method == "yeom" runs an improved version of the Yeom attack that finds a better threshold than the original # method == "merlin" runs a new attack, which uses the direction of the change in per-instance loss for the record true_y, v_true_y, v_membership, v_per_instance_loss, v_counts, counts = proposed_mi_outputs if method == 'yeom': if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(v_membership)) threshs = [] for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] if fixed_thresh: thresh = np.mean(v_per_instance_loss[list(filter(lambda i: i < 10000, v_c_indices))]) else: thresh = -get_inference_threshold(-v_per_instance_loss[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where(per_instance_loss[c_indices] <= thresh, 1, 0) threshs.append(thresh) print(max(0, min(threshs)), max(0, np.median(threshs)), max(0, max(threshs))) #plt.yscale('log') #plt.ylim(1e-6, 1e1) #plt.plot(list(range(1, 101)), list(map(lambda x: -x, threshs))) #plt.show() return max(0, threshs[0]), pred_membership else: thresh = -get_inference_threshold(-v_per_instance_loss, v_membership, fpr_threshold) return max(0, thresh), np.where(per_instance_loss <= thresh, 1, 0) else: # In this case, run the Merlin attack. if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(v_membership)) threshs = [] for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(v_counts[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where(counts[c_indices] >= thresh, 1, 0) threshs.append(thresh) #print(min(threshs), np.median(threshs), max(threshs)) #plt.plot(list(range(1, 101)), threshs) #plt.ylim(0, 100) #plt.show() return threshs[0], pred_membership else: thresh = get_inference_threshold(v_counts, v_membership, fpr_threshold) return thresh, np.where(counts >= thresh, 1, 0)
def get_pred_mem_mi(per_instance_loss, proposed_mi_outputs, method=1, fpr_threshold=None, per_class_thresh=False): true_y, v_true_y, v_membership, v_per_instance_loss, v_counts, counts = proposed_mi_outputs if method == 1: if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(v_membership)) threshs = [] for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold( -v_per_instance_loss[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where( per_instance_loss[c_indices] <= -thresh, 1, 0) threshs.append(thresh) return threshs[0], pred_membership else: thresh = get_inference_threshold(-v_per_instance_loss, v_membership, fpr_threshold) return thresh, np.where(per_instance_loss <= -thresh, 1, 0) else: if per_class_thresh: classes = np.unique(true_y) pred_membership = np.zeros(len(v_membership)) threshs = [] for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(v_counts[v_c_indices], v_membership[v_c_indices], fpr_threshold) pred_membership[c_indices] = np.where( counts[c_indices] >= thresh, 1, 0) threshs.append(thresh) return threshs[0], pred_membership else: thresh = get_inference_threshold(v_counts, v_membership, fpr_threshold) return thresh, np.where(counts >= thresh, 1, 0)
def evaluate_on_all_features(membership, proposed_mi_outputs, proposed_ai_outputs, features, fpr_threshold=None, attack_method=1, per_class_thresh=False): true_y, v_true_y, v_membership, v_per_instance_loss, v_counts, counts = proposed_mi_outputs true_attribute_value_all, low_per_instance_loss_all, high_per_instance_loss_all, low_counts_all, high_counts_all = proposed_ai_outputs for i in range(len(features)): high_prob = np.sum(true_attribute_value_all[i]) / len(true_attribute_value_all[i]) low_prob = 1 - high_prob # Attack Method 1 if attack_method == 1: if per_class_thresh: classes = np.unique(true_y) low_mem, high_mem = np.zeros(len(membership), dtype='int32'), np.zeros(len(membership), dtype='int32') for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(-v_per_instance_loss[v_c_indices], v_membership[v_c_indices], fpr_threshold) low_mem[c_indices] = np.where(np.array(low_per_instance_loss_all[i])[c_indices] <= -thresh, 1, 0) high_mem[c_indices] = np.where(np.array(high_per_instance_loss_all[i])[c_indices] <= -thresh, 1, 0) else: thresh = get_inference_threshold(-v_per_instance_loss, v_membership, fpr_threshold) low_mem = np.where(low_per_instance_loss_all[i] <= -thresh, 1, 0) high_mem = np.where(high_per_instance_loss_all[i] <= -thresh, 1, 0) # Attack Method 2 elif attack_method == 2: if per_class_thresh: classes = np.unique(true_y) low_mem, high_mem = np.zeros(len(membership), dtype='int32'), np.zeros(len(membership), dtype='int32') for c in classes: c_indices = np.arange(len(true_y))[true_y == c] v_c_indices = np.arange(len(v_true_y))[v_true_y == c] thresh = get_inference_threshold(v_counts[v_c_indices], v_membership[v_c_indices], fpr_threshold) low_mem[c_indices] = np.where(np.array(low_counts_all[i])[c_indices] >= thresh, 1, 0) high_mem[c_indices] = np.where(np.array(high_counts_all[i])[c_indices] >= thresh, 1, 0) else: thresh = get_inference_threshold(v_counts, v_membership, fpr_threshold) low_mem = np.where(low_counts_all[i] >= thresh, 1, 0) high_mem = np.where(high_counts_all[i] >= thresh, 1, 0) pred_attribute_value = [np.argmax([low_prob * a, high_prob * b]) for a, b in zip(low_mem, high_mem)] mask = [a | b for a, b in zip(low_mem, high_mem)] pred_membership = mask & (pred_attribute_value ^ true_attribute_value_all[i] ^ [1]*len(pred_attribute_value)) prety_print_result(membership, pred_membership)