def train_shadow_models(n_hidden=50, epochs=100, n_shadow=20, learning_rate=0.05, batch_size=100, l2_ratio=1e-7, model='nn', save=True): attack_x, attack_y = [], [] classes = [] for i in range(n_shadow): #print('Training shadow model {}'.format(i)) dataset = load_data('shadow{}_data.npz'.format(i), args) train_x, train_y, test_x, test_y = dataset # train model classifier = train_model(dataset, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, model=model, l2_ratio=l2_ratio) #print('Gather training data for attack model') attack_i_x, attack_i_y = [], [] # data used in training, label is 1 pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': train_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_i_x.append(pred_scores) attack_i_y.append(np.ones(train_x.shape[0])) # data not used in training, label is 0 pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': test_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_i_x.append(pred_scores) attack_i_y.append(np.zeros(test_x.shape[0])) attack_x += attack_i_x attack_y += attack_i_y classes.append(np.concatenate([train_y, test_y])) # train data for attack model attack_x = np.vstack(attack_x) attack_y = np.concatenate(attack_y) attack_x = attack_x.astype('float32') attack_y = attack_y.astype('int32') classes = np.concatenate(classes) if save: np.savez(MODEL_PATH + 'attack_train_data.npz', attack_x, attack_y) return attack_x, attack_y, classes
def attribute_inference(true_x, true_y, batch_size, classifier, train_loss, features): print('-' * 10 + 'ATTRIBUTE INFERENCE' + '-' * 10 + '\n') attr_adv, attr_mem, attr_pred = [], [], [] for feature in features: low_op, high_op = [], [] low_data, high_data, membership = getAttributeVariations( true_x, feature) pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': low_data}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, low_op = get_predictions(predictions) pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': high_data}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, high_op = get_predictions(predictions) low_op = low_op.astype('float32') high_op = high_op.astype('float32') low_op = log_loss(true_y, low_op) high_op = log_loss(true_y, high_op) pred_membership = np.where( stats.norm(0, train_loss).pdf(low_op) >= stats.norm( 0, train_loss).pdf(high_op), 0, 1) fpr, tpr, thresholds = roc_curve(membership, pred_membership, pos_label=1) print(fpr, tpr, tpr - fpr) attr_adv.append(tpr[1] - fpr[1]) #plt.plot(fpr, tpr) # membership fpr, tpr, thresholds = roc_curve( membership, stats.norm(0, train_loss).pdf(high_op) - stats.norm(0, train_loss).pdf(low_op), pos_label=1) #plt.plot(fpr, tpr) # non-membership fpr, tpr, thresholds = roc_curve( membership, stats.norm(0, train_loss).pdf(low_op) - stats.norm(0, train_loss).pdf(high_op), pos_label=0) #plt.show() attr_mem.append(membership) attr_pred.append(np.vstack((low_op, high_op))) return attr_adv, attr_mem, attr_pred
def yeom_attribute_inference(true_x, true_y, classifier, membership, features, train_loss, test_loss=None): print('-' * 10 + 'YEOM\'S ATTRIBUTE INFERENCE' + '-' * 10 + '\n') pred_membership_all = [] for feature in features: orignial_attribute = np.copy(true_x[:, feature]) low_value, high_value, true_attribute_value = get_attribute_variations( true_x, feature) true_x[:, feature] = low_value pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': true_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, low_op = get_predictions(predictions) low_op = low_op.astype('float32') low_op = log_loss(true_y, low_op) true_x[:, feature] = high_value pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': true_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, high_op = get_predictions(predictions) high_op = high_op.astype('float32') high_op = log_loss(true_y, high_op) high_prob = np.sum(true_attribute_value) / len(true_attribute_value) low_prob = 1 - high_prob if test_loss == None: pred_attribute_value = np.where( low_prob * stats.norm(0, train_loss).pdf(low_op) >= high_prob * stats.norm(0, train_loss).pdf(high_op), 0, 1) mask = [1] * len(pred_attribute_value) else: low_mem = np.where( stats.norm(0, train_loss).pdf(low_op) >= stats.norm( 0, test_loss).pdf(low_op), 1, 0) high_mem = np.where( stats.norm(0, train_loss).pdf(high_op) >= stats.norm( 0, test_loss).pdf(high_op), 1, 0) pred_attribute_value = [ np.argmax([low_prob * a, high_prob * b]) for a, b in zip(low_mem, high_mem) ] mask = [a | b for a, b in zip(low_mem, high_mem)] pred_membership = mask & (pred_attribute_value ^ true_attribute_value ^ [1] * len(pred_attribute_value)) prety_print_result(membership, pred_membership) pred_membership_all.append(pred_membership) true_x[:, feature] = orignial_attribute return pred_membership_all
def train_attack_model(classes, dataset=None, n_hidden=50, learning_rate=0.01, batch_size=200, epochs=50, model='nn', l2_ratio=1e-7): if dataset is None: dataset = load_attack_data() train_x, train_y, test_x, test_y = dataset train_classes, test_classes = classes train_indices = np.arange(len(train_x)) test_indices = np.arange(len(test_x)) unique_classes = np.unique(train_classes) true_y = [] pred_y = [] pred_scores = [] true_x = [] for c in unique_classes: #print('Training attack model for class {}...'.format(c)) c_train_indices = train_indices[train_classes == c] c_train_x, c_train_y = train_x[c_train_indices], train_y[ c_train_indices] c_test_indices = test_indices[test_classes == c] c_test_x, c_test_y = test_x[c_test_indices], test_y[c_test_indices] c_dataset = (c_train_x, c_train_y, c_test_x, c_test_y) classifier = train_model(c_dataset, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, model=model, l2_ratio=l2_ratio) pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': c_test_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) c_pred_y, c_pred_scores = get_predictions(predictions) true_y.append(c_test_y) pred_y.append(c_pred_y) true_x.append(c_test_x) pred_scores.append(c_pred_scores) print('-' * 10 + 'FINAL EVALUATION' + '-' * 10 + '\n') true_y = np.concatenate(true_y) pred_y = np.concatenate(pred_y) true_x = np.concatenate(true_x) pred_scores = np.concatenate(pred_scores) #print('Testing Accuracy: {}'.format(accuracy_score(true_y, pred_y))) #print(classification_report(true_y, pred_y)) prety_print_result(true_y, pred_y) fpr, tpr, thresholds = roc_curve(true_y, pred_y, pos_label=1) attack_adv = tpr[1] - fpr[1] return (attack_adv, pred_scores)
def train_target_model(dataset, hold_out_train_data=None, epochs=100, batch_size=100, learning_rate=0.01, l2_ratio=1e-7, n_hidden=50, model='nn', save=True, privacy='no_privacy', dp='dp', epsilon=0.5, delta=1e-5): train_x, train_y, test_x, test_y = dataset classifier, _, _, train_loss, train_acc, test_acc = train_private(dataset, hold_out_train_data, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, model=model, l2_ratio=l2_ratio, silent=False, privacy=privacy, dp=dp, epsilon=epsilon, delta=delta) # test data for attack model attack_x, attack_y = [], [] # data used in training, label is 1 pred_input_fn = tf.estimator.inputs.numpy_input_fn( x={'x': train_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_x.append(pred_scores) attack_y.append(np.ones(train_x.shape[0])) # data not used in training, label is 0 pred_input_fn = tf.estimator.inputs.numpy_input_fn( x={'x': test_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_x.append(pred_scores) attack_y.append(np.zeros(test_x.shape[0])) attack_x = np.vstack(attack_x) attack_y = np.concatenate(attack_y) attack_x = attack_x.astype('float32') attack_y = attack_y.astype('int32') if save: np.savez(MODEL_PATH + 'attack_test_data.npz', attack_x, attack_y) np.savez(MODEL_PATH + 'target_model.npz', *lasagne.layers.get_all_param_values(output_layer)) classes = np.concatenate([train_y, test_y]) return attack_x, attack_y, classes, train_loss, classifier, train_acc, test_acc
def train_target_model(args, dataset=None, epochs=100, batch_size=100, learning_rate=0.01, clipping_threshold=1, l2_ratio=1e-7, n_hidden=50, model='nn', privacy='no_privacy', dp='dp', epsilon=0.5, delta=1e-5, save=True): if dataset == None: dataset = load_data('target_data.npz', args) train_x, train_y, test_x, test_y = dataset classifier, aux = train_model(dataset, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate, clipping_threshold=clipping_threshold, batch_size=batch_size, model=model, l2_ratio=l2_ratio, silent=False, privacy=privacy, dp=dp, epsilon=epsilon, delta=delta) # test data for attack model attack_x, attack_y = [], [] # data used in training, label is 1 pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': train_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_x.append(pred_scores) attack_y.append(np.ones(train_x.shape[0])) # data not used in training, label is 0 pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': test_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_x.append(pred_scores) attack_y.append(np.zeros(test_x.shape[0])) attack_x = np.vstack(attack_x) attack_y = np.concatenate(attack_y) attack_x = attack_x.astype('float32') attack_y = attack_y.astype('int32') if save: np.savez(MODEL_PATH + 'attack_test_data.npz', attack_x, attack_y) classes = np.concatenate([train_y, test_y]) return attack_x, attack_y, classes, classifier, aux
def predict(): if model: try: samples = [request.get_json()['text']] sample_predictions = classifier.get_predictions(samples, model, tf_vectorizer, idf_vector, pos_vectorizer) predictions = [classifier.class_to_name(sample_predictions[i]) for i in range(len(sample_predictions))] return jsonify({'predictions': predictions}) except Exception, e: return jsonify({'error': str(e), 'trace': traceback.format_exc()})
def proposed_attribute_inference(true_x, true_y, classifier, membership, features, args): print('-' * 10 + 'PROPOSED ATTRIBUTE INFERENCE' + '-' * 10 + '\n') low_per_instance_loss_all, high_per_instance_loss_all = [], [] low_counts_all, high_counts_all = [], [] true_attribute_value_all = [] for feature in features: orignial_attribute = np.copy(true_x[:, feature]) low_value, high_value, true_attribute_value = get_attribute_variations( true_x, feature) noise_params = (args.attack_noise_type, args.attack_noise_coverage, args.attack_noise_magnitude) true_x[:, feature] = low_value pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': true_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, low_op = get_predictions(predictions) low_op = low_op.astype('float32') low_op = log_loss(true_y, low_op) low_counts = loss_increase_counts(true_x, true_y, classifier, low_op, noise_params) true_x[:, feature] = high_value pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': true_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, high_op = get_predictions(predictions) high_op = high_op.astype('float32') high_op = log_loss(true_y, high_op) high_counts = loss_increase_counts(true_x, true_y, classifier, high_op, noise_params) true_attribute_value_all.append(true_attribute_value) low_per_instance_loss_all.append(low_op) high_per_instance_loss_all.append(high_op) low_counts_all.append(low_counts) high_counts_all.append(high_counts) true_x[:, feature] = orignial_attribute return (true_attribute_value_all, low_per_instance_loss_all, high_per_instance_loss_all, low_counts_all, high_counts_all)
def loss_increase_counts(true_x, true_y, classifier, per_instance_loss, noise_params, max_t=100): counts = np.zeros(len(true_x)) for t in range(max_t): noisy_x = true_x + generate_noise(true_x.shape, true_x.dtype, noise_params) pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn( x={'x': noisy_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_y = get_predictions(predictions) noisy_per_instance_loss = np.array(log_loss(true_y, pred_y)) counts += np.where(noisy_per_instance_loss > per_instance_loss, 1, 0) return counts