def attribute_inference(true_x, true_y, batch_size, classifier, train_loss, features): print('-' * 10 + 'ATTRIBUTE INFERENCE' + '-' * 10 + '\n') attr_adv, attr_mem, attr_pred = [], [], [] for feature in features: low_op, high_op = [], [] low_data, high_data, membership = getAttributeVariations( true_x, feature) pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': low_data}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, low_op = get_predictions(predictions) pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': high_data}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, high_op = get_predictions(predictions) low_op = low_op.astype('float32') high_op = high_op.astype('float32') low_op = log_loss(true_y, low_op) high_op = log_loss(true_y, high_op) pred_membership = np.where( stats.norm(0, train_loss).pdf(low_op) >= stats.norm( 0, train_loss).pdf(high_op), 0, 1) fpr, tpr, thresholds = roc_curve(membership, pred_membership, pos_label=1) print(fpr, tpr, tpr - fpr) attr_adv.append(tpr[1] - fpr[1]) #plt.plot(fpr, tpr) # membership fpr, tpr, thresholds = roc_curve( membership, stats.norm(0, train_loss).pdf(high_op) - stats.norm(0, train_loss).pdf(low_op), pos_label=1) #plt.plot(fpr, tpr) # non-membership fpr, tpr, thresholds = roc_curve( membership, stats.norm(0, train_loss).pdf(low_op) - stats.norm(0, train_loss).pdf(high_op), pos_label=0) #plt.show() attr_mem.append(membership) attr_pred.append(np.vstack((low_op, high_op))) return attr_adv, attr_mem, attr_pred
def train_target_model(dataset, epochs=100, batch_size=100, learning_rate=0.01, l2_ratio=1e-7, n_hidden=50, model='nn', save=True, privacy='no_privacy', dp='dp', epsilon=0.5, delta=1e-5): train_x, train_y, test_x, test_y = dataset classifier, _, _, train_loss, train_acc, test_acc = train_model( dataset, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, model=model, l2_ratio=l2_ratio, silent=False, privacy=privacy, dp=dp, epsilon=epsilon, delta=delta) # test data for attack model attack_x, attack_y = [], [] # data used in training, label is 1 pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_x.append(pred_scores) attack_y.append(np.ones(train_x.shape[0])) # data not used in training, label is 0 pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_x.append(pred_scores) attack_y.append(np.zeros(test_x.shape[0])) attack_x = np.vstack(attack_x) attack_y = np.concatenate(attack_y) attack_x = attack_x.astype('float32') attack_y = attack_y.astype('int32') if save: np.savez(MODEL_PATH + 'attack_test_data.npz', attack_x, attack_y) np.savez(MODEL_PATH + 'target_model.npz', *lasagne.layers.get_all_param_values(output_layer)) classes = np.concatenate([train_y, test_y]) return attack_x, attack_y, classes, train_loss, classifier, train_acc, test_acc
def train_shadow_models(n_hidden=50, epochs=100, n_shadow=20, learning_rate=0.05, batch_size=100, l2_ratio=1e-7, model='nn', save=True): # for attack model attack_x, attack_y = [], [] classes = [] for i in range(n_shadow): #print('Training shadow model {}'.format(i)) data = load_data('shadow{}_data.npz'.format(i)) train_x, train_y, test_x, test_y = data # train model classifier, _, _, _, _, _ = train_model(data, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, model=model, l2_ratio=l2_ratio) #print('Gather training data for attack model') attack_i_x, attack_i_y = [], [] # data used in training, label is 1 pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_i_x.append(pred_scores) attack_i_y.append(np.ones(train_x.shape[0])) # data not used in training, label is 0 pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=pred_input_fn) _, pred_scores = get_predictions(predictions) attack_i_x.append(pred_scores) attack_i_y.append(np.zeros(test_x.shape[0])) attack_x += attack_i_x attack_y += attack_i_y classes.append(np.concatenate([train_y, test_y])) # train data for attack model attack_x = np.vstack(attack_x) attack_y = np.concatenate(attack_y) attack_x = attack_x.astype('float32') attack_y = attack_y.astype('int32') classes = np.concatenate(classes) if save: np.savez(MODEL_PATH + 'attack_train_data.npz', attack_x, attack_y) return attack_x, attack_y, classes