Example #1
0
def train_shadow_models(n_hidden=50,
                        epochs=100,
                        n_shadow=20,
                        learning_rate=0.05,
                        batch_size=100,
                        l2_ratio=1e-7,
                        model='nn',
                        save=True):
    attack_x, attack_y = [], []
    classes = []
    for i in range(n_shadow):
        #print('Training shadow model {}'.format(i))
        dataset = load_data('shadow{}_data.npz'.format(i), args)
        train_x, train_y, test_x, test_y = dataset

        # train model
        classifier = train_model(dataset,
                                 n_hidden=n_hidden,
                                 epochs=epochs,
                                 learning_rate=learning_rate,
                                 batch_size=batch_size,
                                 model=model,
                                 l2_ratio=l2_ratio)
        #print('Gather training data for attack model')
        attack_i_x, attack_i_y = [], []

        # data used in training, label is 1
        pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
            x={'x': train_x}, num_epochs=1, shuffle=False)

        predictions = classifier.predict(input_fn=pred_input_fn)
        _, pred_scores = get_predictions(predictions)

        attack_i_x.append(pred_scores)
        attack_i_y.append(np.ones(train_x.shape[0]))

        # data not used in training, label is 0
        pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
            x={'x': test_x}, num_epochs=1, shuffle=False)

        predictions = classifier.predict(input_fn=pred_input_fn)
        _, pred_scores = get_predictions(predictions)

        attack_i_x.append(pred_scores)
        attack_i_y.append(np.zeros(test_x.shape[0]))

        attack_x += attack_i_x
        attack_y += attack_i_y
        classes.append(np.concatenate([train_y, test_y]))
    # train data for attack model
    attack_x = np.vstack(attack_x)
    attack_y = np.concatenate(attack_y)
    attack_x = attack_x.astype('float32')
    attack_y = attack_y.astype('int32')
    classes = np.concatenate(classes)

    if save:
        np.savez(MODEL_PATH + 'attack_train_data.npz', attack_x, attack_y)

    return attack_x, attack_y, classes
Example #2
0
def attribute_inference(true_x, true_y, batch_size, classifier, train_loss,
                        features):
    print('-' * 10 + 'ATTRIBUTE INFERENCE' + '-' * 10 + '\n')
    attr_adv, attr_mem, attr_pred = [], [], []
    for feature in features:
        low_op, high_op = [], []

        low_data, high_data, membership = getAttributeVariations(
            true_x, feature)

        pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': low_data},
                                                           num_epochs=1,
                                                           shuffle=False)

        predictions = classifier.predict(input_fn=pred_input_fn)
        _, low_op = get_predictions(predictions)

        pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': high_data},
                                                           num_epochs=1,
                                                           shuffle=False)

        predictions = classifier.predict(input_fn=pred_input_fn)
        _, high_op = get_predictions(predictions)

        low_op = low_op.astype('float32')
        high_op = high_op.astype('float32')

        low_op = log_loss(true_y, low_op)
        high_op = log_loss(true_y, high_op)

        pred_membership = np.where(
            stats.norm(0, train_loss).pdf(low_op) >= stats.norm(
                0, train_loss).pdf(high_op), 0, 1)
        fpr, tpr, thresholds = roc_curve(membership,
                                         pred_membership,
                                         pos_label=1)
        print(fpr, tpr, tpr - fpr)
        attr_adv.append(tpr[1] - fpr[1])
        #plt.plot(fpr, tpr)

        # membership
        fpr, tpr, thresholds = roc_curve(
            membership,
            stats.norm(0, train_loss).pdf(high_op) -
            stats.norm(0, train_loss).pdf(low_op),
            pos_label=1)
        #plt.plot(fpr, tpr)
        # non-membership
        fpr, tpr, thresholds = roc_curve(
            membership,
            stats.norm(0, train_loss).pdf(low_op) -
            stats.norm(0, train_loss).pdf(high_op),
            pos_label=0)
        #plt.show()

        attr_mem.append(membership)
        attr_pred.append(np.vstack((low_op, high_op)))
    return attr_adv, attr_mem, attr_pred
Example #3
0
def yeom_attribute_inference(true_x,
                             true_y,
                             classifier,
                             membership,
                             features,
                             train_loss,
                             test_loss=None):
    print('-' * 10 + 'YEOM\'S ATTRIBUTE INFERENCE' + '-' * 10 + '\n')
    pred_membership_all = []
    for feature in features:
        orignial_attribute = np.copy(true_x[:, feature])
        low_value, high_value, true_attribute_value = get_attribute_variations(
            true_x, feature)

        true_x[:, feature] = low_value
        pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
            x={'x': true_x}, num_epochs=1, shuffle=False)
        predictions = classifier.predict(input_fn=pred_input_fn)
        _, low_op = get_predictions(predictions)
        low_op = low_op.astype('float32')
        low_op = log_loss(true_y, low_op)

        true_x[:, feature] = high_value
        pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
            x={'x': true_x}, num_epochs=1, shuffle=False)
        predictions = classifier.predict(input_fn=pred_input_fn)
        _, high_op = get_predictions(predictions)
        high_op = high_op.astype('float32')
        high_op = log_loss(true_y, high_op)

        high_prob = np.sum(true_attribute_value) / len(true_attribute_value)
        low_prob = 1 - high_prob

        if test_loss == None:
            pred_attribute_value = np.where(
                low_prob * stats.norm(0, train_loss).pdf(low_op) >=
                high_prob * stats.norm(0, train_loss).pdf(high_op), 0, 1)
            mask = [1] * len(pred_attribute_value)
        else:
            low_mem = np.where(
                stats.norm(0, train_loss).pdf(low_op) >= stats.norm(
                    0, test_loss).pdf(low_op), 1, 0)
            high_mem = np.where(
                stats.norm(0, train_loss).pdf(high_op) >= stats.norm(
                    0, test_loss).pdf(high_op), 1, 0)
            pred_attribute_value = [
                np.argmax([low_prob * a, high_prob * b])
                for a, b in zip(low_mem, high_mem)
            ]
            mask = [a | b for a, b in zip(low_mem, high_mem)]

        pred_membership = mask & (pred_attribute_value ^ true_attribute_value
                                  ^ [1] * len(pred_attribute_value))
        prety_print_result(membership, pred_membership)
        pred_membership_all.append(pred_membership)
        true_x[:, feature] = orignial_attribute
    return pred_membership_all
Example #4
0
def train_attack_model(classes,
                       dataset=None,
                       n_hidden=50,
                       learning_rate=0.01,
                       batch_size=200,
                       epochs=50,
                       model='nn',
                       l2_ratio=1e-7):
    if dataset is None:
        dataset = load_attack_data()
    train_x, train_y, test_x, test_y = dataset

    train_classes, test_classes = classes
    train_indices = np.arange(len(train_x))
    test_indices = np.arange(len(test_x))
    unique_classes = np.unique(train_classes)

    true_y = []
    pred_y = []
    pred_scores = []
    true_x = []
    for c in unique_classes:
        #print('Training attack model for class {}...'.format(c))
        c_train_indices = train_indices[train_classes == c]
        c_train_x, c_train_y = train_x[c_train_indices], train_y[
            c_train_indices]
        c_test_indices = test_indices[test_classes == c]
        c_test_x, c_test_y = test_x[c_test_indices], test_y[c_test_indices]
        c_dataset = (c_train_x, c_train_y, c_test_x, c_test_y)
        classifier = train_model(c_dataset,
                                 n_hidden=n_hidden,
                                 epochs=epochs,
                                 learning_rate=learning_rate,
                                 batch_size=batch_size,
                                 model=model,
                                 l2_ratio=l2_ratio)
        pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': c_test_x},
                                                           num_epochs=1,
                                                           shuffle=False)
        predictions = classifier.predict(input_fn=pred_input_fn)
        c_pred_y, c_pred_scores = get_predictions(predictions)
        true_y.append(c_test_y)
        pred_y.append(c_pred_y)
        true_x.append(c_test_x)
        pred_scores.append(c_pred_scores)

    print('-' * 10 + 'FINAL EVALUATION' + '-' * 10 + '\n')
    true_y = np.concatenate(true_y)
    pred_y = np.concatenate(pred_y)
    true_x = np.concatenate(true_x)
    pred_scores = np.concatenate(pred_scores)
    #print('Testing Accuracy: {}'.format(accuracy_score(true_y, pred_y)))
    #print(classification_report(true_y, pred_y))
    prety_print_result(true_y, pred_y)
    fpr, tpr, thresholds = roc_curve(true_y, pred_y, pos_label=1)
    attack_adv = tpr[1] - fpr[1]
    return (attack_adv, pred_scores)
Example #5
0
def train_target_model(dataset, hold_out_train_data=None, epochs=100, batch_size=100, learning_rate=0.01, l2_ratio=1e-7,
                       n_hidden=50, model='nn', save=True, privacy='no_privacy', dp='dp', epsilon=0.5, delta=1e-5):
    train_x, train_y, test_x, test_y = dataset

    classifier, _, _, train_loss, train_acc, test_acc = train_private(dataset, hold_out_train_data, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate,
                               batch_size=batch_size, model=model, l2_ratio=l2_ratio, silent=False, privacy=privacy, dp=dp, epsilon=epsilon, delta=delta)
    # test data for attack model
    attack_x, attack_y = [], []

    # data used in training, label is 1
    pred_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'x': train_x},
        num_epochs=1,
        shuffle=False)

    predictions = classifier.predict(input_fn=pred_input_fn)
    _, pred_scores = get_predictions(predictions)
    
    attack_x.append(pred_scores)
    attack_y.append(np.ones(train_x.shape[0]))
    
    # data not used in training, label is 0
    pred_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'x': test_x},
        num_epochs=1,
        shuffle=False)

    predictions = classifier.predict(input_fn=pred_input_fn)
    _, pred_scores = get_predictions(predictions)
    
    attack_x.append(pred_scores)
    attack_y.append(np.zeros(test_x.shape[0]))

    attack_x = np.vstack(attack_x)
    attack_y = np.concatenate(attack_y)
    attack_x = attack_x.astype('float32')
    attack_y = attack_y.astype('int32')

    if save:
        np.savez(MODEL_PATH + 'attack_test_data.npz', attack_x, attack_y)
        np.savez(MODEL_PATH + 'target_model.npz', *lasagne.layers.get_all_param_values(output_layer))

    classes = np.concatenate([train_y, test_y])
    return attack_x, attack_y, classes, train_loss, classifier, train_acc, test_acc
Example #6
0
def train_target_model(args, dataset=None, epochs=100, batch_size=100, learning_rate=0.01, clipping_threshold=1, l2_ratio=1e-7, n_hidden=50, model='nn', privacy='no_privacy', dp='dp', epsilon=0.5, delta=1e-5, save=True):
    if dataset == None:
        dataset = load_data('target_data.npz', args)
    train_x, train_y, test_x, test_y = dataset

    classifier, aux = train_model(dataset, n_hidden=n_hidden, epochs=epochs, learning_rate=learning_rate, clipping_threshold=clipping_threshold, batch_size=batch_size, model=model, l2_ratio=l2_ratio, silent=False, privacy=privacy, dp=dp, epsilon=epsilon, delta=delta)
    # test data for attack model
    attack_x, attack_y = [], []

    # data used in training, label is 1
    pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
        x={'x': train_x},
        num_epochs=1,
        shuffle=False)

    predictions = classifier.predict(input_fn=pred_input_fn)
    _, pred_scores = get_predictions(predictions)

    attack_x.append(pred_scores)
    attack_y.append(np.ones(train_x.shape[0]))
    
    # data not used in training, label is 0
    pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
        x={'x': test_x},
        num_epochs=1,
        shuffle=False)

    predictions = classifier.predict(input_fn=pred_input_fn)
    _, pred_scores = get_predictions(predictions)
    
    attack_x.append(pred_scores)
    attack_y.append(np.zeros(test_x.shape[0]))

    attack_x = np.vstack(attack_x)
    attack_y = np.concatenate(attack_y)
    attack_x = attack_x.astype('float32')
    attack_y = attack_y.astype('int32')

    if save:
        np.savez(MODEL_PATH + 'attack_test_data.npz', attack_x, attack_y)

    classes = np.concatenate([train_y, test_y])
    return attack_x, attack_y, classes, classifier, aux
Example #7
0
def predict():
    if model:
        try:
            samples = [request.get_json()['text']]
            sample_predictions = classifier.get_predictions(samples, model, tf_vectorizer, idf_vector, pos_vectorizer)
            predictions = [classifier.class_to_name(sample_predictions[i]) for i in range(len(sample_predictions))]
            return jsonify({'predictions': predictions})

        except Exception, e:
            return jsonify({'error': str(e), 'trace': traceback.format_exc()})
Example #8
0
def proposed_attribute_inference(true_x, true_y, classifier, membership,
                                 features, args):
    print('-' * 10 + 'PROPOSED ATTRIBUTE INFERENCE' + '-' * 10 + '\n')
    low_per_instance_loss_all, high_per_instance_loss_all = [], []
    low_counts_all, high_counts_all = [], []
    true_attribute_value_all = []
    for feature in features:
        orignial_attribute = np.copy(true_x[:, feature])
        low_value, high_value, true_attribute_value = get_attribute_variations(
            true_x, feature)
        noise_params = (args.attack_noise_type, args.attack_noise_coverage,
                        args.attack_noise_magnitude)

        true_x[:, feature] = low_value
        pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
            x={'x': true_x}, num_epochs=1, shuffle=False)
        predictions = classifier.predict(input_fn=pred_input_fn)
        _, low_op = get_predictions(predictions)
        low_op = low_op.astype('float32')
        low_op = log_loss(true_y, low_op)
        low_counts = loss_increase_counts(true_x, true_y, classifier, low_op,
                                          noise_params)

        true_x[:, feature] = high_value
        pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
            x={'x': true_x}, num_epochs=1, shuffle=False)
        predictions = classifier.predict(input_fn=pred_input_fn)
        _, high_op = get_predictions(predictions)
        high_op = high_op.astype('float32')
        high_op = log_loss(true_y, high_op)
        high_counts = loss_increase_counts(true_x, true_y, classifier, high_op,
                                           noise_params)

        true_attribute_value_all.append(true_attribute_value)
        low_per_instance_loss_all.append(low_op)
        high_per_instance_loss_all.append(high_op)
        low_counts_all.append(low_counts)
        high_counts_all.append(high_counts)
        true_x[:, feature] = orignial_attribute
    return (true_attribute_value_all, low_per_instance_loss_all,
            high_per_instance_loss_all, low_counts_all, high_counts_all)
Example #9
0
def loss_increase_counts(true_x, true_y, classifier, per_instance_loss, noise_params, max_t=100):
    counts = np.zeros(len(true_x))
    for t in range(max_t):
        noisy_x = true_x + generate_noise(true_x.shape, true_x.dtype, noise_params)
        pred_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
            x={'x': noisy_x}, 
           num_epochs=1,
            shuffle=False)
        predictions = classifier.predict(input_fn=pred_input_fn)
        _, pred_y = get_predictions(predictions)
        noisy_per_instance_loss = np.array(log_loss(true_y, pred_y))
        counts += np.where(noisy_per_instance_loss > per_instance_loss, 1, 0)
    return counts