def centralize_and_anonymize(np_preds, num_labels, epsilon, do_pate=False):
    print("Combined predictions: {}, labels: {}".format(
        np_preds.shape, np.unique(np_preds)))
    # all ctions done
    y_train = []
    n_obs = np_preds.shape[1]
    print(
        "Performing DP query and adding laplace noise eps={} on {} observation..."
        .format(epsilon, n_obs))
    for _obs in range(n_obs):
        np_obs = np_preds[:, _obs]
        label_counts = np.bincount(np_obs, minlength=num_labels)
        label_counts = add_noise_to_preds(label_counts, eps=epsilon)
        y_train.append(np.argmax(label_counts))
    y_train = np.array(y_train)
    print("  Labels DP done, new labels: {}".format(np.unique(y_train)))
    if do_pate:
        print("Performing PATE analysis on...")
        data_dep_eps, data_ind_eps = pate.perform_analysis(
            teacher_preds=np_preds,
            indices=y_train,
            noise_eps=epsilon,
            delta=1e-5)

        print("  Data Independent Epsilon: {:.10f}".format(data_ind_eps))
        print("  Data Dependent Epsilon:   {:.10f}".format(data_dep_eps))
    return y_train
Example #2
0
def test_torch_ref_match():

    # Verify if the torch implementation values match the original Numpy implementation.

    num_teachers, num_examples, num_labels = (100, 50, 10)
    preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(
        int)  # fake preds

    indices = (np.random.rand(num_examples) * num_labels).astype(
        int)  # true answers

    preds[:, 0:10] *= 0

    data_dep_eps, data_ind_eps = pate.perform_analysis_torch(preds,
                                                             indices,
                                                             noise_eps=0.1,
                                                             delta=1e-5)

    data_dep_eps_ref, data_ind_eps_ref = pate.perform_analysis(preds,
                                                               indices,
                                                               noise_eps=0.1,
                                                               delta=1e-5)

    assert torch.isclose(data_dep_eps, torch.tensor(data_dep_eps_ref.item()))
    assert torch.isclose(data_ind_eps, torch.tensor(data_ind_eps_ref.item()))
Example #3
0
def test_base_dataset():

    num_teachers, num_examples, num_labels = (100, 50, 10)
    preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(
        int)  # fake preds
    indices = (np.random.rand(num_examples) * num_labels).astype(
        int)  # true answers

    preds[:, 0:10] *= 0

    data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                       indices=indices,
                                                       noise_eps=0.1,
                                                       delta=1e-5)

    assert data_dep_eps < data_ind_eps
        labels = np.array([9, 9, 3, 6, 9, 9, 9, 9, 8, 2])
        counts = np.bincount(labels, minlength=10)
        query_result = np.argmax(counts)
        print(query_result)

        num_teachers, num_examples, num_labels = (100, 100, 10)
        preds = (np.random.rand(num_teachers, num_examples) *
                 num_labels).astype(int)  #fake preds
        indices = (np.random.rand(num_examples) * num_labels).astype(
            int)  # true answers

        preds[:, 0:10] *= 0

        data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                           indices=indices,
                                                           noise_eps=0.1,
                                                           delta=1e-5)

        assert data_dep_eps < data_ind_eps

        data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                           indices=indices,
                                                           noise_eps=0.1,
                                                           delta=1e-5)
        print("Data Independent Epsilon:", data_ind_eps)
        print("Data Dependent Epsilon:", data_dep_eps)

        preds[:, 0:50] *= 0

        data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                           indices=indices,
Example #5
0
def test_section_1_differential_privacy():
    """This tests the Udacity course content found at
    https://github.com/Udacity/private-ai
    """

    # the number of entries in our database
    num_entries = 5000

    db = torch.rand(num_entries) > 0.5

    db = torch.rand(num_entries) > 0.5

    def get_parallel_db(db, remove_index):
        return torch.cat((db[0:remove_index], db[remove_index + 1:]))

    get_parallel_db(db, 52352)

    def get_parallel_dbs(db):
        parallel_dbs = list()

        for i in range(len(db)):
            pdb = get_parallel_db(db, i)
            parallel_dbs.append(pdb)

        return parallel_dbs

    pdbs = get_parallel_dbs(db)

    def create_db_and_parallels(num_entries):
        db = torch.rand(num_entries) > 0.5
        pdbs = get_parallel_dbs(db)

        return db, pdbs

    db, pdbs = create_db_and_parallels(20)

    db, pdbs = create_db_and_parallels(5000)

    def query(db):
        return db.sum()

    full_db_result = query(db)

    sensitivity = 0
    for pdb in pdbs:
        pdb_result = query(pdb)

        db_distance = torch.abs(pdb_result - full_db_result)

        if db_distance > sensitivity:
            sensitivity = db_distance

    def sensitivity(query, n_entries=1000):

        db, pdbs = create_db_and_parallels(n_entries)

        full_db_result = query(db)

        max_distance = 0
        for pdb in pdbs:
            pdb_result = query(pdb)

            db_distance = torch.abs(pdb_result - full_db_result)

            if db_distance > max_distance:
                max_distance = db_distance

        return max_distance

    def query(db):
        return db.float().mean()

    sensitivity(query)

    db, pdbs = create_db_and_parallels(20)

    db

    def query(db, threshold=5):
        return (db.sum() > threshold).float()

    for i in range(10):
        sens_f = sensitivity(query, n_entries=10)
        print(sens_f)

    db, _ = create_db_and_parallels(100)

    pdb = get_parallel_db(db, remove_index=10)

    db[10]

    sum(db)

    # differencing attack using sum query

    sum(db) - sum(pdb)

    # differencing attack using mean query

    (sum(db).float() / len(db)) - (sum(pdb).float() / len(pdb))

    # differencing attack using threshold

    (sum(db).float() > 49).float() - (sum(pdb).float() > 49).float()

    def query(db):

        true_result = torch.mean(db.float())

        first_coin_flip = (torch.rand(len(db)) > 0.5).float()
        second_coin_flip = (torch.rand(len(db)) > 0.5).float()

        augmented_database = db.float() * first_coin_flip + (
            1 - first_coin_flip) * second_coin_flip

        db_result = torch.mean(augmented_database.float()) * 2 - 0.5

        return db_result, true_result

    db, pdbs = create_db_and_parallels(10)
    private_result, true_result = query(db)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(1000)
    private_result, true_result = query(db)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(10000)
    private_result, true_result = query(db)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    def query(db, noise=0.2):

        true_result = torch.mean(db.float())

        first_coin_flip = (torch.rand(len(db)) > noise).float()
        second_coin_flip = (torch.rand(len(db)) > 0.5).float()

        augmented_database = db.float() * first_coin_flip + (
            1 - first_coin_flip) * second_coin_flip

        sk_result = augmented_database.float().mean()

        private_result = ((sk_result / noise) - 0.5) * noise / (1 - noise)

        return private_result, true_result

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db, noise=0.1)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db, noise=0.2)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db, noise=0.4)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db, noise=0.8)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(10000)
    private_result, true_result = query(db, noise=0.8)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)

    def query(db):
        return torch.sum(db.float())

    # def M(db):
    #     query(db)  # + noise
    #
    query(db)

    epsilon = 0.0001

    import numpy as np

    db, pdbs = create_db_and_parallels(100)

    def sum_query(db):
        return db.sum()

    def laplacian_mechanism(db, query, sensitivity):

        beta = sensitivity / epsilon
        noise = torch.tensor(np.random.laplace(0, beta, 1))

        return query(db) + noise

    def mean_query(db):
        return torch.mean(db.float())

    laplacian_mechanism(db, sum_query, 1)

    laplacian_mechanism(db, mean_query, 1 / 100)

    import numpy as np

    num_teachers = 10  # we're working with 10 partner hospitals
    num_examples = 10000  # the size of OUR dataset
    num_labels = 10  # number of lablels for our classifier

    preds = ((np.random.rand(num_teachers, num_examples) *
              num_labels).astype(int).transpose(1, 0))  # fake predictions

    new_labels = list()
    for an_image in preds:

        label_counts = np.bincount(an_image, minlength=num_labels)

        epsilon = 0.1
        beta = 1 / epsilon

        for i in range(len(label_counts)):
            label_counts[i] += np.random.laplace(0, beta, 1)

        new_label = np.argmax(label_counts)

        new_labels.append(new_label)

    labels = np.array([9, 9, 3, 6, 9, 9, 9, 9, 8, 2])
    counts = np.bincount(labels, minlength=10)
    query_result = np.argmax(counts)
    query_result

    from syft.frameworks.torch.differential_privacy import pate

    num_teachers, num_examples, num_labels = (100, 100, 10)
    preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(
        int)  # fake preds
    indices = (np.random.rand(num_examples) * num_labels).astype(
        int)  # true answers

    preds[:, 0:10] *= 0

    data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                       indices=indices,
                                                       noise_eps=0.1,
                                                       delta=1e-5)

    assert data_dep_eps < data_ind_eps

    data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                       indices=indices,
                                                       noise_eps=0.1,
                                                       delta=1e-5)
    print("Data Independent Epsilon:", data_ind_eps)
    print("Data Dependent Epsilon:", data_dep_eps)

    preds[:, 0:50] *= 0

    data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                       indices=indices,
                                                       noise_eps=0.1,
                                                       delta=1e-5,
                                                       moments=20)
    print("Data Independent Epsilon:", data_ind_eps)
    print("Data Dependent Epsilon:", data_dep_eps)