def base_model_sb(save_model=False, model_folder=None, use_tf_privacy=False, noise_multiplier=None):
    # Model / data parameters
    num_classes = 10
    input_shape = (28, 28, 1,)
    # the data, split between train and test sets
    # (x_train, y_train), (x_test, y_test) = mnist.load_data()
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    
    # Scale images to the [0, 1] range
    x_train = np.array(x_train, dtype=np.float32) / 255  # x_train.astype("float32") / 255
    x_test = np.array(x_test, dtype=np.float32) / 255  # x_test.astype("float32") / 255
    
    # Make sure images have shape (28, 28, 1)
    # x_train = np.expand_dims(x_train, -1)
    # x_test = np.expand_dims(x_test, -1)
    x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
    x_test = x_test.reshape((x_test.shape[0], 28, 28, 1))

    y_train = np.array(y_train, dtype=np.int32)
    y_test = np.array(y_test, dtype=np.int32)

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    if not use_tf_privacy:
        dnn = DNN(input_shape, num_classes, parameters['base_model'])
        dnn.createModel()
        dnn.train(x_train, y_train, x_test, y_test)
        if save_model: dnn.saveModel(model_folder + '/model_plain')
        score = dnn.model.evaluate(x_test, y_test, verbose=0)
        print("Test loss:", score[0])
        print("Test accuracy:", score[1])
        return dnn.model

    if use_tf_privacy:
        dnn = DNN(input_shape, num_classes, parameters['base_model'], True, noise_multiplier)
        dnn.createModel()
        dnn.train(x_train, y_train, x_test, y_test)
        if save_model: dnn.saveModel(model_folder + '/model_private')
        score = dnn.model.evaluate(x_test, y_test, verbose=0)
        print("Test loss:", score[0])
        print("Test accuracy:", score[1])

        compute_dp_sgd_privacy.compute_dp_sgd_privacy(x_train.shape[0], \
                                                        parameters['base_model']['batch_size'], \
                                                        noise_multiplier, \
                                                        parameters['base_model']['epochs'],
                                                        1e-5)
        return dnn.model
Esempio n. 2
0
 def test_compute_dp_sgd_privacy(self, n, batch_size, noise_multiplier,
                                 epochs, delta, expected_eps,
                                 expected_order):
     eps, order = compute_dp_sgd_privacy.compute_dp_sgd_privacy(
         n, batch_size, noise_multiplier, epochs, delta)
     self.assertAlmostEqual(eps, expected_eps)
     self.assertAlmostEqual(order, expected_order)
Esempio n. 3
0
 def compute_dp_sgd_wrapper(_sigma):
     return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
         n=len(datasets_splitted[i]['train']),
         batch_size=lotsizes[i],
         noise_multiplier=_sigma,
         epochs=args.epochs,
         delta=deltas[i])[0] - args.epsilon
Esempio n. 4
0
def compute_dp_sgd_wrapper(_sigma):
    return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
        n=trainX.shape[0],
        batch_size=lotSize,
        noise_multiplier=_sigma,
        epochs=args.epochs,
        delta=delta)[0] - args.epsilon
Esempio n. 5
0
    def train(self, X, y, models, rank):

        X = self._normalize_X(X)

        # convert class vectors to binary class matrices
        y = np.array(y, dtype=np.int32)
        y = keras.utils.to_categorical(y, num_classes)

        model = self._init_new_model()

        model.fit(
            X,
            y,
            batch_size=batch_size,
            epochs=epochs,
        )

        epsilon, _ = compute_dp_sgd_privacy.compute_dp_sgd_privacy(
            n=X.shape[0],
            batch_size=batch_size,
            noise_multiplier=noise_multiplier,
            epochs=epochs,
            delta=1 / X.shape[0])
        print(f"Computed privacy budget Epsilon: {epsilon}")

        return model
Esempio n. 6
0
    def find_sigma(eps, batches_per_lot, dataset_size):
        lotSize = batches_per_lot * args.batch_size  # L
        N = dataset_size
        delta = min(10**(-5), 1 / N)
        lotsPerEpoch = N / lotSize
        q = lotSize / N  # Sampling ratio
        T = args.epochs * lotsPerEpoch  # Total number of lots

        def compute_dp_sgd_wrapper(_sigma):
            with HiddenPrints():
                return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
                    n=N,
                    batch_size=lotSize,
                    noise_multiplier=_sigma,
                    epochs=args.epochs,
                    delta=delta)[0] - args.epsilon

        sigma = newton(compute_dp_sgd_wrapper, x0=0.5,
                       tol=1e-4)  # adjust x0 to avoid error
        with HiddenPrints():
            actual_eps = compute_dp_sgd_privacy.compute_dp_sgd_privacy(
                n=N,
                batch_size=lotSize,
                noise_multiplier=sigma,
                epochs=args.epochs,
                delta=delta)[0]
#         print('Batches_per_lot={}, q={}, T={}, sigma={}'.format(batches_per_lot, q, T, sigma))
#         print('actual epslion = {}'.format(actual_eps))
        return sigma
Esempio n. 7
0
def compute_gaussian_sigma(
        epsilon, delta, batch_size, dataset_size, epochs) -> float:
    """Compute the level of noise to add when using rdp"""
    def compute_dp_sgd_wrapper(sigma):
        return compute_dp_sgd_privacy(
            n=dataset_size, batch_size=batch_size,
            noise_multiplier=sigma, epochs=epochs, delta=delta)[0] - epsilon

    # turn off output
    old_stdout = sys.stdout
    sys.stdout = open(os.devnull, 'w')

    # calculate sigma
    sigma = bisect(compute_dp_sgd_wrapper, 1e-6, 1e6)
    # calculte actual privacy budget
    actual_epsilon = compute_dp_sgd_privacy(
        n=dataset_size, batch_size=batch_size, noise_multiplier=sigma,
        epochs=epochs, delta=delta)[0]
    log.info("Actual (ε,δ) is: ({}, {}), σ = {}".format(
        actual_epsilon, delta, sigma))

    # turn on output
    sys.stdout.close()
    sys.stdout = old_stdout
    return sigma
Esempio n. 8
0
 def compute_dp_sgd_wrapper(_sigma):
     with HiddenPrints():
         return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
             n=N,
             batch_size=lotSize,
             noise_multiplier=_sigma,
             epochs=args.epochs,
             delta=delta)[0] - args.epsilon
Esempio n. 9
0
def _compute_epsilon(steps: int, store: BaseConfig):
    """
    Calculate epsilon and delta values for differential privacy
    """
    # Note: inverse of number of training samples recommended for minimum
    # delta in differential privacy
    return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
        n=steps,
        batch_size=store.batch_size,
        noise_multiplier=store.dp_noise_multiplier,
        epochs=store.epochs,
        delta=1.0 / float(steps),
    )
Esempio n. 10
0
    def get_privacy_budget(self) -> float:
        """
        Calculates, what epsilon will apply after another model training.
        Need to calculate it in advance to see if another training would result in privacy budget violation.
        """
        batch_size = self.get_train_batch_size()
        iterations_per_epoch = tf.data.experimental.cardinality(self.train_loader).numpy()
        n_samples = batch_size * iterations_per_epoch
        planned_epochs = self.cumulative_epochs + self.epochs_per_proposal

        epsilon, _ = compute_dp_sgd_privacy(
            n=n_samples,
            batch_size=batch_size,
            noise_multiplier=self.diff_priv_config.noise_multiplier,  # type: ignore
            epochs=planned_epochs,
            delta=self.diff_priv_budget.target_delta
        )
        return epsilon
Esempio n. 11
0
def compute_epsilon(steps: int,
                    store: TensorFlowConfig,
                    epoch_number: int = None) -> Tuple[float, float]:
    """
    Calculate epsilon and delta values for differential privacy

    Returns:
        Tuple of eps, opt_order
    """
    # Note: inverse of number of training samples recommended for minimum
    # delta in differential privacy
    if epoch_number is None:
        epoch_number = store.epochs - 1
    return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
        n=steps,
        batch_size=store.batch_size,
        noise_multiplier=store.dp_noise_multiplier,
        epochs=epoch_number,
        delta=1.0 / float(steps),
    )
Esempio n. 12
0
"""## Measure the differential privacy guarantee

Perform a privacy analysis to measure the DP guarantee achieved by a training algorithm. Knowing the level of DP achieved enables the objective comparison of two training runs to determine which of the two is more privacy-preserving. At a high level, the privacy analysis measures how much a potential adversary can improve their guess about properties of any individual training point by observing the outcome of our training procedure (e.g., model updates and parameters). 

This guarantee is sometimes referred to as the **privacy budget**. A lower privacy budget bounds more tightly an adversary's ability to improve their guess. This ensures a stronger privacy guarantee. Intuitively, this is because it is harder for a single training point to affect the outcome of learning: for instance, the information contained in the training point cannot be memorized by the ML algorithm and the privacy of the individual who contributed this training point to the dataset is preserved.

In this tutorial, the privacy analysis is performed in the framework of Rényi Differential Privacy (RDP), which is a relaxation of pure DP based on [this paper](https://arxiv.org/abs/1702.07476) that is particularly well suited for DP-SGD.

Two metrics are used to express the DP guarantee of an ML algorithm:

1.   Delta ($\delta$) - Bounds the probability of the privacy guarantee not holding. A rule of thumb is to set it to be less than the inverse of the size of the training dataset. In this tutorial, it is set to **10^-5** as the MNIST dataset has 60,000 training points.
2.   Epsilon ($\epsilon$) - This is the privacy budget. It measures the strength of the privacy guarantee by bounding how much the probability of a particular model output can vary by including (or excluding) a single training point. A smaller value for $\epsilon$ implies a better privacy guarantee. However, the $\epsilon$ value is only an upper bound and a large value could still mean good privacy in practice.

Tensorflow Privacy provides a tool, `compute_dp_sgd_privacy.py`, to compute the value of $\epsilon$ given a fixed value of $\delta$ and the following hyperparameters from the training process:

1.   The total number of points in the training data, `n`.
2. The `batch_size`.
3.   The `noise_multiplier`.
4. The number of `epochs` of training.
"""

compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=60000, batch_size=250, noise_multiplier=1.3, epochs=15, delta=1e-5)

"""The tool reports that for the hyperparameters chosen above, the trained model has an $\epsilon$ value of 1.18.

## Summary
In this tutorial, you learned about differential privacy (DP) and how you can implement DP principles in existing ML algorithms to provide privacy guarantees for training data. In particular, you learned how to:
*   Wrap existing optimizers (e.g., SGD, Adam) into their differentially private counterparts using TensorFlow Privacy
*   Tune hyperparameters introduced by differentially private machine learning
*   Measure the privacy guarantee provided using analysis tools included in TensorFlow Privacy
"""
Esempio n. 13
0
    return compute_dp_sgd_wrapper


sigmas = [
    bisect(compute_dp_sgd_wrapper_generator(i), 0.01, 10000)
    for i, _ in enumerate(lotsizes)
]

print('--> bpls{} = {}'.format(list(range(len(batches_per_lot_list))),
                               batches_per_lot_list))
print('--> sigmas{} = {}'.format(list(range(len(sigmas))), sigmas))
print('--> actual epslion{} = {}'.format(list(range(len(sigmas))), [
    compute_dp_sgd_privacy.compute_dp_sgd_privacy(
        n=len(datasets_splitted[i]['train']),
        batch_size=lotsizes[i],
        noise_multiplier=sigmas[i],
        epochs=args.epochs,
        delta=deltas[i])[0] for i, _ in enumerate(lotsizes)
]))

## Initialize network ##########################################################
print("Initializing network...")

nets = []
for i in range(len(datasets)):
    nets.append(models.vgg16(pretrained=True))
    nets[-1].classifier[6] = nn.Linear(4096, 101)

if args.load_model:
    print("Loading saved model...")
    for net in nets:
def base_model_nb15(save_model=False, model_folder=None, use_tf_privacy=False, noise_multiplier=None,  label_col='label'):
    # Model / data parameters
    if label_col == 'label':
        num_classes = 2
    elif label_col == 'attack_cat':
        num_classes = 10
    # input_shape = (196, 1,)
    # input_shape = (190, 1,)
    # input_shape = (190,)
    input_shape = (42,)

    # Read training data ==================================================
    df = pd.read_csv('nb15/data/UNSW_NB15_training-set.csv')
    # Prepare data
    df = df.drop(columns=df.columns[0], axis=1)
    df.loc[df['service'] == '-', 'service'] = 'none'
    # Ordinal encoding categorical columns
    # df[['proto', 'service', 'state']] = df[['proto', 'service', 'state']].apply(lambda x: pd.factorize(x)[0])
    df['proto'] = C2I.encode_c2i(df['proto'], 'proto')
    df['service'] = C2I.encode_c2i(df['service'], 'service')
    df['state'] = C2I.encode_c2i(df['state'], 'state')
    # # One-hot encode catagorical columns
    # df = pd.get_dummies(df, columns=['proto', 'service', 'state'], prefix=['proto', 'service', 'state'])
    # # Add missing catagorical columns present in training data
    # df['proto_icmp'] = 0
    # df['proto_rtp'] = 0
    # df['state_ECO'] = 0
    # df['state_no'] = 0
    # df['state_PAR'] = 0
    # df['state_URN'] = 0
    # df['proto_icmp'] = df['proto_icmp'].astype('uint8')
    # df['proto_rtp'] = df['proto_rtp'].astype('uint8')
    # df['state_ECO'] = df['state_ECO'].astype('uint8')
    # df['state_no'] = df['state_no'].astype('uint8')
    # df['state_PAR'] = df['state_PAR'].astype('uint8')
    # df['state_URN'] = df['state_URN'].astype('uint8')
    # Get labels col, and drop them from df
    col_labels = df['label']
    col_attack_cat = df['attack_cat']
    df = df.drop(columns=['label', 'attack_cat'], axis=1)
    if label_col == 'label':
        df_label = col_labels
    elif label_col == 'attack_cat':
        df_label = col_attack_cat
    y_train = np.array(df_label)
    # convert class vector to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    # Reorder columns
    df = df.reindex(sorted(df.columns), axis=1)
    # Normalize (min-max normalization) df
    df = (df-df.min())/(df.max()-df.min())
    x_train = df.to_numpy()
    # Reshape data
    # x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))

    # Read testing data ==================================================
    df = pd.read_csv('nb15/data/UNSW_NB15_testing-set.csv')
    # Prepare data
    df = df.drop(columns=df.columns[0], axis=1)
    df.loc[df['service'] == '-', 'service'] = 'none'
    # Ordinal encoding categorical columns
    # df[['proto', 'service', 'state']] = df[['proto', 'service', 'state']].apply(lambda x: pd.factorize(x)[0])
    df['proto'] = C2I.encode_c2i(df['proto'], 'proto')
    df['service'] = C2I.encode_c2i(df['service'], 'service')
    df['state'] = C2I.encode_c2i(df['state'], 'state')
    # # One-hot encode catagorical columns
    # df = pd.get_dummies(df, columns=['proto', 'service', 'state'], prefix=['proto', 'service', 'state'])
    # # Add missing catagorical columns present in training data
    # df['state_ACC'] = 0
    # df['state_CLO'] = 0
    # Get labels col, and drop them from df
    col_labels = df['label']
    col_attack_cat = df['attack_cat']
    df = df.drop(columns=['label', 'attack_cat'], axis=1)
    if label_col == 'label':
        df_label = col_labels
    elif label_col == 'attack_cat':
        df_label = col_attack_cat
    y_test = np.array(df_label)
    # convert class vector to binary class matrices
    y_test = keras.utils.to_categorical(y_test, num_classes)
    # # Reorder columns
    # df = df.reindex(sorted(df.columns), axis=1)
    # Normalize (min-max normalization) df
    df = (df-df.min())/(df.max()-df.min())
    x_test = df.to_numpy()
    # Reshape data
    # x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    if not use_tf_privacy:
        dnn = DNN(input_shape, num_classes, parameters['base_model'])
        dnn.createModel()
        dnn.train(x_train, y_train, x_test, y_test)
        if save_model: dnn.saveModel(model_folder + '/model_plain')
        score = dnn.model.evaluate(x_test, y_test, verbose=0)
        print("Test loss:", score[0])
        print("Test accuracy:", score[1])
        return dnn.model

    if use_tf_privacy:
        dnn = DNN(input_shape, num_classes, parameters['base_model'], True, noise_multiplier)
        dnn.createModel()
        dnn.train(x_train, y_train, x_test, y_test)
        if save_model: dnn.saveModel(model_folder + '/model_private')
        score = dnn.model.evaluate(x_test, y_test, verbose=0)
        print("Test loss:", score[0])
        print("Test accuracy:", score[1])

        compute_dp_sgd_privacy.compute_dp_sgd_privacy(x_train.shape[0], \
                                                        parameters['base_model']['batch_size'], \
                                                        noise_multiplier, \
                                                        parameters['base_model']['epochs'],
                                                        1e-5)
        return dnn.model
Esempio n. 15
0
def main(winL=90, winR=90, do_preprocess=True, 
    maxRR=True, use_RR=True, norm_RR=True, compute_morph={''}, reduced_DS = False, leads_flag = [1,0],
    differential_privacy=False, noise_multiplier=1.4):
    
    db_path = settings.db_path
    
    # Load train data 
    [tr_features, tr_labels, tr_patient_num_beats] = load_mit_db('DS1', winL, winR, do_preprocess,
        maxRR, use_RR, norm_RR, compute_morph, db_path, reduced_DS, leads_flag)

    # # Load test data
    # [eval_features, eval_labels, eval_patient_num_beats] = load_mit_db('DS2', winL, winR, do_preprocess, 
    #     maxRR, use_RR, norm_RR, compute_morph, db_path, reduced_DS, leads_flag)

    # scaler = StandardScaler()
    # scaler.fit(tr_features)
    # tr_features_scaled = scaler.transform(tr_features)
    # eval_features_scaled = scaler.transform(eval_features)

    # [train_x, train_y] = get_images_data(tr_features, tr_labels, 1000, "train")
    # [train_x, train_y] = get_images_data(tr_features, tr_labels, tr_features.shape[0], "train")
    # [test_x, test_y] = get_images_data(eval_features, eval_labels, 1000, "test")
    # [test_x, test_y] = get_images_data(eval_features, eval_labels, eval_features.shape[0], "test")

    [data_x, data_y, _] = get_images_data(tr_features, tr_labels, 20000, "train")

    train_x, test_x, train_y, test_y = train_test_split(data_x, data_y, test_size=test_split, random_state=40)

    train_y = to_categorical(train_y, num_classes=n_classes)
    test_y = to_categorical(test_y, num_classes=n_classes)

    model_path = db_path + 'keras_cnn_models/'

    model_path = create_model_name(model_path, winL, winR, do_preprocess,
        maxRR, use_RR, norm_RR, compute_morph,
        leads_flag, reduced_DS, '_')

    model_path = model_path + '.h5'

    print("Training model on MIT-BIH DS1: " + model_path + "...")

    if 1==2:#os.path.isfile(model_svm_path):
        # Load the trained model!
        model = load_model(model_path)

    else:

        model = get_model(differential_privacy, noise_multiplier)
        model.summary()

        checkpoint = ModelCheckpoint("weights.best.hdf5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]

        # Let's Train!
        start = time.time()
        model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, validation_split=validation_split, callbacks=callbacks_list)
        end = time.time()

        print(("Trained completed!\n\t" + model_path + "\n \
            \tTime required: " + str(format(end - start, '.2f')) + " sec" ))

        # Save trained MLP model
        model.save(model_path)

    # Test the model
    print(("Testing model on MIT-BIH DS2: " + model_path + "..."))

    # Evaluate the model with new data
    predictions = model.predict(test_x)
    predictions = predictions > 0.5
    print(confusion_matrix(np.argmax(test_y, axis=1), np.argmax(predictions, axis=1)))
    print(classification_report(test_y, predictions))
    print("Accuracy: {0}".format(accuracy_score(test_y, predictions)))

    if differential_privacy:
        eps = compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=train_x.shape[0], batch_size=batch_size, noise_multiplier=noise_multiplier, epochs=epochs, delta=4e-6)
        with open("dp.txt", "a+") as f:
            f.write("noise={0} eps={1} training_time={2:.0f} s \n".format(noise_multiplier, eps, end - start))
            f.write(np.array2string(confusion_matrix(np.argmax(test_y, axis=1), np.argmax(predictions, axis=1))))
            f.write(classification_report(test_y, predictions))
            f.write("Accuracy: {0} \n".format(accuracy_score(test_y, predictions)))
            f.write("-------------------------\n")
Esempio n. 16
0
 def compute_dp_sgd_wrapper(sigma):
     return compute_dp_sgd_privacy(
         n=dataset_size, batch_size=batch_size,
         noise_multiplier=sigma, epochs=epochs, delta=delta)[0] - epsilon
Esempio n. 17
0
def main_DP(winL=90,
            winR=90,
            do_preprocess=True,
            maxRR=True,
            use_RR=True,
            norm_RR=True,
            compute_morph={''},
            reduced_DS=False,
            leads_flag=[1, 0],
            noise_multiplier=1.4):
    print("Runing train_Keras.py for Differential Privacy!")

    db_path = settings.db_path

    # Load train data
    [tr_features, tr_labels,
     tr_patient_num_beats] = load_mit_db('DS1', winL, winR, do_preprocess,
                                         maxRR, use_RR, norm_RR, compute_morph,
                                         db_path, reduced_DS, leads_flag)

    # Load test data
    [eval_features, eval_labels, eval_patient_num_beats
     ] = load_mit_db('DS2', winL, winR, do_preprocess, maxRR, use_RR, norm_RR,
                     compute_morph, db_path, reduced_DS, leads_flag)

    scaler = StandardScaler()
    scaler.fit(tr_features)
    tr_features_scaled = scaler.transform(tr_features)
    eval_features_scaled = scaler.transform(eval_features)

    model_path = db_path + 'keras_models/'

    model_path = create_model_name(model_path, winL, winR, do_preprocess,
                                   maxRR, use_RR, norm_RR, compute_morph,
                                   leads_flag, reduced_DS, '_')

    model_path = model_path + '.h5'

    print(("Training model on MIT-BIH DS1: " + model_path + "..."))

    if 1 == 2:  #os.path.isfile(model_svm_path):
        # Load the trained model!
        mlp_model = load_model(model_path)

    else:
        # print(tr_features_scaled.shape[1])

        l2_norm_clip = 1.5
        # noise_multiplier = 1.4
        num_microbatches = 250
        learning_rate = 0.25

        mlp_model = Sequential()
        mlp_model.add(
            Dense(100,
                  input_dim=tr_features_scaled.shape[1],
                  activation='relu'))
        mlp_model.add(Dropout(0.5))
        mlp_model.add(Dense(1, activation='sigmoid'))

        optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            num_microbatches=num_microbatches,
            learning_rate=learning_rate)

        mlp_model.compile(loss='binary_crossentropy',
                          optimizer=optimizer,
                          metrics=['accuracy'])

        # Let's Train!
        start = time.time()
        mlp_model.fit(tr_features_scaled, tr_labels, epochs=5, batch_size=128)
        end = time.time()

        print(("Trained completed!\n\t" + model_path + "\n \
            \tTime required: " + str(format(end - start, '.2f')) + " sec"))

        # Save trained MLP model
        mlp_model.save(model_path)

    # Test the model
    print(("Testing model on MIT-BIH DS2: " + model_path + "..."))

    # Evaluate the model with new data
    predictions = mlp_model.predict(eval_features_scaled)
    predictions = (predictions.squeeze() > 0.5)
    print(confusion_matrix(eval_labels, predictions))
    print(classification_report(eval_labels, predictions))
    print("Accuracy: {0}".format(accuracy_score(eval_labels, predictions)))

    eps = compute_dp_sgd_privacy.compute_dp_sgd_privacy(
        n=tr_features_scaled.shape[0],
        batch_size=128,
        noise_multiplier=noise_multiplier,
        epochs=5,
        delta=1e-5)
    with open("dp.txt", "a+") as f:
        f.write("noise={0} eps={1} training_time={2:.0f} s \n".format(
            noise_multiplier, eps, end - start))
        f.write(np.array2string(confusion_matrix(eval_labels, predictions)))
        f.write(classification_report(eval_labels, predictions))
        f.write("Accuracy: {0}\n".format(
            accuracy_score(eval_labels, predictions)))
        f.write("-------------------------\n")
Esempio n. 18
0
    return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
        n=trainX.shape[0],
        batch_size=lotSize,
        noise_multiplier=_sigma,
        epochs=args.epochs,
        delta=delta)[0] - args.epsilon


# sigma = newton(compute_dp_sgd_wrapper, 0.66/np.sqrt(args.epsilon))
sigma = bisect(compute_dp_sgd_wrapper, 0.01, 10000)

print('BpL={}, q={}, T={}, σ₁=σ₂={}'.format(args.batches_per_lot, q, T, sigma))
print('actual epslion = {}'.format(
    compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=trainX.shape[0],
                                                  batch_size=lotSize,
                                                  noise_multiplier=sigma,
                                                  epochs=args.epochs,
                                                  delta=delta)))

# using global variable
_lastNoiseShape = None
_noiseToAdd = None


def gaussian_noise(grads):
    global _lastNoiseShape
    global _noiseToAdd
    if grads.shape != _lastNoiseShape:
        _lastNoiseShape = grads.shape
        _noiseToAdd = torch.zeros(grads.shape).cuda()
    _noiseToAdd.data.normal_(0.0, std=sigma * clip)