Esempio n. 1
0
def main(train_data, train_labels, test_data, test_labels):
    logging.set_verbosity(logging.INFO)
    if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
        raise ValueError(
            'Number of microbatches should divide evenly batch_size')

    # Load training and test data.
    #train_data, train_labels, test_data, test_labels = load_mnist()

    # Define a sequential Keras model
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(
            16,
            8,
            # strides=2,
            padding='same',
            activation='relu',
            input_shape=(28, 28, 1)),
        # tf.keras.layers.MaxPool2D(2, 1),
        tf.keras.layers.Conv2D(
            32,
            4,
            # strides=2,
            padding='valid',
            activation='relu'),
        # tf.keras.layers.MaxPool2D(2, 1),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(10)
    ])

    if FLAGS.dpsgd:
        optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=FLAGS.l2_norm_clip,
            noise_multiplier=FLAGS.noise_multiplier,
            num_microbatches=FLAGS.microbatches,
            learning_rate=FLAGS.learning_rate)
        # Compute vector of per-example loss rather than its mean over a minibatch.
        loss = tf.keras.losses.CategoricalCrossentropy(
            from_logits=True, reduction=tf.losses.Reduction.NONE)
    else:
        optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
        loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

        # Compile model with Keras
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    # Train model with Keras
    model.fit(train_data,
              train_labels,
              epochs=FLAGS.epochs,
              validation_data=(test_data, test_labels),
              batch_size=FLAGS.batch_size)

    # Compute the privacy budget expended.
    if FLAGS.dpsgd:
        eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size)
        print('For delta=1e-5, the current epsilon is: %.2f' % eps)
    else:
        print('Trained with vanilla non-private SGD optimizer')
    return model
Esempio n. 2
0
    def testCreateTPUEstimatorSpec(self, n_classes):
        """Tests that an Estimator built with a binary head works."""

        train_features, train_labels = test_utils.make_input_data(
            256, n_classes)
        feature_columns = []
        for key in train_features:
            feature_columns.append(tf.feature_column.numeric_column(key=key))

        head = head_lib._binary_logistic_or_multi_class_head(
            n_classes=n_classes,
            weight_column=None,
            label_vocabulary=None,
            loss_reduction=tf.compat.v1.losses.Reduction.NONE)
        optimizer = DPGradientDescentGaussianOptimizer(learning_rate=0.5,
                                                       l2_norm_clip=1.0,
                                                       noise_multiplier=0.0,
                                                       num_microbatches=2)
        model_fn = make_model_fn(head, optimizer, feature_columns)
        classifier = tf_estimator.Estimator(model_fn=model_fn)

        classifier.train(input_fn=test_utils.make_input_fn(
            train_features, train_labels, True),
                         steps=4)

        test_features, test_labels = test_utils.make_input_data(64, n_classes)
        classifier.evaluate(input_fn=test_utils.make_input_fn(
            test_features, test_labels, False),
                            steps=4)

        predict_features, predict_labels = test_utils.make_input_data(
            64, n_classes)
        classifier.predict(input_fn=test_utils.make_input_fn(
            predict_features, predict_labels, False))
Esempio n. 3
0
def target_model():
    """The architecture of the target model.
    The attack is white-box, hence the attacker is assumed to know this architecture too.

    :return: target model
    """
    classifier = tf.keras.Sequential([
        tf.keras.Input((feature_size), name='feature'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation=tf.nn.relu),
        tf.keras.layers.Dense(2,
                              activation=tf.nn.softmax,
                              kernel_regularizer=l1(kernel_regularization))
    ])

    if dpsgd:
        optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            num_microbatches=int(microbatches_perc * batch_size),
            learning_rate=learning_rate)

        # Compute vector of per-example loss rather than its mean over a minibatch.
        loss = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction=tf.compat.v2.losses.Reduction.NONE)
    else:
        optimizer = GradientDescentOptimizer(learning_rate=learning_rate)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction=tf.compat.v2.losses.Reduction.NONE)

    # Compile model with Keras
    classifier.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

    return classifier
Esempio n. 4
0
def create_dpsgd_model(FLAGS):

    optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=FLAGS.l2_norm_clip,
            noise_multiplier=FLAGS.noise_multiplier,
            num_microbatches=FLAGS.microbatches,
            learning_rate=FLAGS.learning_rate
            )

    loss = tf.keras.losses.CategoricalCrossentropy(
            from_logits=True, reduction=tf.compat.v1.losses.Reduction.NONE)

    setattr(loss, "__name__", "CategoricalCrossentropy")

    plot_losses = PlotLosses()

    #create model
    model = Sequential()

    c = NUM_CLASSES

    #add model layers
    model.add(BatchNormalization(input_shape=(NUM_FEATURES, 1)))
    model.add(Conv1D(50, kernel_size=4, strides=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=3, strides=2))
    model.add(Flatten())
    model.add(Dense(c))
    model.add(Dropout(0.05))
    model.add(Dense(c, activation='softmax'))

    # Compile model with Keras
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

    return model, optimizer
def main(unused_argv):
  logging.set_verbosity(logging.INFO)

  if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
    raise ValueError('Number of microbatches should divide evenly batch_size')

    # Split the training set into 60% and 40%, so we'll end up with 15,000 examples
    # for training, 10,000 examples for validation and 25,000 examples for testing.
  train_data, validation_data, test_data = tfds.load(
        name="imdb_reviews",
        split=('train[:60%]', 'train[60%:]', 'test'),
        as_supervised=True)

  train_examples_batch, train_labels_batch = next(iter(train_data.batch(10)))
  print(train_examples_batch)

  embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
  hub_layer = hub.KerasLayer(embedding, input_shape=[],
                               dtype=tf.string, trainable=True)
  hub_layer(train_examples_batch[:3])

  model = tf.keras.Sequential()
  model.add(hub_layer)
  model.add(tf.keras.layers.Dense(16, activation='relu'))
  model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
  model.summary()

  if FLAGS.dpsgd:
      optimizer = DPGradientDescentGaussianOptimizer(
      l2_norm_clip=FLAGS.l2_norm_clip,
      noise_multiplier=FLAGS.noise_multiplier,
      num_microbatches=FLAGS.microbatches,
      learning_rate=FLAGS.learning_rate)
      # Compute vector of per-example loss rather than its mean over a minibatch.
      loss = tf.keras.losses.BinaryCrossentropy(
      from_logits=True) # reduction=tf.compat.v1.losses.Reduction.NONE
  else:
      optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
      loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)

  # Compile model with Keras
  model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

  # Train model with Keras
  model.fit(train_data.shuffle(10000).batch(FLAGS.batch_size),
            epochs=FLAGS.epochs,
            validation_data=validation_data.batch(FLAGS.batch_size))

  # Compute the privacy budget expended.
  if FLAGS.dpsgd:
    eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size)
    print('For delta=1e-5, the current epsilon is: %.2f' % eps)
  else:
    print('Trained with vanilla non-private SGD optimizer')
Esempio n. 6
0
def get_model(differential_privacy=False, noise_multiplier=1.4, federated=False):
    
    model = Sequential()

    model.add(Conv2D(64, filters, input_shape=(input_size[0], input_size[1], 1), padding='same',
                     kernel_regularizer=l1_l2(regularizers, regularizers), activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(64, filters, kernel_regularizer=l2(regularizers), padding='same',
               activation='relu'))
    # model.add(MaxPooling2D())
    model.add(MaxPooling2D((4, 4)))
    model.add(Dropout(0.20))

    model.add(
        Conv2D(256, filters, kernel_regularizer=l2(regularizers), padding='same',
               activation='relu'))
    model.add(
        Conv2D(256, filters, kernel_regularizer=l2(regularizers), padding='same',
               activation='relu'))
    model.add(MaxPooling2D())
    model.add(Dropout(0.20))

    model.add(Flatten())
    # model.add(Dense(512, kernel_regularizer=l2(regularizers), activation='relu'))
    model.add(Dense(256, kernel_regularizer=l2(regularizers), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_classes, activation='softmax'))

    # model.load_weights("weights.best.hdf5")

    # opt = Adam(lr=0.001)

    optimizer = Adam()
    if differential_privacy:
        optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            num_microbatches=num_microbatches,
            learning_rate=learning_rate)

    if not federated:
        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model
def shadow_model():
    """The architecture of the shadow model is same as target model, because the attack is white-box,
    hence the attacker is assumed to know this architecture too.

    :return: shadow model
    """

    classifier = Sequential()
    classifier.add(
        Conv1D(filters=64,
               kernel_size=3,
               activation='relu',
               input_shape=(shadow_input_shape)))
    classifier.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    classifier.add(Dropout(0.5))
    classifier.add(MaxPooling1D(pool_size=2))
    classifier.add(Flatten())
    classifier.add(Dense(100, activation='relu'))
    classifier.add(Dense(2, activation='softmax'))

    if dpsgd:
        optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            num_microbatches=int(microbatches_perc * batch_size),
            learning_rate=learning_rate)
        # Compute vector of per-example loss rather than its mean over a minibatch.
        loss = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction=tf.compat.v2.losses.Reduction.NONE)
    else:
        optimizer = GradientDescentOptimizer(learning_rate=learning_rate)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction=tf.compat.v2.losses.Reduction.NONE)

    # Compile model with Keras
    classifier.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

    return classifier
Esempio n. 8
0
    def _init_new_model(self):

        if batch_size % num_microbatches != 0:
            raise ValueError(
                'Batch size should be an integer multiple of the number of microbatches'
            )

        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(16,
                                   8,
                                   strides=2,
                                   padding='same',
                                   activation='relu',
                                   input_shape=input_shape),
            tf.keras.layers.MaxPool2D(2, 1),
            tf.keras.layers.Conv2D(32,
                                   4,
                                   strides=2,
                                   padding='valid',
                                   activation='relu'),
            tf.keras.layers.MaxPool2D(2, 1),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(num_classes, activation='softmax')
        ])

        optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            num_microbatches=num_microbatches,
            learning_rate=learning_rate)

        loss = tf.keras.losses.CategoricalCrossentropy(
            from_logits=True, reduction=tf.losses.Reduction.NONE)

        model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

        return model
Esempio n. 9
0
def main_DP(winL=90,
            winR=90,
            do_preprocess=True,
            maxRR=True,
            use_RR=True,
            norm_RR=True,
            compute_morph={''},
            reduced_DS=False,
            leads_flag=[1, 0],
            noise_multiplier=1.4):
    print("Runing train_Keras.py for Differential Privacy!")

    db_path = settings.db_path

    # Load train data
    [tr_features, tr_labels,
     tr_patient_num_beats] = load_mit_db('DS1', winL, winR, do_preprocess,
                                         maxRR, use_RR, norm_RR, compute_morph,
                                         db_path, reduced_DS, leads_flag)

    # Load test data
    [eval_features, eval_labels, eval_patient_num_beats
     ] = load_mit_db('DS2', winL, winR, do_preprocess, maxRR, use_RR, norm_RR,
                     compute_morph, db_path, reduced_DS, leads_flag)

    scaler = StandardScaler()
    scaler.fit(tr_features)
    tr_features_scaled = scaler.transform(tr_features)
    eval_features_scaled = scaler.transform(eval_features)

    model_path = db_path + 'keras_models/'

    model_path = create_model_name(model_path, winL, winR, do_preprocess,
                                   maxRR, use_RR, norm_RR, compute_morph,
                                   leads_flag, reduced_DS, '_')

    model_path = model_path + '.h5'

    print(("Training model on MIT-BIH DS1: " + model_path + "..."))

    if 1 == 2:  #os.path.isfile(model_svm_path):
        # Load the trained model!
        mlp_model = load_model(model_path)

    else:
        # print(tr_features_scaled.shape[1])

        l2_norm_clip = 1.5
        # noise_multiplier = 1.4
        num_microbatches = 250
        learning_rate = 0.25

        mlp_model = Sequential()
        mlp_model.add(
            Dense(100,
                  input_dim=tr_features_scaled.shape[1],
                  activation='relu'))
        mlp_model.add(Dropout(0.5))
        mlp_model.add(Dense(1, activation='sigmoid'))

        optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            num_microbatches=num_microbatches,
            learning_rate=learning_rate)

        mlp_model.compile(loss='binary_crossentropy',
                          optimizer=optimizer,
                          metrics=['accuracy'])

        # Let's Train!
        start = time.time()
        mlp_model.fit(tr_features_scaled, tr_labels, epochs=5, batch_size=128)
        end = time.time()

        print(("Trained completed!\n\t" + model_path + "\n \
            \tTime required: " + str(format(end - start, '.2f')) + " sec"))

        # Save trained MLP model
        mlp_model.save(model_path)

    # Test the model
    print(("Testing model on MIT-BIH DS2: " + model_path + "..."))

    # Evaluate the model with new data
    predictions = mlp_model.predict(eval_features_scaled)
    predictions = (predictions.squeeze() > 0.5)
    print(confusion_matrix(eval_labels, predictions))
    print(classification_report(eval_labels, predictions))
    print("Accuracy: {0}".format(accuracy_score(eval_labels, predictions)))

    eps = compute_dp_sgd_privacy.compute_dp_sgd_privacy(
        n=tr_features_scaled.shape[0],
        batch_size=128,
        noise_multiplier=noise_multiplier,
        epochs=5,
        delta=1e-5)
    with open("dp.txt", "a+") as f:
        f.write("noise={0} eps={1} training_time={2:.0f} s \n".format(
            noise_multiplier, eps, end - start))
        f.write(np.array2string(confusion_matrix(eval_labels, predictions)))
        f.write(classification_report(eval_labels, predictions))
        f.write("Accuracy: {0}\n".format(
            accuracy_score(eval_labels, predictions)))
        f.write("-------------------------\n")
Esempio n. 10
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)

    if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
        raise ValueError(
            'Number of microbatches should divide evenly batch_size')

    (train_data, test_data), info = tfds.load(
        # Use the version pre-encoded with an ~8k vocabulary.
        'imdb_reviews/subwords8k',
        # Return the train/test datasets as a tuple.
        split=(tfds.Split.TRAIN, tfds.Split.TEST),
        # Return (example, label) pairs from the dataset (instead of a dictionary).
        as_supervised=True,
        # Also return the `info` structure.
        with_info=True)

    encoder = info.features['text'].encoder
    print('Vocabulary size: {}'.format(encoder.vocab_size))
    sample_string = 'Hello TensorFlow.'

    encoded_string = encoder.encode(sample_string)
    print('Encoded string is {}'.format(encoded_string))

    original_string = encoder.decode(encoded_string)
    print('The original string: "{}"'.format(original_string))

    assert original_string == sample_string

    for ts in encoded_string:
        print('{} ----> {}'.format(ts, encoder.decode([ts])))

    for train_example, train_label in train_data.take(1):
        print('Encoded text:', train_example[:10].numpy())
        print('Label:', train_label.numpy())

    BUFFER_SIZE = 1000

    train_batches = (train_data.shuffle(BUFFER_SIZE).padded_batch(32))

    test_batches = (test_data.padded_batch(32))

    for example_batch, label_batch in train_batches.take(2):
        print("Batch shape:", example_batch.shape)
        print("label shape:", label_batch.shape)

    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(encoder.vocab_size, 16),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.summary()

    optimizer = DPGradientDescentGaussianOptimizer(
        l2_norm_clip=FLAGS.l2_norm_clip,
        noise_multiplier=FLAGS.noise_multiplier,
        num_microbatches=FLAGS.microbatches,
        learning_rate=FLAGS.learning_rate)
    # Compute vector of per-example loss rather than its mean over a minibatch.
    loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)

    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(train_batches,
                        epochs=10,
                        validation_data=test_batches,
                        validation_steps=30)

    loss, accuracy = model.evaluate(test_batches)

    print("Loss: ", loss)
    print("Accuracy: ", accuracy)

    # Compute the privacy budget expended.
    if FLAGS.dpsgd:
        eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size)
        print('For delta=1e-5, the current epsilon is: %.2f' % eps)
    else:
        print('Trained with vanilla non-private SGD optimizer')
Esempio n. 11
0
epochs = 10
batch_size = 250

l2_norm_clip = 1.5
noise_multiplier = 0
num_microbatches = batch_size
learning_rate = 0.25

if batch_size % num_microbatches != 0:
    raise ValueError(
        'Batch size should be an integer multiple of the number of microbatches'
    )

optimizer = DPGradientDescentGaussianOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

loss = tf.keras.losses.CategoricalCrossentropy(
    from_logits=True, reduction=tf.losses.Reduction.NONE)

model1_dir = 'saved_model/clip_0_1_5'

model1 = tf.keras.models.load_model(model1_dir)

model1.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

test_loss, test_acc = model1.evaluate(test_data, test_labels)
print(test_acc)
Esempio n. 12
0
def main(_):
  if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
    raise ValueError('Number of microbatches should divide evenly batch_size')

  # Fetch the mnist data
  train, test = tf.keras.datasets.mnist.load_data()
  train_images, train_labels = train
  test_images, test_labels = test

  # Create a dataset object and batch for the training data
  dataset = tf.data.Dataset.from_tensor_slices(
      (tf.cast(train_images[..., tf.newaxis]/255, tf.float32),
       tf.cast(train_labels, tf.int64)))
  dataset = dataset.shuffle(1000).batch(FLAGS.batch_size)

  # Create a dataset object and batch for the test data
  eval_dataset = tf.data.Dataset.from_tensor_slices(
      (tf.cast(test_images[..., tf.newaxis]/255, tf.float32),
       tf.cast(test_labels, tf.int64)))
  eval_dataset = eval_dataset.batch(10000)

  # Define the model using tf.keras.layers
  mnist_model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(16, 8,
                             strides=2,
                             padding='same',
                             activation='relu'),
      tf.keras.layers.MaxPool2D(2, 1),
      tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'),
      tf.keras.layers.MaxPool2D(2, 1),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(32, activation='relu'),
      tf.keras.layers.Dense(10)
  ])

  # Instantiate the optimizer
  if FLAGS.dpsgd:
    opt = DPGradientDescentGaussianOptimizer(
        l2_norm_clip=FLAGS.l2_norm_clip,
        noise_multiplier=FLAGS.noise_multiplier,
        num_microbatches=FLAGS.microbatches,
        learning_rate=FLAGS.learning_rate)
  else:
    opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)

  # Training loop.
  steps_per_epoch = 60000 // FLAGS.batch_size
  for epoch in range(FLAGS.epochs):
    # Train the model for one epoch.
    for (_, (images, labels)) in enumerate(dataset.take(-1)):
      with tf.GradientTape(persistent=True) as gradient_tape:
        # This dummy call is needed to obtain the var list.
        logits = mnist_model(images, training=True)
        var_list = mnist_model.trainable_variables

        # In Eager mode, the optimizer takes a function that returns the loss.
        def loss_fn():
          logits = mnist_model(images, training=True)  # pylint: disable=undefined-loop-variable,cell-var-from-loop
          loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
              labels=labels, logits=logits)  # pylint: disable=undefined-loop-variable,cell-var-from-loop
          # If training without privacy, the loss is a scalar not a vector.
          if not FLAGS.dpsgd:
            loss = tf.reduce_mean(input_tensor=loss)
          return loss

        if FLAGS.dpsgd:
          grads_and_vars = opt.compute_gradients(loss_fn, var_list,
                                                 gradient_tape=gradient_tape)
        else:
          grads_and_vars = opt.compute_gradients(loss_fn, var_list)

      opt.apply_gradients(grads_and_vars)

    # Evaluate the model and print results
    for (_, (images, labels)) in enumerate(eval_dataset.take(-1)):
      logits = mnist_model(images, training=False)
      correct_preds = tf.equal(tf.argmax(input=logits, axis=1), labels)
    test_accuracy = np.mean(correct_preds.numpy())
    print('Test accuracy after epoch %d is: %.3f' % (epoch, test_accuracy))

    # Compute the privacy budget expended so far.
    if FLAGS.dpsgd:
      eps = compute_epsilon((epoch + 1) * steps_per_epoch)
      print('For delta=1e-5, the current epsilon is: %.2f' % eps)
    else:
      print('Trained with vanilla non-private SGD optimizer')
Esempio n. 13
0
def main(unused_argv):
    logging.set_verbosity(logging.INFO)
    train["comment_text"].fillna("fillna")
    test["comment_text"].fillna("fillna")

    x_train = train["comment_text"].str.lower()
    y_train = train[[
        "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"
    ]].values

    x_test = train["comment_text"].str.lower()
    y_test = train[[
        "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"
    ]].values

    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words,
                                                      lower=True)
    tokenizer.fit_on_texts(x_train)
    x_train = tokenizer.texts_to_sequences(x_train)
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                            maxlen=max_len)

    embeddings_index = {}

    with open(GLOVE_EMBEDDING, encoding='utf8') as f:
        for line in f:
            values = line.rstrip().rsplit(' ')
            word = values[0]
            embed = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = embed

    word_index = tokenizer.word_index

    num_words = min(max_words, len(word_index) + 1)

    embedding_matrix = np.zeros((num_words, embed_size), dtype='float32')

    for word, i in word_index.items():

        if i >= max_words:
            continue

        embedding_vector = embeddings_index.get(word)

        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

    input = tf.keras.layers.Input(shape=(max_len, ))

    x = tf.keras.layers.Embedding(max_words,
                                  embed_size,
                                  weights=[embedding_matrix],
                                  trainable=False)(input)

    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.GRU(128,
                            return_sequences=True,
                            dropout=0.1,
                            recurrent_dropout=0.1))(x)

    x = tf.keras.layers.Conv1D(64,
                               kernel_size=3,
                               padding="valid",
                               kernel_initializer="glorot_uniform")(x)

    avg_pool = tf.keras.layers.GlobalAveragePooling1D()(x)
    max_pool = tf.keras.layers.GlobalMaxPooling1D()(x)

    x = tf.keras.layers.concatenate([avg_pool, max_pool])

    preds = tf.keras.layers.Dense(6, activation="sigmoid")(x)

    model = tf.keras.Model(input, preds)

    model.summary()
    if FLAGS.dpsgd:
        optimizer = DPGradientDescentGaussianOptimizer(
            l2_norm_clip=FLAGS.l2_norm_clip,
            noise_multiplier=FLAGS.noise_multiplier,
            num_microbatches=FLAGS.microbatches,
            learning_rate=FLAGS.learning_rate)
        # Compute vector of per-example loss rather than its mean over a minibatch.
        loss = tf.keras.losses.BinaryCrossentropy(
            from_logits=True)  # reduction=tf.compat.v1.losses.Reduction.NONE
    else:
        optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
        loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)

    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'
                           ])  #optimizer=tf.keras.optimizers.Adam(lr=1e-3

    batch_size = 128

    checkpoint_path = "training_1/cp.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)

    cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                     save_weights_only=True,
                                                     verbose=1)

    callbacks = [
        tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_loss'),
        tf.keras.callbacks.TensorBoard(log_dir='./logs'), cp_callback
    ]

    model.fit(x_train,
              y_train,
              validation_split=0.2,
              batch_size=batch_size,
              epochs=1,
              callbacks=callbacks,
              verbose=1)

    latest = tf.train.latest_checkpoint(checkpoint_dir)

    model.load_weights(latest)

    tokenizer.fit_on_texts(x_test)
    x_test = tokenizer.texts_to_sequences(x_test)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                           maxlen=max_len)

    score = model.evaluate(x_test, y_test, verbose=1)

    print("Test Score:", score[0])
    print("Test Accuracy:", score[1])

    # Compute the privacy budget expended.
    if FLAGS.dpsgd:
        eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size)
        print('For delta=1e-5, the current epsilon is: %.2f' % eps)
    else:
        print('Trained with vanilla non-private SGD optimizer')