Beispiel #1
0
def BiGRU(input_tensor, units=64, use_gpu=True):
    """
    Bi-GRU
    :param input_tensor:
    :param units:
    :param use_gpu: if true, use CuDNNGRU to accelerate computing.
    :return:
    """
    if use_gpu:
        GRU = layers.CuDNNGRU
    else:
        GRU = layers.GRU
    gru1 = layers.Bidirectional(GRU(units,
                                    return_sequences=True,
                                    kernel_initializer='he_normal',
                                    name='gru1'),
                                merge_mode='sum')(input_tensor)
    x = layers.Bidirectional(GRU(units,
                                 return_sequences=True,
                                 kernel_initializer='he_normal',
                                 name='gru2'),
                             merge_mode='concat')(gru1)
    x = layers.TimeDistributed(layers.Dense(units=units * 2,
                                            activation='relu'),
                               name='fc')(x)
    x = layers.TimeDistributed(layers.Dropout(0.3), name='dropout')(x)
    return x
Beispiel #2
0
def BiLSTM(input_tensor, units=64, use_gpu=False):
    """
    Bi-LSTM
    :param input_tensor:
    :param units:
    :param use_gpu: if true, use CuDNNGRU to accelerate computing.
    :return:
    """
    if use_gpu:
        LSTM = layers.CuDNNLSTM
    else:
        LSTM = layers.LSTM
    lstm1 = layers.Bidirectional(LSTM(units,
                                      return_sequences=True,
                                      kernel_initializer='he_normal',
                                      name='lstm1'),
                                 merge_mode='sum')(input_tensor)
    x = layers.Bidirectional(LSTM(units,
                                  return_sequences=True,
                                  kernel_initializer='he_normal',
                                  name='lstm2'),
                             merge_mode='concat')(lstm1)
    x = layers.TimeDistributed(layers.Dense(units=units * 2,
                                            activation='relu'),
                               name='fc')(x)
    x = layers.TimeDistributed(layers.Dropout(0.3), name='dropout')(x)
    return x
        def custom_model_fn(features, labels, mode):
            """Used to build a TF custom estimator"""
            embedded_input = tf.contrib.layers.embed_sequence(features['sequence'],
                                                              embedding_matrix.shape[0],
                                                              embedding_matrix.shape[1],
                                                              initializer=embedding_initializer,
                                                              trainable=False)
            first_gru = layers.CuDNNGRU(self.num_neurons, return_sequences=True)
            gru_output = layers.Bidirectional(first_gru)(embedded_input)
            gru_output = layers.Bidirectional(layers.CuDNNGRU(self.num_neurons))(gru_output)
            logits = layers.Dense(6)(gru_output)

            predicted_classes = tf.argmax(logits, 1)
            if mode == tf.estimator.ModeKeys.PREDICT:
                predictions = {
                    'class_ids': predicted_classes[:, tf.newaxis],
                    'probabilities': tf.nn.sigmoid(logits),
                    'logits': logits
                }
                return tf.estimator.EstimatorSpec(mode, predictions=predictions)

            loss = tf.losses.sigmoid_cross_entropy(labels, logits)

            if mode == tf.estimator.ModeKeys.EVAL:
                return tf.estimator.EstimatorSpec(mode, loss=loss)

            assert mode == tf.estimator.ModeKeys.TRAIN
            optimizer = tf.train.AdamOptimizer()
            train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
    def build_bigru_model(self, embedding_matrix) -> Tuple[Model, Model]:
        """
        build and return multi-headed BiGru model
        with 1) MLM output from first GRU layer
             2) standard toxicity classification output from second
        :param embedding_matrix:
        :return:
        """
        token_input = layers.Input(shape=(self.max_seq_len,))
        embedding_layer = layers.Embedding(self.vocab_size + 1,
                                           self.embedding_dims,
                                           weights=[embedding_matrix],
                                           trainable=False)
        embedded_input = embedding_layer(token_input)
        gru1_output = layers.Bidirectional(layers.CuDNNGRU(self.num_neurons,
                                                           return_sequences=True))(embedded_input)
        aux_output = layers.Dense(self.vocab_size + 1, 'softmax', name='aux_output')(gru1_output)
        gru2_output = layers.Bidirectional(layers.CuDNNGRU(self.num_neurons))(gru1_output)
        main_output = layers.Dense(6, activation='sigmoid', name='main_output')(gru2_output)

        training_model = Model(inputs=token_input, outputs=[main_output, aux_output])
        mlm_loss = MaskedPenalizedSparseCategoricalCrossentropy(CONFIDENCE_PENALTY)
        training_model.compile(optimizer=optimizers.Adam(),
                               loss={'main_output': MaskedBinaryCrossedentropy(),
                                     'aux_output': mlm_loss})

        inference_model = Model(inputs=token_input, outputs=main_output)

        print('generated bigru model...')
        print(training_model.summary())

        return training_model, inference_model
    def build_bigru_model(self, embedding_matrix) -> Model:
        """
        build and return BiGru model using standard optimizer and loss
        :param embedding_matrix:
        :return:
        """
        token_input = layers.Input(shape=(self.max_seq_len, ))
        embedding_layer = layers.Embedding(self.vocab_size + 1,
                                           self.embedding_dims,
                                           weights=[embedding_matrix],
                                           trainable=False)
        embedded_input = embedding_layer(token_input)
        gru_output = layers.Bidirectional(
            layers.CuDNNGRU(self.num_neurons,
                            return_sequences=True))(embedded_input)
        gru_output = layers.Bidirectional(layers.CuDNNGRU(
            self.num_neurons))(gru_output)
        dense_output = layers.Dense(6, activation='sigmoid')(gru_output)

        bigru_model = Model(token_input, dense_output)
        bigru_model.compile(optimizer=optimizers.Adam(),
                            loss=losses.binary_crossentropy)

        print('generated bigru model...')

        return bigru_model
Beispiel #6
0
 def __init__(self, embedding, hidden_size, batch_size):
     super(Encoder, self).__init__()
     self.embedding = embedding
     self.batch_size = batch_size
     self.hidden_size = hidden_size
     self.bilstm = layers.Bidirectional(layers.CuDNNLSTM(
         self.hidden_size, return_sequences=True, return_state=True),
                                        merge_mode='concat')
Beispiel #7
0
    def build_bigru_model(self) -> Model:
        """
        build and return BiGru model using standard optimizer and loss
        :return:
        """
        embedded_input = layers.Input(shape=(None, self.embedding_dims))
        gru_output = layers.Bidirectional(
            layers.CuDNNGRU(self.num_neurons,
                            return_sequences=True))(embedded_input)
        gru_output = layers.Bidirectional(layers.CuDNNGRU(
            self.num_neurons))(gru_output)
        dense_output = layers.Dense(6, activation='sigmoid')(gru_output)

        bigru_model = Model(embedded_input, dense_output)
        bigru_model.compile(optimizer=optimizers.Adam(),
                            loss=losses.binary_crossentropy)

        print('generated bigru model...')
        print(bigru_model.summary())

        return bigru_model
Beispiel #8
0
    def _get_keras_model(self) -> models.Model:
        I = layers.Input(shape=(None, self._embedding_size),
                         dtype='float32',
                         name=base_model.TOKENS_FEATURE_KEY)

        # Bidirectional GRU
        H = I
        for num_units in self.hparams().gru_units:
            H = layers.Bidirectional(
                layers.GRU(num_units, return_sequences=True))(I)

        # Attention
        last_gru_units = self.hparams(
        ).gru_units[-1] * 2  # x2 because bidirectional
        A = layers.TimeDistributed(layers.Dense(self.hparams().attention_units,
                                                activation='relu'),
                                   input_shape=(None, last_gru_units))(H)
        A = layers.TimeDistributed(layers.Dense(1))(A)
        A = layers.Flatten()(A)
        A = layers.Activation('softmax')(A)

        # Dense
        X = layers.Dot((1, 1))([H, A])
        X = layers.Flatten()(X)
        for num_units in self.hparams().dense_units:
            X = layers.Dense(num_units, activation='relu')(X)
            X = layers.Dropout(self.hparams().dropout_rate)(X)

        # Outputs
        outputs = []
        for label in self._labels:
            outputs.append(
                layers.Dense(1, activation='sigmoid', name=label)(X))

        model = models.Model(inputs=I, outputs=outputs)
        model.compile(
            optimizer=optimizers.Adam(lr=self.hparams().learning_rate),
            loss='binary_crossentropy',
            metrics=['binary_accuracy', super().roc_auc])

        tf.logging.info(model.summary())
        return model
Beispiel #9
0
 def __init__(self,
              units,
              use_bias=True,
              kernel_initializer='glorot_uniform',
              recurrent_initializer='orthogonal',
              bias_initializer='zeros',
              unit_forget_bias=True,
              dropout=0.,
              return_sequences=False,
              return_state=False,
              go_backwards=False,
              stateful=False,
              num_layers=1,
              bidirectional=False,
              **kwargs):
     super(LSTM, self).__init__(**kwargs)
     assert num_layers == 1, "Only support single layer for CuDNN RNN in keras"
     self._rnn = layers.LSTM(
         # cuDNN requirement
         activation='tanh',
         recurrent_activation='sigmoid',
         recurrent_dropout=0,
         unroll=False,
         use_bias=use_bias,
         # free arguments
         units=units,
         kernel_initializer=kernel_initializer,
         recurrent_initializer=recurrent_initializer,
         bias_initializer=bias_initializer,
         unit_forget_bias=unit_forget_bias,
         dropout=dropout,
         return_sequences=return_sequences,
         return_state=return_state,
         go_backwards=go_backwards,
         stateful=stateful,
         **kwargs)
     if bidirectional:
         self._rnn = layers.Bidirectional(
             self._rnn,
             merge_mode='concat',
         )
Beispiel #10
0
 def __init__(self,
              units,
              activation='tanh',
              use_bias=True,
              kernel_initializer='glorot_uniform',
              recurrent_initializer='orthogonal',
              bias_initializer='zeros',
              dropout=0.,
              return_sequences=False,
              return_state=False,
              go_backwards=False,
              stateful=False,
              num_layers=1,
              bidirectional=False,
              **kwargs):
     super(SimpleRNN, self).__init__(*kwargs)
     assert num_layers == 1, "Only support single layer for CuDNN RNN in keras"
     self._rnn = layers.SimpleRNN(
         units=units,
         activation=activation,
         use_bias=use_bias,
         kernel_initializer=kernel_initializer,
         recurrent_initializer=recurrent_initializer,
         bias_initializer=bias_initializer,
         dropout=dropout,
         recurrent_dropout=0.,
         return_sequences=return_sequences,
         return_state=return_state,
         go_backwards=go_backwards,
         stateful=stateful,
         unroll=False)
     if bidirectional:
         self._rnn = layers.Bidirectional(
             self._rnn,
             merge_mode='concat',
         )
# In[99]:

model.fit(X_train,
          Y_train,
          validation_data=(X_test, Y_test),
          callbacks=[es],
          epochs=50,
          verbose=1)

# # Bi-LSTM

# In[103]:

model = Sequential(name="Bi-LSTM")
model.add(
    layers.Bidirectional(
        layers.LSTM(64, input_shape=(X_train.shape[1], 1), activation="relu")))
model.add(layers.Dense(6, activation="softmax"))

# In[104]:

model.compile(optimizer=optimizer,
              loss="categorical_crossentropy",
              metrics=['accuracy'])

# In[106]:

model.fit(X_train,
          Y_train,
          validation_data=(X_test, Y_test),
          callbacks=[es],
          epochs=50)