예제 #1
0
def build_model(token_num,
                tag_num,
                embedding_dim=100,
                embedding_weights=None,
                rnn_units=100,
                return_attention=False,
                lr=1e-3):
    """Build the model for predicting tags.

    :param token_num: Number of tokens in the word dictionary.
    :param tag_num: Number of tags.
    :param embedding_dim: The output dimension of the embedding layer.
    :param embedding_weights: Initial weights for embedding layer.
    :param rnn_units: The number of RNN units in a single direction.
    :param return_attention: Whether to return the attention matrix.
    :param lr: Learning rate of optimizer.
    :return model: The built model.
    """
    if embedding_weights is not None and not isinstance(embedding_weights, list):
        embedding_weights = [embedding_weights]

    input_layer = keras.layers.Input(shape=(None,))
    embd_layer = keras.layers.Embedding(input_dim=token_num,
                                        output_dim=embedding_dim,
                                        mask_zero=True,
                                        weights=embedding_weights,
                                        trainable=embedding_weights is None,
                                        name='Embedding')(input_layer)
    lstm_layer = keras.layers.Bidirectional(keras.layers.LSTM(units=rnn_units,
                                                              recurrent_dropout=0.4,
                                                              return_sequences=True),
                                            name='Bi-LSTM')(embd_layer)
    attention_layer = Attention(attention_activation='sigmoid',
                                attention_width=9,
                                return_attention=return_attention,
                                name='Attention')(lstm_layer)
    if return_attention:
        attention_layer, attention = attention_layer
    crf = CRF(units=tag_num, sparse_target=True, name='CRF')

    outputs = [crf(attention_layer)]
    loss = {'CRF': crf.loss_function}
    if return_attention:
        outputs.append(attention)
        loss['Attention'] = Attention.loss(1e-4)

    model = keras.models.Model(inputs=input_layer, outputs=outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(lr=lr),
        loss=loss,
        metrics={'CRF': crf.accuracy},
    )
    return model
예제 #2
0
    def build(self):
        _embedding = [
            feature.embedding_layer(trainable=True)
            for idx, feature in enumerate(self._features)
        ]

        if len(_embedding) > 1:
            _layer = Concatenate()(_embedding)
        else:
            _layer = _embedding[0]

        _layer = Reshape((-1, 3, 161))(_layer)
        for i, size in enumerate(self._params.layers_size):
            # if self._params.spatial_dropout[i] is not None:
            #     _layer = SpatialDropout1D(self._params.spatial_dropout[i])(_layer)
            hidden_layer = self._params.rnn_cell(
                size,
                recurrent_dropout=self._params.recurrent_dropout[i],
                return_sequences=i != self._params.deep_lvl or self._attention)

            if self._params.bidirectional:
                _layer = TimeDistributed(Bidirectional(hidden_layer))(_layer)
            else:
                _layer = TimeDistributed(hidden_layer)(_layer)

        if self._attention:
            _layer = TimeDistributed(Attention())(_layer)

        _layer = Bidirectional(
            LSTM(300, recurrent_dropout=0.4, return_sequences=True))(_layer)
        _layer = Bidirectional(
            LSTM(300, recurrent_dropout=0.4, return_sequences=True))(_layer)
        _layer = Attention()(_layer)
        _layer = Capsule(num_capsule=2,
                         dim_capsule=8,
                         routings=3,
                         share_weights=True)(_layer)
        _layer = Flatten()(_layer)
        if self._params.dropout_dense:
            _layer = Dropout(self._params.dropout_dense)(_layer)
        if self._params.dense_encoder_size:
            _layer = Dense(self._params.dense_encoder_size,
                           activation='relu')(_layer)

        output = self._output(_layer)

        self.__model = Model(inputs=self._inputs, outputs=output)
예제 #3
0
    def test_multi_attention(self):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Embedding(input_dim=5,
                                   output_dim=3,
                                   mask_zero=True,
                                   name='Embed'))
        model.add(
            MultiHead(
                layer=Attention(name='Attention'),
                layer_num=5,
                hidden_dim=3,
                use_bias=True,
                name='Multi-Head-Attention',
            ))
        model.add(keras.layers.TimeDistributed(MaskFlatten(), name='Flatten'))
        model.add(
            keras.layers.Bidirectional(keras.layers.GRU(units=8),
                                       name='Bi-GRU'))
        model.add(
            keras.layers.Dense(units=2, activation='softmax', name='Dense'))
        model.build()
        model.compile(
            optimizer='adam',
            loss=keras.losses.sparse_categorical_crossentropy,
            metrics=[keras.metrics.sparse_categorical_accuracy],
        )
        model.summary()

        model.fit_generator(
            generator=self.data_generator(),
            steps_per_epoch=100,
            epochs=100,
            validation_data=self.data_generator(),
            validation_steps=10,
            callbacks=[
                keras.callbacks.EarlyStopping(
                    monitor='val_sparse_categorical_accuracy', patience=5),
            ],
        )
        model.layers[1].set_weights(model.layers[1].get_weights())

        model_path = os.path.join(tempfile.gettempdir(),
                                  'test_save_load_%f.h5' % np.random.random())
        model.save(model_path)
        model = keras.models.load_model(model_path,
                                        custom_objects={
                                            'MaskFlatten': MaskFlatten,
                                            'SeqSelfAttention': Attention,
                                            'MultiHead': MultiHead,
                                        })
        model.summary()
        for data, tag in self.data_generator():
            predicts = model.predict(data)
            predicts = np.argmax(predicts, axis=-1)
            self.assertGreaterEqual(np.sum(tag == predicts), 30,
                                    (tag, predicts))
            break
예제 #4
0
    def build(self):
        _embedding = [
            feature.embedding_layer(trainable=True)
            for idx, feature in enumerate(self._features)
        ]
        _embedding_to_dense = [
            feature.embedding_layer(trainable=True)
            for idx, feature in enumerate(self._features_to_dense)
        ]

        if len(_embedding) > 1:
            _layer = Concatenate()(_embedding)
        elif len(_embedding) == 1:
            _layer = _embedding[0]

        if len(_embedding_to_dense) > 1:
            _layer_to_dense = Concatenate()(_embedding_to_dense)
        elif len(_embedding_to_dense) == 1:
            _layer_to_dense = _embedding_to_dense[0]

        if self._input_direct is not None:
            _layer = self._input_direct
            print('self._input_direct is not None')

        for i, size in enumerate(self._params.layers_size):
            if self._params.spatial_dropout[i] is not None:
                _layer = SpatialDropout1D(
                    self._params.spatial_dropout[i])(_layer)
            hidden_layer = self._params.rnn_cell(
                size,
                recurrent_dropout=self._params.recurrent_dropout[i],
                return_sequences=i != self._params.deep_lvl or self._attention)
            if self._params.bidirectional:
                _layer = Bidirectional(hidden_layer)(_layer)
            else:
                _layer = hidden_layer(_layer)

        if self._attention:
            _layer = Attention()(_layer)

        if len(_embedding_to_dense) != 0:
            _layer_to_dense = Reshape((6, ))(_layer_to_dense)
            _layer = Concatenate()([_layer, _layer_to_dense])

        if self._params.dropout_dense:
            _layer = Dropout(self._params.dropout_dense)(_layer)
        if self._params.dense_encoder_size:
            _layer = Dense(self._params.dense_encoder_size,
                           activation='relu')(_layer)

        if self._output:
            output = self._output(_layer)
        else:
            output = _layer

        self.__model = Model(inputs=self._inputs, outputs=output)
예제 #5
0
 def test_multi_lstm(self):
     model = keras.models.Sequential()
     model.add(keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True, name='Embed'))
     model.add(MultiHead(
         layer=keras.layers.Bidirectional(keras.layers.LSTM(units=16, return_sequences=True), name='LSTM'),
         layer_num=5,
         reg_index=[1, 4],
         reg_slice=(slice(None, None), slice(32, 48)),
         reg_factor=0.1,
         name='Multi-Head-LSTM',
     ))
     model.add(keras.layers.TimeDistributed(MaskFlatten(name='Flatten-1')))
     model.add(MultiHead(
         layer=Attention(name='Attention'),
         layer_num=5,
         reg_index=0,
         reg_factor=0.1,
         name='Multi-Head-Attention',
     ))
     model.add(keras.layers.Flatten(name='Flatten-2'))
     model.add(keras.layers.Dense(units=2, activation='softmax', name='Dense'))
     model.build()
     model.compile(
         optimizer='adam',
         loss=keras.losses.sparse_categorical_crossentropy,
         metrics=[keras.metrics.sparse_categorical_accuracy],
     )
     model.fit_generator(
         generator=self.data_generator(),
         steps_per_epoch=100,
         epochs=100,
         validation_data=self.data_generator(),
         validation_steps=10,
         callbacks=[
             keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy', patience=5),
         ],
     )
     model_path = os.path.join(tempfile.gettempdir(), 'test_save_load_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(model_path, custom_objects={
         'MaskFlatten': MaskFlatten,
         'SeqWeightedAttention': Attention,
         'MultiHead': MultiHead,
     })
     model.summary()
     for data, tag in self.data_generator():
         predicts = model.predict(data)
         predicts = np.argmax(predicts, axis=-1)
         self.assertGreaterEqual(np.sum(tag == predicts), 30)
         break
예제 #6
0
def lstm(seq_len: int):
    # input_deepmoji = layers.Input(shape=(2304, ), name="deepmoji_input")
    input_text = layers.Input(shape=(1, ), dtype=tf.string, name="text_input")

    # embedding = layers.Embedding(168, 64)(input_text)
    embedding = layers.Lambda(ELMo, output_shape=(1024, ))(input_text)

    spt_dropout_1 = layers.SpatialDropout1D(0.4)(embedding)
    lstm1 = layers.Bidirectional(
        layers.LSTM(350,
                    kernel_initializer='random_uniform',
                    return_sequences=True,
                    recurrent_dropout=0.4))(spt_dropout_1)
    spt_dropout_2 = layers.SpatialDropout1D(0.3)(lstm1)
    lstm2 = layers.Bidirectional(
        layers.LSTM(350,
                    kernel_initializer='random_uniform',
                    return_sequences=True,
                    recurrent_dropout=0.3))(spt_dropout_2)
    spt_dropout_3 = layers.SpatialDropout1D(0.2)(lstm2)
    lstm3 = layers.Bidirectional(
        layers.LSTM(300,
                    kernel_initializer='random_uniform',
                    return_sequences=True,
                    recurrent_dropout=0.3))(spt_dropout_3)

    att = Attention()(lstm3)

    # merged = layers.Concatenate()([input_deepmoji, att])
    dense = layers.Dense(100, activation='relu')(att)
    pred = layers.Dense(2, activation='softmax', name="output")(dense)

    model = Model(inputs=input_text, outputs=pred)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['categorical_accuracy'])
    model.summary()

    return model
예제 #7
0
파일: models.py 프로젝트: zhoubaohang/MTAAL
 def nen_decoder(self, x):
     x = Attention(attention_activation='tanh')(x)
     return x, self._nen_decoder(x)
 def test_return_attention(self):
     self._test_save_load(Attention(return_attention=True, use_bias=False, name='Attention'))
 def test_default(self):
     self._test_save_load(Attention(name='Attention'))
                        features_to_dense=[],
                        output=None,
                        params=params,
                        attention=True)

word_encoder.build()
word_encoder.model().summary()

input_model = Input(shape=(
    3,
    MAX_LEN,
), name='input_1')
review_word_enc = TimeDistributed(word_encoder.model())(input_model)
l_lstm_sent = Bidirectional(
    LSTM(200, recurrent_dropout=0.2, return_sequences=True))(review_word_enc)
l_att_sent = Attention()(l_lstm_sent)
preds = Dense(2, activation='softmax', name='output_1')(l_att_sent)

model = Model(inputs=input_model, outputs=[preds])

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['categorical_accuracy'])
model.summary()

with h5py.File('/data/elmo_embeddings.hdf5', 'r') as fin:
    X = [fin[str(x)][2] for x in range(0, 30160 * 3)]

with h5py.File('/data/dev_elmo_embeddings.hdf5', 'r') as fin:
    X_val = [fin[str(x)][2] for x in range(0, 8265)]