def build(self): _embedding = [ feature.embedding_layer(trainable=True) for idx, feature in enumerate(self._features) ] if len(_embedding) > 1: _layer = Concatenate()(_embedding) else: _layer = _embedding[0] _layer = Reshape((-1, 3, 161))(_layer) for i, size in enumerate(self._params.layers_size): # if self._params.spatial_dropout[i] is not None: # _layer = SpatialDropout1D(self._params.spatial_dropout[i])(_layer) hidden_layer = self._params.rnn_cell( size, recurrent_dropout=self._params.recurrent_dropout[i], return_sequences=i != self._params.deep_lvl or self._attention) if self._params.bidirectional: _layer = TimeDistributed(Bidirectional(hidden_layer))(_layer) else: _layer = TimeDistributed(hidden_layer)(_layer) if self._attention: _layer = TimeDistributed(Attention())(_layer) _layer = Bidirectional( LSTM(300, recurrent_dropout=0.4, return_sequences=True))(_layer) _layer = Bidirectional( LSTM(300, recurrent_dropout=0.4, return_sequences=True))(_layer) _layer = Attention()(_layer) _layer = Capsule(num_capsule=2, dim_capsule=8, routings=3, share_weights=True)(_layer) _layer = Flatten()(_layer) if self._params.dropout_dense: _layer = Dropout(self._params.dropout_dense)(_layer) if self._params.dense_encoder_size: _layer = Dense(self._params.dense_encoder_size, activation='relu')(_layer) output = self._output(_layer) self.__model = Model(inputs=self._inputs, outputs=output)
def test_multi_attention(self): model = keras.models.Sequential() model.add( keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True, name='Embed')) model.add( MultiHead( layer=Attention(name='Attention'), layer_num=5, hidden_dim=3, use_bias=True, name='Multi-Head-Attention', )) model.add(keras.layers.TimeDistributed(MaskFlatten(), name='Flatten')) model.add( keras.layers.Bidirectional(keras.layers.GRU(units=8), name='Bi-GRU')) model.add( keras.layers.Dense(units=2, activation='softmax', name='Dense')) model.build() model.compile( optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy, metrics=[keras.metrics.sparse_categorical_accuracy], ) model.summary() model.fit_generator( generator=self.data_generator(), steps_per_epoch=100, epochs=100, validation_data=self.data_generator(), validation_steps=10, callbacks=[ keras.callbacks.EarlyStopping( monitor='val_sparse_categorical_accuracy', patience=5), ], ) model.layers[1].set_weights(model.layers[1].get_weights()) model_path = os.path.join(tempfile.gettempdir(), 'test_save_load_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects={ 'MaskFlatten': MaskFlatten, 'SeqSelfAttention': Attention, 'MultiHead': MultiHead, }) model.summary() for data, tag in self.data_generator(): predicts = model.predict(data) predicts = np.argmax(predicts, axis=-1) self.assertGreaterEqual(np.sum(tag == predicts), 30, (tag, predicts)) break
def build(self): _embedding = [ feature.embedding_layer(trainable=True) for idx, feature in enumerate(self._features) ] _embedding_to_dense = [ feature.embedding_layer(trainable=True) for idx, feature in enumerate(self._features_to_dense) ] if len(_embedding) > 1: _layer = Concatenate()(_embedding) elif len(_embedding) == 1: _layer = _embedding[0] if len(_embedding_to_dense) > 1: _layer_to_dense = Concatenate()(_embedding_to_dense) elif len(_embedding_to_dense) == 1: _layer_to_dense = _embedding_to_dense[0] if self._input_direct is not None: _layer = self._input_direct print('self._input_direct is not None') for i, size in enumerate(self._params.layers_size): if self._params.spatial_dropout[i] is not None: _layer = SpatialDropout1D( self._params.spatial_dropout[i])(_layer) hidden_layer = self._params.rnn_cell( size, recurrent_dropout=self._params.recurrent_dropout[i], return_sequences=i != self._params.deep_lvl or self._attention) if self._params.bidirectional: _layer = Bidirectional(hidden_layer)(_layer) else: _layer = hidden_layer(_layer) if self._attention: _layer = Attention()(_layer) if len(_embedding_to_dense) != 0: _layer_to_dense = Reshape((6, ))(_layer_to_dense) _layer = Concatenate()([_layer, _layer_to_dense]) if self._params.dropout_dense: _layer = Dropout(self._params.dropout_dense)(_layer) if self._params.dense_encoder_size: _layer = Dense(self._params.dense_encoder_size, activation='relu')(_layer) if self._output: output = self._output(_layer) else: output = _layer self.__model = Model(inputs=self._inputs, outputs=output)
def build_model(token_num, tag_num, embedding_dim=100, embedding_weights=None, rnn_units=100, return_attention=False, lr=1e-3): """Build the model for predicting tags. :param token_num: Number of tokens in the word dictionary. :param tag_num: Number of tags. :param embedding_dim: The output dimension of the embedding layer. :param embedding_weights: Initial weights for embedding layer. :param rnn_units: The number of RNN units in a single direction. :param return_attention: Whether to return the attention matrix. :param lr: Learning rate of optimizer. :return model: The built model. """ if embedding_weights is not None and not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] input_layer = keras.layers.Input(shape=(None,)) embd_layer = keras.layers.Embedding(input_dim=token_num, output_dim=embedding_dim, mask_zero=True, weights=embedding_weights, trainable=embedding_weights is None, name='Embedding')(input_layer) lstm_layer = keras.layers.Bidirectional(keras.layers.LSTM(units=rnn_units, recurrent_dropout=0.4, return_sequences=True), name='Bi-LSTM')(embd_layer) attention_layer = Attention(attention_activation='sigmoid', attention_width=9, return_attention=return_attention, name='Attention')(lstm_layer) if return_attention: attention_layer, attention = attention_layer crf = CRF(units=tag_num, sparse_target=True, name='CRF') outputs = [crf(attention_layer)] loss = {'CRF': crf.loss_function} if return_attention: outputs.append(attention) loss['Attention'] = Attention.loss(1e-4) model = keras.models.Model(inputs=input_layer, outputs=outputs) model.compile( optimizer=keras.optimizers.Adam(lr=lr), loss=loss, metrics={'CRF': crf.accuracy}, ) return model
def test_multi_lstm(self): model = keras.models.Sequential() model.add(keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True, name='Embed')) model.add(MultiHead( layer=keras.layers.Bidirectional(keras.layers.LSTM(units=16, return_sequences=True), name='LSTM'), layer_num=5, reg_index=[1, 4], reg_slice=(slice(None, None), slice(32, 48)), reg_factor=0.1, name='Multi-Head-LSTM', )) model.add(keras.layers.TimeDistributed(MaskFlatten(name='Flatten-1'))) model.add(MultiHead( layer=Attention(name='Attention'), layer_num=5, reg_index=0, reg_factor=0.1, name='Multi-Head-Attention', )) model.add(keras.layers.Flatten(name='Flatten-2')) model.add(keras.layers.Dense(units=2, activation='softmax', name='Dense')) model.build() model.compile( optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy, metrics=[keras.metrics.sparse_categorical_accuracy], ) model.fit_generator( generator=self.data_generator(), steps_per_epoch=100, epochs=100, validation_data=self.data_generator(), validation_steps=10, callbacks=[ keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy', patience=5), ], ) model_path = os.path.join(tempfile.gettempdir(), 'test_save_load_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects={ 'MaskFlatten': MaskFlatten, 'SeqWeightedAttention': Attention, 'MultiHead': MultiHead, }) model.summary() for data, tag in self.data_generator(): predicts = model.predict(data) predicts = np.argmax(predicts, axis=-1) self.assertGreaterEqual(np.sum(tag == predicts), 30) break
def lstm(seq_len: int): # input_deepmoji = layers.Input(shape=(2304, ), name="deepmoji_input") input_text = layers.Input(shape=(1, ), dtype=tf.string, name="text_input") # embedding = layers.Embedding(168, 64)(input_text) embedding = layers.Lambda(ELMo, output_shape=(1024, ))(input_text) spt_dropout_1 = layers.SpatialDropout1D(0.4)(embedding) lstm1 = layers.Bidirectional( layers.LSTM(350, kernel_initializer='random_uniform', return_sequences=True, recurrent_dropout=0.4))(spt_dropout_1) spt_dropout_2 = layers.SpatialDropout1D(0.3)(lstm1) lstm2 = layers.Bidirectional( layers.LSTM(350, kernel_initializer='random_uniform', return_sequences=True, recurrent_dropout=0.3))(spt_dropout_2) spt_dropout_3 = layers.SpatialDropout1D(0.2)(lstm2) lstm3 = layers.Bidirectional( layers.LSTM(300, kernel_initializer='random_uniform', return_sequences=True, recurrent_dropout=0.3))(spt_dropout_3) att = Attention()(lstm3) # merged = layers.Concatenate()([input_deepmoji, att]) dense = layers.Dense(100, activation='relu')(att) pred = layers.Dense(2, activation='softmax', name="output")(dense) model = Model(inputs=input_text, outputs=pred) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy']) model.summary() return model
def nen_decoder(self, x): x = Attention(attention_activation='tanh')(x) return x, self._nen_decoder(x)
def test_return_attention(self): self._test_save_load(Attention(return_attention=True, use_bias=False, name='Attention'))
def test_default(self): self._test_save_load(Attention(name='Attention'))
features_to_dense=[], output=None, params=params, attention=True) word_encoder.build() word_encoder.model().summary() input_model = Input(shape=( 3, MAX_LEN, ), name='input_1') review_word_enc = TimeDistributed(word_encoder.model())(input_model) l_lstm_sent = Bidirectional( LSTM(200, recurrent_dropout=0.2, return_sequences=True))(review_word_enc) l_att_sent = Attention()(l_lstm_sent) preds = Dense(2, activation='softmax', name='output_1')(l_att_sent) model = Model(inputs=input_model, outputs=[preds]) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy']) model.summary() with h5py.File('/data/elmo_embeddings.hdf5', 'r') as fin: X = [fin[str(x)][2] for x in range(0, 30160 * 3)] with h5py.File('/data/dev_elmo_embeddings.hdf5', 'r') as fin: X_val = [fin[str(x)][2] for x in range(0, 8265)]