def test_multi_lstm(self): model = keras.models.Sequential() model.add(keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True, name='Embed')) model.add(MultiHead( layer=keras.layers.Bidirectional(keras.layers.LSTM(units=16, return_sequences=True), name='LSTM'), layer_num=5, reg_index=[1, 4], reg_slice=(slice(None, None), slice(32, 48)), reg_factor=0.1, name='Multi-Head-LSTM', )) model.add(keras.layers.TimeDistributed(MaskFlatten(name='Flatten-1'))) model.add(MultiHead( layer=Attention(name='Attention'), layer_num=5, reg_index=0, reg_factor=0.1, name='Multi-Head-Attention', )) model.add(keras.layers.Flatten(name='Flatten-2')) model.add(keras.layers.Dense(units=2, activation='softmax', name='Dense')) model.build() model.compile( optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy, metrics=[keras.metrics.sparse_categorical_accuracy], ) model.fit_generator( generator=self.data_generator(), steps_per_epoch=100, epochs=100, validation_data=self.data_generator(), validation_steps=10, callbacks=[ keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy', patience=5), ], ) model_path = os.path.join(tempfile.gettempdir(), 'test_save_load_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects={ 'MaskFlatten': MaskFlatten, 'SeqWeightedAttention': Attention, 'MultiHead': MultiHead, }) model.summary() for data, tag in self.data_generator(): predicts = model.predict(data) predicts = np.argmax(predicts, axis=-1) self.assertGreaterEqual(np.sum(tag == predicts), 30) break
def test_multi_attention(self): model = keras.models.Sequential() model.add( keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True, name='Embed')) model.add( MultiHead( layer=Attention(name='Attention'), layer_num=5, hidden_dim=3, use_bias=True, name='Multi-Head-Attention', )) model.add(keras.layers.TimeDistributed(MaskFlatten(), name='Flatten')) model.add( keras.layers.Bidirectional(keras.layers.GRU(units=8), name='Bi-GRU')) model.add( keras.layers.Dense(units=2, activation='softmax', name='Dense')) model.build() model.compile( optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy, metrics=[keras.metrics.sparse_categorical_accuracy], ) model.summary() model.fit_generator( generator=self.data_generator(), steps_per_epoch=100, epochs=100, validation_data=self.data_generator(), validation_steps=10, callbacks=[ keras.callbacks.EarlyStopping( monitor='val_sparse_categorical_accuracy', patience=5), ], ) model.layers[1].set_weights(model.layers[1].get_weights()) model_path = os.path.join(tempfile.gettempdir(), 'test_save_load_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects={ 'MaskFlatten': MaskFlatten, 'SeqSelfAttention': Attention, 'MultiHead': MultiHead, }) model.summary() for data, tag in self.data_generator(): predicts = model.predict(data) predicts = np.argmax(predicts, axis=-1) self.assertGreaterEqual(np.sum(tag == predicts), 30, (tag, predicts)) break
def test_multi_pooling(self): data = [ [1, 3, 2, 4], [2, 8, 3, 5], ] positions = [ [1, 3], [2, 4], ] data_input = keras.layers.Input(shape=(4, ), name='Input-Data') pos_input = keras.layers.Input(shape=(2, ), name='Input-Pos') pooling = MultiHead( [ PiecewisePooling1D(pool_type=PiecewisePooling1D.POOL_TYPE_MAX), PiecewisePooling1D( pool_type=PiecewisePooling1D.POOL_TYPE_AVERAGE), ], name='Multi-Head-Pooling', )([data_input, pos_input]) model = keras.models.Model(inputs=[data_input, pos_input], outputs=pooling) model.summary() predicts = model.predict([np.asarray(data), np.asarray(positions)]).tolist() expected = [ [[1.0, 1.0], [3.0, 2.5]], [[8.0, 5.0], [5.0, 4.0]], ] self.assertTrue(np.allclose(expected, predicts)) model_path = os.path.join(tempfile.gettempdir(), 'test_save_load_%f.h5' % random.random()) model.save(model_path) custom_objects = PiecewisePooling1D.get_custom_objects() custom_objects['MultiHead'] = MultiHead model = keras.models.load_model(model_path, custom_objects=custom_objects) predicts = model.predict([np.asarray(data), np.asarray(positions)]).tolist() expected = [ [[1.0, 1.0], [3.0, 2.5]], [[8.0, 5.0], [5.0, 4.0]], ] self.assertTrue(np.allclose(expected, predicts))
#embedding_matrix = joblib.load('embedding_matrix.vec') #padded_test = joblib.load('padded_test.vec') #test_labels = joblib.load('test_labels.vec') #padded_train = joblib.load('padded_train.vec') #encoded_train_labels = joblib.load('encoded_train_labels.vec') #le = joblib.load('label_encoder_le_task2.vec') # define the model input = Input(shape=(64, )) m = Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=64, trainable=False)(input) bi = MultiHead(LSTM(64, activation='tanh', return_sequences=True), layer_num=5, name='Multi-LSTMs')(m) bi = Flatten()(bi) ff = Dense(3000)(bi) ff = Dropout(0.1)(ff) ff = Dense(len(le.category_mapping[0]['mapping']), activation='softmax')(ff) model = keras.models.Model(inputs=[input], outputs=[ff]) from keras.callbacks import CSVLogger filepath = "TASK2_multiheadAtt_Fasttext_03052020weights.{epoch:05d}-{val_loss:.5f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True,
def MultiHead_self_attention(X_train, y_train, X_val, y_val, X_test, num_classes, dropout=0.5, batch_size=68, learning_rate=0.0001, epochs=20, optimizer='Adam'): """Multi-Head attention 模型""" lstm_unit = 256 model = tf.keras.models.Sequential() model.add( Masking(mask_value=0.0, input_shape=(X_train.shape[1], X_train.shape[2]))) model.add( MultiHead(Bidirectional(LSTM(units=lstm_unit, dropout=dropout)), layer_num=10, name='Multi-LSTMs')) model.add( SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, attention_activation='sigmoid', kernel_regularizer=keras.regularizers.l2(1e-2), use_attention_bias=False, name='Attention', )) model.add(keras.layers.Flatten()) model.add(Dense(num_classes, activation='softmax')) print(model.summary()) opt = opt_select(optimizer, learning_rate) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) callbacks = [ EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min'), ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min') ] history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=callbacks, validation_data=(X_val, y_val), verbose=0) model.load_weights(filepath='.mdl_wts.hdf5') model.save('/mnt/lxr/SER/paper/fiji_binary.h5') yhat = model.predict(X_test) return history, yhat