def build_model(self, config, is_on): main_input = Input(shape=(config['maxlen'],), dtype='int32', name='main_input') x = Embedding(input_dim=len(config['word_index']), output_dim=config['veclen'], weights=[config['embedding_matrix']], input_length=config['maxlen'], trainable=False)(main_input) if is_on: x = Bidirectional(ONLSTM(units=64, chunk_size=8, return_sequences=True, recurrent_dropconnect=0.25))(x) else: x = Bidirectional(CuDNNLSTM(units=64, return_sequences=True))(x) x = Dropout(0.2)(x) if is_on: lstm_out = Bidirectional(ONLSTM(units=64, chunk_size=8, recurrent_dropconnect=0.25))(x) else: x = Bidirectional(CuDNNLSTM(units=64))(x) lstm_out = Dropout(0.2)(x) feature_input = Input(shape=(len(config['syntax_features'][0]),), name='feature_input') x = keras.layers.concatenate([lstm_out, feature_input]) x = Dense(units=32, activation='relu')(x) main_output = Dense(units=1, activation='sigmoid')(x) self.model = Model(inputs=[main_input, feature_input], outputs=main_output) self.model.compile(optimizer='adam', loss=keras.losses.binary_crossentropy, metrics=['accuracy'])
def test_fit_classification(self): model = models.Sequential() model.add(layers.Embedding(input_shape=(None,), input_dim=10, output_dim=100, mask_zero=True)) model.add(layers.Bidirectional(ONLSTM( units=50, chunk_size=5, dropout=0.1, recurrent_dropconnect=0.1, use_bias=False, return_sequences=True, ))) model.add(layers.Bidirectional(ONLSTM( units=50, chunk_size=5, recurrent_dropout=0.1, return_sequences=True, ))) model.add(layers.Bidirectional(ONLSTM(units=50, chunk_size=5, unit_forget_bias=False))) model.add(layers.Dense(units=2, activation='softmax')) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') model_path = os.path.join(tempfile.gettempdir(), 'test_on_lstm_%f.h5' % np.random.random()) model.save(model_path) model = models.load_model(model_path, custom_objects={'ONLSTM': ONLSTM}) data_size, seq_len = 10000, 17 x = np.random.randint(0, 10, (data_size, seq_len)) y = [0] * data_size for i in range(data_size): if 3 in x[i].tolist() and 7 in x[i].tolist(): y[i] = 1 y = np.array(y) model.summary() model.fit( x, y, epochs=10, callbacks=[callbacks.EarlyStopping(monitor='loss', min_delta=1e-3, patience=2)], ) model_path = os.path.join(tempfile.gettempdir(), 'test_on_lstm_%f.h5' % np.random.random()) model.save(model_path) model = models.load_model(model_path, custom_objects={'ONLSTM': ONLSTM}) predicted = model.predict(x).argmax(axis=-1) self.assertLess(np.sum(np.abs(y - predicted)), data_size // 100)
def test_return_last_splits(self): inputs = layers.Input(shape=(None,)) embed = layers.Embedding(input_dim=10, output_dim=100)(inputs) outputs = ONLSTM(units=50, chunk_size=5, return_splits=True)(embed) model = models.Model(inputs=inputs, outputs=outputs) model.compile(optimizer='adam', loss='mse') model.summary(line_length=120) predicted = model.predict(np.random.randint(0, 10, (3, 7))) self.assertEqual((3, 50), predicted[0].shape) self.assertEqual((3, 2), predicted[1].shape)
def test_return_all_splits(self): if K.backend() == 'cntk': return inputs = layers.Input(shape=(None, )) embed = layers.Embedding(input_dim=10, output_dim=100)(inputs) outputs = ONLSTM(units=50, chunk_size=5, return_sequences=True, return_splits=True)(embed) model = models.Model(inputs=inputs, outputs=outputs) model.compile(optimizer=self._get_optimizer(), loss='mse') model.summary(line_length=120) predicted = model.predict(np.random.randint(0, 10, (3, 7))) self.assertEqual((3, 7, 50), predicted[0].shape) self.assertEqual((3, 7, 2), predicted[1].shape)
def test_invalid_chunk_size(self): with self.assertRaises(ValueError): model = models.Sequential() model.add(ONLSTM(units=13, chunk_size=5, input_shape=(None, 100)))
def build_model(embeddings_size): # Inputs q1_embeddings_input = Input(shape=( None, embeddings_size, ), name='q1_word_embeddings') q2_embeddings_input = Input(shape=( None, embeddings_size, ), name='q2_word_embeddings') # RNN word_lstm1 = Bidirectional( ONLSTM(units=256, chunk_size=8, dropout=args.dropout_rate, return_sequences=True, kernel_initializer='glorot_normal')) q1_word_lstm1 = word_lstm1(q1_embeddings_input) q2_word_lstm1 = word_lstm1(q2_embeddings_input) word_lstm2 = Bidirectional( ONLSTM(units=256, chunk_size=8, dropout=args.dropout_rate, return_sequences=True, kernel_initializer='glorot_normal')) q1_word_lstm2 = word_lstm2(q1_word_lstm1) q2_word_lstm2 = word_lstm2(q2_word_lstm1) word_attention = SeqWeightedAttention() q1_word_attention = word_attention(q1_word_lstm2) q2_word_attention = word_attention(q2_word_lstm2) # Concatenate subtract = Subtract()([q1_word_attention, q2_word_attention]) multiply_subtract = Multiply()([subtract, subtract]) # Fully Connected dense1 = Dropout(args.dropout_rate)( Dense(units=1024, activation='relu', kernel_initializer='glorot_normal')(multiply_subtract)) dense2 = Dropout( args.dropout_rate)(Dense(units=512, activation='relu', kernel_initializer='glorot_normal')(dense1)) dense3 = Dropout( args.dropout_rate)(Dense(units=256, activation='relu', kernel_initializer='glorot_normal')(dense2)) dense4 = Dropout( args.dropout_rate)(Dense(units=128, activation='relu', kernel_initializer='glorot_normal')(dense3)) # Predict output = Dense(units=1, activation='sigmoid', kernel_initializer='glorot_normal')(dense4) model = Model([q1_embeddings_input, q2_embeddings_input], output) model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy', f1]) model.summary() return model
def build_model(self): char_input = Input(shape=(None, ), dtype='int32', name='char_input') bichar_input = Input(shape=(None, ), dtype='int32', name='bichar_input') elmo_input = Input(shape=(None, ), dtype='int32', name='elmo_input') inputNodes = [char_input, bichar_input, elmo_input] word2vec_char_embedding = Embedding( input_dim=self.params['char2id_size'] + 1, output_dim=self.params['char_embedding_size'], trainable=False, weights=[self.char_word2vec], name='word2vec_char_embedding')(char_input) glove_char_embedding = Embedding( input_dim=self.params['char2id_size'] + 1, output_dim=self.params['char_embedding_size'], trainable=False, weights=[self.char_glove], name='glove_char_embedding')(char_input) fasttext_char_embedding = Embedding( input_dim=self.params['char2id_size'] + 1, output_dim=self.params['char_embedding_size'], trainable=False, weights=[self.char_fasttext], name='fasttext_char_embedding')(char_input) bichar_word2vec_embeding = Embedding( input_dim=self.params['bichar2id_size'] + 1, output_dim=self.params['bichar_embedding_size'], weights=[self.bichar_word2vec], trainable=False, name='word2vec_bichar_embedding')(bichar_input) bichar_glove_embedding = Embedding( input_dim=self.params['bichar2id_size'] + 1, output_dim=self.params['bichar_embedding_size'], weights=[self.bichar_glove], trainable=False, name='glove_bichar_embedding')(bichar_input) bichar_fasttext_embedding = Embedding( input_dim=self.params['bichar2id_size'] + 1, output_dim=self.params['bichar_embedding_size'], weights=[self.bichar_fasttext], trainable=False, name='fasttext_bichar_embedding')(bichar_input) word2vec_char_embedding = Dropout( self.params['dropout'])(word2vec_char_embedding) glove_char_embedding = Dropout( self.params['dropout'])(glove_char_embedding) fasttext_char_embedding = Dropout( self.params['dropout'])(fasttext_char_embedding) bichar_word2vec_embeding = Dropout( self.params['dropout'])(bichar_word2vec_embeding) bichar_glove_embedding = Dropout( self.params['dropout'])(bichar_glove_embedding) bichar_fasttext_embedding = Dropout( self.params['dropout'])(bichar_fasttext_embedding) shared_layer = Concatenate(axis=-1)([ word2vec_char_embedding, glove_char_embedding, fasttext_char_embedding, bichar_word2vec_embeding, bichar_glove_embedding, bichar_fasttext_embedding ]) elmo_embedding = ELMoEmbedding(output_dim=self.elmo_dim * 2, elmo_dim=self.elmo_dim)(elmo_input) shared_layer = Concatenate(axis=-1)([shared_layer, elmo_embedding]) for size in self.params['LSTM-Size']: shared_layer = Bidirectional( ONLSTM(size, chunk_size=30, return_sequences=True))(shared_layer) shared_layer = Dropout(self.params['dropout'])(shared_layer) self_att = SeqSelfAttention()(shared_layer) lstm_att = Concatenate(axis=-1)([shared_layer, self_att]) output = lstm_att output = TimeDistributed( Dense(self.params['n_class_labels'], activation=None))(output) crf = ChainCRF() output = crf(output) lossFct = crf.sparse_loss # :: Parameters for the optimizer :: optimizerParams = {} if 'clipnorm' in self.params and self.params[ 'clipnorm'] != None and self.params['clipnorm'] > 0: optimizerParams['clipnorm'] = self.params['clipnorm'] opt = Adam(**optimizerParams) model = Model(inputs=inputNodes, outputs=[output]) model.compile(loss=lossFct, optimizer=opt) model.summary(line_length=125) return model