def get_model(metrics, output_bias=None): """ Create and return the model. """ if output_bias is not None: output_bias = tf.keras.initializers.Constant(output_bias) k.clear_session() # INPUTS inputs = [] models = [] for embed_mod_id, _ in enumerate(embedding_models): inputs.append( Input(shape=( seq_max_length, feature_vec_lengths[embed_mod_id], ))) models.append(Masking(mask_value=0.)(inputs[-1])) if args.pos: inputs.append(Input(shape=( seq_max_length, len(xposs_cat[0][0]), ))) models.append(Masking(mask_value=0.)(inputs[-1])) if corpus.metadata: inputs.append( Input(shape=( seq_max_length, len(xmetas_cat[0][0]), ))) models.append(Masking(mask_value=0.)(inputs[-1])) # Combinde INPUTS (including masks) if len(models) > 1: model = concatenate(models) else: model = models[0] # CORE MODEL model = Bidirectional( LSTM( 50, return_sequences=True, dropout=0, # ! # dropout=0.1, # dropout=0.25, recurrent_dropout=recurrent_dropout, implementation=1))(model) # (unfold LSTM and) # one-hot encode binary label # outputs = TimeDistributed(Dense(2, activation="softmax"))(model) outputs = TimeDistributed( Dense(1, activation="sigmoid", bias_initializer=output_bias))(model) model = Model(inputs=inputs, outputs=outputs) model.compile(optimizer=optimizer, loss=loss, metrics=metrics) return model
def get_compiled_model(self, vectorizer_model_name, missing_values_handled, max_sentence_length, max_word_length, n_words, n_chars, n_tags, word2idx): vectorizer_model_settings = models[vectorizer_model_name] vectorizer_model_size = vectorizer_model_settings['vector_size'] word_in = Input(shape=(max_sentence_length, )) if not vectorizer_model_settings['precomputed_vectors']: emb_word = Embedding(input_dim=n_words + 2, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True)(word_in) else: embedding_weights = get_embedding_weights(vectorizer_model_name, vectorizer_model_size, missing_values_handled, word2idx) emb_word = Embedding(input_dim=n_words + 2, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True, weights=[embedding_weights], trainable=False)(word_in) # input and embeddings for characters char_in = Input(shape=( max_sentence_length, max_word_length, )) emb_char = TimeDistributed( Embedding(input_dim=n_chars + 2, output_dim=10, mask_zero=True))(char_in) # character LSTM to get word encodings by characters char_enc = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char) # main LSTM x = concatenate([emb_word, char_enc]) # x = SpatialDropout1D(0.3)(x) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(x) model = TimeDistributed(Dense(50, activation='relu'))(model) crf = CRF(n_tags + 1) out = crf(model) model = Model([word_in, char_in], out) model.summary() model.compile(optimizer="rmsprop", loss=crf.loss, metrics=[crf.accuracy]) return model
def get_compiled_model(self, vectorizer_model_name, missing_values_handled, max_sentence_length, max_word_length, n_words, n_chars, n_tags, word2idx): vectorizer_model_settings = models[vectorizer_model_name] vectorizer_model_size = vectorizer_model_settings['vector_size'] print(vectorizer_model_size) word_in = Input(shape=(max_sentence_length, )) if not vectorizer_model_settings['precomputed_vectors']: emb_word = Embedding(input_dim=n_words, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True)(word_in) else: embedding_weights = get_embedding_weights(vectorizer_model_name, vectorizer_model_size, missing_values_handled, word2idx) emb_word = Embedding(input_dim=n_words, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True, weights=[embedding_weights], trainable=False)(word_in) # input and embeddings for characters char_in = Input(shape=( max_sentence_length, max_word_length, )) emb_char = TimeDistributed( Embedding(input_dim=n_chars, output_dim=10, mask_zero=True))(char_in) char_enc = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char) model = concatenate([emb_word, char_enc]) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) model = TimeDistributed(Dense(50, activation='relu'))(model) model = Dense(n_tags, activation=None)(model) crf = CRF(dtype='float32', name='crf') output = crf(model) base_model = Model(word_in, output) base_model.compile(optimizer='adam') model = ModelWithCRFLoss(base_model) model.compile(optimizer='adam') model.summary() return model
def get_bidirectional_lstm(params, suffix): hidden_units = params['hidden_units'] inp = Input(shape=(MAX_LEN, BERT_DIM)) model = Bidirectional( LSTM(units=hidden_units, return_sequences=True, recurrent_dropout=0.1))(inp) for i in range(params['layers'] - 1): model = Bidirectional( LSTM(units=hidden_units, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(3, activation="softmax"))( model) # softmax output layer model = Model(inp, out) print(model.summary()) suffix = 'h' + str(hidden_units) + '_l_' + str(layers) + '_' + suffix mc_l = ModelCheckpoint('models/location_text_blstm_' + suffix + '.h5', monitor='val_location_recall', mode='max', verbose=1, save_best_only=True) mc_f = ModelCheckpoint('models/food_text_blstm_' + suffix + '.h5', monitor='val_food_recall', mode='max', verbose=1, save_best_only=True) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=[ tf.keras.metrics.Recall(class_id=1, name="location_recall"), tf.keras.metrics.Precision(class_id=1, name="location_precision"), tf.keras.metrics.Recall(class_id=2, name="food_recall"), tf.keras.metrics.Precision(class_id=2, name="food_precision") ]) return model, mc_l, mc_f, model.count_params()
def get_compiled_model(self, vectorizer_model_name, missing_values_handled, max_sentence_length, max_word_length, n_words, n_tags, word2idx): vectorizer_model_settings = models[vectorizer_model_name] vectorizer_model_size = vectorizer_model_settings['vector_size'] word_in = Input(shape=(max_sentence_length, )) if not vectorizer_model_settings['precomputed_vectors']: emb_word = Embedding(input_dim=n_words + 2, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True)(word_in) else: embedding_weights = get_embedding_weights(vectorizer_model_name, vectorizer_model_size, missing_values_handled, word2idx) emb_word = Embedding(input_dim=n_words + 2, output_dim=vectorizer_model_size, input_length=max_sentence_length, mask_zero=True, weights=[embedding_weights], trainable=False)(word_in) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(emb_word) model = TimeDistributed(Dense(50, activation='relu'))(model) # print(dir(CRF)) crf = CRF(n_tags + 1) out = crf(model) model = Model(word_in, out) model.summary() model.compile(optimizer="rmsprop", loss=crf.loss, metrics=[crf.accuracy]) return model