def test_crf_add_boundary_energy_with_no_mask(): energy = tf.constant( [ [ [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], ], [ [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], ], ], dtype=tf.float32, ) mask = None start = tf.constant([1, 1, 1, 1, 1], dtype=tf.float32) end = tf.constant([-1, -1, -1, -1, -1], dtype=tf.float32) crf = CRF(None) new_energy_tensor = crf.add_boundary_energy(energy, mask, start, end) with tf.Session() as sess: result = sess.run(new_energy_tensor) expected = np.array( [ [ [1, 1, 1, 1, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [-1, -1, -1, -1, -1], ], [ [1, 1, 1, 1, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [-1, -1, -1, -1, -1], ], ] ) np.testing.assert_array_equal(result, expected)
def setUp(self): super().setUp() self.logits = np.array([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) self.tags = np.array([ [2, 3, 4], [3, 2, 2] ]) self.transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) self.transitions_from_start = np.array([0.1, 0.2, 0.3, 0.4, 0.6]) self.transitions_to_end = np.array([-0.1, -0.2, 0.3, -0.4, -0.4]) # Use the CRF Module with fixed transitions to compute the log_likelihood self.crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(self.transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(self.transitions_from_start), # right_boundary_initializer=initializers.Constant(self.transitions_to_end), name="crf_layer" ) self.crf.left_boundary = self.crf.add_weight( shape=(self.crf.units,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) self.crf.right_boundary = self.crf.add_weight( shape=(self.crf.units,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), )
def test_forward_works_with_mask(numpy_crf): logits = np.array([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) boundary_transitions = np.array([0.1, 0.2, 0.3, 0.4, 0.6]) tags = np.array([ [2, 3, 4], [3, 2, 2] ]) # Use the CRF Module with fixed transitions to compute the log_likelihood crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, boundary_initializer=initializers.Constant(boundary_transitions), name="crf_layer" ) # Use a non-trivial mask mask = np.array([ [1, 1, 1], [1, 1, 0] ]) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) result = model.train_on_batch(logits, tags) numpy_crf_instance = numpy_crf(logits, mask, transitions, boundary_transitions, boundary_transitions) expected = numpy_crf_instance.compute_log_likehood(tags) / -2 assert result == approx(expected)
def _keras_train(self, training_data: TrainingData, cfg: RasaNLUModelConfig, **kwargs: Any) -> None: from tensorflow.python.keras.layers import Input, Masking from tensorflow.python.keras.models import Sequential from tf_crf_layer.layer import CRF from tf_crf_layer.loss import crf_loss from tf_crf_layer.metrics import crf_accuracy from seq2annotation.input import generate_tagset from seq2annotation.input import build_input_func from seq2annotation.input import Lookuper config = self.component_config if 'result_dir' not in config: config['result_dir'] = tempfile.mkdtemp() # read data according configure train_data_generator_func = kwargs.get('addons_tf_input_fn') corpus_meta_data = kwargs.get('addons_tf_input_meta') config['tags_data'] = generate_tagset(corpus_meta_data['tags']) # train and evaluate model train_input_func = build_input_func(train_data_generator_func, config) tag_lookuper = Lookuper( {v: i for i, v in enumerate(config['tags_data'])}) maxlen = 25 offset_data = train_input_func() train_x, train_y = self._keras_data_preprocss(offset_data, tag_lookuper, maxlen) EPOCHS = 1 tag_size = tag_lookuper.size() model = Sequential() model.add(Input(shape=(25, 768))) model.add(Masking()) model.add(CRF(tag_size)) model.compile('adam', loss=crf_loss) model.summary() model.compile('adam', loss=crf_loss, metrics=[crf_accuracy]) model.fit(train_x, train_y, epochs=EPOCHS)
def create_bilstm_crf(vocab_size, EMBED_DIM, BiRNN_UNITS, tags_size): model = Sequential() # model.add(Embedding(len(vocab)+1, EMBED_DIM, mask_zero=True)) model.add(Embedding(vocab_size, EMBED_DIM, mask_zero=True)) model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) model.add(CRF(tags_size, sparse_target=True, name="crf_layer")) print(model.summary()) crf_loss_instance = ConditionalRandomFieldLoss() # model.compile('adam', loss=crf_loss, metrics=[crf_viterbi_accuracy]) #model.compile('adam', loss={"crf_layer": crf_loss_instance}, metrics=[crf_accuracy]) model.summary() return model
def test_masking_fixed_length(get_random_data): nb_samples = 2 timesteps = 10 embedding_dim = 4 output_dim = 5 embedding_num = 12 crf_loss_instance = ConditionalRandomFieldLoss() x, y = get_random_data(nb_samples, timesteps, x_high=embedding_num, y_high=output_dim) # right padding; left padding is not supported due to the tf.contrib.crf x[0, -4:] = 0 # test with masking, fix length model = Sequential() model.add( Embedding(embedding_num, embedding_dim, input_length=timesteps, mask_zero=True)) model.add(CRF(output_dim, name="crf_layer")) model.compile(optimizer='adam', loss={"crf_layer": crf_loss_instance}) model.fit(x, y, epochs=1, batch_size=1) model.fit(x, y, epochs=1, batch_size=2) model.fit(x, y, epochs=1, batch_size=3) model.fit(x, y, epochs=1) # check mask y_pred = model.predict(x) assert (y_pred[0, -4:] == 0).all() # right padding # left padding not working currently due to the tf.contrib.crf.* # assert (y_pred[1, :5] == 0).all() # test saving and loading model MODEL_PERSISTENCE_PATH = './test_saving_crf_model.h5' model.save(MODEL_PERSISTENCE_PATH) load_model(MODEL_PERSISTENCE_PATH, custom_objects={'CRF': CRF}) try: os.remove(MODEL_PERSISTENCE_PATH) except OSError: pass
def test_viterbi_tags(numpy_crf): logits = np.array([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) boundary_transitions = np.array([0.1, 0.2, 0.3, 0.4, 0.6]) # Use the CRF Module with fixed transitions to compute the log_likelihood crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, boundary_initializer=initializers.Constant(boundary_transitions), name="crf_layer" ) mask = np.array([ [1, 1, 1], [1, 1, 0] ]) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) # Separate the tags and scores. result = model.predict(logits) numpy_crf_instance = numpy_crf(logits, mask, transitions, boundary_transitions, boundary_transitions) expected, _ = numpy_crf_instance.decode() np.testing.assert_equal(result, expected)
def test_masking_fixed_length(get_random_data): nb_samples = 2 timesteps = 10 embedding_dim = 4 output_dim = 5 embedding_num = 12 crf_loss_instance = ConditionalRandomFieldLoss() x, y = get_random_data(nb_samples, timesteps, x_high=embedding_num, y_high=output_dim) # test with no masking, fix length model = Sequential() model.add(Embedding(embedding_num, embedding_dim, input_length=timesteps)) model.add(CRF(output_dim, name="crf_layer")) model.compile(optimizer='adam', loss={"crf_layer": crf_loss_instance}) model.fit(x, y, epochs=1, batch_size=1) model.fit(x, y, epochs=1, batch_size=2) model.fit(x, y, epochs=1, batch_size=3) model.fit(x, y, epochs=1) # test saving and loading model MODEL_PERSISTENCE_PATH = './test_saving_crf_model.h5' model.save(MODEL_PERSISTENCE_PATH) load_model(MODEL_PERSISTENCE_PATH, custom_objects={ 'CRF': CRF, 'crf_loss': crf_loss }) try: os.remove(MODEL_PERSISTENCE_PATH) except OSError: pass
def test_masking_with_boundary(get_random_data): nb_samples = 2 timesteps = 10 embedding_dim = 4 output_dim = 5 embedding_num = 12 crf_loss_instance = ConditionalRandomFieldLoss() x, y = get_random_data(nb_samples, timesteps, x_high=embedding_num,y_high=output_dim) # right padding; left padding is not supported due to the tf.contrib.crf x[0, -4:] = 0 # test with masking, fix length model = Sequential() model.add(Embedding(embedding_num, embedding_dim, input_length=timesteps, mask_zero=True)) model.add(CRF(output_dim, use_boundary=True, name="crf_layer")) model.compile(optimizer='adam', loss={"crf_layer": crf_loss_instance}) model.fit(x, y, epochs=1, batch_size=1) model.fit(x, y, epochs=1, batch_size=2) model.fit(x, y, epochs=1, batch_size=3) model.fit(x, y, epochs=1)
def test_crf_viterbi_accuracy(get_random_data): nb_samples = 2 timesteps = 10 embedding_dim = 4 output_dim = 5 embedding_num = 12 crf_loss_instance = ConditionalRandomFieldLoss() x, y = get_random_data(nb_samples, timesteps, x_high=embedding_num, y_high=output_dim) # right padding; left padding is not supported due to the tf.contrib.crf x[0, -4:] = 0 # test with masking, fix length model = Sequential() model.add( Embedding(embedding_num, embedding_dim, input_length=timesteps, mask_zero=True)) model.add(CRF(output_dim, name="crf_layer")) model.compile(optimizer='rmsprop', loss={"crf_layer": crf_loss_instance}, metrics=[crf_viterbi_accuracy]) model.fit(x, y, epochs=1, batch_size=10) # test viterbi_acc y_pred = model.predict(x) _, v_acc = model.evaluate(x, y) np_acc = (y_pred[x > 0] == y[x > 0]).astype('float32').mean() print(v_acc, np_acc) assert np.abs(v_acc - np_acc) < 1e-4
transition_contrain = allowed_transitions(constraint_type.BIOUL, tag_lookuper.inverse_index_table) train_x, train_y = preprocss(train_data) test_x, test_y = preprocss(eval_data) EPOCHS = 1 EMBED_DIM = 64 BiRNN_UNITS = 200 vacab_size = vocabulary_lookuper.size() tag_size = tag_lookuper.size() model = Sequential() model.add(Embedding(vacab_size, EMBED_DIM, mask_zero=True)) model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) model.add(CRF(tag_size)) model.summary() model.compile('adam', loss=crf_loss, metrics=[crf_accuracy]) model.fit(train_x, train_y, epochs=EPOCHS, validation_data=[test_x, test_y]) pred_y = model.predict(test_x) test_y_pred = pred_y[test_x > 0] test_y_true = test_y[test_x > 0] print('\n---- Result of BiLSTM-CRF ----\n') classification_report(test_y_true, test_y_pred, tags_data)
def main(): config = read_configure() # ioflow corpus = get_corpus_processor(config) corpus.prepare() # ? train_data_generator_func = corpus.get_generator_func(corpus.TRAIN) eval_data_generator_func = corpus.get_generator_func(corpus.EVAL) corpus_meta_data = corpus.get_meta_info() tags_data = generate_tagset(corpus_meta_data["tags"]) # process entity into BIO train_data = list(train_data_generator_func()) eval_data = list(eval_data_generator_func()) tag_lookuper = Lookuper({v: i for i, v in enumerate(tags_data)}) # tag index vocab_data_file = config.get("vocabulary_file") vocabulary_lookuper = index_table_from_file(vocab_data_file) def preprocss(data, maxlen): raw_x = [] raw_y = [] for offset_data in data: tags = offset_to_biluo(offset_data) words = offset_data.text tag_ids = [tag_lookuper.lookup(i) for i in tags] word_ids = [vocabulary_lookuper.lookup(i) for i in words] raw_x.append(word_ids) raw_y.append(tag_ids) if maxlen is None: maxlen = max(len(s) for s in raw_x) print(">>> maxlen: {}".format(maxlen)) x = tf.keras.preprocessing.sequence.pad_sequences( raw_x, maxlen, padding="post" ) # right padding # lef padded with -1. Indeed, any integer works as it will be masked # y_pos = pad_sequences(y_pos, maxlen, value=-1) # y_chunk = pad_sequences(y_chunk, maxlen, value=-1) y = tf.keras.preprocessing.sequence.pad_sequences( raw_y, maxlen, value=0, padding="post" ) return x, y MAX_SENTENCE_LEN = config.get("max_sentence_len", 25) train_x, train_y = preprocss(train_data, MAX_SENTENCE_LEN) test_x, test_y = preprocss(eval_data, MAX_SENTENCE_LEN) EPOCHS = config["epochs"] BATCH_SIZE = config["batch_size"] EMBED_DIM = config["embedding_dim"] USE_ATTENTION_LAYER = config.get("use_attention_layer", False) BiLSTM_STACK_CONFIG = config.get("bilstm_stack_config", []) BATCH_NORMALIZATION_AFTER_EMBEDDING_CONFIG = config.get( "use_batch_normalization_after_embedding", False ) BATCH_NORMALIZATION_AFTER_BILSTM_CONFIG = config.get( "use_batch_normalization_after_bilstm", False ) CRF_PARAMS = config.get("crf_params", {}) OPTIMIZER_PARAMS = config.get("optimizer_params", {}) vacab_size = vocabulary_lookuper.size() tag_size = tag_lookuper.size() model = Sequential() model.add( Embedding(vacab_size, EMBED_DIM, embeddings_initializer='glorot_normal', mask_zero=True, input_length=MAX_SENTENCE_LEN) ) if BATCH_NORMALIZATION_AFTER_EMBEDDING_CONFIG: model.add(BatchNormalization()) for bilstm_config in BiLSTM_STACK_CONFIG: model.add(Bidirectional(LSTM(return_sequences=True, **bilstm_config))) if BATCH_NORMALIZATION_AFTER_BILSTM_CONFIG: model.add(BatchNormalization()) if USE_ATTENTION_LAYER: model.add(GlobalAttentionLayer()) model.add(CRF(tag_size, name="crf", **CRF_PARAMS)) # print model summary model.summary() callbacks_list = [] tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=create_dir_if_needed(config["summary_log_dir"]) ) callbacks_list.append(tensorboard_callback) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( os.path.join(create_dir_if_needed(config["model_dir"]), "cp-{epoch:04d}.ckpt"), load_weights_on_restart=True, verbose=1, ) callbacks_list.append(checkpoint_callback) metrics_list = [] metrics_list.append(SequenceCorrectness()) metrics_list.append(SequenceSpanAccuracy()) loss_func = ConditionalRandomFieldLoss() # loss_func = crf_loss optimizer = optimizers.Adam(**OPTIMIZER_PARAMS) # optimizer = optimizers.Nadam(**OPTIMIZER_PARAMS) model.compile(optimizer=optimizer, loss={"crf": loss_func}, metrics=metrics_list) model.fit( train_x, train_y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=[test_x, test_y], callbacks=callbacks_list, ) # Save the model model.save(create_file_dir_if_needed(config["h5_model_file"])) tf.keras.experimental.export_saved_model( model, create_dir_if_needed(config["saved_model_dir"])) export_as_deliverable_model( create_dir_if_needed(config["deliverable_model_dir"]), keras_saved_model=config["saved_model_dir"], vocabulary_lookup_table=vocabulary_lookuper, tag_lookup_table=tag_lookuper, padding_parameter={"maxlen": MAX_SENTENCE_LEN, "value": 0, "padding": "post"}, addition_model_dependency=["tf-crf-layer"], custom_object_dependency=["tf_crf_layer"], )
def main(): # get configure config = read_configure() # get train/test corpus corpus = get_corpus_processor(config) corpus.prepare() train_data_generator_func = corpus.get_generator_func(corpus.TRAIN) eval_data_generator_func = corpus.get_generator_func(corpus.EVAL) corpus_meta_data = corpus.get_meta_info() # process str data to onehot ner_tags_data = generate_tagset(corpus_meta_data["tags"]) cls_tags_data = corpus_meta_data["labels"] train_data = list(train_data_generator_func()) eval_data = list(eval_data_generator_func()) ner_tag_lookuper = Lookuper({v: i for i, v in enumerate(ner_tags_data)}) cls_tag_lookuper = Lookuper({v: i for i, v in enumerate(cls_tags_data)}) vocab_data_file = config.get("vocabulary_file") if not vocab_data_file: # load built in vocabulary file vocab_data_file = os.path.join( os.path.dirname(__file__), "../data/unicode_char_list.txt" ) vocabulary_lookuper = index_table_from_file(vocab_data_file) def preprocss(data, maxlen, **kwargs): raw_x = [] raw_y_ner = [] raw_y_cls = [] for offset_data in data: tags = offset_to_biluo(offset_data) label = offset_data.label words = offset_data.text tag_ids = [ner_tag_lookuper.lookup(i) for i in tags] label_id = cls_tag_lookuper.lookup(label) word_ids = [vocabulary_lookuper.lookup(i) for i in words] raw_x.append(word_ids) raw_y_ner.append(tag_ids) raw_y_cls.append(label_id) if maxlen is None: maxlen = max(len(s) for s in raw_x) print(">>> maxlen: {}".format(maxlen)) x = tf.keras.preprocessing.sequence.pad_sequences( raw_x, maxlen, padding="post" ) # right padding y_ner = tf.keras.preprocessing.sequence.pad_sequences( raw_y_ner, maxlen, value=0, padding="post" ) from keras.utils import to_categorical y_cls = np.array(raw_y_cls) y_cls = y_cls[:, np.newaxis] y_cls = to_categorical(y_cls, kwargs.get('cls_dims', 81)) return x, y_ner, y_cls # get Parameters (controller) EPOCHS = config.get("epochs", 10) BATCHSIZE = config.get("batch_size", 32) LEARNINGRATE = config.get("learning_rate", 0.001) MAX_SENTENCE_LEN = config.get("max_sentence_len", 25) # get Parameters (model structure) EMBED_DIM = config.get("embedding_dim", 300) USE_ATTENTION_LAYER = config.get("use_attention_layer", False) BiLSTM_STACK_CONFIG = config.get("bilstm_stack_config", []) BATCH_NORMALIZATION_AFTER_EMBEDDING_CONFIG = config.get( "use_batch_normalization_after_embedding", False) BATCH_NORMALIZATION_AFTER_BILSTM_CONFIG = config.get( "use_batch_normalization_after_bilstm", False) CRF_PARAMS = config.get("crf_params", {}) # get train/test data for training model vacab_size = vocabulary_lookuper.size() tag_size = ner_tag_lookuper.size() label_size = cls_tag_lookuper.size() train_x, train_y_ner, train_y_cls = preprocss(train_data, MAX_SENTENCE_LEN, **{'cls_dims':label_size}) test_x, test_y_ner, test_y_cls = preprocss(eval_data, MAX_SENTENCE_LEN, **{'cls_dims':label_size}) # build model input_length = MAX_SENTENCE_LEN input_layer = Input(shape=(input_length,), dtype='float', name='input_layer') # encoder with tf.keras.backend.name_scope("Encoder"): embedding_layer = Embedding(vacab_size, EMBED_DIM, mask_zero=True, input_length=input_length, name='embedding')(input_layer) # feature extractor with tf.keras.backend.name_scope("biLSTM"): if BATCH_NORMALIZATION_AFTER_EMBEDDING_CONFIG: embedding_layer = BatchNormalization()(embedding_layer) biLSTM = embedding_layer for bilstm_config in BiLSTM_STACK_CONFIG: biLSTM = Bidirectional(LSTM(return_sequences=True, **bilstm_config, name='biLSTM'))(biLSTM) if BATCH_NORMALIZATION_AFTER_BILSTM_CONFIG: biLSTM = BatchNormalization()(biLSTM) if USE_ATTENTION_LAYER: biLSTM = GlobalAttentionLayer()(biLSTM) # NER branch with tf.keras.backend.name_scope("NER_branch"): crf = CRF(tag_size, name="crf", **CRF_PARAMS)(biLSTM) loss_func = ConditionalRandomFieldLoss() # classification branch chosen = 'lstm_cls' with tf.keras.backend.name_scope("CLS_branch"): from tensorflow.keras.layers import Dense, Flatten, Dropout # add paragraph vector #paragraph_vector = get_paragraph_vector(embedding_layer) if chosen == "lstm_cls": cls_flat_lstm = Flatten()(biLSTM) #cls_flat_lstm = tf.keras.layers.concatenate([cls_flat_lstm, paragraph_vector]) classification_dense = Dropout(0.2)(cls_flat_lstm) classification_dense = SetLearningRate(Dense(label_size, activation='sigmoid', name='CLS'), lr=0.001, is_ada=True)(classification_dense) elif chosen == "conv_cls": from tensorflow.keras.layers import Conv1D, MaxPooling1D embedding_layer = BatchNormalization()(embedding_layer) cls_conv_emb = Conv1D(32, 3, activation='relu', padding='same')(embedding_layer) cls_conv_emb = Conv1D(64, 3, activation='relu', padding='same')(cls_conv_emb) cls_conv_emb = MaxPooling1D(2)(cls_conv_emb) cls_conv_emb = Conv1D(128, 3, activation='relu', dilation_rate=1, padding='same')(cls_conv_emb) cls_conv_emb = Conv1D(128, 3, activation='relu', dilation_rate=2, padding='same')(cls_conv_emb) cls_conv_emb = Conv1D(128, 3, activation='relu', dilation_rate=5, padding='same')(cls_conv_emb) cls_conv_emb = Conv1D(256, 1, activation='relu', padding='same')(cls_conv_emb) cls_conv_emb = MaxPooling1D(2)(cls_conv_emb) cls_flat = BatchNormalization()(cls_conv_emb) cls_flat = Flatten()(cls_flat) classification_dense = Dropout(0.2)(cls_flat) classification_dense = Dense(label_size, activation='sigmoid', name='CLS')(classification_dense) # merge NER and Classification model = Model(inputs=[input_layer], outputs=[crf, classification_dense]) model.summary() callbacks_list = [] tensorboard_callback = tf.keras.callbacks.TensorBoard( #log_dir=create_dir_if_needed(config["summary_log_dir"]) log_dir='.\\results\\summary_log_dir', batch_size=BATCHSIZE, ) callbacks_list.append(tensorboard_callback) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( os.path.join(create_dir_if_needed(config["model_dir"]), "cp-{epoch:04d}.ckpt"), load_weights_on_restart=True, verbose=1, ) callbacks_list.append(checkpoint_callback) metrics_list = [] metrics_list.append(crf_accuracy) metrics_list.append(SequenceCorrectness()) metrics_list.append(sequence_span_accuracy) # early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', # early stop index # patience=3, # early stop delay epoch # verbose=2, # display mode # mode='auto') # callbacks_list.append(early_stop) from mtnlpmodel.trainer.loss_func_util import FocalLoss adam_optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNINGRATE, beta_1=0.9, beta_2=0.999, amsgrad=False) model.compile(optimizer=adam_optimizer, #loss={'crf': loss_func, 'CLS': 'sparse_categorical_crossentropy'}, loss={'crf': loss_func, 'CLS': FocalLoss()}, loss_weights={'crf': 1., 'CLS': 100}, # set weight of loss #metrics={'crf': SequenceCorrectness(), 'CLS': 'sparse_categorical_accuracy'} ) metrics={'crf': SequenceCorrectness(), 'CLS': 'categorical_accuracy'}) model.fit( train_x, {'crf': train_y_ner, 'CLS': train_y_cls}, epochs=EPOCHS, batch_size=BATCHSIZE, validation_data=[test_x, {'crf': test_y_ner, 'CLS': test_y_cls}], callbacks=callbacks_list, ) model.save(create_file_dir_if_needed(config["h5_model_file"])) model.save_weights(create_file_dir_if_needed(config["h5_weights_file"])) tf.keras.experimental.export_saved_model( model, create_or_rm_dir_if_needed(config["saved_model_dir"]) ) mt_export_as_deliverable_model( create_dir_if_needed(config["deliverable_model_dir"]), keras_saved_model=config["saved_model_dir"], converter_for_request=ConverterForRequest(), converter_for_response=ConverterForMTResponse(), lookup_tables={'vocab_lookup':vocabulary_lookuper, 'tag_lookup':ner_tag_lookuper, 'label_lookup':cls_tag_lookuper}, padding_parameter={"maxlen": MAX_SENTENCE_LEN, "value": 0, "padding": "post"}, addition_model_dependency=["tf-crf-layer"], custom_object_dependency=["tf_crf_layer"], )
def build_model(model_choice, **hyperparams): from mtnlpmodel.utils.model_util import ( get_ner_cls_output_tensor_merge_embedding, get_ner_cls_output_tensor_merge_input) # get hyperparams EMBED_DIM = hyperparams['EMBED_DIM'] CRF_PARAMS = hyperparams['CRF_PARAMS'] BiLSTM_STACK_CONFIG = hyperparams['BiLSTM_STACK_CONFIG'] CLS2NER_KEYWORD_LEN = hyperparams['CLS2NER_KEYWORD_LEN'] USE_ATTENTION_LAYER = hyperparams['USE_ATTENTION_LAYER'] tag_size = hyperparams['ner_tag_lookuper'].size() label_size = hyperparams['cls_label_lookuper'].size() vocab_size = hyperparams['vocabulary_lookuper'].size() # input layer input_length = hyperparams['MAX_SENTENCE_LEN'] ner_input_layer = Input(shape=(input_length, ), dtype='int32', name='ner_input') cls_input_layer = Input(shape=(input_length, ), dtype='int32', name='cls_input') # encoder if model_choice == 'VIRTUAL_EMBEDDING': # cls_out embedding merged to ner_input_embedding as virtual embedding from mtnlpmodel.utils.model_util import VirtualEmbedding, Discriminator_new with tf.keras.backend.name_scope("Encoder"): embedding_layer_vocab = Embedding(vocab_size, EMBED_DIM, mask_zero=True, input_length=input_length, name='embedding_vocab') embedding_layer_virtual = VirtualEmbedding( label_size, EMBED_DIM, mask_zero=True, input_length=CLS2NER_KEYWORD_LEN, mask_length=CLS2NER_KEYWORD_LEN, name='embedding_virtual', ) ner_embedding = embedding_layer_vocab(ner_input_layer) cls_embedding = embedding_layer_vocab(cls_input_layer) ner_embedding = Dropout(0.15)( ner_embedding) # just like random erase cls_embedding = Dropout(0.15)(cls_embedding) with tf.keras.backend.name_scope("Feature_extractor"): for bilstm_config in BiLSTM_STACK_CONFIG: biLSTM = Bidirectional( LSTM(return_sequences=True, **bilstm_config, name='biLSTM')) bilstm_extrator = biLSTM # classification branch ner_cls_layer, cls_output = cls_branch(hyperparams['Arcloss'], label_size, bilstm_extrator, cls_embedding, ner_embedding, outputlayer_name='cls') ner_cls_output_shape = get_ner_cls_output_tensor_merge_embedding( CLS2NER_KEYWORD_LEN)(ner_cls_layer).shape ner_cls_output_layer = Lambda( get_ner_cls_output_tensor_merge_embedding(CLS2NER_KEYWORD_LEN), ner_cls_output_shape)(ner_cls_layer) # classification output will be used as a keyword adding to input of NER discriminator = Discriminator_new( onetask_output_shape=(CLS2NER_KEYWORD_LEN, ), output_dtype='int32') ner_cls_input_layer = discriminator(ner_cls_output_layer) ner_virtual_embedding = embedding_layer_virtual(ner_cls_input_layer) ner_merged_embedding = tf.keras.layers.concatenate( [ner_virtual_embedding, ner_embedding], axis=1) ner_branch_embedding = ner_merged_embedding elif model_choice == 'CLS2NER_INPUT': # cls_out merged to ner_input as virtual keywords from mtnlpmodel.utils.model_util import Discriminator from mtnlpmodel.utils.input_process_util import build_vacablookuper_from_list vocabs = list( hyperparams['vocabulary_lookuper'].inverse_index_table.values()) cls_labels = list( hyperparams['cls_label_lookuper'].inverse_index_table.values()) vocabs.extend(cls_labels) vocabulary_lookuper = build_vacablookuper_from_list(*vocabs) vocab_size = vocabulary_lookuper.size() with tf.keras.backend.name_scope("Encoder"): embedding_layer = Embedding( vocab_size, EMBED_DIM, mask_zero=True, input_length=input_length, ) ner_embedding = embedding_layer(ner_input_layer) cls_embedding = embedding_layer(cls_input_layer) ner_embedding = Dropout(0.15)( ner_embedding) # just like random erase cls_embedding = Dropout(0.15)(cls_embedding) with tf.keras.backend.name_scope("Feature_extractor"): for bilstm_config in BiLSTM_STACK_CONFIG: biLSTM = Bidirectional( LSTM(return_sequences=True, **bilstm_config, name='biLSTM')) bilstm_extrator = biLSTM # classification branch ner_cls_layer, cls_output = cls_branch(hyperparams['Arcloss'], label_size, bilstm_extrator, cls_embedding, ner_embedding, outputlayer_name='cls') ner_cls_output_shape = get_ner_cls_output_tensor_merge_input( CLS2NER_KEYWORD_LEN, **{ "vocab_size": vocab_size, "label_size": label_size })(ner_cls_layer).shape ner_cls_output_layer = Lambda( get_ner_cls_output_tensor_merge_input( CLS2NER_KEYWORD_LEN, **{ "vocab_size": vocab_size, "label_size": label_size }), ner_cls_output_shape)(ner_cls_layer) # classification output will be used as a keyword adding to input of NER discriminator = Discriminator( ner_input_layer, onetask_output_shape=(CLS2NER_KEYWORD_LEN, ), output_dtype='int32') merged_ner_input_layer = discriminator( [ner_cls_output_layer, ner_input_layer]) ner_branch_embedding = embedding_layer(merged_ner_input_layer) else: # task independent with tf.keras.backend.name_scope("Encoder"): embedding_layer = Embedding( vocab_size, EMBED_DIM, mask_zero=True, input_length=input_length, ) ner_embedding = embedding_layer(ner_input_layer) cls_embedding = embedding_layer(cls_input_layer) ner_embedding = Dropout(0.15)( ner_embedding) # just like random erase cls_embedding = Dropout(0.15)(cls_embedding) with tf.keras.backend.name_scope("Feature_extractor"): for bilstm_config in BiLSTM_STACK_CONFIG: biLSTM = Bidirectional( LSTM(return_sequences=True, **bilstm_config, name='biLSTM')) bilstm_extrator = biLSTM # classification branch _, cls_output = cls_branch(hyperparams['Arcloss'], label_size, bilstm_extrator, cls_embedding, outputlayer_name='cls') ner_branch_embedding = ner_embedding # NER branch with tf.keras.backend.name_scope("NER_branch"): # print_op = tf.print(ner_virtual_embedding._keras_mask, ner_embedding._keras_mask) # with tf.control_dependencies([print_op]): embedding_layer = LayerNormalization()(ner_branch_embedding) biLSTM = bilstm_extrator(embedding_layer) biLSTM = LayerNormalization()(biLSTM) if USE_ATTENTION_LAYER: biLSTM = GlobalAttentionLayer()(biLSTM) ner_output = CRF(tag_size, name="crf", **CRF_PARAMS)(biLSTM) # merge NER and Classification model = Model(inputs=[ner_input_layer, cls_input_layer], outputs=[ner_output, cls_output]) return model
train_x, train_y = preprocss(train_data) test_x, test_y = preprocss(eval_data) EPOCHS = 1 EMBED_DIM = 64 BiRNN_UNITS = 200 vacab_size = vocabulary_lookuper.size() tag_size = tag_lookuper.size() char_embed_layer = Embedding(vacab_size, EMBED_DIM, mask_zero=True) char_bilstm_layer = Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))(char_embed_layer) dict_input_layer = Input(shape=) dict_bilstm_layer = Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))(dict_input_layer) crf_layer = CRF(tag_size) model.summary() model.compile('adam', loss=crf_loss, metrics=[crf_accuracy]) model.fit(train_x, train_y, epochs=EPOCHS, validation_data=[test_x, test_y]) pred_y = model.predict(test_x) test_y_pred = pred_y[test_x > 0] test_y_true = test_y[test_x > 0] print('\n---- Result of BiLSTM-CRF ----\n') classification_report(test_y_true, test_y_pred, tags_data)
# mask_zero=True, input_length=MAX_SENTENCE_LEN) # ) if BATCH_NORMALIZATION_AFTER_EMBEDDING_CONFIG: model.add(BatchNormalization()) for bilstm_config in BiLSTM_STACK_CONFIG: model.add(Bidirectional(LSTM(return_sequences=True, **bilstm_config))) if BATCH_NORMALIZATION_AFTER_BILSTM_CONFIG: model.add(BatchNormalization()) if USE_ATTENTION_LAYER: model.add(GlobalAttentionLayer()) model.add(CRF(tag_size, name="crf", **CRF_PARAMS)) # print model summary model.summary() callbacks_list = [] tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=create_dir_if_needed(config["summary_log_dir"]) ) callbacks_list.append(tensorboard_callback) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( os.path.join(create_dir_if_needed(config["model_dir"]), "cp-{epoch:04d}.ckpt"), load_weights_on_restart=True, verbose=1,
train_x, train_y = preprocss(train_data) test_x, test_y = preprocss(eval_data) EPOCHS = config['epochs'] EMBED_DIM = config['embedding_dim'] BiRNN_UNITS = config['lstm_size'] vacab_size = vocabulary_lookuper.size() tag_size = tag_lookuper.size() model = Sequential() model.add(Embedding(vacab_size, EMBED_DIM, mask_zero=True)) model.add(Bidirectional(LSTM(BiRNN_UNITS, return_sequences=True))) model.add(CRF(tag_size, name='crf')) # print model summary model.summary() callbacks_list = [] tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=create_dir_if_needed(config['summary_log_dir'])) callbacks_list.append(tensorboard_callback) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( os.path.join(create_dir_if_needed(config['model_dir']), 'cp-{epoch:04d}.ckpt'), load_weights_on_restart=True, verbose=1)
def main(): # ------ # Data # ----- # conll200 has two different targets, here will only use # IBO like chunking as an example train, test, voc = conll2000.load_data() (train_x, _, train_y) = train (test_x, _, test_y) = test (vocab, _, class_labels) = voc # -------------- # 1. Regular CRF # -------------- print('==== training CRF ====') model = Sequential() model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True)) # Random embedding # model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True, input_length=78)) # Random embedding crf = CRF(len(class_labels), name="crf_layer") model.add(crf) crf_loss_instance = ConditionalRandomFieldLoss() # The default `crf_loss` for `learn_mode='join'` is negative log likelihood. model.compile('adam', loss={"crf_layer": crf_loss_instance}, metrics=[SequenceSpanAccuracy()]) # model.compile('adam', loss={"crf_layer": crf_loss_instance}, metrics=[CategoricalAccuracy()]) # model.compile('adam', loss={"crf_layer": crf_loss_instance}, metrics=[crf_accuracy]) model.fit(train_x, train_y, epochs=EPOCHS, validation_data=[test_x, test_y]) # test_y_pred = model.predict(test_x).argmax(-1)[test_x > 0] test_y_pred = model.predict(test_x)[test_x > 0] test_y_true = test_y[test_x > 0] print('\n---- Result of CRF ----\n') classification_report(test_y_true, test_y_pred, class_labels) # ------------- # 2. BiLSTM-CRF # ------------- print('==== training BiLSTM-CRF ====') model = Sequential() model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True)) # Random embedding # model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True, input_length=78)) # Random embedding model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) crf = CRF(len(class_labels), name="crf_layer") model.add(crf) crf_loss_instance = ConditionalRandomFieldLoss() model.compile('adam', loss={"crf_layer": crf_loss_instance}, metrics=[SequenceSpanAccuracy()]) # model.compile('adam', loss={"crf_layer": crf_loss_instance}, metrics=[CategoricalAccuracy()]) # model.compile('adam', loss={"crf_layer": crf_loss_instance}, metrics=[crf_accuracy]) model.fit(train_x, train_y, epochs=EPOCHS, validation_data=[test_x, test_y]) predict_result = model.predict(test_x) test_y_pred = predict_result[test_x > 0] test_y_true = test_y[test_x > 0] print('\n---- Result of BiLSTM-CRF ----\n') classification_report(test_y_true, test_y_pred, class_labels)
train_x, train_intent, train_y, intent_lookup_table = preprocss(train_data, 25) test_x, test_intent, test_y, _ = preprocss(eval_data, 25, intent_lookup_table) EPOCHS = 10 EMBED_DIM = 64 BiRNN_UNITS = 200 vacab_size = vocabulary_lookuper.size() tag_size = tag_lookuper.size() allowed = allowed_transitions("BIOUL", tag_lookuper.inverse_index_table) model = Sequential() model.add(Embedding(vacab_size, EMBED_DIM, mask_zero=True)) model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) model.add(CRF(tag_size, transition_constraint=allowed)) # print model summary model.summary() callbacks_list = [] # tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=config['summary_log_dir']) # callbacks_list.append(tensorboard_callback) # # checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( # os.path.join(config['model_dir'], 'cp-{epoch:04d}.ckpt'), # load_weights_on_restart=True, # verbose=1 # ) # callbacks_list.append(checkpoint_callback)
def test_crf_config(get_random_data): nb_samples = 2 timesteps = 10 embedding_dim = 4 output_dim = 5 embedding_num = 12 x, y = get_random_data( nb_samples, timesteps, x_high=embedding_num, y_high=output_dim ) # right padding; left padding is not supported due to the tf.contrib.crf x[0, -4:] = 0 crf_loss_instance = ConditionalRandomFieldLoss() # test with masking, fix length model = Sequential() model.add( Embedding(embedding_num, embedding_dim, input_length=timesteps, mask_zero=True) ) model.add(CRF(output_dim, name="crf_layer")) model.compile(optimizer="rmsprop", loss={"crf_layer": crf_loss_instance}) model.fit(x, y, epochs=1, batch_size=10) # test config result = model.get_config() expected = { "name": "sequential", "layers": [ { "class_name": "Embedding", "config": { "name": "embedding", "trainable": True, "batch_input_shape": (None, 10), "dtype": "float32", "input_dim": 12, "output_dim": 4, "embeddings_initializer": { "class_name": "RandomUniform", "config": { "minval": -0.05, "maxval": 0.05, "seed": None, "dtype": "float32", }, }, "embeddings_regularizer": None, "activity_regularizer": None, "embeddings_constraint": None, "mask_zero": True, "input_length": 10, }, }, { "class_name": "CRF", "config": { "name": "crf_layer", "trainable": True, "dtype": "float32", "units": 5, "use_boundary": True, "use_bias": True, "use_kernel": True, "kernel_initializer": { "class_name": "GlorotUniform", "config": {"seed": None, "dtype": "float32"}, }, "chain_initializer": { "class_name": "Orthogonal", "config": {"gain": 1.0, "seed": None, "dtype": "float32"}, }, "boundary_initializer": { "class_name": "Zeros", "config": {"dtype": "float32"}, }, "bias_initializer": { "class_name": "Zeros", "config": {"dtype": "float32"}, }, "activation": "linear", "kernel_regularizer": None, "chain_regularizer": None, "boundary_regularizer": None, "bias_regularizer": None, "kernel_constraint": None, "chain_constraint": None, "boundary_constraint": None, "bias_constraint": None, }, }, ], } assert result == expected
def test_unmasked_constrained_viterbi_tags(self): # TODO: using BILUO tag scheme instead of BIO. # So that, transition from tags to end can be tested. raw_constraints = np.array([ # O B-X I-X B-Y I-Y start end [ 1, 1, 0, 1, 0, 0, 1], # O [ 1, 1, 1, 1, 0, 0, 1], # B-X [ 1, 1, 1, 1, 0, 0, 1], # I-X [ 1, 1, 0, 1, 1, 0, 1], # B-Y [ 1, 1, 0, 1, 1, 0, 1], # I-Y [ 1, 1, 0, 1, 0, 0, 0], # start [ 0, 0, 0, 0, 0, 0, 0], # end ]) constraints = np.argwhere(raw_constraints > 0).tolist() # transitions = np.array([ # # O B-X I-X B-Y I-Y # [ 0.1, 0.2, 0.3, 0.4, 0.5], # O # [ 0.8, 0.3, 0.1, 0.7, 0.9], # B-X # [ -0.3, 2.1, -5.6, 3.4, 4.0], # I-X # [ 0.2, 0.4, 0.6, -0.3, -0.4], # B-Y # [ 1.0, 1.0, 1.0, 1.0, 1.0] # I-Y # ]) transitions = np.ones([5, 5]) # transitions_from_start = np.array( # # O B-X I-X B-Y I-Y # [ 0.1, 0.2, 0.3, 0.4, 0.6] # start # ) transitions_from_start = np.ones(5) # transitions_to_end = np.array( # [ # # end # -0.1, # O # -0.2, # B-X # 0.3, # I-X # -0.4, # B-Y # -0.4 # I-Y # ] # ) transitions_to_end = np.ones(5) logits = np.array([ [ # constraint transition from start to tags # O B-X I-X B-Y I-Y [ 0., .1, 1., 0., 0.], [ 0., 0., 1., 0., 0.], [ 0., 0., 1., 0., 0.] ], [ # constraint transition from tags to tags # O B-X I-X B-Y I-Y [ 0., 1., 0., 0., 0.], [ 0., 0., .1, 1., 0.], [ 0., 0., 1., 0., 0.] ] ]) crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(transitions_from_start), # right_boundary_initializer=initializers.Constant(transitions_to_end), transition_constraint=constraints, name="crf_layer" ) crf.left_boundary = crf.add_weight( shape=(5,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) crf.right_boundary = crf.add_weight( shape=(5,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), ) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) for layer in model.layers: print(layer.get_config()) print(dict(zip(layer.weights, layer.get_weights()))) # Get just the tags from each tuple of (tags, score). viterbi_tags = model.predict(logits) # Now the tags should respect the constraints expected_tags = [ [1, 2, 2], # B-X I-X I-X [1, 2, 2] # B-X I-X I-X ] # if constrain not work it should be: # [ # [2, 4, 3], # [2, 3, 0] # ] # test assert np.testing.assert_equal(viterbi_tags, expected_tags)
def test_masked_viterbi_decode(): transitions = np.ones([5, 5]) transitions_from_start = np.ones(5) transitions_to_end = np.ones(5) logits = np.array([ [ # O B-X I-X B-Y I-Y [ 0., 1., 0., 0., 0.], [ 0., 0., 1., 0., 0.], [ 0., 0., 1., 0., 0.] ], [ # O B-X I-X B-Y I-Y [ 0., 1., 0., 0., 0.], [ 0., 1., 0., 0., 0.], [ 0., 1., 0., 0., 0.] ] ]) # TODO: this test case is right padding mask only # due to the underline crf function only support sequence length mask = np.array([ [1, 1, 0], [1, 1, 0] ]) crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(transitions_from_start), # right_boundary_initializer=initializers.Constant(transitions_to_end), name="crf_layer" ) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) # for layer in model.layers: # print(layer.get_config()) # print(dict(zip(layer.weights, layer.get_weights()))) # Get just the tags from each tuple of (tags, score). result = model.predict(logits) # Now the tags should respect the constraints expected = [ [1, 2, 0], # B-X I-X NA [1, 1, 0] # B-X B-X NA ] # if constrain not work it should be: # [ # [2, 4, 3], # [2, 3, 0] # ] # test assert np.testing.assert_equal(result, expected)
vacab_size = vocabulary_lookuper.size() tag_size = tag_lookuper.size() # model = Sequential() # model.add(Embedding(vacab_size, EMBED_DIM, mask_zero=True)) # model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) # model.add(CRF(tag_size)) raw_input = layers.Input(shape=(MAX_LEN,)) embedding_layer = Embedding(vacab_size, EMBED_DIM, mask_zero=True)(raw_input) bilstm_layer = Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))(embedding_layer) crf_layer = CRF( units=tag_size, transition_constraint_matrix=constraint_table ) dynamic_constraint_input = layers.Input(shape=(intent_number,)) output_layer = crf_layer([bilstm_layer, dynamic_constraint_input]) model = models.Model([raw_input, dynamic_constraint_input], output_layer) # print model summary model.summary() callbacks_list = [] # tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=config['summary_log_dir']) # callbacks_list.append(tensorboard_callback)
def test_constrained_viterbi_tags(self): constraints = {(0, 0), (0, 1), (1, 1), (1, 2), (2, 2), (2, 3), (3, 3), (3, 4), (4, 4), (4, 0)} # Add the transitions to the end tag # and from the start tag. for i in range(5): constraints.add((5, i)) constraints.add((i, 6)) mask = np.array([ [1, 1, 1], [1, 1, 0] ]) crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(self.transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(self.transitions_from_start), # right_boundary_initializer=initializers.Constant(self.transitions_to_end), transition_constraint=constraints, name="crf_layer" ) crf.left_boundary = crf.add_weight( shape=(5,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) crf.right_boundary = crf.add_weight( shape=(5,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), ) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) for layer in model.layers: print(layer.get_config()) print(dict(zip(layer.weights, layer.get_weights()))) # Get just the tags from each tuple of (tags, score). viterbi_tags = model.predict(self.logits) # Now the tags should respect the constraints expected_tags = [ [2, 3, 3], [2, 3, 0] ] # if constrain not work it should be: # [ # [2, 4, 3], # [2, 3, 0] # ] # test assert np.testing.assert_equal(viterbi_tags, expected_tags)
input_length=maxlen, trainable=False)(input) model = Bidirectional( LSTM(units=word_embedding_size, return_sequences=True, dropout=0.5, recurrent_dropout=0.4, kernel_initializer=k.initializers.he_normal()))(model) model = Bidirectional( LSTM(units=word_embedding_size * 2, return_sequences=True, dropout=0.5, recurrent_dropout=0.4, kernel_initializer=k.initializers.he_normal()))(model) model = TimeDistributed(Dense(n_tags, activation="relu"))(model) crf = CRF(n_tags) out = crf(model) model = Model(input, out) adam = k.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999) # ##Training model # model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy']) # model.summary() # class_weight = [10, 1, 1] # print(f'\nclass_weight : {class_weight}\n') # history = model.fit(X_train, np.array(y_train), batch_size=256, epochs=3, verbose=1, class_weight=class_weight,shuffle=True) # model.save("Model Version/ner_kw.h5") # # plot_history(history) #Loading model model = k.models.load_model("Model Version/ner_kw.h5",
class TestConditionalRandomField(unittest.TestCase): def setUp(self): super().setUp() self.logits = np.array([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) self.tags = np.array([ [2, 3, 4], [3, 2, 2] ]) self.transitions = np.array([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) self.transitions_from_start = np.array([0.1, 0.2, 0.3, 0.4, 0.6]) self.transitions_to_end = np.array([-0.1, -0.2, 0.3, -0.4, -0.4]) # Use the CRF Module with fixed transitions to compute the log_likelihood self.crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(self.transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(self.transitions_from_start), # right_boundary_initializer=initializers.Constant(self.transitions_to_end), name="crf_layer" ) self.crf.left_boundary = self.crf.add_weight( shape=(self.crf.units,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) self.crf.right_boundary = self.crf.add_weight( shape=(self.crf.units,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), ) def score(self, logits, tags): """ Computes the likelihood score for the given sequence of tags, given the provided logits (and the transition weights in the CRF model) """ # Start with transitions from START and to END total = self.transitions_from_start[tags[0]] + self.transitions_to_end[tags[-1]] # Add in all the intermediate transitions for tag, next_tag in zip(tags, tags[1:]): total += self.transitions[tag, next_tag] # Add in the logits for the observed tags for logit, tag in zip(logits, tags): total += logit[tag] return total # def test_forward_works_without_mask(self): # log_likelihood = self.crf(self.logits, self.tags).item() # # # Now compute the log-likelihood manually # manual_log_likelihood = 0.0 # # # For each instance, manually compute the numerator # # (which is just the score for the logits and actual tags) # # and the denominator # # (which is the log-sum-exp of the scores for the logits across all possible tags) # for logits_i, tags_i in zip(self.logits, self.tags): # numerator = self.score(logits_i.detach(), tags_i.detach()) # all_scores = [self.score(logits_i.detach(), tags_j) # for tags_j in itertools.product(range(5), repeat=3)] # denominator = math.log(sum(math.exp(score) for score in all_scores)) # # And include them in the manual calculation. # manual_log_likelihood += numerator - denominator # # # The manually computed log likelihood should equal the result of crf.forward. # assert manual_log_likelihood.item() == approx(log_likelihood) @pytest.mark.skip("constrain is not supported yet") def test_constrained_viterbi_tags(self): constraints = {(0, 0), (0, 1), (1, 1), (1, 2), (2, 2), (2, 3), (3, 3), (3, 4), (4, 4), (4, 0)} # Add the transitions to the end tag # and from the start tag. for i in range(5): constraints.add((5, i)) constraints.add((i, 6)) mask = np.array([ [1, 1, 1], [1, 1, 0] ]) crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(self.transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(self.transitions_from_start), # right_boundary_initializer=initializers.Constant(self.transitions_to_end), transition_constraint=constraints, name="crf_layer" ) crf.left_boundary = crf.add_weight( shape=(5,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) crf.right_boundary = crf.add_weight( shape=(5,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), ) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(MockMasking(mask_shape=(2, 3), mask_value=mask)) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) for layer in model.layers: print(layer.get_config()) print(dict(zip(layer.weights, layer.get_weights()))) # Get just the tags from each tuple of (tags, score). viterbi_tags = model.predict(self.logits) # Now the tags should respect the constraints expected_tags = [ [2, 3, 3], [2, 3, 0] ] # if constrain not work it should be: # [ # [2, 4, 3], # [2, 3, 0] # ] # test assert np.testing.assert_equal(viterbi_tags, expected_tags) @pytest.mark.skip("constrain is not supported yet") def test_unmasked_constrained_viterbi_tags(self): # TODO: using BILUO tag scheme instead of BIO. # So that, transition from tags to end can be tested. raw_constraints = np.array([ # O B-X I-X B-Y I-Y start end [ 1, 1, 0, 1, 0, 0, 1], # O [ 1, 1, 1, 1, 0, 0, 1], # B-X [ 1, 1, 1, 1, 0, 0, 1], # I-X [ 1, 1, 0, 1, 1, 0, 1], # B-Y [ 1, 1, 0, 1, 1, 0, 1], # I-Y [ 1, 1, 0, 1, 0, 0, 0], # start [ 0, 0, 0, 0, 0, 0, 0], # end ]) constraints = np.argwhere(raw_constraints > 0).tolist() # transitions = np.array([ # # O B-X I-X B-Y I-Y # [ 0.1, 0.2, 0.3, 0.4, 0.5], # O # [ 0.8, 0.3, 0.1, 0.7, 0.9], # B-X # [ -0.3, 2.1, -5.6, 3.4, 4.0], # I-X # [ 0.2, 0.4, 0.6, -0.3, -0.4], # B-Y # [ 1.0, 1.0, 1.0, 1.0, 1.0] # I-Y # ]) transitions = np.ones([5, 5]) # transitions_from_start = np.array( # # O B-X I-X B-Y I-Y # [ 0.1, 0.2, 0.3, 0.4, 0.6] # start # ) transitions_from_start = np.ones(5) # transitions_to_end = np.array( # [ # # end # -0.1, # O # -0.2, # B-X # 0.3, # I-X # -0.4, # B-Y # -0.4 # I-Y # ] # ) transitions_to_end = np.ones(5) logits = np.array([ [ # constraint transition from start to tags # O B-X I-X B-Y I-Y [ 0., .1, 1., 0., 0.], [ 0., 0., 1., 0., 0.], [ 0., 0., 1., 0., 0.] ], [ # constraint transition from tags to tags # O B-X I-X B-Y I-Y [ 0., 1., 0., 0., 0.], [ 0., 0., .1, 1., 0.], [ 0., 0., 1., 0., 0.] ] ]) crf = CRF( units=5, use_kernel=False, # disable kernel transform chain_initializer=initializers.Constant(transitions), use_boundary=True, # left_boundary_initializer=initializers.Constant(transitions_from_start), # right_boundary_initializer=initializers.Constant(transitions_to_end), transition_constraint=constraints, name="crf_layer" ) crf.left_boundary = crf.add_weight( shape=(5,), name="left_boundary", initializer=initializers.Constant(self.transitions_from_start), ) crf.right_boundary = crf.add_weight( shape=(5,), name="right_boundary", initializer=initializers.Constant(self.transitions_to_end), ) crf_loss_instance = ConditionalRandomFieldLoss() model = Sequential() model.add(layers.Input(shape=(3, 5))) model.add(crf) model.compile('adam', loss={"crf_layer": crf_loss_instance}) for layer in model.layers: print(layer.get_config()) print(dict(zip(layer.weights, layer.get_weights()))) # Get just the tags from each tuple of (tags, score). viterbi_tags = model.predict(logits) # Now the tags should respect the constraints expected_tags = [ [1, 2, 2], # B-X I-X I-X [1, 2, 2] # B-X I-X I-X ] # if constrain not work it should be: # [ # [2, 4, 3], # [2, 3, 0] # ] # test assert np.testing.assert_equal(viterbi_tags, expected_tags)