예제 #1
0
def bilstm_crf_predcit():

    # 重新初始化模型,构建配置信息,和train部分一样
    input = Input(shape=(max_len, ))
    model = Embedding(input_dim=n_words + 1,
                      output_dim=20,
                      input_length=max_len,
                      mask_zero=True)(input)  # 20-dim embedding
    model = Bidirectional(
        LSTM(units=50, return_sequences=True,
             recurrent_dropout=0.1))(model)  # variational biLSTM
    model = TimeDistributed(Dense(50, activation="relu"))(
        model)  # a dense layer as suggested by neuralNer
    crf = CRF(n_tags)  # CRF layer
    out = crf(model)  # output
    model = Model(input, out)

    # 恢复权重
    save_load_utils.load_all_weights(model, filepath="result/bilstm-crf.h5")

    p = model.predict(np.array([x_test_sent[0]]))
    p = np.argmax(p, axis=-1)
    print("{:15}||{}".format("Word", "Prediction"))
    print(30 * "=")
    for w, pred in zip(test_sentence, p[0]):
        print("{:15}: {:5}".format(w, tags[pred]))
def test_save_and_load_all_weights():
    '''
    Test save_all_weights and load_all_weights. Save and load optimizer and model weights but not configuration.
    '''
    def make_model():
        _x = Input((10, ))
        _y = Dense(10)(_x)
        _m = Model(_x, _y)
        _m.compile('adam', 'mean_squared_error')
        _m._make_train_function()
        return _m

    # make a model
    m1 = make_model()
    # set weights
    w1 = m1.layers[1].kernel  # dense layer
    w1value = K.get_value(w1)
    w1value[0, 0:4] = [1, 3, 3, 7]
    K.set_value(w1, w1value)
    # set optimizer weights
    ow1 = m1.optimizer.weights[3]  # momentum weights
    ow1value = K.get_value(ow1)
    ow1value[0, 0:3] = [4, 2, 0]
    K.set_value(ow1, ow1value)
    # save all weights
    save_all_weights(m1, 'model.h5')
    # new model
    m2 = make_model()
    # load all weights
    load_all_weights(m2, 'model.h5')
    # check weights
    assert_allclose(K.get_value(m2.layers[1].kernel)[0, 0:4], [1, 3, 3, 7])
    # check optimizer weights
    assert_allclose(K.get_value(m2.optimizer.weights[3])[0, 0:3], [4, 2, 0])
    os.remove('model.h5')
    def load_pretrained_model(self,
                              path_model,
                              layers=2,
                              rec_dropout=[0.2, 0.2],
                              rnn_dropout=[0.5, 0.5],
                              units=[400, 400],
                              lr=0.001,
                              train_emb=False):
        """
        Method that loads a pretrained CRF on top of BiLSTM model. However, the loading is a bit tricky.
        To successfully load the model you need to do a naive train at first in order to load the model afterwards.
        """

        print("BiLSTM with CRF...")
        self.create_model(layers, rec_dropout, rnn_dropout, units, lr,
                          train_emb)

        # model.summary()

        # define callbacks
        print("Dummy train to init model...")
        early_stopping = EarlyStopping(monitor="val_loss",
                                       min_delta=0.001,
                                       patience=3,
                                       verbose=1)
        callbacks_list = [early_stopping]

        # model training with batch normalization
        hist = self.model.fit(np.random.randint(low=2, high=105, size=(1, 36)),
                              np.zeros(shape=(1, 36, 2)))

        print("Loading model...")
        # Load model
        save_load_utils.load_all_weights(self.model, path_model)
        print("Done!")
예제 #4
0
    def load_model(self, filename):
        input = Input(shape=(self.max_len, ))
        embed = Embedding(input_dim=self.num_word + 1,
                          output_dim=64,
                          input_length=self.max_len,
                          dropout=0.2,
                          name='embed')(input)
        bilstm = Bidirectional(
            LSTM(32, dropout_W=0.1, dropout_U=0.1,
                 return_sequences=True))(embed)
        bilstm_d = Dropout(0.1)(bilstm)

        half_window_size = 2
        paddinglayer = ZeroPadding1D(padding=half_window_size)(embed)
        conv = Conv1D(nb_filter=50,
                      filter_length=(2 * half_window_size + 1),
                      border_mode='valid')(paddinglayer)

        conv_d = Dropout(0.1)(conv)
        dense_conv = TimeDistributed(Dense(50))(conv_d)

        rnn_cnn_merge = concatenate([bilstm_d, dense_conv], axis=2)
        dense = TimeDistributed(Dense(self.num_tag))(
            rnn_cnn_merge)  # softmax output layer
        crf = CRF(self.num_tag, sparse_target=False)
        out = crf(dense)
        model = Model(input, out)
        model.compile(optimizer="adam",
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])

        save_load_utils.load_all_weights(model,
                                         filename,
                                         include_optimizer=False)
        return model
예제 #5
0
def testKerasModel(max_len, num_LSTM_Units, learning_rate, vector_dim,
                   num_docs, embedding_matrix, embeddingLayerFlag,
                   embeddingFlag, dropout, batch_size, epochs, X_Test, y_Test,
                   experiment):
    model1, crf1 = createModelArchitecture(max_len, num_LSTM_Units, vector_dim,
                                           num_docs, embedding_matrix, dropout)
    model1.compile(loss=crf1.loss_function,
                   optimizer="rmsprop",
                   metrics=[crf1.accuracy])
    print("Model Architecture Created")
    print("Loading Model")
    modelName = 'KerasModel' + '_lstm' + str(num_LSTM_Units) + '_lr' + str(
        learning_rate) + '_dropOut' + str(dropout) + '_bSize' + str(
            batch_size) + '_epochs' + str(
                epochs
            ) + '_' + embeddingLayerFlag + '_' + embeddingFlag + '_' + str(
                experiment) + 'exp.h5'
    save_load_utils.load_all_weights(model1,
                                     path + modelName,
                                     include_optimizer=False)

    print("Model Loaded, Testing...")
    summaryResult, percCorrectSeqPred, editDistance, editDistanceWithSwap = evaluateModel(
        model1, X_Test, y_Test)

    print("Percentage of Correctly Predicted Sequences :: ",
          percCorrectSeqPred)
    print("Average Edit Distance :: ", editDistance)
    print("Average Edit Distance with swaps allowed :: ", editDistanceWithSwap)
    print(summaryResult)
예제 #6
0
    def load(self, path):
        """
        Load model weights

        Args:
            path (str): path to load model from
        """
        save_load_utils.load_all_weights(self.model, path, include_optimizer=False)
예제 #7
0
def load_model(model_path):
    from keras_contrib.utils import save_load_utils
    model = Model().model
    save_load_utils.load_all_weights(model,
                                     model_path,
                                     include_optimizer=False)

    return model
    def load_actor(self, timestamp, name):

        path = os.path.join(r'./ExperimentResults', timestamp + '_' + name,
                            name + '.model')
        if (os.path.isfile(path)):
            save_load_utils.load_all_weights(self.model, path)
            logger.info('Weights from ' + path + ' found and loaded')
        else:
            logger.info('Weights from ' + path +
                        ' could NOT be found and loaded')
예제 #9
0
	def _load_keras_model(self, path, model=None):
		with open(os.path.abspath(path + '.json'), 'r') as jf:
			json_model = jf.read()
		if model is None:
			model = model_from_json(json_model)
		# model.summary()
		# print(model.get_weights()[0])
		try:
			save_load_utils.load_all_weights(model, os.path.abspath(path + '.hdf5'))
		except KeyError:
			model.load_weights(os.path.abspath(path + '.hdf5'))
		# print(model.get_weights()[0])
		return model
예제 #10
0
 def load(self, file_path):
     """ Loads a model saved locally. """
     load_path = Path(file_path)
     model_load_path = load_path.joinpath("KerasNER.model")
     config_load_path = load_path.joinpath("KerasNER.config")
     encoder_load_path = load_path.joinpath("encoder")
     self.config.load(config_load_path)
     if self.config.get_parameter("use_crf"):
         save_load_utils.load_all_weights(self.model, str(model_load_path))
     else:
         self.model = load_model(str(model_load_path))
     self.encoder.load(encoder_load_path)
     return self
    def load_trained_model(self, name):
        if self.verbose:
            print("\n[INFO] Loading trained model from '" + name + "'")
            print(
                "Performing dummy training in order to be able to load weights:\n"
            )
        self.model.fit(self.X_train[0:5],
                       np.array(self.y_train[0:5]),
                       epochs=1)

        save_load_utils.load_all_weights(self.model, name)
        self.model.compile(optimizer=self.optimizer,
                           loss=self.loss,
                           metrics=[crf_accuracy])
        if self.verbose: print("[INFO] Model is loaded.\n")
    def load_model(self, filename):
        input = Input(shape=(self.max_len, ))
        model = Embedding(input_dim=self.num_word + 1,
                          output_dim=20,
                          input_length=self.max_len,
                          mask_zero=True)(input)
        model = Dropout(0.1)(model)
        model = Bidirectional(
            LSTM(units=50, return_sequences=True,
                 recurrent_dropout=0.1))(model)
        model = TimeDistributed(Dense(50, activation="relu"))(
            model)  # softmax output layer
        crf = CRF(self.num_tag)
        out = crf(model)
        model = Model(input, out)
        model.compile(optimizer="rmsprop",
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])

        save_load_utils.load_all_weights(model,
                                         filename,
                                         include_optimizer=False)
        return model
예제 #13
0
def load_resources():
    print("Loading embeddings...")
    embeddings = {
        'en':
        KeyedVectors.load_word2vec_format('data/wiki.multi.en.vec.txt',
                                          binary=False),
        'es':
        KeyedVectors.load_word2vec_format('data/wiki.multi.es.vec.txt',
                                          binary=False),
        'de':
        KeyedVectors.load_word2vec_format('data/wiki.multi.de.vec.txt',
                                          binary=False),
        'it':
        KeyedVectors.load_word2vec_format('data/wiki.multi.it.vec.txt',
                                          binary=False)
    }

    print("Loading model...")
    model = create_model(num_classes, num_features)
    save_load_utils.load_all_weights(model,
                                     'models/full_train.h5',
                                     include_optimizer=False)

    return embeddings, model
 def get_model(self, filename):
     model = self.train()
     save_load_utils.load_all_weights(model, filename)
     return model
예제 #15
0
파일: predict.py 프로젝트: gsj4ever/DCWS
    sents = line.split('。')
    if len(sents) == 1:
        test_sentences.extend(sents)
    else:
        for s in sents:
            if s != '':
                test_sentences.append(s.strip() + '。')
print(test_sentences)

# Reconstruct trained tokenizer
with open('./data/tokenizer', 'rb') as f:
    tokenizer = pickle.load(f)
# Load built model architecture and trained weights
with open('./data/model_architecture.json', 'r') as f:
    model = model_from_json(f.read(), custom_objects={'CRF': CRF})
save_load_utils.load_all_weights(model, './data/model_weights.hdf5')

# test_tokens = tokenizer.texts_to_sequences(test_sentences)

# Deal with infrequent characters
test_tokens = [tokenizer.texts_to_sequences(i) for i in test_sentences]
for i in range(len(test_tokens)):
    for j in range(len(test_tokens[i])):
        if test_tokens[i][j]:
            test_tokens[i][j] = test_tokens[i][j][0]
        else:
            test_tokens[i][
                j] = tokenizer.num_words  # tokenizer.word_index.get('UNK')  # limit_size?

test_tokens_pad = sequence.pad_sequences(test_tokens,
                                         maxlen=MAX_LEN,
def load_model(filename):
    model = build_model()
    save_load_utils.load_all_weights(model, filename)
    return model
def load_embedding_bilstm2_crf_model(filename, NUM_CLASS, embeddings_matrix, input_length):
    model = build_embedding_bilstm2_crf_model(NUM_CLASS, embeddings_matrix, input_length)
    save_load_utils.load_all_weights(model, filename, include_optimizer=False)
    return model
예제 #18
0
 def load(self, path):
     save_load_utils.load_all_weights(self.model, path, include_optimizer=False)
예제 #19
0
def predict(model_path,
            test_data_path,
            result_file,
            wv_for_score_model,
            score_clf,
            window,
            word_vector,
            is_model_path=False):
    test_x, test_docs = get_test_data(test_data_path, FLAGS.vocab_dir,
                                      FLAGS.max_len)
    file_list = ['../dictionary/trainSenDict', '../dictionary/theme1117.txt']
    dictionary_list = get_dict(file_list)
    dict_index = matrix_index(test_docs, dictionary_list, FLAGS.max_len)
    print("test data is ok!")
    if (is_model_path):
        model_object = dnnModel(FLAGS)
        model = model_object.build_bilstm_model(word_vector)
        # model = model_object.build_crf_model(word_vector)
        save_load_utils.load_all_weights(model, model_path)
        # model = load_model(model)
    else:
        model = model_path
    print("model have loaded!")
    predictArr = model.predict([test_x, dict_index])
    predict = np.argmax(predictArr, axis=-1)
    flag = 'word'
    o = open(result_file, 'w', encoding='utf8')

    for i in range(len(test_docs)):
        sentence = test_docs[i]
        sentence_len = len(sentence)
        cur_predict = predict[i][:sentence_len]
        predict_theme = []
        predict_senti = []
        j = 0
        while (j < len(cur_predict)):
            cur_label = cur_predict[j]
            if (cur_label == 0):
                j += 1
                continue
            elif (cur_label == 4):
                predict_senti.append([sentence[j], j,
                                      j])  ## word, start_index, end_index
                j += 1
            elif (cur_label == 8):
                predict_theme.append([sentence[j], j, j])
                j += 1
            elif (cur_label == 1):
                senti_start = j
                j += 1
                while (j < len(cur_predict)):
                    if (cur_predict[j] == 3):
                        predict_senti.append(
                            [sentence[senti_start:j + 1], senti_start, j])
                        j += 1
                        break
                    elif (cur_predict[j] == 2):
                        j += 1
                    else:
                        break
            elif (cur_label == 5):
                theme_start = j
                j += 1
                while (j < len(cur_predict)):
                    if (cur_predict[j] == 7):
                        predict_theme.append(
                            [sentence[theme_start:j + 1], theme_start, j])
                        j += 1
                        break
                    elif (cur_predict[j] == 6):
                        j += 1
                    else:
                        break
            else:
                j += 1


#######################################################rule match method################################################
        result = []
        for senti_tuple in predict_senti:
            senti_word = senti_tuple[0]
            sen_vec = sentiment2vec(senti_word, wv_for_score_model, Flag=flag)
            sen_vec = np.array([sen_vec])
            score = str(score_clf.predict(sen_vec)[0])
            senti_start = senti_tuple[1]
            senti_end = senti_tuple[2]
            first = True
            is_match = False
            for theme_tuple in predict_theme:
                if (abs(senti_start - theme_tuple[2]) < window
                        or abs(senti_end - theme_tuple[1]) < window):
                    # if (min(abs(senti_start - theme_tuple[2]), abs(senti_end - theme_tuple[1])) < min_distance):
                    if (not first and theme_tuple[1] > senti_start):
                        result.pop()
                    result.append([theme_tuple[0], senti_word, score])
                    is_match = True
                    first = False
            if (not is_match):
                result.append(['NULL', senti_word, score])
        cur_predict = [str(item) for item in cur_predict]
        o.write(str(result)[1:-1] + '\t' + ''.join(cur_predict) + '\n')
    o.close()
예제 #20
0
def load_embedding_bilstm2_crf_model(filename, VOCAB_SIZE, NUM_CLASS,
                                     TIME_STAMPS):
    model = build_embedding_lstm2_crf_model(VOCAB_SIZE, NUM_CLASS, TIME_STAMPS)
    save_load_utils.load_all_weights(model, filename, include_optimizer=False)
    return model
예제 #21
0
# history = model.fit(X_tr, np.array(y_tr), batch_size=32, epochs=5, validation_split=0.1, verbose=1)
# #
# save_load_utils.save_all_weights(model, 'lstm_crf.model', include_optimizer=False)
#
# hist = pd.DataFrame(history.history)
#
#
# plt.style.use("ggplot")
# plt.figure(figsize=(12,12))
# plt.plot(hist["acc"])
# plt.plot(hist["val_acc"])
# plt.show()


save_load_utils.load_all_weights(model, 'lstm_crf.model')

test_pred = model.predict(X_te, verbose=2)

idx2tag = {i: w for w, i in tag2idx.items()}
# print(idx2tag)
print(test_pred)


def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            p_i = np.argmax(p)
            out_i.append(idx2tag[p_i].replace("PAD", "O"))
예제 #22
0
def load_model(filename, model):
    save_load_utils.load_all_weights(model, filename)
def load_model():
    embedding_size = 100
    word_vocabulary_size = dataset.vocab_size
    character_vocab_size = dataset.char_vocab_size
    character_emb_size = 25
    char_lstm_hidden_dims = 25
    intent_labels = dataset.intent_size
    tagging_lstm_hidden_dims = 100
    tag_labels = dataset.label_vocab_size
    words_input = Input(shape=(sentence_length, ), name='words_input')
    embedding_layer = Embedding(word_vocabulary_size,
                                embedding_size,
                                input_length=sentence_length,
                                trainable=True,
                                name='word_embedding_layer')
    word_embeddings = embedding_layer(words_input)
    word_embeddings = Dropout(0.5)(word_embeddings)
    word_chars_input = Input(shape=(sentence_length, word_length),
                             name='word_chars_input')
    char_embedding_layer = Embedding(character_vocab_size,
                                     character_emb_size,
                                     input_length=word_length,
                                     name='char_embedding_layer')
    char_embeddings = TimeDistributed(char_embedding_layer)(word_chars_input)
    char_embeddings = TimeDistributed(
        Bidirectional(LSTM(char_lstm_hidden_dims)))(char_embeddings)
    char_embeddings = Dropout(0.5)(char_embeddings)
    shared_bilstm_layer = Bidirectional(
        LSTM(100, return_sequences=True, return_state=True))
    shared_lstm_out = shared_bilstm_layer(word_embeddings)
    shared_lstm_y = shared_lstm_out[:1][0]  # save y states of the LSTM layer
    states = shared_lstm_out[1:]
    hf, cf, hb, cb = states  # extract last hidden states
    h_state = concatenate([hf, hb], axis=-1)  # concatenate last states
    intent_out = Dense(intent_labels,
                       activation='softmax',
                       name='intent_classifier_output')(h_state)
    combined_features = concatenate([shared_lstm_y, char_embeddings], axis=-1)
    tagging_lstm = Bidirectional(
        LSTM(tagging_lstm_hidden_dims,
             return_sequences=True))(combined_features)
    second_bilstm_layer = Dropout(0.5)(tagging_lstm)
    crf = CRF(tag_labels, sparse_target=False)
    labels_out = crf(second_bilstm_layer)
    model = Model(inputs=[words_input, word_chars_input],
                  outputs=[intent_out, labels_out])
    loss_f = {
        'intent_classifier_output': 'categorical_crossentropy',
        'crf_1': crf.loss_function
    }
    metrics = {
        'intent_classifier_output': 'categorical_accuracy',
        'crf_1': crf.accuracy
    }
    model.compile(loss=loss_f, optimizer='adam', metrics=metrics)
    model_name = 'my_model'
    print('Loading model weights')
    save_load_utils.load_all_weights(model,
                                     model_name,
                                     include_optimizer=False)
    return model
def load_embedding_bilstm2_crf_model(filename, VOCAB_SIZE):
    model = build_bilstm_crf_model(VOCAB_SIZE)
    # json_string = model.to_json()
    # model = model_from_json(json_string)
    save_load_utils.load_all_weights(model, filename, include_optimizer=False)
    return model
예제 #25
0
 def restore_model(self, filepath):
     save_load_utils.load_all_weights(self.model, filepath)
예제 #26
0
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)

    model = Sequential()
    model.add(
        Embedding(input_dim=4466,
                  output_dim=64,
                  input_length=MAX_TEXT_LENGTH,
                  trainable=False,
                  mask_zero=True))
    model.add(
        Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.3)))
    model.add(Dense(7))
    crf = CRF(7)
    model.add(crf)
    model.compile(optimizer=ks.optimizers.Adadelta(),
                  loss=crf.loss_function,
                  metrics=[crf.accuracy])

    model.summary()

    word_tokenizer = load_obj('model/word_tokenizer.pickle')
    index_pos = load_obj('model/index_pos.pickle')
    save_load_utils.load_all_weights(model,
                                     r'model/ner_250epoch_weights.h5',
                                     include_optimizer=False)

    to_test = '琪斯美是日本的“东方project”系列弹幕游戏及其衍生作品的登场角色之一。'
    ner_ret = test_sentences(to_test)
    pretty_ner(to_test, ner_ret)
예제 #27
0
 def TryLoadModel(self):
     tempModel = self.constructModel()
     save_load_utils.load_all_weights(tempModel, 'week4.h5')
예제 #28
0
파일: main.py 프로젝트: zxyscz/ner-english
# load 进来模型紧接着就执行一次 predict 函数
print('test train...')
bilstm_model.predict(np.zeros((1, 50)))


input = Input(shape=(max_len,))
bilstm_crf_model = Embedding(input_dim=n_words + 1, output_dim=20,
                      input_length=max_len, mask_zero=True)(input)  # 20-dim embedding
bilstm_crf_model = Bidirectional(LSTM(units=50, return_sequences=True,
                               recurrent_dropout=0.1))(bilstm_crf_model)  # variational biLSTM
bilstm_crf_model = TimeDistributed(Dense(50, activation="relu"))(bilstm_crf_model)  # a dense layer as suggested by neuralNer
crf = CRF(n_tags)  # CRF layer
out = crf(bilstm_crf_model)  # output
bilstm_crf_model = Model(input, out)
save_load_utils.load_all_weights(bilstm_crf_model, filepath="result/bilstm-crf.h5")


bilstm_crf_model.predict(np.zeros((1, 50)))
print('test done.')

# 测试数据
def build_input(test_sentence):
    test_sentence =test_sentence.split(" ")
    x_test_sent = pad_sequences(sequences=[[word2idx.get(w, 0) for w in test_sentence]],
                                    padding="post", value=0, maxlen=max_len)
    return test_sentence,x_test_sent


def bilstm_predcit(model,test_sentence,x_test_sent):
    pred = model.predict(np.array([x_test_sent[0]]))
def load_embedding_bilstm2_crf_model(filename):
    model = build_embedding_bilstm2_crf_model()
    save_load_utils.load_all_weights(model, filename)
    return model
예제 #30
0
    def test_exist(self, glove, test_data, test_labels):
        # get word embeddings
        utils = wordUtils.Utils()

        if glove:
            # use glove
            self.words_list, self.embedding_matrix = utils.load_glove()
            unword_n = len(self.words_list)

        else:
            self.words_list, self.embedding_matrix = utils.load_word2vec()
            unword_n = len(self.words_list)

        # get the training corpus
        cr = corpusreader.CorpusReader(test_data, test_labels)
        corpus = cr.trainseqs

        # get the number of the embedding
        for idx in range(len(corpus)):
            words = corpus[idx]['tokens']
            words_id = []
            for i in words:

                # get the number of the embedding
                try:
                    # the index of the word in the embedding matrix
                    index = self.words_list.index(i)
                except ValueError:
                    # use the embedding full of zeros to identify an unknown word
                    index = unword_n

                # the index of the word in the embedding matrix
                words_id.append(index)

            corpus[idx]['embs'] = words_id

        input = Input(shape=(None,))
        el = Embedding(len(self.words_list) + 1, 200, weights=[self.embedding_matrix], trainable=False)(input)
        bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5),
                            merge_mode="concat",
                            name="lstm1")(el)
        bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5),
                            merge_mode="concat",
                            name="lstm2")(bl1)
        bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5),
                            merge_mode="concat",
                            name="lstm3")(bl2)
        model = TimeDistributed(Dense(50, activation="relu"))(bl3)  # a dense layer as suggested by neuralNer
        crf = CRF(self.lab_len)  # CRF layer
        out = crf(model)  # output

        model = Model(input, out)
        model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy])
        model.summary()
        save_load_utils.load_all_weights(model, 'word_models/words_glove_multiLSTM31.h5')

        for doc in corpus:
            doc_arr = doc['embs']
            p = model.predict(np.array([doc_arr]))
            p = np.argmax(p, axis=-1)

            position = 0
            offsets = defaultdict(list)
            counter = 0
            # check if there are any mutations identified
            # {'O': 0, 'B-E': 1, 'I-E': 2, 'E-E': 3, 'S-E': 4}
            B = False
            last = 0
            for idx in p[0]:
                if idx == 1 and last == 1:
                    counter = counter + 1
                    offsets[counter].append(position)
                    B = True
                elif idx == 1:
                    B = True
                    offsets[counter].append(position)
                    last = 1
                elif idx == 2 and B:
                    offsets[counter].append(position)
                    last = 2
                elif idx == 3 and B:
                    offsets[counter].append(position)
                    last = 3
                    B = False
                    counter = counter + 1
                elif idx == 4:
                    offsets[counter].append(position)
                    counter = counter + 1
                    last = 4
                else:
                    B = False

                position = position + 1

            # open file to write
            textid = str(doc['textid'])
            abstract = open("words-silver/" + textid + ".a1", 'w')
            for i in offsets:
                word = offsets.get(i)
                size = len(word)
                if size == 1:
                    s = word[0]  # just one; singleton

                    abstract.write(str(doc['tokstart'][s]) + "\t")
                    abstract.write(str(doc['tokend'][s]) + "\t")
                    abstract.write(str(doc['tokens'][s]) + "\n")


                elif size > 1:
                    s = word[0]  # start of token
                    e = word[-1]  # end of token

                    abstract.write(str(doc['tokstart'][s]) + "\t")
                    abstract.write(str(doc['tokend'][e]) + "\t")
                    token = ""
                    for c in word:
                        token = token + doc['tokens'][c]

                    abstract.write(str(token) + "\n")
예제 #31
0
 def load(self, path):
     save_load_utils.load_all_weights(self.model, path, include_optimizer=False)
예제 #32
0
def load_embedding_bilstm2_crf_model(filename):
    model = build_embedding_bilstm2_crf_model()
    save_load_utils.load_all_weights(model, filename)
    return model