Ejemplo n.º 1
0
def test():
    with open(os.path.join(filename1, "train1.pkl"), 'rb') as inp:
        word2id = pickle.load(inp)
        id2word = pickle.load(inp)
        tag2id = pickle.load(inp)
        id2tag = pickle.load(inp)
        x_train = pickle.load(inp)
        y_train = pickle.load(inp)
        x_test = pickle.load(inp)
        y_test = pickle.load(inp)
        x_valid = pickle.load(inp)
        y_valid = pickle.load(inp)

    epochs = 31
    batch_size = 32

    config = {}
    config["lr"] = 0.001
    config["embedding_dim"] = 100
    config["sen_len"] = len(x_train[0])
    config["batch_size"] = batch_size
    config["embedding_size"] = len(word2id) + 1
    config["tag_size"] = len(tag2id)
    config["pretrained"] = False

    embedding_pre = []
    #利用训练完成的模型进行测试
    print("begin to extraction...")
    model = Model(config, embedding_pre, dropout_keep=1)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state('./model')
        if ckpt is None:
            print('Model not found, please train your model first')
        else:
            path = ckpt.model_checkpoint_path
            print('loading pre-trained model from %s.....' % path)
            saver.restore(sess, path)
            pathDir = os.listdir(filename2)
            for i in range(len(pathDir)):
                a = os.path.join(filename2, pathDir[i])
                b = os.path.join('F:/天池/糖尿病文本分析/DiabetesKG/data/result3/',
                                 pathDir[i])
                extraction(a, b, model, sess, word2id, id2tag, batch_size)
Ejemplo n.º 2
0
 def predict_ner(self, text):
     word2id, id2tag, config = self.get_config()
     embedding_pre = []
     model = Model(config, embedding_pre, dropout_keep=1)
     batch_size = config['batch_size']
     with tf.Session() as sess:
         sess.run(tf.global_variables_initializer())
         saver = tf.train.Saver()
         ckpt = tf.train.get_checkpoint_state('./model')
         if ckpt is None:
             print('Model not found, please train your model first')
         else:
             path = ckpt.model_checkpoint_path
             print('loading pre-trained model from %s.....' % path)
             saver.restore(sess, path)
             text_list = re.split(u'[,。!?、‘’“”(),.!?' '"()]', text)
             if '' in text_list:
                 text_list.remove('')
             half_batch = batch_size // 8
             all_entity = []
             for i in range(0, len(text_list), half_batch):
                 text_part = text_list[i:i + half_batch]
                 entity = self.test_input(text_part, model, sess, word2id,
                                          id2tag, batch_size)
                 all_entity.extend(entity)
             new_entity = []
             for entity in all_entity:
                 entity_label = {}
                 entity = entity.split(':')
                 if entity[0] == 'nr':
                     entity[0] = '人名'
                 elif entity[0] == 'ns':
                     entity[0] = '地名'
                 elif entity[0] == 'nt':
                     entity[0] = '机构团体'
                 entity_label[entity[1]] = entity[0]
                 new_entity.append(entity_label)
             return new_entity
Ejemplo n.º 3
0
def train():
    with open(os.path.join(filename1, "train1.pkl"), 'rb') as inp:
        word2id = pickle.load(inp)
        id2word = pickle.load(inp)
        tag2id = pickle.load(inp)
        id2tag = pickle.load(inp)
        x_train = pickle.load(inp)
        y_train = pickle.load(inp)
        x_test = pickle.load(inp)
        y_test = pickle.load(inp)
        x_valid = pickle.load(inp)
        y_valid = pickle.load(inp)

    data_train = BatchGenerator(x_train, y_train, shuffle=True)
    data_valid = BatchGenerator(x_valid, y_valid, shuffle=False)
    data_test = BatchGenerator(x_test, y_test, shuffle=False)
    epochs = 31
    batch_size = 32

    config = {}
    config["lr"] = 0.001
    config["embedding_dim"] = 100
    config["sen_len"] = len(x_train[0])
    config["batch_size"] = batch_size
    config["embedding_size"] = len(word2id) + 1
    config["tag_size"] = len(tag2id)
    config["pretrained"] = False

    embedding_pre = []
    #训练模型
    print("begin to train...")
    model = Model(config, embedding_pre, dropout_keep=0.5)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        train(model, sess, saver, epochs, batch_size, data_train, data_valid,
              id2word, id2tag)
Ejemplo n.º 4
0
            word2vec[line.split()[0]] = map(eval, line.split()[1:])

    unknow_pre = []
    unknow_pre.extend([1] * 100)
    embedding_pre.append(unknow_pre)  #wordvec id 0
    for word in word2id:
        if word2vec.has_key(word):
            embedding_pre.append(word2vec[word])
        else:
            embedding_pre.append(unknow_pre)

    embedding_pre = np.asarray(embedding_pre)

if len(sys.argv) == 2 and sys.argv[1] == "test":
    print "begin to test..."
    model = Model(config, embedding_pre, dropout_keep=1)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state('./model')
        if ckpt is None:
            print 'Model not found, please train your model first'
        else:
            path = ckpt.model_checkpoint_path
            print 'loading pre-trained model from %s.....' % path
            saver.restore(sess, path)
            test_input(model, sess, word2id, id2tag, batch_size)

elif len(sys.argv) == 3:
    print "begin to extraction..."
    model = Model(config, embedding_pre, dropout_keep=1)
Ejemplo n.º 5
0
from bilstm_crf import Model
import tensorflow as tf
import load_data

config = {}
config["lr"] = 0.01
config["embedding_dim"] = 100
config["sen_len"] = 15
config["batch_size"] = 32
config["embedding_size"] = 1856
config["tag_size"] = 27

X, y, seq_len = load_data.train_data()
X_eval, y_eval, seq_len_eval = load_data.eval_data()

model = Model(config)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(1000):
        _, loss, acc = sess.run([model.train_op, model.loss, model.accuracy],
                                feed_dict={model.keep_prob: 0.9,
                                           model.input_data: X,
                                           model.labels: y,
                                           model.seq_len: seq_len})
        print('epoch: ', epoch, ' loss: ', loss, ' acc: ', acc)

        _, eval_loss, eval_acc = sess.run([model.train_op, model.loss, model.accuracy],
                                          feed_dict={model.keep_prob: 1,
                                                     model.input_data: X_eval,
                                                     model.labels: y_eval,
                                                     model.seq_len: seq_len_eval})