def main():

    examples = [
        "《中国风水十讲》是2007年华夏出版社出版的图书,作者是杨文衡", "你是最爱词:许常德李素珍/曲:刘天健你的故事写到你离去后为止",
        "《苏州商会档案丛编第二辑》是2012年华中师范大学出版社出版的图书,作者是马敏、祖苏、肖芃"
    ]

    sess = tf.compat.v1.Session()

    model_path = "/home/johnsaxon/github.com/oushu1zhangxiangxuan1/HolmesNER/serving/savedmodel_loader/models/ner/m1"
    # tf.saved_model.loader.load(
    tf.compat.v1.saved_model.loader.load(sess, [tf.saved_model.SERVING],
                                         model_path)

    prediction = sess.graph.get_tensor_by_name("layer_crf/cond/Merge:0")

    bert_embed = BERTEmbedding(
        "/home/johnsaxon/github.com/Entity-Relation-Extraction/pretrained_model/chinese_L-12_H-768_A-12",
        task=kashgari.LABELING,
        sequence_length=100)

    x0, x1 = bert_embed.process_x_dataset(examples)

    print(x0, x1)

    predictions_result = sess.run(prediction,
                                  feed_dict={
                                      'Input-Segment_1:0': x0,
                                      'Input-Token_1:0': x1
                                  })
    sess.close()

    print(predictions_result)
Exemple #2
0
# from kashgari.corpus import SMP2018ECDTCorpus

# test_x, test_y = SMP2018ECDTCorpus.load_data('valid')

# b.analyze_corpus(test_x, test_y)
data1 = '湖 北'.split(' ')
data3 = '纽 约'.split(' ')
data2 = '武 汉'.split(' ')
data4 = '武 汉'.split(' ')
data5 = '北 京'.split(' ')
data6 = '武 汉 地 铁'.split(' ')
sents = [data1, data3, data4, data5, data6]
doc_vecs = b.embed(sents, debug=True)

tokens = b.process_x_dataset([['语', '言', '模', '型']])[0]
target_index = [101, 6427, 6241, 3563, 1798, 102]
target_index = target_index + [0] * (12 - len(target_index))
assert list(tokens[0]) == list(target_index)
print(tokens)
print(doc_vecs)
print(doc_vecs.shape)
print(doc_vecs[0])
print(doc_vecs[0][0])

query_vec = b.embed([data2])[0]
query = '武 汉'
# compute normalized dot product as score
for i, sent in enumerate(sents):
    d = b.embed([sent])[0]
    c = cos_dist(d, query_vec)