def main(): examples = [ "《中国风水十讲》是2007年华夏出版社出版的图书,作者是杨文衡", "你是最爱词:许常德李素珍/曲:刘天健你的故事写到你离去后为止", "《苏州商会档案丛编第二辑》是2012年华中师范大学出版社出版的图书,作者是马敏、祖苏、肖芃" ] sess = tf.compat.v1.Session() model_path = "/home/johnsaxon/github.com/oushu1zhangxiangxuan1/HolmesNER/serving/savedmodel_loader/models/ner/m1" # tf.saved_model.loader.load( tf.compat.v1.saved_model.loader.load(sess, [tf.saved_model.SERVING], model_path) prediction = sess.graph.get_tensor_by_name("layer_crf/cond/Merge:0") bert_embed = BERTEmbedding( "/home/johnsaxon/github.com/Entity-Relation-Extraction/pretrained_model/chinese_L-12_H-768_A-12", task=kashgari.LABELING, sequence_length=100) x0, x1 = bert_embed.process_x_dataset(examples) print(x0, x1) predictions_result = sess.run(prediction, feed_dict={ 'Input-Segment_1:0': x0, 'Input-Token_1:0': x1 }) sess.close() print(predictions_result)
# from kashgari.corpus import SMP2018ECDTCorpus # test_x, test_y = SMP2018ECDTCorpus.load_data('valid') # b.analyze_corpus(test_x, test_y) data1 = '湖 北'.split(' ') data3 = '纽 约'.split(' ') data2 = '武 汉'.split(' ') data4 = '武 汉'.split(' ') data5 = '北 京'.split(' ') data6 = '武 汉 地 铁'.split(' ') sents = [data1, data3, data4, data5, data6] doc_vecs = b.embed(sents, debug=True) tokens = b.process_x_dataset([['语', '言', '模', '型']])[0] target_index = [101, 6427, 6241, 3563, 1798, 102] target_index = target_index + [0] * (12 - len(target_index)) assert list(tokens[0]) == list(target_index) print(tokens) print(doc_vecs) print(doc_vecs.shape) print(doc_vecs[0]) print(doc_vecs[0][0]) query_vec = b.embed([data2])[0] query = '武 汉' # compute normalized dot product as score for i, sent in enumerate(sents): d = b.embed([sent])[0] c = cos_dist(d, query_vec)