def main(): from kashgari.corpus import ChineseDailyNerCorpus model = load_model('/home/johnsaxon/HolmesNER/BERT/ner.h5') print(dir(model)) # test_x, test_y = ChineseDailyNerCorpus.load_data("test") # print("\n test_x:\n{}\n\n".format(test_x[0:5])) # metrics = model.evaluate(test_x[0:5], test_y[0:5]) # print("\n\n") # print(metrics) # print("\n\n") print("\n=================predicton==============\n") # test_x = test_x[0:5] test_x = [ ''.join(['第', '一', '次', '明', '确', '提', '出', '把', '自', '己', '建', '设', '成', '“', '国', '家', '科', '学', '思', '想', '库', '”', '的', '设', '想', ',', '路', '甬', '祥', '指', '出', ',', '面', '向', '新', '世', '纪', ',', '中', '科', '院', '学', '部', '要', '建', '设', '成', '最', '有', '影', '响', '的', '国', '家', '宏', '观', '决', '策', '科', '技', '咨', '询', '系', '统', ',', '要', '充', '分', '发', '挥', '院', '士', '群', '体', '的', '优', '势', ',', '加', '强', '科', '技', '战', '略', '研', '究', ',', '重', '点', '做', '好', '对', '国', '家', '宏', '观', '科', '技', '政', '策', '、', '科', '技', '发', '展', '计', '划', '、', '学', '科', '发', '展', '战', '略', '的', '制', '定', '以', '及', '经', '济', '建', '设', '、', '社', '会', '发', '展', '中', '重', '大', '科', '技', '问', '题', '的', '咨', '询', '工', '作', '。']) ] predictions = model.predict(test_x) print(predictions) print("\n\n") print("\n=================predicton entities==============\n") predictions = model.predict_entities(test_x) print(predictions)
def create_ner_model(): global g, sess model_path = '/home/johnsaxon/HolmesNER/BERT/ner.h5' model = load_model(model_path) model.tf_model._make_predict_function() test = [['第', '一', '次', '明', '确', '提', '出', '把', '自', '己', '建', '设', '成', '“', '国', '家', '科', '学', '思', '想', '库', '”', '的', '设', '想', ',', '路', '甬', '祥', '指', '出', ',', '面', '向', '新', '世', '纪', ',', '中', '科', '院', '学', '部', '要', '建', '设', '成', '最', '有', '影', '响', '的', '国', '家', '宏', '观', '决', '策', '科', '技', '咨', '询', '系', '统', ',', '要', '充', '分', '发', '挥', '院', '士', '群', '体', '的', '优', '势', ',', '加', '强', '科', '技', '战', '略', '研', '究', ',', '重', '点', '做', '好', '对', '国', '家', '宏', '观', '科', '技', '政', '策', '、', '科', '技', '发', '展', '计', '划', '、', '学', '科', '发', '展', '战', '略', '的', '制', '定', '以', '及', '经', '济', '建', '设', '、', '社', '会', '发', '展', '中', '重', '大', '科', '技', '问', '题', '的', '咨', '询', '工', '作', '。']] model.predict(test) return model, g, sess
def __init__(self): super().__init__() self.refer_dict = { '电话': 'phone', '办公电话': 'phone', '个人主页': 'homepage', '电子邮箱': 'email', '邮箱': 'email', '地址': 'office', '办公室': 'office', '传真': 'fax', '系别': 'major', '职称': 'title', '职务': 'position' } with open('../configs/config_%s.json' % input("department:"), 'r', encoding='utf-8') as f: self.config = json.load(f) #self.file = re.search('(.*?).json',f.name).group(1) self.model = load_model( r'D:\Ubuntu\rootfs\home\pt\models\bert_epoch_20_new')
from kashgari.utils import load_model from kashgari.corpus import ChineseDailyNerCorpus model = load_model('models/ner.h5') print(dir(model)) test_x, test_y = ChineseDailyNerCorpus.load_data("test") print("\n test_x:\n{}\n\n".format(test_x[0:5])) metrics = model.evaluate(test_x[0:5], test_y[0:5]) print("\n\n") print(metrics) print("\n\n") print("\n=================predicton==============\n") predictions = model.predict(test_x[0:5]) print(predictions) print("\n\n") print("\n=================predicton entities==============\n") predictions = model.predict_entities(test_x[0:5]) print(predictions)
def __init__(self, model_path): self.model = load_model(model_path)
from kashgari.corpus import DataReader from keras.models import load_model from keras.backend.tensorflow_backend import set_session import os import tensorflow as tf os.environ["CUDA_VISIBLE_DEVICES"] = '1' config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) set_session(sess) test_x, test_y, _ = DataReader.read_conll_format_file_word( "/home/y182235017/law/predict.txt") print(f"test data count: {len(test_x)}") from kashgari import utils model = utils.load_model("/home/y182235017/law/model/Word_BiLSTM_CRF_Model") # model = load_model("/home/y182235017/law/model/Word_BiLSTM_CRF_Attention_Model_test1/my_model.h5") import codecs # result=model.evaluate(test_x,test_y,batch_size=128) result = model.predict_entities_all(test_x) with codecs.open("/home/y182235017/law/2.txt", "w", "utf-8") as file_obj: file_obj.write(result)
data.loc[idx, 'entity_id'] = ' '.join(entity_id_list) data.loc[idx, 'entity_idx'] = ' '.join(entity_idx_list) '''关系抽取 # title:标题内容 # entity_each:该标题中所有实体的列表,列表元素为(entity_type,entity_var,entity_span) # 其中entity_type为实体类型,entity_var为实体值,entity_span为实体所在的位置 ''' g.close() f = open( r'D:\repositories\DaChuang\utils\process_entity\entity2id_new.json', 'w', encoding='utf-8') line = json.dumps(entity2id, ensure_ascii=False) f.write(line) f.close() return data, entities if __name__ == "__main__": model = load_model( r'D:\Ubuntu\rootfs\home\pt\models\BILSTM_CRF_epoch_100.model') data = pandas.read_csv( r'D:\repositories\DaChuang\data\news_each_school\info_output.csv', encoding='utf-8') a = data.loc[0:5, :] data, entities = getEntity_from_NER(model, data) print(entities) print(data)
if len(session) > 1: #print(session) session_count = Counter(session) entity = refer_dict[session_count.most_common(1)[0][0]] cover = (indexes[0],indexes[-1]+1) entity_each.append((entity,cover)) session = [char[2]] indexes = [index] else: if len(session) > 1: #print(session) session_count = Counter(session) entity = refer_dict[session_count.most_common(1)[0][0]] cover = (indexes[0],indexes[-1]+1) entity_each.append((entity,cover)) session = [] indexes = [] if len(session) > 1: session_count = Counter(session) entity = refer_dict[session_count.most_common(1)[0][0]] cover = (indexes[0],indexes[-1]+1) entity_each.append((entity,cover)) entities.append(entity_each) return entities if __name__ == "__main__": model = load_model(r'D:\Ubuntu\rootfs\home\pt\models\bert_epoch_20') data = pandas.read_csv(r'D:\repositories\DaChuang\data\news_each_school\info_output.csv',encoding='utf-8') a = data.title[2000:2005] getEntity(model,a)
import kashgari import tensorflow import tqdm import keras from kashgari.tasks.classification import CNNModel import jieba from kashgari.utils import load_model x=[] # new_model = CNNModel.get_default_hyper_parameters('./model_cnn') new_model=load_model('topic_bigru') news=['耗油好多啊'] # y=jieba.cut((news[0]),cut_all=False) x.append(news) # print(x) q=new_model.predict(x) print(q)
logging.debug('------ sample {} ------'.format(index)) logging.debug('x : {}'.format(x_data[index])) logging.debug('y_true : {}'.format(y_true[index])) logging.debug('y_pred : {}'.format(y_pred[index])) report = classification_report(y_true, y_pred, digits=digits) print(classification_report(y_true, y_pred, digits=digits)) return report def build_model_arc(self): raise NotImplementedError if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) from kashgari.tasks.labeling import BiLSTM_Model from kashgari.corpus import ChineseDailyNerCorpus from kashgari.utils import load_model train_x, train_y = ChineseDailyNerCorpus.load_data('train', shuffle=False) valid_x, valid_y = ChineseDailyNerCorpus.load_data('valid') train_x, train_y = train_x[:5120], train_y[:5120] model = load_model('/Users/brikerman/Desktop/blstm_model') # model.build_model(train_x[:100], train_y[:100]) # model.fit(train_x[:1000], train_y[:1000], epochs=10) # model.evaluate(train_x[:20], train_y[:20]) print("Hello world")
data_shizheng = pd.read_csv('test_data/shizheng.csv', encoding='utf-8', sep=',') data_cj = data_caijing.iloc[:, 4] data_sz = data_caijing.iloc[:, 4] data_cj1 = np.array(data_cj).astype(str).tolist() # data_xz1 = np.array(data_sz).astype(str).tolist() # # print(data_cj) # cj_list=[] # for cj in data_cj1: # new_model = CNNModel.get_default_hyper_parameters('./model_cnn') x1, x2 = [], [] new_model = load_model('model_cnn') for cj in data_cj1: y = jieba.cut(cj, cut_all=False) x1.append(y) for sz in data_xz1: y = jieba.cut(sz, cut_all=False) x2.append(y) # print(x) q = new_model.predict(x1) w = new_model.predict(x2) print(q) df1 = pd.DataFrame(q, columns=['prediction']) df2 = pd.DataFrame(w, columns=['prediction']) data_re = pd.concat([data_caijing, df1], axis=1)
from kashgari.utils import load_model from kashgari.corpus import ChineseDailyNerCorpus import tensorflow as tf model = load_model('/home/johnsaxon/HolmesNER/BERT/ner.h5') model_export_dir = 'models/ner/m1' print(dir(model)) name_to_inputs = {i.name.split(":")[0]: i for i in model.tf_model.inputs} name_to_outputs = {i.name: i for i in model.tf_model.outputs} print(name_to_inputs) print(name_to_outputs) tf.saved_model.simple_save(tf.keras.backend.get_session(), model_export_dir, inputs=name_to_inputs, outputs=name_to_outputs) # print() # test_x, test_y = ChineseDailyNerCorpus.load_data("test") # print("\n test_x:\n{}\n\n".format(test_x[0:5])) # metrics = model.evaluate(test_x[0:5], test_y[0:5]) # print("\n\n") # print(metrics) # print("\n\n") # print("\n=================predicton==============\n") # predictions = model.predict(test_x[0:5])
def load(path): load_model(path)
return model def predict(model, sentences): test = [[char for char in sentence] for sentence in sentences] print(test) pred = model.predict(test) for index, line in enumerate(test): for char, tag in zip(line, pred[index]): print("{}---{}\n".format(char, tag)) def contrast(model, path='/home/peitian_zhang/data/corpus/labeled_train.txt'): train_x, train_y = getTrain(path) index = random.choice(range(0, len(train_x))) pred = model.predict([train_x[index]])[0] for tag, target in zip(pred, train_y[index]): print("{},{}\n".format(tag, target)) def load(path): load_model(path) if __name__ == "__main__": model = load_model( r'D:\Ubuntu\rootfs\home\pt\models\bilstm+crf_epoch_100.model') x, y = getTrain(path=r'D:\repositories\DaChuang\data\语料\train.txt') predict(model, x[-10:-5])
import functools from TimeFmt.parser import Parser import time import tensorflow as tf from tensorflow.python.keras.backend import set_session from crf_model import CRFModel app = Flask(__name__) bootstrap = Bootstrap(app) test_text_1 = '2014年12月的一天,被告人张某趁鲍某试衣服时,将鲍某包中该银行卡盗走。12月30日9时许,被告人张某持卡到ATM机上盗取现金2300元。后被告人张某到派出所主动投案。' test_text_2 = '2015年6月22日凌晨,被告人鞠某通过攀爬楼体的方式进入位于延安市宝塔区东盛大厦3号楼1单元603室被害人赵某办公室内,盗窃戴尔牌p04S型笔记本电脑1台(经鉴定价值1000元)。事后,被告人温某通过网络将2部笔记本电脑以5000元销赃。 ' sess = tf.Session() graph = tf.get_default_graph() set_session(sess) loaded_model = utils.load_model('modelInfo/p1/input/p1') event_list = ['B-Steal', 'B-Draw', 'B-Consume', 'B-Sale', 'B-Volunteer'] cm = CRFModel(model='crf/model') # 自定义事件类 class MyEvent: def __init__(self, t1, t2, t_str, arg_type, arg_value, num): self.time1 = t1 self.time2 = t2 self.time_str = t_str self.arg_type = arg_type self.arg_value = arg_value self.num = num