def extract_items(text_in): R = [] _s = [char2id.get(c, 1) for c in text_in] _s = np.array([_s]) _k1, _k2, t, t_max, mask = s_model(torch.LongTensor(_s)) _k1, _k2 = _k1[0, :, 0], _k2[0, :, 0] _kk1s = [] for i, _kk1 in enumerate(_k1): if _kk1 > 0.5: _subject = '' for j, _kk2 in enumerate(_k2[i:]): if _kk2 > 0.5: _subject = text_in[i: i + j + 1] break if _subject: _k1, _k2 = torch.LongTensor([[i]]), torch.LongTensor([[i + j]]) # np.array([i]), np.array([i+j]) _o1, _o2 = po_model(t.cuda(), t_max.cuda(), _k1.cuda(), _k2.cuda()) _o1, _o2 = _o1.cpu().data.numpy(), _o2.cpu().data.numpy() _o1, _o2 = np.argmax(_o1[0], 1), np.argmax(_o2[0], 1) for i, _oo1 in enumerate(_o1): if _oo1 > 0: for j, _oo2 in enumerate(_o2[i:]): if _oo2 == _oo1: _object = text_in[i: i + j + 1] _predicate = id2predicate[_oo1] # print((_subject, _predicate, _object)) R.append((_subject, _predicate, _object)) break _kk1s.append(_kk1.data.cpu().numpy()) _kk1s = np.array(_kk1s) return list(set(R))
import json from pdb import set_trace import numpy as np from config import set_args from model import s_model, po_model if __name__ == '__main__': args = set_args() # 加载类别 词表 id2predicate, predicate2id = json.load( open('./data/all_50_schemas_me.json')) id2predicate = {int(i): j for i, j in id2predicate.items()} id2char, char2id = json.load(open('./data/all_chars_me.json')) num_classes = len(id2predicate) s_model = s_model(len(char2id) + 2, args.char_size, args.hidden_size) po_model = po_model(len(char2id) + 2, args.char_size, args.hidden_size, 49) s_model.load_state_dict(torch.load('./save_model/s_0.bin')) po_model.load_state_dict(torch.load('./save_model/po_0.bin')) text_in = '周杰伦出演了电影不能说的秘密。我当时是在电影院看的' t_s = [char2id.get(i, 1) for i in text_in] _s = torch.LongTensor([t_s]) R = [] with torch.no_grad(): _k1, _k2, t, t_max, mask = s_model(torch.LongTensor(_s)) _k1, _k2 = _k1[0, :, 0], _k2[0, :, 0] _kk1s = [] for i, _kk1 in enumerate(_k1): if _kk1 > 0.5:
dg = data_generator(train_data) T, S1, S2, K1, K2, O1, O2 = dg.pro_res() torch_dataset = myDataset(T, S1, S2, K1, K2, O1, O2) loader = Data.DataLoader( dataset=torch_dataset, # torch TensorDataset format batch_size=args.batch_size, # mini batch size shuffle=True, # random shuffle for training num_workers=8, collate_fn=collate_fn, # subprocesses for loading data ) s_m = model.s_model(len(char2id) + 2, args.char_dim, args.hidden_dim, args).to(args.device) # .cuda() po_m = model.po_model(args.char_dim, num_classes=num_classes, args=args).to(args.device) # .cuda() params = list(s_m.parameters()) params += list(po_m.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) loss = torch.nn.CrossEntropyLoss().to(args.device) b_loss = torch.nn.BCEWithLogitsLoss().to(args.device) def extract_items(text_in): # 验证测试时,batch_size=1 R = [] _s = [char2id.get(c, 1) for c in text_in] _s = np.array([_s]) _k1, _k2, t, h, mask = s_m(torch.LongTensor(_s).to(args.device))
dg = Data_Generator(train_data) T, S1, S2, K1, K2, O1, O2 = dg.pro_res() # print('数据量:', len(T)) # 数据量: 21 torch_dataset = MyDataset(T, S1, S2, K1, K2, O1, O2) data_loader = DataLoader( dataset=torch_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, collate_fn=collate_fn, # subprocesses for loading data ) s_model = s_model(len(char2id) + 2, args.char_size, args.hidden_size) po_model = po_model(len(char2id) + 2, args.char_size, args.hidden_size, 49) params = list(s_model.parameters()) params += list(po_model.parameters()) optimizer = torch.optim.Adam(params, lr=0.001) cross_loss = torch.nn.CrossEntropyLoss() bce_loss = torch.nn.BCEWithLogitsLoss() best_f1 = 0 best_epoch = 0 for epoch in range(args.num_epochs): for step, loader_res in tqdm(iter(enumerate(data_loader))): t_s = loader_res["T"] k1 = loader_res["K1"]
torch_dataset = myDataset(T,S1,S2,K1,K2,O1,O2) loader = Data.DataLoader( dataset=torch_dataset, # torch TensorDataset format batch_size=BATCH_SIZE, # mini batch size shuffle=True, # random shuffle for training num_workers=8, collate_fn=collate_fn, # subprocesses for loading data ) # print("len",len(id2char)) s_m = model.s_model(len(char2id)+2,CHAR_SIZE,HIDDEN_SIZE).cuda() po_m = model.po_model(len(char2id)+2,CHAR_SIZE,HIDDEN_SIZE,49).cuda() params = list(s_m.parameters()) params += list(po_m.parameters()) optimizer = torch.optim.Adam(params, lr=0.001) loss = torch.nn.CrossEntropyLoss().cuda() b_loss = torch.nn.BCEWithLogitsLoss().cuda() def extract_items(text_in): R = [] _s = [char2id.get(c, 1) for c in text_in] _s = np.array([_s]) _k1, _k2,t , t_max,mask = s_m(torch.LongTensor(_s).cuda())