def inference(): model.eval() predictions = [] id_prediction = {} with torch.no_grad(): for i in range(0, len(data), args.batch_size): print("{} in {}".format(i, len(data))) one = data[i:i + args.batch_size] query, _ = padding([x[0] for x in one], max_len=50) passage, _ = padding([x[1] for x in one], max_len=300) answer = pad_answer([x[2] for x in one]) str_words = [x[-1] for x in one] ids = [x[3] for x in one] answer = pad_wrong_answer(answer) query = torch.LongTensor(query) passage = torch.LongTensor(passage) #print(np.shape(answer)) answer = torch.LongTensor(answer) if args.cuda: query = query.cuda() passage = passage.cuda() answer = answer.cuda() output = model([query, passage, answer, False]) for q_id, prediction, candidates in zip(ids, output, str_words): id_prediction[q_id] = int(prediction) prediction_answer = u''.join(candidates[prediction]) predictions.append(str(q_id) + '\t' + prediction_answer) outputs = u'\n'.join(predictions) with codecs.open(args.output, 'w', encoding='utf-8') as f: f.write(outputs) with open("pkl_records/dev11.pkl", "wb") as f: pickle.dump(id_prediction, f) print('done!')
def train(epoch, net, train_dt, opt, best): net.train() data = shuffle_data(train_dt, 1) total_loss = 0.0 for num, i in enumerate(range(0, len(data), opts["batch"])): one = data[i:i + opts["batch"]] query, _ = padding([x[0] for x in one], max_len=50) passage, _ = padding([x[1] for x in one], max_len=350) answer = pad_answer([x[2] for x in one]) query, passage, answer = torch.LongTensor(query), torch.LongTensor( passage), torch.LongTensor(answer) if args.cuda: query = query.cuda() passage = passage.cuda() answer = answer.cuda() opt.zero_grad() loss = net([query, passage, answer, True]) loss.backward() total_loss += loss.item() opt.step() if (num + 1) % opts["log_interval"] == 0: print( '|------epoch {:d} train error is {:f} eclipse {:.2f}% best {}------|' .format(epoch, total_loss / opts["log_interval"], i * 100.0 / len(data), best)) total_loss = 0
def train(epoch, net, train_dt, opt, best, best_epoch): net.train() data = shuffle_data(train_dt, 1) total_loss = 0.0 time_sum = 0.0 for num, i in enumerate(range(0, len(data), opts["batch"])): time_start = time.time() one = data[i:i + opts["batch"]] query, _ = padding([x[0] for x in one], max_len=opts["q_len"]) passage, _ = padding([x[1] for x in one], max_len=opts["p_len"]) answer = pad_answer([x[2] for x in one]) ids = [x[3] for x in one] query, passage, answer, ids = torch.LongTensor( query), torch.LongTensor(passage), torch.LongTensor(answer), ids if torch.cuda.is_available(): query = query.cuda() passage = passage.cuda() answer = answer.cuda() opt.zero_grad() loss = net([query, passage, answer, ids, True, True]) loss.backward() total_loss += loss.item() opt.step() # 计时 time_end = time.time() cost = (time_end - time_start) time_sum += cost if (num + 1) % opts["log_interval"] == 0: ts = str('%.2f' % time_sum) print( '|---epoch {:d} train error is {:f} eclipse {:.2f}% costing: {} best {} on epoch {}---|' .format(epoch, total_loss / opts["log_interval"], i * 100.0 / len(data), ts + " s", best, best_epoch)) time_sum = 0.0 total_loss = 0
def test(pred, session, ans, que, para): r, a = 0.0, 0.0 id_list = [] # 用于记录ensemble所需的数据 pred_list = [] for i in range(0, len(dev_data), 10): one = dev_data[i:i + 10] query, _ = padding([x[0] for x in one], max_len=350) passage, _ = padding([x[1] for x in one], max_len=50) answer = pad_answer([x[2] for x in one], max_len=70) ids = [int(c[3]) for c in one] # query, passage, answer = np.array(query), np.array(passage), np.array(answer) fd = {para: passage, que: query, ans: answer} p = session.run(pred, feed_dict=fd) # 储存q_id与预测答案下标 p = list(p) ids = list(ids) id_list.extend(ids) pred_list.extend(p) r = 0 for item in p: if np.argmax(item) == 0: r += 1 a += len(one) return r * 100.0 / a, id_list, pred_list
def inference(pred,query,para,ans,sess,data,store_path="esm_record/test.pkl"): # model.eval() predictions = [] exception=[] id_list = [] # 用于记录ensemble所需的数据 pred_list = [] # with torch.no_grad(): for i in range(0, len(data), args.batch_size): one = data[i:i + args.batch_size] q, _ = padding([x[0] for x in one], max_len=50) p, _ = padding([x[1] for x in one], max_len=350) a = pad_answer([x[2] for x in one],max_length=70) str_words = [x[-1] for x in one] ids = [x[3] for x in one] if not len(np.shape(a)) == 3: # print(i) a = pad_wrong_answer(a) # query, passage, answer = torch.LongTensor(query), torch.LongTensor(passage), torch.LongTensor(answer) # if args.cuda: # query = query.cuda() # passage = passage.cuda() # answer = answer.cuda() output =np.argmax(sess.run(pred,feed_dict={ query:q, para:p, ans:a }),axis=1) # id_list = id_list.extend(ids) # pred_list = pred_list.extend(output) output = list(output) ids = list(ids) id_list.extend(ids) pred_list.extend(output) for q_id, prediction, candidates in zip(ids, output, str_words): print(q_id) print(prediction) #FIXME l=len(candidates) fir=candidates[0] if l<3: for _ in range(3-l): candidates.append(fir) print(candidates) prediction_answer = u''.join(candidates[prediction]) predictions.append(str(q_id) + '\t' + prediction_answer) outputs = u'\n'.join(predictions) with codecs.open(args.output, 'w',encoding='utf-8') as f: f.write(outputs) print ('done!') esm_record(id_list=id_list, pred_list=pred_list, path=store_path)
def inference(model, data, md_name, dat_name, opts, is_argmax=True): pkl_path = "pkl_records/" + md_name + "." + dat_name + ".pkl" if not os.path.exists(pkl_path): model.eval() predictions = [] id_prediction = {} with torch.no_grad(): for i in range(0, len(data), opts["batch"]): # print("{} in {}".format(i, len(data))) one = data[i:i + opts["batch"]] query, _ = padding([x[0] for x in one], max_len=50) passage, _ = padding([x[1] for x in one], max_len=300) answer = pad_answer([x[2] for x in one]) str_words = [x[-1] for x in one] ids = [x[3] for x in one] answer = pad_wrong_answer(answer) query = torch.LongTensor(query) passage = torch.LongTensor(passage) # print(np.shape(answer)) answer = torch.LongTensor(answer) if torch.cuda.is_available(): query = query.cuda() passage = passage.cuda() answer = answer.cuda() output = model([query, passage, answer, ids, False, is_argmax]) for q_id, prediction, candidates in zip( ids, output, str_words): if is_argmax: id_prediction[q_id] = int(prediction) else: prediction = prediction.cpu().numpy() id_prediction[q_id] = list(prediction) prediction_answer = u''.join( candidates[np.argmax(prediction)]) predictions.append(str(q_id) + '\t' + prediction_answer) outputs = u'\n'.join(predictions) print("score: {}".format(score_on_dt(id_prediction))) with codecs.open("submit/" + md_name + "." + dat_name + ".txt", 'w', encoding='utf-8') as f: f.write(outputs) with open("pkl_records/" + md_name + "." + dat_name + ".pkl", "wb") as f: # TODO: 更换pkl文件名称 pickle.dump(id_prediction, f) print('done!') else: pkkl = pickle.load(open(pkl_path, "rb")) print(pkl_path + " exist, score:", score_on_dt(pkkl))
def test(net, valid_data): net.eval() r, a = 0.0, 0.0 with torch.no_grad(): for i in range(0, len(valid_data), opts["batch"]): print("{} in {}".format(i, len(valid_data))) one = valid_data[i:i + opts["batch"]] query, _ = padding([x[0] for x in one], max_len=50) passage, _ = padding([x[1] for x in one], max_len=500) answer = pad_answer([x[2] for x in one]) query, passage, answer = torch.LongTensor(query), torch.LongTensor( passage), torch.LongTensor(answer) if args.cuda: query = query.cuda() passage = passage.cuda() answer = answer.cuda() output = net([query, passage, answer, False]) r += torch.eq(output, 0).sum().item() a += len(one) return r * 100.0 / a
def test(net, valid_data): net.eval() r, a = 0.0, 0.0 with torch.no_grad(): for i in range(0, len(valid_data), opts["batch"]): print("{} in {}".format(i, len(valid_data))) one = valid_data[i:i + opts["batch"]] query, _ = padding([x[0] for x in one], max_len=opts["q_len"]) passage, _ = padding([x[1] for x in one], max_len=opts["p_len"]) answer = pad_answer([x[2] for x in one], max_len=opts["alt_len"]) ids = [x[3] for x in one] query, passage, answer, ids = torch.LongTensor( query), torch.LongTensor(passage), torch.LongTensor( answer), ids if torch.cuda.is_available(): query = query.cuda() passage = passage.cuda() answer = answer.cuda() output = net([query, passage, answer, ids, False, True]) r += torch.eq(output, 0).sum().item() a += len(one) return r * 100.0 / a
def train_epoch(epoch, model, train_dt, dt_util, opt, best, best_epoch, batch_size=32): model.train() print("sentence pairs size:", np.shape(train_dt)) data = shuffle_data(train_dt) data = train_dt total_loss = 0.0 time_sum = 0.0 for num, i in enumerate(range(0, len(data[:68]), batch_size)): time_start = time.time() ids = np.arange(start=i, stop=i + batch_size) batch_dt = [] for id in ids: batch_dt.append(dt_util.__getitem__(id)) _inputs, _ = padding([x[0] for x in batch_dt]) _mask_lab, _ = padding([x[1] for x in batch_dt]) _seg_lab, _ = padding([x[2] for x in batch_dt]) _is_next = [x[3] for x in batch_dt] inputs,mask_lab,seg_lab,is_next=torch.LongTensor(_inputs),\ torch.LongTensor(_mask_lab),\ torch.LongTensor(_seg_lab),\ torch.LongTensor(_is_next) # ( b,t) (b,t) (b,t) (b,) criterion = nn.NLLLoss(ignore_index=0) next_sent_output, mask_lm_output = bert_lm.forward( inputs, seg_lab) # (b,2) & (b,t,vocab) next_loss = criterion(next_sent_output, is_next) mask_loss = criterion(mask_lm_output.transpose(1, 2), mask_lab) loss = next_loss + mask_loss print(loss)
def forward(self, inputs): try: [query, passage, answer, ids, is_train, is_argmax] = inputs opts = self.opts # Embedding q_embedding = self.embedding(query) p_embedding = self.embedding(passage) a_embeddings = self.embedding(answer) # Layer1: Encoding Layer a, _ = self.a_encoder( a_embeddings.view(-1, a_embeddings.size(2), a_embeddings.size(3))) a = F.dropout(a, self.drop_out) # (b,a,2h) q, _ = self.q_encoder(q_embedding) q = F.dropout(q, self.drop_out) # (b,q,2h) t, _ = self.p_encoder(p_embedding) t = F.dropout(t, self.drop_out) # (b,p,2h) a_score = F.softmax(self.a_attention(a), 1) # (3b,a,1) # # #print(a_score.shape) a_output = a_score.transpose( 2, 1).bmm(a).squeeze() # (3b,1,a) bmm (3b,a,2h)-> (3b,2h) # # #print(a_output.shape) a_emb = a_output.view(opts["batch"], 3, a.size(2)) # (b,3,2h) # 4.1 Semantic Perspective # text w_k_t = self.V_t(t) # #print("w_k_t: {}".format(np.shape(w_k_t))) # (b,p,1) t_sum = w_k_t.transpose( 2, 1).bmm(t).squeeze() # (b,1,p) (b,p,2h) -> (b,2h) # #print("t_sum:{}".format(np.shape(t_sum))) st = F.leaky_relu(self.W_A_t(t_sum)) # #print("st: {} ".format(np.shape(st))) # (b,h) # hypothesis a = a.view(-1, 3, a.size(1), a.size(2)) # #print("a: {}".format(np.shape(a))) q = q.unsqueeze(1).repeat( 1, 3, 1, 1) # (b,q,2h)->(b,1,q,2h)->(b,3,q,2h) 采用向量方式代替循环:将q和t在1维度上重复 t = t.unsqueeze(1).repeat(1, 3, 1, 1) # (b,3,t,2h) # #print("q/t: {}".format(np.shape(q))) h = torch.cat([q, a], dim=2) # #print("h: {}".format(np.shape(h))) # (b,3,q+a,2h) w_k_h = self.V_h(h) # #print("w_k_h: {}".format(np.shape(w_k_h))) # (b,3,q+a,1) h_sum = w_k_h.view(h.size(0) * 3, h.size(2), -1).transpose( 2, 1).bmm(h.view(h.size(0) * 3, h.size(2), -1)).squeeze() h_sum = h_sum.view(h.size(0), 3, h.size(3)) # #print("h_sum: {}".format(np.shape(h_sum))) # (3,b,1,q+a) (3,b,q+a,2h) -> (3,b,2h) sh = F.leaky_relu(self.W_A_h(h_sum)) # (3,b,2h) (2h,h) -> (3,b,h) # #print("sh: {}".format(np.shape(sh))) # (b,3,h) st = st.unsqueeze(1).repeat(1, 3, 1) # #print("st: {}".format(np.shape(st))) # (3,b,h) # #print("sh: {}".format(np.shape(sh))) # (3,b,h) M_sem = F.cosine_similarity(st, sh, dim=2) M_sem = F.dropout(M_sem, self.drop_out) # #print("--Semantic-- M_sem: {}".format(np.shape(M_sem))) # (3,b) # 4.2 Word-by-Word Perspective def get_position(pos_weight): """ :param pos_weight: (t,) :return: """ position_T = pos_weight[:t.size(2)] position_T = position_T.unsqueeze(0).unsqueeze(1).unsqueeze( 3).repeat(opts["batch"], 3, 1, 1) position_Q = position_T.repeat(1, 1, 1, q.size(2)) position_A = position_T.repeat(1, 1, 1, a.size(2)) # #print("position_T: {}".format(np.shape(position_T))) return position_Q, position_A def get_pos_simil(text_k, query_m, answer_n, pos_weight=None): """ :param text_k: (b,3,t,1,h) :param query_m: (b,3,1,q,h) :param answer_n: :param pos_weight: :return: """ Q_km = F.cosine_similarity( text_k.repeat(1, 1, 1, q.size(2), 1), query_m.repeat(1, 1, t.size(2), 1, 1), dim=4) # (b,3,t,q) A_kn = F.cosine_similarity( text_k.repeat(1, 1, 1, a.size(2), 1), answer_n.repeat(1, 1, t.size(2), 1, 1), dim=4) # (b,3,t,a) if not pos_weight is None: position_Q, position_A = get_position(pos_weight) Q_km = Q_km * position_Q A_kn = A_kn * position_A return Q_km, A_kn def get_M(Q_km, A_kn): # 公式(5) _MQ = torch.max(Q_km, dim=2)[0] # (b,3,1,q) _MQ = _MQ.view(q.size(0), 3, -1) # (b,3,q) MQ = _MQ.view(-1, q.size(2)).unsqueeze(1).bmm( w_m_q.view(-1, q.size(2), 1)) # (3b,1,q)(3b,q,1)->(3b,1,1) MQ = MQ.view(q.size(0), 3) # (b,3) 3个维度上的MQ相等 _MA = torch.max(A_kn, dim=2)[0] # (b,3,1,a) _MA = _MA.view(a.size(0), 3, -1) # (b,3,a) MA = _MA.view(-1, a.size(2)).unsqueeze(1).bmm( w_n_a.view(-1, a.size(2), 1)) # (3b,1,a)(3b,a,1) -> (3b,1,1) MA = MA.view(a.size(0), 3) # (b,3) 3个维度上的MA不相等 MA = F.softmax(MA, dim=1) MQ = F.dropout(MQ, self.drop_out) MA = F.dropout(MA, self.drop_out) return MQ, MA, MQ * MA # 4.2-(1)preparing tk = F.leaky_relu(self.W_B_t(t)) # (b,3,t,h) qm = F.leaky_relu(self.W_B_q(q)) # (b,3,q,h) an = F.leaky_relu(self.W_B_a(a)) # (b,3,a,h) # #print("tk: {}".format(np.shape(tk))) # (b,3,t,h) # #print("an/qm: {}".format(np.shape(qm))) # (b,3,a/q,h) # 4.2-(2)reshaping tk = tk.unsqueeze(3) # #print("tk un-squeezed(2):{}".format(np.shape(tk))) # (b,3,t,1,h) qm = qm.unsqueeze(2) an = an.unsqueeze(2) # #print("an/qm un-squeezed(1):{}".format(np.shape(an))) # (b,3,1,a/q,h) # 4.2-(3) weight vector for q and a w_m_q = self.V_q(q) # (b,3,q,1) w_n_a = self.V_a(a) # (b,3,a,1) # #print("w_n/m_a/q: {}".format(w_m_q.shape)) # 4.2.1 Sentential cq_km, ca_kn = get_pos_simil(text_k=tk, query_m=qm, answer_n=an, pos_weight=None) # fixme: slow # #print("ca_kn: {}".format(np.shape(ca_kn))) # #print("cq_km: {}".format(np.shape(cq_km))) Mq, Ma, Maq = get_M(cq_km, ca_kn) M_word = self.W_a1(Ma) + self.W_a2(Mq) + self.W_a3(Maq) # #print("--WbW/Sentential-- M_word: {}".format(np.shape(M_word))) # 4.2.2 Sequential Sliding Window sq_km, sa_kn = get_pos_simil( text_k=tk, query_m=qm, answer_n=an, pos_weight=self.position_t) # fixme: slow # #print("sa_kn: {}".format(np.shape(sa_kn))) # #print("sq_km: {}".format(np.shape(sq_km))) Mq, Ma, Maq = get_M(sq_km, sa_kn) M_sws = self.W_a4(Ma) + self.W_a5(Mq) + self.W_a6(Maq) # #print("--WbW/SWS-- M_word: {}".format(np.shape(M_sws))) # 4.2.3 Dependency Sliding Window # 根据fiedler向量的值给text重新排序 tk = tk.view(tk.size(0), tk.size(1), tk.size(2), -1) # #print("tk: {}".format(np.shape(tk))) dep_idx, _ = padding([self.dep_info[int(id)] for id in ids], max_len=tk.size(2), limit_max=False) # #print("dep_mat: {}".format(np.shape(dep_idx))) tk_sort_init = np.zeros( shape=[tk.size(0), 3, t.size(2), tk.size(-1)]) tk_sort = torch.FloatTensor( tk_sort_init) # pytorch中只有float_tensor可以被优化 for i in range(tk.size(0)): # batch tk_i_sorted = tk[i, :, dep_idx[i], :] # sort dim 2 try: tk_sort[i] = tk_i_sorted except Exception as e: print(e) print(tk_sort[i].shape, tk_i_sorted.shape) # #print("tk_sort: {}".format(tk_sort.shape)) # 对重新排序的tk作sliding window处理 tk = tk_sort.unsqueeze(3) sq_km, sa_kn = get_pos_simil( text_k=tk, query_m=qm, answer_n=an, pos_weight=self.position_t2) # fixme: slow # #print("sa_kn: {}".format(np.shape(sa_kn))) # #print("sq_km: {}".format(np.shape(sq_km))) Mq, Ma, Maq = get_M(sq_km, sa_kn) M_swd = self.W_a7(Ma) + self.W_a8(Mq) + self.W_a9(Ma * Mq) # #print("--WbW/SWD-- M_word: {}".format(np.shape(M_sws))) aggregation = torch.cat([M_sem, M_word, M_sws, M_swd], dim=1) # (b,12) # #print("aggregation: {}".format(aggregation.shape)) # Layer4: Prediction Layer encoder_output = F.dropout(F.leaky_relu(self.MLP(aggregation)), self.drop_out) # (b,2h) score = F.softmax( a_emb.bmm(encoder_output.unsqueeze(2)).squeeze(), 1) # (b,3,2h) (b,2h,1) print("batch score: {}".format( Counter(score.argmax(1).data.numpy())[0] / self.opts["batch"])) # #print("batch score: {}".format(Counter(score.argmax(1).data.numpy())[0] / opts["batch"])) if not is_train: if is_argmax: return score.argmax(1) else: return score # loss = -torch.log(score[:, 0]).mean() # 原loss最大化score[0]的得分 """we take the maximum over i so that we are ranking the correct answer over the best-ranked incorrect answer (of which there are three) """ correct = score[:, 0] m_score = torch.max(score, dim=1)[0] # #print(m_score.shape) u = 1.5 # score0 与 错误得分的间距 margin = u + correct - m_score # #print(margin) zeros = torch.FloatTensor(np.zeros(shape=opts["batch"])) L = torch.max(zeros, margin) loss = L.mean() # 最大化score0与错误选项的间距 return loss except Exception as e: print(e) return 1