Beispiel #1
0
def inference():
    model.eval()
    predictions = []
    id_prediction = {}
    with torch.no_grad():
        for i in range(0, len(data), args.batch_size):
            print("{} in {}".format(i, len(data)))
            one = data[i:i + args.batch_size]
            query, _ = padding([x[0] for x in one], max_len=50)
            passage, _ = padding([x[1] for x in one], max_len=300)
            answer = pad_answer([x[2] for x in one])
            str_words = [x[-1] for x in one]
            ids = [x[3] for x in one]
            answer = pad_wrong_answer(answer)
            query = torch.LongTensor(query)
            passage = torch.LongTensor(passage)
            #print(np.shape(answer))
            answer = torch.LongTensor(answer)
            if args.cuda:
                query = query.cuda()
                passage = passage.cuda()
                answer = answer.cuda()
            output = model([query, passage, answer, False])
            for q_id, prediction, candidates in zip(ids, output, str_words):
                id_prediction[q_id] = int(prediction)
                prediction_answer = u''.join(candidates[prediction])
                predictions.append(str(q_id) + '\t' + prediction_answer)
    outputs = u'\n'.join(predictions)
    with codecs.open(args.output, 'w', encoding='utf-8') as f:
        f.write(outputs)
    with open("pkl_records/dev11.pkl", "wb") as f:
        pickle.dump(id_prediction, f)
    print('done!')
Beispiel #2
0
def train(epoch, net, train_dt, opt, best):
    net.train()
    data = shuffle_data(train_dt, 1)
    total_loss = 0.0
    for num, i in enumerate(range(0, len(data), opts["batch"])):
        one = data[i:i + opts["batch"]]
        query, _ = padding([x[0] for x in one], max_len=50)
        passage, _ = padding([x[1] for x in one], max_len=350)
        answer = pad_answer([x[2] for x in one])
        query, passage, answer = torch.LongTensor(query), torch.LongTensor(
            passage), torch.LongTensor(answer)
        if args.cuda:
            query = query.cuda()
            passage = passage.cuda()
            answer = answer.cuda()
        opt.zero_grad()
        loss = net([query, passage, answer, True])
        loss.backward()
        total_loss += loss.item()
        opt.step()
        if (num + 1) % opts["log_interval"] == 0:
            print(
                '|------epoch {:d} train error is {:f}  eclipse {:.2f}% best {}------|'
                .format(epoch, total_loss / opts["log_interval"],
                        i * 100.0 / len(data), best))
            total_loss = 0
Beispiel #3
0
def train(epoch, net, train_dt, opt, best, best_epoch):
    net.train()
    data = shuffle_data(train_dt, 1)
    total_loss = 0.0
    time_sum = 0.0
    for num, i in enumerate(range(0, len(data), opts["batch"])):
        time_start = time.time()
        one = data[i:i + opts["batch"]]
        query, _ = padding([x[0] for x in one], max_len=opts["q_len"])
        passage, _ = padding([x[1] for x in one], max_len=opts["p_len"])
        answer = pad_answer([x[2] for x in one])
        ids = [x[3] for x in one]
        query, passage, answer, ids = torch.LongTensor(
            query), torch.LongTensor(passage), torch.LongTensor(answer), ids
        if torch.cuda.is_available():
            query = query.cuda()
            passage = passage.cuda()
            answer = answer.cuda()
        opt.zero_grad()
        loss = net([query, passage, answer, ids, True, True])
        loss.backward()
        total_loss += loss.item()
        opt.step()
        # 计时
        time_end = time.time()
        cost = (time_end - time_start)
        time_sum += cost
        if (num + 1) % opts["log_interval"] == 0:
            ts = str('%.2f' % time_sum)
            print(
                '|---epoch {:d} train error is {:f}  eclipse {:.2f}%  costing: {} best {} on epoch {}---|'
                .format(epoch, total_loss / opts["log_interval"],
                        i * 100.0 / len(data), ts + " s", best, best_epoch))
            time_sum = 0.0
            total_loss = 0
Beispiel #4
0
def test(pred, session, ans, que, para):
    r, a = 0.0, 0.0
    id_list = []  # 用于记录ensemble所需的数据
    pred_list = []
    for i in range(0, len(dev_data), 10):
        one = dev_data[i:i + 10]
        query, _ = padding([x[0] for x in one], max_len=350)
        passage, _ = padding([x[1] for x in one], max_len=50)
        answer = pad_answer([x[2] for x in one], max_len=70)
        ids = [int(c[3]) for c in one]
        # query, passage, answer = np.array(query), np.array(passage), np.array(answer)
        fd = {para: passage, que: query, ans: answer}
        p = session.run(pred, feed_dict=fd)

        # 储存q_id与预测答案下标
        p = list(p)
        ids = list(ids)
        id_list.extend(ids)
        pred_list.extend(p)

        r = 0
        for item in p:
            if np.argmax(item) == 0:
                r += 1
        a += len(one)
    return r * 100.0 / a, id_list, pred_list
Beispiel #5
0
def inference(pred,query,para,ans,sess,data,store_path="esm_record/test.pkl"):
    # model.eval()
    predictions = []
    exception=[]
    id_list = []  # 用于记录ensemble所需的数据
    pred_list = []
    # with torch.no_grad():
    for i in range(0, len(data), args.batch_size):
        one = data[i:i + args.batch_size]
        q, _ = padding([x[0] for x in one], max_len=50)
        p, _ = padding([x[1] for x in one], max_len=350)
        a = pad_answer([x[2] for x in one],max_length=70)
        str_words = [x[-1] for x in one]
        ids = [x[3] for x in one]
        if not len(np.shape(a)) == 3:
            # print(i)
            a = pad_wrong_answer(a)
        # query, passage, answer = torch.LongTensor(query), torch.LongTensor(passage), torch.LongTensor(answer)
        # if args.cuda:
        #     query = query.cuda()
        #     passage = passage.cuda()
        #     answer = answer.cuda()
        output =np.argmax(sess.run(pred,feed_dict={
            query:q,
            para:p,
            ans:a
        }),axis=1)
        # id_list = id_list.extend(ids)
        # pred_list = pred_list.extend(output)
        output = list(output)
        ids = list(ids)
        id_list.extend(ids)
        pred_list.extend(output)
        for q_id, prediction, candidates in zip(ids, output, str_words):
            print(q_id)
            print(prediction)
            #FIXME
            l=len(candidates)
            fir=candidates[0]
            if l<3:
                for _ in range(3-l):
                    candidates.append(fir)
            print(candidates)
            prediction_answer = u''.join(candidates[prediction])
            predictions.append(str(q_id) + '\t' + prediction_answer)
    outputs = u'\n'.join(predictions)
    with codecs.open(args.output, 'w',encoding='utf-8') as f:
        f.write(outputs)
    print ('done!')

    esm_record(id_list=id_list, pred_list=pred_list, path=store_path)
Beispiel #6
0
def inference(model, data, md_name, dat_name, opts, is_argmax=True):
    pkl_path = "pkl_records/" + md_name + "." + dat_name + ".pkl"
    if not os.path.exists(pkl_path):
        model.eval()
        predictions = []
        id_prediction = {}
        with torch.no_grad():
            for i in range(0, len(data), opts["batch"]):
                # print("{} in {}".format(i, len(data)))
                one = data[i:i + opts["batch"]]
                query, _ = padding([x[0] for x in one], max_len=50)
                passage, _ = padding([x[1] for x in one], max_len=300)
                answer = pad_answer([x[2] for x in one])
                str_words = [x[-1] for x in one]
                ids = [x[3] for x in one]
                answer = pad_wrong_answer(answer)
                query = torch.LongTensor(query)
                passage = torch.LongTensor(passage)
                # print(np.shape(answer))
                answer = torch.LongTensor(answer)
                if torch.cuda.is_available():
                    query = query.cuda()
                    passage = passage.cuda()
                    answer = answer.cuda()
                output = model([query, passage, answer, ids, False, is_argmax])
                for q_id, prediction, candidates in zip(
                        ids, output, str_words):
                    if is_argmax:
                        id_prediction[q_id] = int(prediction)
                    else:
                        prediction = prediction.cpu().numpy()
                        id_prediction[q_id] = list(prediction)
                    prediction_answer = u''.join(
                        candidates[np.argmax(prediction)])
                    predictions.append(str(q_id) + '\t' + prediction_answer)
        outputs = u'\n'.join(predictions)
        print("score: {}".format(score_on_dt(id_prediction)))
        with codecs.open("submit/" + md_name + "." + dat_name + ".txt",
                         'w',
                         encoding='utf-8') as f:
            f.write(outputs)
        with open("pkl_records/" + md_name + "." + dat_name + ".pkl",
                  "wb") as f:  # TODO: 更换pkl文件名称
            pickle.dump(id_prediction, f)
        print('done!')
    else:
        pkkl = pickle.load(open(pkl_path, "rb"))
        print(pkl_path + " exist, score:", score_on_dt(pkkl))
Beispiel #7
0
def test(net, valid_data):
    net.eval()
    r, a = 0.0, 0.0
    with torch.no_grad():
        for i in range(0, len(valid_data), opts["batch"]):
            print("{} in {}".format(i, len(valid_data)))
            one = valid_data[i:i + opts["batch"]]
            query, _ = padding([x[0] for x in one], max_len=50)
            passage, _ = padding([x[1] for x in one], max_len=500)
            answer = pad_answer([x[2] for x in one])
            query, passage, answer = torch.LongTensor(query), torch.LongTensor(
                passage), torch.LongTensor(answer)
            if args.cuda:
                query = query.cuda()
                passage = passage.cuda()
                answer = answer.cuda()
            output = net([query, passage, answer, False])
            r += torch.eq(output, 0).sum().item()
            a += len(one)
    return r * 100.0 / a
Beispiel #8
0
def test(net, valid_data):
    net.eval()
    r, a = 0.0, 0.0
    with torch.no_grad():
        for i in range(0, len(valid_data), opts["batch"]):
            print("{} in {}".format(i, len(valid_data)))
            one = valid_data[i:i + opts["batch"]]
            query, _ = padding([x[0] for x in one], max_len=opts["q_len"])
            passage, _ = padding([x[1] for x in one], max_len=opts["p_len"])
            answer = pad_answer([x[2] for x in one], max_len=opts["alt_len"])
            ids = [x[3] for x in one]
            query, passage, answer, ids = torch.LongTensor(
                query), torch.LongTensor(passage), torch.LongTensor(
                    answer), ids
            if torch.cuda.is_available():
                query = query.cuda()
                passage = passage.cuda()
                answer = answer.cuda()
            output = net([query, passage, answer, ids, False, True])
            r += torch.eq(output, 0).sum().item()
            a += len(one)
    return r * 100.0 / a
Beispiel #9
0
def train_epoch(epoch,
                model,
                train_dt,
                dt_util,
                opt,
                best,
                best_epoch,
                batch_size=32):
    model.train()
    print("sentence pairs size:", np.shape(train_dt))
    data = shuffle_data(train_dt)
    data = train_dt
    total_loss = 0.0
    time_sum = 0.0
    for num, i in enumerate(range(0, len(data[:68]), batch_size)):
        time_start = time.time()
        ids = np.arange(start=i, stop=i + batch_size)
        batch_dt = []
        for id in ids:
            batch_dt.append(dt_util.__getitem__(id))

        _inputs, _ = padding([x[0] for x in batch_dt])
        _mask_lab, _ = padding([x[1] for x in batch_dt])
        _seg_lab, _ = padding([x[2] for x in batch_dt])
        _is_next = [x[3] for x in batch_dt]
        inputs,mask_lab,seg_lab,is_next=torch.LongTensor(_inputs),\
                                torch.LongTensor(_mask_lab),\
                                torch.LongTensor(_seg_lab),\
                                torch.LongTensor(_is_next) # ( b,t) (b,t) (b,t) (b,)
        criterion = nn.NLLLoss(ignore_index=0)
        next_sent_output, mask_lm_output = bert_lm.forward(
            inputs, seg_lab)  # (b,2) & (b,t,vocab)

        next_loss = criterion(next_sent_output, is_next)
        mask_loss = criterion(mask_lm_output.transpose(1, 2), mask_lab)
        loss = next_loss + mask_loss
        print(loss)
Beispiel #10
0
    def forward(self, inputs):
        try:
            [query, passage, answer, ids, is_train, is_argmax] = inputs
            opts = self.opts
            # Embedding
            q_embedding = self.embedding(query)
            p_embedding = self.embedding(passage)
            a_embeddings = self.embedding(answer)
            # Layer1: Encoding Layer
            a, _ = self.a_encoder(
                a_embeddings.view(-1, a_embeddings.size(2),
                                  a_embeddings.size(3)))
            a = F.dropout(a, self.drop_out)  # (b,a,2h)
            q, _ = self.q_encoder(q_embedding)
            q = F.dropout(q, self.drop_out)  # (b,q,2h)
            t, _ = self.p_encoder(p_embedding)
            t = F.dropout(t, self.drop_out)  # (b,p,2h)

            a_score = F.softmax(self.a_attention(a), 1)  # (3b,a,1)
            # # #print(a_score.shape)
            a_output = a_score.transpose(
                2, 1).bmm(a).squeeze()  # (3b,1,a) bmm (3b,a,2h)-> (3b,2h)
            # # #print(a_output.shape)
            a_emb = a_output.view(opts["batch"], 3, a.size(2))  # (b,3,2h)

            # 4.1 Semantic Perspective
            # text
            w_k_t = self.V_t(t)
            # #print("w_k_t: {}".format(np.shape(w_k_t)))  # (b,p,1)
            t_sum = w_k_t.transpose(
                2, 1).bmm(t).squeeze()  # (b,1,p) (b,p,2h) -> (b,2h)
            # #print("t_sum:{}".format(np.shape(t_sum)))
            st = F.leaky_relu(self.W_A_t(t_sum))
            # #print("st: {} ".format(np.shape(st)))  # (b,h)

            # hypothesis
            a = a.view(-1, 3, a.size(1), a.size(2))
            # #print("a: {}".format(np.shape(a)))

            q = q.unsqueeze(1).repeat(
                1, 3, 1,
                1)  # (b,q,2h)->(b,1,q,2h)->(b,3,q,2h) 采用向量方式代替循环:将q和t在1维度上重复
            t = t.unsqueeze(1).repeat(1, 3, 1, 1)  # (b,3,t,2h)
            # #print("q/t: {}".format(np.shape(q)))
            h = torch.cat([q, a], dim=2)
            # #print("h: {}".format(np.shape(h)))  # (b,3,q+a,2h)
            w_k_h = self.V_h(h)
            # #print("w_k_h: {}".format(np.shape(w_k_h)))  # (b,3,q+a,1)
            h_sum = w_k_h.view(h.size(0) * 3, h.size(2), -1).transpose(
                2, 1).bmm(h.view(h.size(0) * 3, h.size(2), -1)).squeeze()
            h_sum = h_sum.view(h.size(0), 3, h.size(3))
            # #print("h_sum: {}".format(np.shape(h_sum)))  # (3,b,1,q+a) (3,b,q+a,2h) -> (3,b,2h)
            sh = F.leaky_relu(self.W_A_h(h_sum))  # (3,b,2h) (2h,h) -> (3,b,h)
            # #print("sh: {}".format(np.shape(sh)))  # (b,3,h)

            st = st.unsqueeze(1).repeat(1, 3, 1)
            # #print("st: {}".format(np.shape(st)))  # (3,b,h)
            # #print("sh: {}".format(np.shape(sh)))  # (3,b,h)
            M_sem = F.cosine_similarity(st, sh, dim=2)
            M_sem = F.dropout(M_sem, self.drop_out)

            # #print("--Semantic-- M_sem: {}".format(np.shape(M_sem)))  # (3,b)

            # 4.2 Word-by-Word Perspective
            def get_position(pos_weight):
                """

                :param pos_weight: (t,)
                :return:
                """
                position_T = pos_weight[:t.size(2)]
                position_T = position_T.unsqueeze(0).unsqueeze(1).unsqueeze(
                    3).repeat(opts["batch"], 3, 1, 1)
                position_Q = position_T.repeat(1, 1, 1, q.size(2))
                position_A = position_T.repeat(1, 1, 1, a.size(2))
                # #print("position_T: {}".format(np.shape(position_T)))
                return position_Q, position_A

            def get_pos_simil(text_k, query_m, answer_n, pos_weight=None):
                """
                :param text_k:  (b,3,t,1,h)
                :param query_m: (b,3,1,q,h)
                :param answer_n:
                :param pos_weight:
                :return:
                """
                Q_km = F.cosine_similarity(
                    text_k.repeat(1, 1, 1, q.size(2), 1),
                    query_m.repeat(1, 1, t.size(2), 1, 1),
                    dim=4)  # (b,3,t,q)
                A_kn = F.cosine_similarity(
                    text_k.repeat(1, 1, 1, a.size(2), 1),
                    answer_n.repeat(1, 1, t.size(2), 1, 1),
                    dim=4)  # (b,3,t,a)
                if not pos_weight is None:
                    position_Q, position_A = get_position(pos_weight)
                    Q_km = Q_km * position_Q
                    A_kn = A_kn * position_A
                return Q_km, A_kn

            def get_M(Q_km, A_kn):
                # 公式(5)
                _MQ = torch.max(Q_km, dim=2)[0]  # (b,3,1,q)
                _MQ = _MQ.view(q.size(0), 3, -1)  # (b,3,q)
                MQ = _MQ.view(-1, q.size(2)).unsqueeze(1).bmm(
                    w_m_q.view(-1, q.size(2), 1))  # (3b,1,q)(3b,q,1)->(3b,1,1)
                MQ = MQ.view(q.size(0), 3)  # (b,3) 3个维度上的MQ相等

                _MA = torch.max(A_kn, dim=2)[0]  # (b,3,1,a)
                _MA = _MA.view(a.size(0), 3, -1)  # (b,3,a)
                MA = _MA.view(-1, a.size(2)).unsqueeze(1).bmm(
                    w_n_a.view(-1, a.size(2),
                               1))  # (3b,1,a)(3b,a,1) -> (3b,1,1)
                MA = MA.view(a.size(0), 3)  # (b,3) 3个维度上的MA不相等
                MA = F.softmax(MA, dim=1)

                MQ = F.dropout(MQ, self.drop_out)
                MA = F.dropout(MA, self.drop_out)

                return MQ, MA, MQ * MA

            #  4.2-(1)preparing
            tk = F.leaky_relu(self.W_B_t(t))  # (b,3,t,h)
            qm = F.leaky_relu(self.W_B_q(q))  # (b,3,q,h)
            an = F.leaky_relu(self.W_B_a(a))  # (b,3,a,h)
            # #print("tk: {}".format(np.shape(tk)))  # (b,3,t,h)
            # #print("an/qm: {}".format(np.shape(qm)))  # (b,3,a/q,h)
            #  4.2-(2)reshaping
            tk = tk.unsqueeze(3)
            # #print("tk un-squeezed(2):{}".format(np.shape(tk)))  # (b,3,t,1,h)
            qm = qm.unsqueeze(2)
            an = an.unsqueeze(2)
            # #print("an/qm un-squeezed(1):{}".format(np.shape(an)))  # (b,3,1,a/q,h)
            #  4.2-(3) weight vector for q and a
            w_m_q = self.V_q(q)  # (b,3,q,1)
            w_n_a = self.V_a(a)  # (b,3,a,1)
            # #print("w_n/m_a/q: {}".format(w_m_q.shape))

            # 4.2.1 Sentential
            cq_km, ca_kn = get_pos_simil(text_k=tk,
                                         query_m=qm,
                                         answer_n=an,
                                         pos_weight=None)  # fixme: slow
            # #print("ca_kn: {}".format(np.shape(ca_kn)))
            # #print("cq_km: {}".format(np.shape(cq_km)))

            Mq, Ma, Maq = get_M(cq_km, ca_kn)
            M_word = self.W_a1(Ma) + self.W_a2(Mq) + self.W_a3(Maq)
            # #print("--WbW/Sentential-- M_word: {}".format(np.shape(M_word)))

            # 4.2.2  Sequential Sliding Window
            sq_km, sa_kn = get_pos_simil(
                text_k=tk, query_m=qm, answer_n=an,
                pos_weight=self.position_t)  # fixme: slow
            # #print("sa_kn: {}".format(np.shape(sa_kn)))
            # #print("sq_km: {}".format(np.shape(sq_km)))

            Mq, Ma, Maq = get_M(sq_km, sa_kn)
            M_sws = self.W_a4(Ma) + self.W_a5(Mq) + self.W_a6(Maq)
            # #print("--WbW/SWS-- M_word: {}".format(np.shape(M_sws)))

            # 4.2.3 Dependency Sliding Window
            # 根据fiedler向量的值给text重新排序
            tk = tk.view(tk.size(0), tk.size(1), tk.size(2), -1)
            # #print("tk: {}".format(np.shape(tk)))
            dep_idx, _ = padding([self.dep_info[int(id)] for id in ids],
                                 max_len=tk.size(2),
                                 limit_max=False)
            # #print("dep_mat: {}".format(np.shape(dep_idx)))

            tk_sort_init = np.zeros(
                shape=[tk.size(0), 3, t.size(2),
                       tk.size(-1)])
            tk_sort = torch.FloatTensor(
                tk_sort_init)  # pytorch中只有float_tensor可以被优化
            for i in range(tk.size(0)):  # batch
                tk_i_sorted = tk[i, :, dep_idx[i], :]  # sort dim 2
                try:
                    tk_sort[i] = tk_i_sorted
                except Exception as e:
                    print(e)
                    print(tk_sort[i].shape, tk_i_sorted.shape)
            # #print("tk_sort: {}".format(tk_sort.shape))

            # 对重新排序的tk作sliding window处理
            tk = tk_sort.unsqueeze(3)
            sq_km, sa_kn = get_pos_simil(
                text_k=tk,
                query_m=qm,
                answer_n=an,
                pos_weight=self.position_t2)  #  fixme: slow
            # #print("sa_kn: {}".format(np.shape(sa_kn)))
            # #print("sq_km: {}".format(np.shape(sq_km)))

            Mq, Ma, Maq = get_M(sq_km, sa_kn)
            M_swd = self.W_a7(Ma) + self.W_a8(Mq) + self.W_a9(Ma * Mq)
            # #print("--WbW/SWD-- M_word: {}".format(np.shape(M_sws)))

            aggregation = torch.cat([M_sem, M_word, M_sws, M_swd],
                                    dim=1)  # (b,12)
            # #print("aggregation: {}".format(aggregation.shape))

            # Layer4: Prediction Layer
            encoder_output = F.dropout(F.leaky_relu(self.MLP(aggregation)),
                                       self.drop_out)  # (b,2h)
            score = F.softmax(
                a_emb.bmm(encoder_output.unsqueeze(2)).squeeze(),
                1)  # (b,3,2h) (b,2h,1)
            print("batch score: {}".format(
                Counter(score.argmax(1).data.numpy())[0] / self.opts["batch"]))
            # #print("batch score: {}".format(Counter(score.argmax(1).data.numpy())[0] / opts["batch"]))
            if not is_train:
                if is_argmax:
                    return score.argmax(1)
                else:
                    return score
            # loss = -torch.log(score[:, 0]).mean() # 原loss最大化score[0]的得分
            """we take the maximum over i so that  we  are  ranking  the  correct  answer  over  the  best-ranked
            incorrect answer (of which there are three) """
            correct = score[:, 0]
            m_score = torch.max(score, dim=1)[0]
            # #print(m_score.shape)
            u = 1.5  # score0 与 错误得分的间距
            margin = u + correct - m_score
            # #print(margin)
            zeros = torch.FloatTensor(np.zeros(shape=opts["batch"]))
            L = torch.max(zeros, margin)
            loss = L.mean()  # 最大化score0与错误选项的间距
            return loss
        except Exception as e:
            print(e)
            return 1