Beispiel #1
0
 def test_link(self, test_data, n_ent, heads, tails, filt=True):
     mrr_tot = 0
     mr_tot = 0
     hit10_tot = 0
     count = 0
     for batch_s, batch_r, batch_t in batch_by_size(
             config().test_batch_size, *test_data):
         batch_size = batch_s.size(0)
         rel_var = Variable(
             batch_r.unsqueeze(1).expand(batch_size, n_ent).cuda())
         src_var = Variable(
             batch_s.unsqueeze(1).expand(batch_size, n_ent).cuda())
         dst_var = Variable(
             batch_t.unsqueeze(1).expand(batch_size, n_ent).cuda())
         all_var = Variable(torch.arange(0, n_ent).unsqueeze(0).expand(
             batch_size, n_ent).type(torch.LongTensor).cuda(),
                            volatile=True)
         batch_dst_scores = self.mdl.score(src_var, rel_var, all_var).data
         batch_src_scores = self.mdl.score(all_var, rel_var, dst_var).data
         for s, r, t, dst_scores, src_scores in zip(batch_s, batch_r,
                                                    batch_t,
                                                    batch_dst_scores,
                                                    batch_src_scores):
             if filt:
                 if tails[(s, r)]._nnz() > 1:
                     tmp = dst_scores[t]
                     dst_scores += tails[(s, r)].cuda() * 1e30
                     dst_scores[t] = tmp
                 if heads[(t, r)]._nnz() > 1:
                     tmp = src_scores[s]
                     src_scores += heads[(t, r)].cuda() * 1e30
                     src_scores[s] = tmp
             mrr, mr, hit10 = mrr_mr_hitk(dst_scores, t)
             mrr_tot += mrr
             mr_tot += mr
             hit10_tot += hit10
             mrr, mr, hit10 = mrr_mr_hitk(src_scores, s)
             mrr_tot += mrr
             mr_tot += mr
             hit10_tot += hit10
             count += 2
     logging.info('Test_MRR=%f, Test_MR=%f, Test_H@10=%f', mrr_tot / count,
                  mr_tot / count, hit10_tot / count)
     return mrr_tot / count
Beispiel #2
0
 def test_link(self, test_data, n_ent, heads, tails,filt=True,write_data=True,epo=0):
     mrr_tot = 0
     mr_tot = 0
     hit10_tot = 0
     count = 0
     for batch_s, batch_r, batch_t in batch_by_size(config().test_batch_size, *test_data):
         batch_size = batch_s.size(0)
         rel_var = Variable(batch_r.unsqueeze(1).expand(batch_size, n_ent).cuda())
         src_var = Variable(batch_s.unsqueeze(1).expand(batch_size, n_ent).cuda())
         dst_var = Variable(batch_t.unsqueeze(1).expand(batch_size, n_ent).cuda())
         all_var = Variable(torch.arange(0, n_ent).unsqueeze(0).expand(batch_size, n_ent)
                            .type(torch.LongTensor).cuda(), volatile=True)
         batch_dst_scores = self.mdl.score(src_var, rel_var, all_var).data
         batch_src_scores = self.mdl.score(all_var, rel_var, dst_var).data
         for s, r, t, dst_scores, src_scores in zip(batch_s, batch_r, batch_t, batch_dst_scores, batch_src_scores):
             if filt:
                 if tails[(s, r)]._nnz() > 1:
                     tmp = dst_scores[t]
                     dst_scores += tails[(s, r)].cuda() * 1e30
                     dst_scores[t] = tmp
                 if heads[(t, r)]._nnz() > 1:
                     tmp = src_scores[s]
                     src_scores += heads[(t, r)].cuda() * 1e30
                     src_scores[s] = tmp
             mrr, mr, hit10 = mrr_mr_hitk(dst_scores, t)
             mrr_tot += mrr
             mr_tot += mr
             hit10_tot += hit10
             mrr, mr, hit10 = mrr_mr_hitk(src_scores, s)
             mrr_tot += mrr
             mr_tot += mr
             hit10_tot += hit10
             count += 2
     logging.info('Test_MRR=%f, Test_MR=%f, Test_H@10=%f', mrr_tot / count, mr_tot / count, hit10_tot / count)
     
     if write_data:
         f=open(os.path.join(os.getcwd(),"plotdata",config().task.dir+".txt"),"a")
         write_str=str(mrr_tot / count)+" "+str(hit10_tot / count)+" "+str(epo)+'\n'
         f.write(write_str)
         f.close()
         print("write"+'\n'+write_str+"to "+"plotdata/"+config().task.dir+".txt")
     
     return mrr_tot / count
Beispiel #3
0
 def test_link(self, test_data, n_ent, heads, tails, filt=True):
     mrr_tot = 0
     mr_tot = 0
     hit1_tot = 0
     hit3_tot = 0
     hit10_tot = 0
     count = 0
     for batch_s, batch_r, batch_t in batch_by_size(
             config().test_batch_size, *test_data):
         batch_size = batch_s.size(0)
         rel_var = Variable(
             batch_r.unsqueeze(1).expand(batch_size, n_ent).cuda())
         src_var = Variable(
             batch_s.unsqueeze(1).expand(batch_size, n_ent).cuda())
         dst_var = Variable(
             batch_t.unsqueeze(1).expand(batch_size, n_ent).cuda())
         all_var = Variable(torch.arange(0, n_ent).unsqueeze(0).expand(
             batch_size, n_ent).type(torch.LongTensor).cuda(),
                            volatile=True)
         batch_dst_scores = self.mdl.score(src_var, rel_var, all_var).data
         batch_src_scores = self.mdl.score(all_var, rel_var, dst_var).data
         for s, r, t, dst_scores, src_scores in zip(batch_s, batch_r,
                                                    batch_t,
                                                    batch_dst_scores,
                                                    batch_src_scores):
             if filt:
                 if tails[(s, r)]._nnz() > 1:
                     tmp = dst_scores[t]
                     dst_scores += tails[(s, r)].cuda() * 1e30
                     dst_scores[t] = tmp
                 if heads[(t, r)]._nnz() > 1:
                     tmp = src_scores[s]
                     src_scores += heads[(t, r)].cuda() * 1e30
                     src_scores[s] = tmp
             mrr, mr, hit1, hit3, hit10 = mrr_mr_hitk(dst_scores, t)
             mrr_tot += mrr
             mr_tot += mr
             hit1_tot += hit1
             hit3_tot += hit3
             hit10_tot += hit10
             mrr, mr, hit1, hit3, hit10 = mrr_mr_hitk(src_scores, s)
             mrr_tot += mrr
             mr_tot += mr
             hit1_tot += hit1
             hit3_tot += hit3
             hit10_tot += hit10
             count += 2
     logging.info(
         'Test_MRR=%f, Test_MR=%f, Test_H@1=%f, Test_H@3=%f, Test_H@10=%f',
         mrr_tot / count, mr_tot / count, hit1_tot / count,
         hit3_tot / count, hit10_tot / count)
     writeList = [
         'testSet',
         '%.6f' % (hit1_tot / count),
         '%.6f' % (hit3_tot / count),
         '%.6f' % (hit10_tot / count),
         '%.6f' % (mr_tot / count),
         '%.6f' % (mrr_tot / count)
     ]
     # Write the result into file
     with open(
             os.path.join(
                 './result/',
                 config().task.dir.split('/')[-1] + '_' +
                 config().pretrain_config), 'a') as fw:
         fw.write('\t'.join(writeList) + '\n')
     return mrr_tot / count
Beispiel #4
0
def ranking_and_hits(model, batch_size, dateset, eval_h, eval_t, name):
    heads, rels, tails = dateset
    logger.info('')
    logger.info('-'*50)
    logger.info(name)
    logger.info('-'*50)
    logger.info('')
    hits_left = []
    hits_right = []
    hits = []
    ranks = []
    ranks_left = []
    ranks_right = []

    for i in range(10):
        hits_left.append([])
        hits_right.append([])
        hits.append([])

    for bh, br, bt in batch_by_size(batch_size, heads, rels, tails):
        b_size = bh.size(0)
        bh = bh.cuda();br = br.cuda();bt = bt.cuda()
        pred1 = model.forward(bh, br)
        pred2 = model.forward(bt, br)

        e2_multi1 = torch.empty(b_size, pred1.size(1))
        e2_multi2 = torch.empty(b_size, pred1.size(1))

        for i, (h, r, t) in enumerate(zip(bh, br, bt)):
            e2_multi1[i] = eval_t[h.item(), r.item()].to_dense()
            e2_multi2[i] = eval_h[t.item(), r.item()].to_dense()
        e2_multi1 = e2_multi1.cuda()
        e2_multi2 = e2_multi2.cuda()

        for i in range(b_size):
            # save the prediction that is relevant
            target_value1 = pred1[i,bt[i].item()].item()
            target_value2 = pred2[i,bh[i].item()].item()
            # zero all known cases (this are not interesting)
            # this corresponds to the filtered setting
            pred1[i] += e2_multi1[i] * (-1e20)
            pred2[i] += e2_multi2[i] * (-1e20)
            # write base the saved values
            pred1[i][bt[i].item()] = target_value1
            pred2[i][bh[i].item()] = target_value2

        # sort and rank
        max_values, argsort1 = torch.sort(pred1, 1, descending=True)
        max_values, argsort2 = torch.sort(pred2, 1, descending=True)
        # max_values, argsort2 = torch.sort(pred2, 1, descending=True)
        for i in range(b_size):
            # find the rank of the target entities
            find_target1 = argsort1[i] == bt[i]
            find_target2 = argsort2[i] == bh[i]
            rank1 = torch.nonzero(find_target1)[0, 0].item() + 1
            rank2 = torch.nonzero(find_target2)[0, 0].item() + 1
            # rank+1, since the lowest rank is rank 1 not rank 0
            # ranks.append(rank1+1)
            ranks_left.append(rank1)
            # ranks.append(rank2+1)
            ranks_right.append(rank2)

            # this could be done more elegantly, but here you go
            hits[9].append(int(rank1<=10))
            hits[9].append(int(rank2<=10))
            hits_left[9].append((int(rank1<=10)))
            hits_right[9].append((int(rank2<=10)))
            # for hits_level in range(10):
            #     if rank1 <= hits_level:
            #         hits[hits_level].append(1.0)
            #         hits_left[hits_level].append(1.0)
            #     else:
            #         hits[hits_level].append(0.0)
            #         hits_left[hits_level].append(0.0)
            #
            #     if rank2 <= hits_level:
            #         hits[hits_level].append(1.0)
            #         hits_right[hits_level].append(1.0)
            #     else:
            #         hits[hits_level].append(0.0)
            #         hits_right[hits_level].append(0.0)

    # for i in range(10):
    #     logger.info('Hits left @{0}: {1}'.format(i+1, np.mean(hits_left[i])))
    #     logger.info('Hits right @{0}: {1}'.format(i+1, np.mean(hits_right[i])))
    logger.info('Hits left @{0}: {1}'.format(10, np.mean(hits_left[9])))
    logger.info('Hits right @{0}: {1}'.format(10, np.mean(hits_right[9])))
    logger.info('Hits @{0}: {1}'.format(10, np.mean(hits[9])))
    logger.info('Mean rank left: {0}'.format(np.mean(ranks_left)))
    logger.info('Mean rank right: {0}'.format(np.mean(ranks_right)))
    logger.info('Mean rank: {0}'.format(np.mean(ranks_left+ranks_right)))
    logger.info('Mean reciprocal rank left: {0}'.format(np.mean(1./np.array(ranks_left))))
    logger.info('Mean reciprocal rank right: {0}'.format(np.mean(1./np.array(ranks_right))))
    logger.info('Mean reciprocal rank: {0}'.format(np.mean(1./np.array(ranks_left+ranks_right))))