コード例 #1
0
def evaluate_dataset_point(model, data , addbase):
    pred_trees = []
    gold_trees = []
    for i, inst in enumerate(data):
        pred_scores = []
        for tree in inst.kbest:
            if tree.size == inst.gold.size:
                pred_scores.append(model.predict(tree))
            else:
                print 'error'
                pred_scores.append(-1000)
        #data_util.normalize(pred_scores)
        if addbase:
            data_util.normalize(inst.scores)
            scores = [p_s + b_s for p_s, b_s in zip(pred_scores, inst.scores)]
        else:
            scores = pred_scores
        max_id = scores.index(max(scores))
        #print max_id,scores[max_id]
        for line in inst.lines[max_id]:
            pred_trees.append(line)
        pred_trees.append('\n')
        for line in inst.gold_lines:
            gold_trees.append(line)
        gold_trees.append('\n')
    res = eval_tool.evaluate(pred_trees,gold_trees)
    print 'f1score: %.4f' % (res[0])
    return res
コード例 #2
0
def evaluate_dataset_pair(model, data, addbase, ratio=1):
    pred_trees = []
    gold_trees = []
    for i, inst in enumerate(data):
        lens = len(inst.kbest)
        max = 0
        for j in range(1, lens):
            loss = 0
            if inst.kbest[j].size == inst.gold.size:
                #loss = np.mean(model.train_pairwise(inst.kbest[max],inst.kbest[j],True))
                # score_j - score_best
                if addbase:
                    baseloss = inst.scores[j] - inst.scores[max]
                    #print "loss: %.4f    baseloss: %.4f" % (loss,baseloss)
                    loss += baseloss
            if loss > 0:
                max = j
        for line in inst.lines[max]:
            pred_trees.append(line)
        pred_trees.append('\n')
        for line in inst.gold_lines:
            gold_trees.append(line)
        gold_trees.append('\n')
    res = eval_tool.evaluate(pred_trees, gold_trees)
    print 'ratio: %f f1score: %.4f' % (ratio, res[0])
    return res
コード例 #3
0
def evaluate_dataset_point(model, data, addbase, ratio=1):
    pred_trees = []
    gold_trees = []
    for i, inst in enumerate(data):
        pred_scores = [
            model.predict(tree) for tree in inst.kbest
            if tree.size == inst.gold.size
        ]
        data_util.normalize(pred_scores)
        if addbase:
            data_util.normalize(inst.scores)
            scores = [
                ratio * p_s + (1 - ratio) * b_s
                for p_s, b_s in zip(pred_scores, inst.scores)
            ]
        else:
            scores = pred_scores
        max_id = scores.index(max(scores))
        #print "pred: %.4f    base: %.4f" % (pred_scores[max_id],inst.scores[max_id])
        for line in inst.lines[max_id]:
            pred_trees.append(line)
        pred_trees.append('\n')
        for line in inst.gold_lines:
            gold_trees.append(line)
        gold_trees.append('\n')
    res = eval_tool.evaluate(pred_trees, gold_trees)
    print 'ratio: %f f1score: %.4f' % (ratio, res[0])
    return res
コード例 #4
0
def evaluate_baseline(data):
    pred_trees = []
    gold_trees = []
    for i, inst in enumerate(data):
        for line in inst.lines[len(inst.kbest)-1]:
            pred_trees.append(line)
        pred_trees.append('\n')
        for line in inst.gold_lines:
            gold_trees.append(line)
        gold_trees.append('\n')
    print 'baseline: %.4f' % (eval_tool.evaluate(pred_trees,gold_trees)[0])
コード例 #5
0
def evaluate_oracle_worst(data):
    oracle_trees = []
    worst_trees = []
    gold_trees = []
    pred_trees = []
    for i, inst in enumerate(data):
        max = 0
        maxid = 0
        min = 1
        minid = 0
        for line in inst.gold_lines:
            gold_trees.append(line)
        gold_trees.append('\n')

        for line in inst.lines[len(inst.kbest)-1]:
            pred_trees.append(line)
        pred_trees.append('\n')

        i = 0
        for list in inst.lines:
            temp = []
            for line in list:
                temp.append(line)
            temp.append('\n')
            res = eval_tool.evaluate(temp, inst.gold_lines)[0]
            if res > max :
                max = res
                maxid = i
            if res < min :
                min = res
                minid = i
            i += 1
        for line in inst.lines[maxid]:
            oracle_trees.append(line)
        oracle_trees.append('\n')
        for line in inst.lines[minid]:
            worst_trees.append(line)
        worst_trees.append('\n')
    print 'f1score: %.4f'  % (eval_tool.evaluate(pred_trees, gold_trees)[0])
    print 'oracle: %.4f'  % (eval_tool.evaluate(oracle_trees, gold_trees)[0])
    print 'worst: %.4f'  % (eval_tool.evaluate(worst_trees, gold_trees)[0])
コード例 #6
0
def evaluate_baseline_random(data):
    random.seed(189)
    pred_trees = []
    gold_trees = []
    for i, inst in enumerate(data):
        rand = int(random.random()*len(inst.kbest))
        for line in inst.lines[rand]:
            pred_trees.append(line)
        pred_trees.append('\n')
        for line in inst.gold_lines:
            gold_trees.append(line)
        gold_trees.append('\n')
    print 'baseline: %.4f' % (eval_tool.evaluate(pred_trees,gold_trees)[0])
コード例 #7
0
 def get_oracle_index(self):
     max = 0
     maxid = 0
     i = 0
     for list in self.lines:
         temp = []
         for line in list:
             temp.append(line)
         temp.append('\n')
         res = eval_tool.evaluate(temp, self.gold_lines)[0]
         self.f1score.append(res)
         if res > max:
             max = res
             maxid = i
         i += 1
     return maxid
コード例 #8
0
def evaluate_dataset_pair(model, data):
    pred_trees = []
    gold_trees = []
    for i, inst in enumerate(data):
        lens = len(inst.kbest)
        max = 0
        for j in range(1, lens):
            loss = 0
            if inst.kbest[j].size == inst.gold.size:
                loss = np.mean(model.train_pairwise(inst.kbest[max],inst.kbest[j],True))
            if loss > 0:
                max = j
        for line in inst.lines[max]:
            pred_trees.append(line)
        pred_trees.append('\n')
        for line in inst.gold_lines:
            gold_trees.append(line)
        gold_trees.append('\n')
    res = eval_tool.evaluate(pred_trees,gold_trees)
    print 'f1score: %.4f' % (res[0])
    return res
コード例 #9
0
 def set_f1(self):
     for l in self.lines:
         f1 = eval_tool.evaluate(l, self.gold_lines)[0]
         self.f1score.append(f1)