Exemple #1
0
def oracleStatistics(p_iterator, g_iterator, n):
    TP = 0
    FP = 0
    FN = 0
    TP_oracle = 0
    FP_oracle = 0
    FN_oracle = 0
    counter = 0
    for p_document, g_document in zip(p_iterator, g_iterator):
        #counter3 += 1
        #counter += 1
        #if counter > 30:
        #    print FN
        #    sys.exit(0)
        for p_child, g_child in zip(p_document, g_document):
            if g_child.tag == "sentence":
                assert p_child.attrib["origId"]==g_child.attrib["origId"]
                p_entities, p_pairs = getEntitiesAndPairs(p_child)
                g_entities, g_pairs = getEntitiesAndPairs(g_child)
                if len(p_pairs) == 0:
                    FN += len(g_pairs)
                    FN_oracle += len(g_pairs)
                else:
                    g_edges, outside_count = getGSEdges(g_pairs, g_entities)
                    predictions = getSimplePredictions(p_entities, p_pairs)
                    table, table_transpose, keys = toTable(predictions)
                    best = nbest.decode(table_transpose, n)
                    p_edges = getPredictedEdges(predictions, keys, p_entities, best[0])
                    tp, fp, fn = getTP_FP_FN(g_edges, p_edges)
                    TP += tp
                    FP += fp
                    FN += fn
                    tp_best = tp
                    fp_best = fp
                    fn_best = fn
                    best_s = score(tp, fp, fn)
                    for i in range(1,len(best)):
                        p_edges = getPredictedEdges(predictions, keys, p_entities, best[i])
                        tp_c, fp_c, fn_c = getTP_FP_FN(g_edges, p_edges)
                        assert tp_c+fn_c == tp+fn
                        s = score(tp_c, fp_c, fn_c)
                        if s > best_s:
                            tp_best = tp_c
                            fp_best = fp_c
                            fn_best = fn_c
                            best_s = s
                    TP_oracle += tp_best
                    FP_oracle += fp_best
                    FN_oracle += fn_best
    PR = float(TP)/float(TP+FP)
    R = float(TP)/float(TP+FN)
    PR_oracle = float(TP_oracle)/float(TP_oracle+FP_oracle)
    R_oracle = float(TP_oracle)/float(TP_oracle+FN_oracle)
    assert TP_oracle+FN_oracle == TP+FN
    print "TP", TP
    print "FP", FP
    print "FN", FN
    print "F-score", (2*PR*R)/(PR+R)
    print "TP (oracle)", TP_oracle
    print "FP (oracle)", FP_oracle
    print "FN (oracle)", FN_oracle
    print "F-score (oracle)", (2*PR_oracle*R_oracle)/(PR_oracle+R_oracle)
Exemple #2
0
    g_file = open(args[1])
    p_parser = parseGifxml.gifxmlParser(p_file)
    p_iterator = p_parser.documentIterator()
    g_parser = parseGifxml.gifxmlParser(g_file)
    g_iterator = g_parser.documentIterator()
    counter = 1
    oracleStatistics(p_iterator, g_iterator, options.nbest)
    sys.exit(0)
    for p_document, g_document in zip(p_iterator, g_iterator):
        for p_child, g_child in zip(p_document, g_document):
            if p_child.tag == "sentence":
                assert p_child.attrib["id"]==g_child.attrib["id"]
                p_entities, p_pairs = getEntitiesAndPairs(p_child)
                g_entities, g_pairs = getEntitiesAndPairs(g_child)
                predictions = getSimplePredictions(p_entities, p_pairs)
                table, table_transpose, keys = toTable(predictions)
                best = nbest.decode(table_transpose,options.nbest)
                getTP_FP_FN(g_entities, g_pairs, p_entities, predictions, best)
                if counter > 30:
                    sys.exit(0)
                #if predictions:
                #    sys.exit(0)
                #if len(predictions) > 0:
                #    table, table_transpose, keys = toTable(predictions)
                #    five = nbest.decode(table_transpose, 100)
                #    print table
                #    print five
                #    counter += 1
                #    if counter > 100:
                #        assert False
Exemple #3
0
 w_decisions = 0
 ties = 0
 for p_document, g_document in zip(p_iterator, g_iterator):
     for p_child, g_child in zip(p_document, g_document):
         if g_child.tag == "sentence":
             assert p_child.attrib["origId"]==g_child.attrib["origId"]
             p_entities, p_pairs = getEntitiesAndPairs(p_child)
             g_entities, g_pairs = getEntitiesAndPairs(g_child)
             if len(p_pairs) == 0:
                 FN += len(g_pairs)
                 FN_oracle += len(g_pairs)
             else:
                 g_edges, outside_count = getGSEdges(g_pairs, g_entities)
                 predictions = getSimplePredictions(p_entities, p_pairs)
                 table, table_transpose, keys = toTable(predictions)
                 best = nbest.decode(table_transpose, n)
                 p_edges = getPredictedEdges(predictions, keys, p_entities, best[0])
                 Y = []
                 for b in best:
                     Y.append(getOutputRepresentation(predictions, keys, p_entities, b))
                 correct = getGSOutputRepresentation(g_entities, g_pairs)
                 correct_score = reranker.score([correct], counter)[0]
                 predicted_scores = reranker.score(Y, counter)
                 minimum = predicted_scores[0]
                 min_index = 0
                 #min_index = random.randint(0,len(predicted_scores)-1)
                 for i in range(len(predicted_scores)):
                     if predicted_scores[i] < minimum:
                         minimum = predicted_scores[i]
                         min_index = i
                 tp_b, fp_b, fn_b = getTP_FP_FN(g_edges, p_edges)
Exemple #4
0
def oracleStatistics(p_iterator, g_iterator, n):
    TP = 0
    FP = 0
    FN = 0
    TP_oracle = 0
    FP_oracle = 0
    FN_oracle = 0
    counter = 0
    for p_document, g_document in zip(p_iterator, g_iterator):
        #counter3 += 1
        #counter += 1
        #if counter > 30:
        #    print FN
        #    sys.exit(0)
        for p_child, g_child in zip(p_document, g_document):
            if g_child.tag == "sentence":
                assert p_child.attrib["origId"] == g_child.attrib["origId"]
                p_entities, p_pairs = getEntitiesAndPairs(p_child)
                g_entities, g_pairs = getEntitiesAndPairs(g_child)
                if len(p_pairs) == 0:
                    FN += len(g_pairs)
                    FN_oracle += len(g_pairs)
                else:
                    g_edges, outside_count = getGSEdges(g_pairs, g_entities)
                    predictions = getSimplePredictions(p_entities, p_pairs)
                    table, table_transpose, keys = toTable(predictions)
                    best = nbest.decode(table_transpose, n)
                    p_edges = getPredictedEdges(predictions, keys, p_entities,
                                                best[0])
                    tp, fp, fn = getTP_FP_FN(g_edges, p_edges)
                    TP += tp
                    FP += fp
                    FN += fn
                    tp_best = tp
                    fp_best = fp
                    fn_best = fn
                    best_s = score(tp, fp, fn)
                    for i in range(1, len(best)):
                        p_edges = getPredictedEdges(predictions, keys,
                                                    p_entities, best[i])
                        tp_c, fp_c, fn_c = getTP_FP_FN(g_edges, p_edges)
                        assert tp_c + fn_c == tp + fn
                        s = score(tp_c, fp_c, fn_c)
                        if s > best_s:
                            tp_best = tp_c
                            fp_best = fp_c
                            fn_best = fn_c
                            best_s = s
                    TP_oracle += tp_best
                    FP_oracle += fp_best
                    FN_oracle += fn_best
    PR = float(TP) / float(TP + FP)
    R = float(TP) / float(TP + FN)
    PR_oracle = float(TP_oracle) / float(TP_oracle + FP_oracle)
    R_oracle = float(TP_oracle) / float(TP_oracle + FN_oracle)
    assert TP_oracle + FN_oracle == TP + FN
    print "TP", TP
    print "FP", FP
    print "FN", FN
    print "F-score", (2 * PR * R) / (PR + R)
    print "TP (oracle)", TP_oracle
    print "FP (oracle)", FP_oracle
    print "FN (oracle)", FN_oracle
    print "F-score (oracle)", (2 * PR_oracle * R_oracle) / (PR_oracle +
                                                            R_oracle)
Exemple #5
0
    g_file = open(args[1])
    p_parser = parseGifxml.gifxmlParser(p_file)
    p_iterator = p_parser.documentIterator()
    g_parser = parseGifxml.gifxmlParser(g_file)
    g_iterator = g_parser.documentIterator()
    counter = 1
    oracleStatistics(p_iterator, g_iterator, options.nbest)
    sys.exit(0)
    for p_document, g_document in zip(p_iterator, g_iterator):
        for p_child, g_child in zip(p_document, g_document):
            if p_child.tag == "sentence":
                assert p_child.attrib["id"] == g_child.attrib["id"]
                p_entities, p_pairs = getEntitiesAndPairs(p_child)
                g_entities, g_pairs = getEntitiesAndPairs(g_child)
                predictions = getSimplePredictions(p_entities, p_pairs)
                table, table_transpose, keys = toTable(predictions)
                best = nbest.decode(table_transpose, options.nbest)
                getTP_FP_FN(g_entities, g_pairs, p_entities, predictions, best)
                if counter > 30:
                    sys.exit(0)
                #if predictions:
                #    sys.exit(0)
                #if len(predictions) > 0:
                #    table, table_transpose, keys = toTable(predictions)
                #    five = nbest.decode(table_transpose, 100)
                #    print table
                #    print five
                #    counter += 1
                #    if counter > 100:
                #        assert False
 w_decisions = 0
 ties = 0
 for p_document, g_document in zip(p_iterator, g_iterator):
     for p_child, g_child in zip(p_document, g_document):
         if g_child.tag == "sentence":
             assert p_child.attrib["origId"] == g_child.attrib["origId"]
             p_entities, p_pairs = getEntitiesAndPairs(p_child)
             g_entities, g_pairs = getEntitiesAndPairs(g_child)
             if len(p_pairs) == 0:
                 FN += len(g_pairs)
                 FN_oracle += len(g_pairs)
             else:
                 g_edges, outside_count = getGSEdges(g_pairs, g_entities)
                 predictions = getSimplePredictions(p_entities, p_pairs)
                 table, table_transpose, keys = toTable(predictions)
                 best = nbest.decode(table_transpose, n)
                 p_edges = getPredictedEdges(predictions, keys, p_entities,
                                             best[0])
                 Y = []
                 for b in best:
                     Y.append(
                         getOutputRepresentation(predictions, keys,
                                                 p_entities, b))
                 correct = getGSOutputRepresentation(g_entities, g_pairs)
                 correct_score = reranker.score([correct], counter)[0]
                 predicted_scores = reranker.score(Y, counter)
                 minimum = predicted_scores[0]
                 min_index = 0
                 #min_index = random.randint(0,len(predicted_scores)-1)
                 for i in range(len(predicted_scores)):
                     if predicted_scores[i] < minimum: