def oracleStatistics(p_iterator, g_iterator, n): TP = 0 FP = 0 FN = 0 TP_oracle = 0 FP_oracle = 0 FN_oracle = 0 counter = 0 for p_document, g_document in zip(p_iterator, g_iterator): #counter3 += 1 #counter += 1 #if counter > 30: # print FN # sys.exit(0) for p_child, g_child in zip(p_document, g_document): if g_child.tag == "sentence": assert p_child.attrib["origId"]==g_child.attrib["origId"] p_entities, p_pairs = getEntitiesAndPairs(p_child) g_entities, g_pairs = getEntitiesAndPairs(g_child) if len(p_pairs) == 0: FN += len(g_pairs) FN_oracle += len(g_pairs) else: g_edges, outside_count = getGSEdges(g_pairs, g_entities) predictions = getSimplePredictions(p_entities, p_pairs) table, table_transpose, keys = toTable(predictions) best = nbest.decode(table_transpose, n) p_edges = getPredictedEdges(predictions, keys, p_entities, best[0]) tp, fp, fn = getTP_FP_FN(g_edges, p_edges) TP += tp FP += fp FN += fn tp_best = tp fp_best = fp fn_best = fn best_s = score(tp, fp, fn) for i in range(1,len(best)): p_edges = getPredictedEdges(predictions, keys, p_entities, best[i]) tp_c, fp_c, fn_c = getTP_FP_FN(g_edges, p_edges) assert tp_c+fn_c == tp+fn s = score(tp_c, fp_c, fn_c) if s > best_s: tp_best = tp_c fp_best = fp_c fn_best = fn_c best_s = s TP_oracle += tp_best FP_oracle += fp_best FN_oracle += fn_best PR = float(TP)/float(TP+FP) R = float(TP)/float(TP+FN) PR_oracle = float(TP_oracle)/float(TP_oracle+FP_oracle) R_oracle = float(TP_oracle)/float(TP_oracle+FN_oracle) assert TP_oracle+FN_oracle == TP+FN print "TP", TP print "FP", FP print "FN", FN print "F-score", (2*PR*R)/(PR+R) print "TP (oracle)", TP_oracle print "FP (oracle)", FP_oracle print "FN (oracle)", FN_oracle print "F-score (oracle)", (2*PR_oracle*R_oracle)/(PR_oracle+R_oracle)
g_file = open(args[1]) p_parser = parseGifxml.gifxmlParser(p_file) p_iterator = p_parser.documentIterator() g_parser = parseGifxml.gifxmlParser(g_file) g_iterator = g_parser.documentIterator() counter = 1 oracleStatistics(p_iterator, g_iterator, options.nbest) sys.exit(0) for p_document, g_document in zip(p_iterator, g_iterator): for p_child, g_child in zip(p_document, g_document): if p_child.tag == "sentence": assert p_child.attrib["id"]==g_child.attrib["id"] p_entities, p_pairs = getEntitiesAndPairs(p_child) g_entities, g_pairs = getEntitiesAndPairs(g_child) predictions = getSimplePredictions(p_entities, p_pairs) table, table_transpose, keys = toTable(predictions) best = nbest.decode(table_transpose,options.nbest) getTP_FP_FN(g_entities, g_pairs, p_entities, predictions, best) if counter > 30: sys.exit(0) #if predictions: # sys.exit(0) #if len(predictions) > 0: # table, table_transpose, keys = toTable(predictions) # five = nbest.decode(table_transpose, 100) # print table # print five # counter += 1 # if counter > 100: # assert False
w_decisions = 0 ties = 0 for p_document, g_document in zip(p_iterator, g_iterator): for p_child, g_child in zip(p_document, g_document): if g_child.tag == "sentence": assert p_child.attrib["origId"]==g_child.attrib["origId"] p_entities, p_pairs = getEntitiesAndPairs(p_child) g_entities, g_pairs = getEntitiesAndPairs(g_child) if len(p_pairs) == 0: FN += len(g_pairs) FN_oracle += len(g_pairs) else: g_edges, outside_count = getGSEdges(g_pairs, g_entities) predictions = getSimplePredictions(p_entities, p_pairs) table, table_transpose, keys = toTable(predictions) best = nbest.decode(table_transpose, n) p_edges = getPredictedEdges(predictions, keys, p_entities, best[0]) Y = [] for b in best: Y.append(getOutputRepresentation(predictions, keys, p_entities, b)) correct = getGSOutputRepresentation(g_entities, g_pairs) correct_score = reranker.score([correct], counter)[0] predicted_scores = reranker.score(Y, counter) minimum = predicted_scores[0] min_index = 0 #min_index = random.randint(0,len(predicted_scores)-1) for i in range(len(predicted_scores)): if predicted_scores[i] < minimum: minimum = predicted_scores[i] min_index = i tp_b, fp_b, fn_b = getTP_FP_FN(g_edges, p_edges)
def oracleStatistics(p_iterator, g_iterator, n): TP = 0 FP = 0 FN = 0 TP_oracle = 0 FP_oracle = 0 FN_oracle = 0 counter = 0 for p_document, g_document in zip(p_iterator, g_iterator): #counter3 += 1 #counter += 1 #if counter > 30: # print FN # sys.exit(0) for p_child, g_child in zip(p_document, g_document): if g_child.tag == "sentence": assert p_child.attrib["origId"] == g_child.attrib["origId"] p_entities, p_pairs = getEntitiesAndPairs(p_child) g_entities, g_pairs = getEntitiesAndPairs(g_child) if len(p_pairs) == 0: FN += len(g_pairs) FN_oracle += len(g_pairs) else: g_edges, outside_count = getGSEdges(g_pairs, g_entities) predictions = getSimplePredictions(p_entities, p_pairs) table, table_transpose, keys = toTable(predictions) best = nbest.decode(table_transpose, n) p_edges = getPredictedEdges(predictions, keys, p_entities, best[0]) tp, fp, fn = getTP_FP_FN(g_edges, p_edges) TP += tp FP += fp FN += fn tp_best = tp fp_best = fp fn_best = fn best_s = score(tp, fp, fn) for i in range(1, len(best)): p_edges = getPredictedEdges(predictions, keys, p_entities, best[i]) tp_c, fp_c, fn_c = getTP_FP_FN(g_edges, p_edges) assert tp_c + fn_c == tp + fn s = score(tp_c, fp_c, fn_c) if s > best_s: tp_best = tp_c fp_best = fp_c fn_best = fn_c best_s = s TP_oracle += tp_best FP_oracle += fp_best FN_oracle += fn_best PR = float(TP) / float(TP + FP) R = float(TP) / float(TP + FN) PR_oracle = float(TP_oracle) / float(TP_oracle + FP_oracle) R_oracle = float(TP_oracle) / float(TP_oracle + FN_oracle) assert TP_oracle + FN_oracle == TP + FN print "TP", TP print "FP", FP print "FN", FN print "F-score", (2 * PR * R) / (PR + R) print "TP (oracle)", TP_oracle print "FP (oracle)", FP_oracle print "FN (oracle)", FN_oracle print "F-score (oracle)", (2 * PR_oracle * R_oracle) / (PR_oracle + R_oracle)
g_file = open(args[1]) p_parser = parseGifxml.gifxmlParser(p_file) p_iterator = p_parser.documentIterator() g_parser = parseGifxml.gifxmlParser(g_file) g_iterator = g_parser.documentIterator() counter = 1 oracleStatistics(p_iterator, g_iterator, options.nbest) sys.exit(0) for p_document, g_document in zip(p_iterator, g_iterator): for p_child, g_child in zip(p_document, g_document): if p_child.tag == "sentence": assert p_child.attrib["id"] == g_child.attrib["id"] p_entities, p_pairs = getEntitiesAndPairs(p_child) g_entities, g_pairs = getEntitiesAndPairs(g_child) predictions = getSimplePredictions(p_entities, p_pairs) table, table_transpose, keys = toTable(predictions) best = nbest.decode(table_transpose, options.nbest) getTP_FP_FN(g_entities, g_pairs, p_entities, predictions, best) if counter > 30: sys.exit(0) #if predictions: # sys.exit(0) #if len(predictions) > 0: # table, table_transpose, keys = toTable(predictions) # five = nbest.decode(table_transpose, 100) # print table # print five # counter += 1 # if counter > 100: # assert False
w_decisions = 0 ties = 0 for p_document, g_document in zip(p_iterator, g_iterator): for p_child, g_child in zip(p_document, g_document): if g_child.tag == "sentence": assert p_child.attrib["origId"] == g_child.attrib["origId"] p_entities, p_pairs = getEntitiesAndPairs(p_child) g_entities, g_pairs = getEntitiesAndPairs(g_child) if len(p_pairs) == 0: FN += len(g_pairs) FN_oracle += len(g_pairs) else: g_edges, outside_count = getGSEdges(g_pairs, g_entities) predictions = getSimplePredictions(p_entities, p_pairs) table, table_transpose, keys = toTable(predictions) best = nbest.decode(table_transpose, n) p_edges = getPredictedEdges(predictions, keys, p_entities, best[0]) Y = [] for b in best: Y.append( getOutputRepresentation(predictions, keys, p_entities, b)) correct = getGSOutputRepresentation(g_entities, g_pairs) correct_score = reranker.score([correct], counter)[0] predicted_scores = reranker.score(Y, counter) minimum = predicted_scores[0] min_index = 0 #min_index = random.randint(0,len(predicted_scores)-1) for i in range(len(predicted_scores)): if predicted_scores[i] < minimum: