Exemple #1
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred", 
                  format="trec",
                  th=50, 
                  verbose=False,
                  reranking_th=-100.0,
                  ignore_noanswer=False, ignore_allanswer=False):
	ir, svm = read_res_pred_files(res_fname, pred_fname, format, verbose, 
		                              reranking_th=reranking_th, 
		                              ignore_noanswer=ignore_noanswer,
								  	ignore_allanswer=ignore_allanswer)

	# evaluate IR
	prec_se = metrics.recall_of_1(ir, th)
	acc_se = metrics.accuracy(ir, th)
	acc_se1 = metrics.accuracy1(ir, th)
	acc_se2 = metrics.accuracy2(ir, th)

	# evaluate SVM
	prec_svm = metrics.recall_of_1(svm, th)
	acc_svm = metrics.accuracy(svm, th)
	acc_svm1 = metrics.accuracy1(svm, th)
	acc_svm2 = metrics.accuracy2(svm, th)

	mrr_se = metrics.mrr(ir, th)
	mrr_svm = metrics.mrr(svm, th)
	map_se = metrics.map(ir)
	map_svm = metrics.map(svm)

	avg_acc1_svm = metrics.avg_acc1(svm, th)
	avg_acc1_ir = metrics.avg_acc1(ir, th)

	'''
	print "%13s %5s" %("IR", "SVM")
	print "MRR: %5.2f %5.2f" %(mrr_se, mrr_svm)
	print "MAP: %5.4f %5.4f" %(map_se, map_svm)
	print "AvgRec: %5.2f %5.2f" %(avg_acc1_ir, avg_acc1_svm)
	print "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SVM", "IR", "SVM", "IR", "SVM", "IR", "SVM")
	'''
	rec1_se =-10
	rec1_svm = -10
	for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
		#print "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2)
		if (rec1_se<-5):
			rec1_se = p_se
			rec1_svm = p_svm


	'''
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
	'''

	print "Table view"
	print "	MRR	MAP	P@1"
	print "REF_FILE	%5.2f	%5.2f	%5.2f" % (mrr_se, map_se*100, rec1_se)
	print "SVM	%5.2f	%5.2f	%5.2f" % (mrr_svm, map_svm*100, rec1_svm)
Exemple #2
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print "acc\tf1\tMAP\tMRR\tAvgRec"
    print "%.4f %4.4f %4.4f %4.4f %4.4f" % (acc, f1, map_svm, mrr_svm,
                                            avg_acc1_svm)
Exemple #3
0
def eval_search_engine(res_fname, format, th=10):
    ir = read_res_file(res_fname, format)

    # evaluate IR
    rec = metrics.recall_of_1(ir, th)
    acc = metrics.accuracy(ir, th)
    acc1 = metrics.accuracy1(ir, th)
    acc2 = metrics.accuracy2(ir, th)

    mrr = metrics.mrr(ir, th)

    print("%13s" % "IR")
    print("MRRof1: %5.2f" % mrr)
    for i, (r, a, a1, a2) in enumerate(zip(rec, acc, acc1, acc2), 1):
        print(
            "REC-1@%02d: %6.2f  ACC@%02d: %6.2f  AC1@%02d: %6.2f  AC2@%02d: %4.0f"
            % (i, r, i, a, i, a1, i, a2))
    print()
    print(
        "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
    )
    print(
        "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
    )
    print(
        "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
    )
    print("AC2   - the absolute number of correct answers at @X")
Exemple #4
0
def eval_search_engine(res_fname, format, th=50):
	ir = read_res_file(res_fname, format)		

	# evaluate IR
	rec = metrics.recall_of_1(ir, th)
	acc = metrics.accuracy(ir, th)
	acc1 = metrics.accuracy1(ir, th)
	acc2 = metrics.accuracy2(ir, th)

	mrr = metrics.mrr(ir, th)

  # MAP
	map_ir = metrics.map(ir)
  

	print "%10s" %"IR"
	print "MRR: %5.2f" % mrr
	print "MAP: %5.2f" % map_ir
	for i, (r, a, a1, a2) in enumerate(zip(rec, acc, acc1, acc2), 1):
		print "REC-1@%02d: %6.2f  ACC@%02d: %6.2f  AC1@%02d: %6.2f  AC2@%02d: %4.0f" %(i, r, i, a, i, a1, i, a2)
	print
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
Exemple #5
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred", 
                  format="trec",
                  th=50, 
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
	ir, svm = read_res_pred_files(res_fname, pred_fname, format, verbose, 
		                              reranking_th=reranking_th, 
		                              ignore_noanswer=ignore_noanswer)		
	# evaluate IR
	prec_se = metrics.recall_of_1(ir, th)
	acc_se = metrics.accuracy(ir, th)
	acc_se1 = metrics.accuracy1(ir, th)
	acc_se2 = metrics.accuracy2(ir, th)

	# evaluate SVM
	prec_svm = metrics.recall_of_1(svm, th)
	acc_svm = metrics.accuracy(svm, th)
	acc_svm1 = metrics.accuracy1(svm, th)
	acc_svm2 = metrics.accuracy2(svm, th)

	mrr_se = metrics.mrr(ir, th)
	mrr_svm = metrics.mrr(svm, th)
	map_se = metrics.map(ir)
	map_svm = metrics.map(svm)

	avg_acc1_svm = metrics.avg_acc1(svm, th)
	avg_acc1_ir = metrics.avg_acc1(ir, th)

	print "%13s %5s" %("IR", "SVM")
	print "MRR: %5.2f %5.2f" %(mrr_se, mrr_svm)
	print "MAP: %5.4f %5.4f" %(map_se, map_svm)
	print "AvgRec: %5.2f %5.2f" %(avg_acc1_ir, avg_acc1_svm)
	print "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SVM", "IR", "SVM", "IR", "SVM", "IR", "SVM")
	for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
		print "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2)
	print
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
Exemple #6
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print("")
    print("*** Official score (MAP for SYS): %5.4f" % (map_svm))
    print("")
    print("")
    print("******************************")
    print("*** Classification results ***")
    print("******************************")
    print("")
    print("Acc = %5.4f" % (acc))
    print("P   = %5.4f" % (p))
    print("R   = %5.4f" % (r))
    print("F1  = %5.4f" % (f1))
    print("")
    print("")
    print("********************************")
    print("*** Detailed ranking results ***")
    print("********************************")
    print("")
    print("IR  -- Score for the output of the IR system (baseline).")
    print("SYS -- Score for the output of the tested system.")
    print("")
    print("%13s %5s" % ("IR", "SYS"))
    print("MAP   : %5.4f %5.4f" % (map_se, map_svm))
    print("AvgRec: %5.4f %5.4f" % (avg_acc1_ir, avg_acc1_svm))
    print("MRR   : %6.2f %6.2f" % (mrr_se, mrr_svm))
    print("%16s %6s  %14s %6s  %14s %6s  %12s %4s" %
          ("IR", "SYS", "IR", "SYS", "IR", "SYS", "IR", "SYS"))
    for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2,
            a_svm2) in enumerate(
                zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1,
                    acc_se2, acc_svm2), 1):
        print(
            "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f"
            % (i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2,
               a_svm2))
    print()
    print(
        "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks where questions have at most one correct answer)"
    )
    print(
        "ACC   - accuracy, i.e., number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
    )
    print(
        "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
    )
    print("AC2   - the absolute number of correct answers at @X")
Exemple #7
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    #print ""
    #print "*** Official score (MAP for SYS): %5.4f" %(map_svm)
    #print ""
    #print ""
    #print "******************************"
    #print "*** Classification results ***"
    #print "******************************"
    #print ""
    #print "Acc = %5.4f" %(acc)
    #print "P   = %5.4f" %(p)
    #print "R   = %5.4f" %(r)
    #print "F1  = %5.4f" %(f1)
    #print ""
    #print ""
    #print "********************************"
    #print "*** Detailed ranking results ***"
    #print "********************************"
    #print ""
    #print "IR  -- Score for the output of the IR system (baseline)."
    #print "SYS -- Score for the output of the tested system."
    #print ""
    #print "%13s %5s" %("IR", "SYS")
    #print "MAP   : %5.4f %5.4f" %(map_se, map_svm)
    #print "AvgRec: %5.4f %5.4f" %(avg_acc1_ir, avg_acc1_svm)
    #print "MRR   : %6.2f %6.2f" %(mrr_se, mrr_svm)
    print "MAP   : %5.4f\tMRR   : %5.4f\tAvgRec: %5.4f" % (map_svm, mrr_svm,
                                                           avg_acc1_svm)
    #print "Acc   : %5.4f" %(acc)
    #print "P     : %5.4f" %(p)
    #print "R     : %5.4f" %(r)
    #print "F1    : %5.4f" %(f1)
    """
Exemple #8
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname, pred_fname, format, verbose,
                                      reranking_th=reranking_th,
                                      ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (conf_matrix['true']['true'] + conf_matrix['false']['false']) / (conf_matrix['true']['true'] + conf_matrix['false']['false'] + conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print ("")
    print ("*** Official score (MAP for SYS): %5.4f" %(map_svm))
    print ("")
    print ("")
    print( "******************************")
    print( "*** Classification results ***")
    print( "******************************")
    print( "")
    print( "Acc = %5.4f" %(acc))
    print( "P   = %5.4f" %(p))
    print( "R   = %5.4f" %(r))
    print( "F1  = %5.4f" %(f1))
    print( "")
    print( "")
    print( "********************************")
    print( "*** Detailed ranking results ***")
    print( "********************************")
    print( "")
    print( "IR  -- Score for the output of the IR system (baseline).")
    print( "SYS -- Score for the output of the tested system.")
    print( "")
    print( "%13s %5s" %("IR", "SYS"))
    print( "MAP   : %5.4f %5.4f" %(map_se, map_svm))
    print( "AvgRec: %5.4f %5.4f" %(avg_acc1_ir, avg_acc1_svm))
    print( "MRR   : %6.2f %6.2f" %(mrr_se, mrr_svm))
    print( "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SYS", "IR", "SYS", "IR", "SYS", "IR", "SYS"))
    for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
        print( "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2))

    print( "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks where questions have at most one correct answer)")
    print( "ACC   - accuracy, i.e., number of correct answers retrieved at rank @X normalized by the rank and the total number of questions")
    print( "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)")
    print( "AC2   - the absolute number of correct answers at @X")

    return map_svm