Esempio n. 1
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred", 
                  format="trec",
                  th=50, 
                  verbose=False,
                  reranking_th=-100.0,
                  ignore_noanswer=False, ignore_allanswer=False):
	ir, svm = read_res_pred_files(res_fname, pred_fname, format, verbose, 
		                              reranking_th=reranking_th, 
		                              ignore_noanswer=ignore_noanswer,
								  	ignore_allanswer=ignore_allanswer)

	# evaluate IR
	prec_se = metrics.recall_of_1(ir, th)
	acc_se = metrics.accuracy(ir, th)
	acc_se1 = metrics.accuracy1(ir, th)
	acc_se2 = metrics.accuracy2(ir, th)

	# evaluate SVM
	prec_svm = metrics.recall_of_1(svm, th)
	acc_svm = metrics.accuracy(svm, th)
	acc_svm1 = metrics.accuracy1(svm, th)
	acc_svm2 = metrics.accuracy2(svm, th)

	mrr_se = metrics.mrr(ir, th)
	mrr_svm = metrics.mrr(svm, th)
	map_se = metrics.map(ir)
	map_svm = metrics.map(svm)

	avg_acc1_svm = metrics.avg_acc1(svm, th)
	avg_acc1_ir = metrics.avg_acc1(ir, th)

	'''
	print "%13s %5s" %("IR", "SVM")
	print "MRR: %5.2f %5.2f" %(mrr_se, mrr_svm)
	print "MAP: %5.4f %5.4f" %(map_se, map_svm)
	print "AvgRec: %5.2f %5.2f" %(avg_acc1_ir, avg_acc1_svm)
	print "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SVM", "IR", "SVM", "IR", "SVM", "IR", "SVM")
	'''
	rec1_se =-10
	rec1_svm = -10
	for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
		#print "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2)
		if (rec1_se<-5):
			rec1_se = p_se
			rec1_svm = p_svm


	'''
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
	'''

	print "Table view"
	print "	MRR	MAP	P@1"
	print "REF_FILE	%5.2f	%5.2f	%5.2f" % (mrr_se, map_se*100, rec1_se)
	print "SVM	%5.2f	%5.2f	%5.2f" % (mrr_svm, map_svm*100, rec1_svm)
Esempio n. 2
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print "acc\tf1\tMAP\tMRR\tAvgRec"
    print "%.4f %4.4f %4.4f %4.4f %4.4f" % (acc, f1, map_svm, mrr_svm,
                                            avg_acc1_svm)
Esempio n. 3
0
def eval_search_engine(res_fname, format, th=10):
    ir = read_res_file(res_fname, format)

    # evaluate IR
    rec = metrics.recall_of_1(ir, th)
    acc = metrics.accuracy(ir, th)
    acc1 = metrics.accuracy1(ir, th)
    acc2 = metrics.accuracy2(ir, th)

    mrr = metrics.mrr(ir, th)

    print("%13s" % "IR")
    print("MRRof1: %5.2f" % mrr)
    for i, (r, a, a1, a2) in enumerate(zip(rec, acc, acc1, acc2), 1):
        print(
            "REC-1@%02d: %6.2f  ACC@%02d: %6.2f  AC1@%02d: %6.2f  AC2@%02d: %4.0f"
            % (i, r, i, a, i, a1, i, a2))
    print()
    print(
        "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
    )
    print(
        "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
    )
    print(
        "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
    )
    print("AC2   - the absolute number of correct answers at @X")
Esempio n. 4
0
def eval_search_engine(res_fname, format, th=50):
	ir = read_res_file(res_fname, format)		

	# evaluate IR
	rec = metrics.recall_of_1(ir, th)
	acc = metrics.accuracy(ir, th)
	acc1 = metrics.accuracy1(ir, th)
	acc2 = metrics.accuracy2(ir, th)

	mrr = metrics.mrr(ir, th)

  # MAP
	map_ir = metrics.map(ir)
  

	print "%10s" %"IR"
	print "MRR: %5.2f" % mrr
	print "MAP: %5.2f" % map_ir
	for i, (r, a, a1, a2) in enumerate(zip(rec, acc, acc1, acc2), 1):
		print "REC-1@%02d: %6.2f  ACC@%02d: %6.2f  AC1@%02d: %6.2f  AC2@%02d: %4.0f" %(i, r, i, a, i, a1, i, a2)
	print
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
Esempio n. 5
0
def get_cv_evaluation_results(qid_aid_label_list, y_pred):
    predictions_dict = get_cv_ranked_predictions_dict(qid_aid_label_list,
                                                      y_pred)
    logging.debug("Num of questions: %d" % (len(predictions_dict)))

    mrr_score = m.mrr(predictions_dict, 1000)
    map_score = m.map(predictions_dict) * 100
    p1_score = m.recall_of_1(predictions_dict, 1000)[0]

    return mrr_score, map_score, p1_score
Esempio n. 6
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred", 
                  format="trec",
                  th=50, 
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
	ir, svm = read_res_pred_files(res_fname, pred_fname, format, verbose, 
		                              reranking_th=reranking_th, 
		                              ignore_noanswer=ignore_noanswer)		
	# evaluate IR
	prec_se = metrics.recall_of_1(ir, th)
	acc_se = metrics.accuracy(ir, th)
	acc_se1 = metrics.accuracy1(ir, th)
	acc_se2 = metrics.accuracy2(ir, th)

	# evaluate SVM
	prec_svm = metrics.recall_of_1(svm, th)
	acc_svm = metrics.accuracy(svm, th)
	acc_svm1 = metrics.accuracy1(svm, th)
	acc_svm2 = metrics.accuracy2(svm, th)

	mrr_se = metrics.mrr(ir, th)
	mrr_svm = metrics.mrr(svm, th)
	map_se = metrics.map(ir)
	map_svm = metrics.map(svm)

	avg_acc1_svm = metrics.avg_acc1(svm, th)
	avg_acc1_ir = metrics.avg_acc1(ir, th)

	print "%13s %5s" %("IR", "SVM")
	print "MRR: %5.2f %5.2f" %(mrr_se, mrr_svm)
	print "MAP: %5.4f %5.4f" %(map_se, map_svm)
	print "AvgRec: %5.2f %5.2f" %(avg_acc1_ir, avg_acc1_svm)
	print "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SVM", "IR", "SVM", "IR", "SVM", "IR", "SVM")
	for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
		print "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2)
	print
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
Esempio n. 7
0
def get_evaluation_results(df,
                           y_pred,
                           skip_all_positives_and_all_negatives=True):
    predictions_dict = get_ranked_predictions_dict(
        df,
        y_pred,
        skip_all_positives_and_all_negatives=
        skip_all_positives_and_all_negatives)
    logging.debug("Num of questions: %d" % (len(predictions_dict)))

    mrr_score = m.mrr(predictions_dict, 1000)
    map_score = m.map(predictions_dict) * 100
    p1_score = m.recall_of_1(predictions_dict, 1000)[0]

    return mrr_score, map_score, p1_score
Esempio n. 8
0
def stats_cv(path=".",  format="trec", prefix="svm", th=50, verbose=False):
  mrrs_se = []
  mrrs_svm = []
  abs_mrrs = []
  rel_mrrs = []

  maps_se = []
  maps_svm = []
  abs_maps = []
  rel_maps = []

  recalls1_se = []
  recalls1_svm = []
  abs_recalls = []
  rel_recalls = []

  oracle_mrrs = []
  oracle_maps = []
  oracle_recs1 = []

  num_folds = 0
  
  print "%13s %5s %7s %7s" %("IR", "SVM", "(abs)", "(rel)")
  for fold in sorted(os.listdir(path)):
    currentFold = os.path.join(path, fold)
    if not os.path.isdir(currentFold):
      continue
    if not fold.startswith("fold"):
      logging.warn("Directories containing CV folds should start with 'fold'")
      continue
    print fold

    # Relevancy file
    res_fname = os.path.join(currentFold, "%s.test.res" % prefix)
    if not os.path.exists(res_fname):
      logging.error("Relevancy file not found: %s", res_fname)
      sys.exit(1)

    # Predictions file
    pred_fname = os.path.join(currentFold, "%s.pred" % prefix)
    if not os.path.exists(pred_fname):
      logging.error("SVM prediction file not found: %s", pred_fname)
      sys.exit(1)

    try:
      ir, svm = read_res_pred_files(res_fname, pred_fname, format, verbose)   
    except:
      logging.error("Failed to process input files: %s %s", res_fname, pred_fname)
      logging.error("Check that the input file format is correct")
      sys.exit(1)

    # MRR
    mrr_se = metrics.mrr(ir, th) or 1
    mrr_svm = metrics.mrr(svm, th) 
    mrrs_se.append(mrr_se)
    mrrs_svm.append(mrr_svm) 

    # improvement
    abs_mrr_diff = mrr_svm - mrr_se
    rel_mrr_diff = (mrr_svm - mrr_se)*100/mrr_se
    abs_mrrs.append(abs_mrr_diff)
    rel_mrrs.append(rel_mrr_diff)

    print "MRR: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (mrr_se, mrr_svm, abs_mrr_diff, rel_mrr_diff) 

    # MAP
    map_se = metrics.map(ir) or 1
    map_svm = metrics.map(svm)
    maps_se.append(map_se) 
    maps_svm.append(map_svm)

    # improvement
    abs_map_diff = map_svm - map_se
    rel_map_diff = (map_svm - map_se)*100/map_se
    abs_maps.append(abs_map_diff)
    rel_maps.append(rel_map_diff)
    print "MAP: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (map_se, map_svm, abs_map_diff, rel_map_diff) 

    # Recall-of-1@1
    rec_se = metrics.recall_of_1(ir, th)[0] or 1
    rec_svm = metrics.recall_of_1(svm, th)[0]
    recalls1_se.append(rec_se)
    recalls1_svm.append(rec_svm)

    # improvement
    abs_rec_diff = rec_svm - rec_se
    rel_rec_diff = (rec_svm - rec_se)*100/rec_se
    abs_recalls.append(abs_rec_diff)
    rel_recalls.append(rel_rec_diff)

    print "P@1: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (rec_se, rec_svm, abs_rec_diff, rel_rec_diff)   

    num_folds += 1

    '''
    mrr_oracle = metrics.oracle_mrr(ir, th)
    map_oracle = metrics.oracle_map(ir)
    prec_oracle = metrics.oracle_precision(ir, th)[0]
    rec1_oracle = metrics.oracle_recall_of_1(ir, th)[0]

    oracle_mrrs.append(mrr_oracle)
    oracle_maps.append(map_oracle)
    oracle_recs1.append(rec1_oracle)

    print "Oracle MRR: %5.2f, Oracle MAP: %5.2f, Oracle prec: %5.2f, Oracle rec@1: %5.2f" % (mrr_oracle, map_oracle, prec_oracle, rec1_oracle)
    '''
  # mrrs
  avg_mrr_se, std_mrr_se = mean_and_std(mrrs_se)
  avg_mrr_svm, std_mrr_svm = mean_and_std(mrrs_svm)
  avg_abs_impr_mrr, std_abs_impr_mrr = mean_and_std(abs_mrrs)
  avg_rel_impr_mrr, std_rel_impr_mrr = mean_and_std(rel_mrrs)
  #oracle_avg_mrr, std_oracle_avg_mrr = mean_and_std(oracle_mrrs)

  # maps
  avg_map_se, std_map_se = mean_and_std(maps_se)
  avg_map_svm, std_map_svm = mean_and_std(maps_svm)
  avg_abs_impr_map, std_abs_impr_map = mean_and_std(abs_maps)
  avg_rel_impr_map, std_rel_impr_map = mean_and_std(rel_maps)
  #oracle_avg_map, std_oracle_avg_map = mean_and_std(oracle_maps)

  # recall
  avg_rec1_se, std_rec1_se = mean_and_std(recalls1_se)  # se 
  avg_rec1_svm, std_rec1_svm = mean_and_std(recalls1_svm)  # svm
  avg_abs_impr_rec1, std_abs_impr_rec1 = mean_and_std(abs_recalls)  # absolute
  avg_rel_impr_rec1, std_rel_impr_rec1 = mean_and_std(rel_recalls)  # relative
  #oracle_avg_rec1, std_oracle_avg_rec1 = mean_and_std(oracle_recs1)

  FMT = u"%3s: %5.2f \u00B1 %4.2f %5.2f \u00B1 %4.2f %+6.2f%% \u00B1 %4.2f %+6.2f%% \u00B1 %4.2f"
  #ORACLE_FMT = u"Oracle MRR: %5.2f \u00B1 %4.2f, Oracle MAP: %5.2f \u00B1 %4.2f, Oracle P@1: %5.2f \u00B1 %4.2f"
  print
  print "Averaged over %s folds" % num_folds
  print "%17s %12s %14s %14s" %("IR", "SVM", "(abs)", "(rel)")
  print FMT % ("MRR", avg_mrr_se, std_mrr_se, avg_mrr_svm, std_mrr_svm, avg_abs_impr_mrr, std_abs_impr_mrr, avg_rel_impr_mrr, std_rel_impr_mrr)
  print FMT % ("MAP", avg_map_se, std_map_se, avg_map_svm, std_map_svm, avg_abs_impr_map, std_abs_impr_map, avg_rel_impr_map, std_rel_impr_map)
  print FMT % ("P@1", avg_rec1_se, std_rec1_se, avg_rec1_svm, std_rec1_svm, avg_abs_impr_rec1, std_abs_impr_rec1, avg_rel_impr_rec1, std_rel_impr_rec1)
Esempio n. 9
0
def stats_cv(path=".",
             format="trec",
             prefix="svm",
             th=50,
             suf="",
             verbose=False,
             truth_file=None,
             ignore_noanswer=False,
             cut_truth_map_at_N=None):
    mrrs_se = []
    mrrs_svm = []
    abs_mrrs = []
    rel_mrrs = []

    maps_se = []
    maps_svm = []
    abs_maps = []
    rel_maps = []

    recalls1_se = []
    recalls1_svm = []
    abs_recalls = []
    rel_recalls = []

    num_folds = 0
    truth = read_truth_file(truth_file, format, cut_truth_map_at_N)
    print "%13s %5s %7s %7s" % ("IR", "SVM", "(abs)", "(rel)")
    for fold in sorted(os.listdir(path)):
        currentFold = os.path.join(path, fold)
        if not os.path.isdir(currentFold):
            continue
        if not fold.startswith("fold"):
            logging.warn(
                "Directories containing CV folds should start with 'fold'")
            continue
        print fold

        # Relevancy file
        res_fname = os.path.join(currentFold, "%s.relevancy" % prefix)
        if not os.path.exists(res_fname):
            logging.error("Relevancy file not found: %s", res_fname)
            sys.exit(1)

        # Predictions file
        pred_fname = os.path.join(currentFold, "%s.pred" % (prefix + suf))
        if not os.path.exists(pred_fname):
            logging.error("SVM prediction file not found: %s", pred_fname)
            sys.exit(1)

        try:
            ir, svm = read_res_pred_files(res_fname,
                                          pred_fname,
                                          format,
                                          verbose,
                                          ignore_noanswer=ignore_noanswer,
                                          truth_map=truth)
        except:
            logging.error("Failed to process input files: %s %s", res_fname,
                          pred_fname)
            logging.error("Check that the input file format is correct")
            sys.exit(1)

        # MRR
        mrr_se = metrics.mrr(ir, th)
        mrr_svm = metrics.mrr(svm, th)
        mrrs_se.append(mrr_se)
        mrrs_svm.append(mrr_svm)

        # improvement
        abs_mrr_diff = mrr_svm - mrr_se
        rel_mrr_diff = (mrr_svm - mrr_se) * 100 / mrr_se
        abs_mrrs.append(abs_mrr_diff)
        rel_mrrs.append(rel_mrr_diff)

        print "MRR: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (
            mrr_se, mrr_svm, abs_mrr_diff, rel_mrr_diff)

        # MAP
        map_se = metrics.map(ir)
        map_svm = metrics.map(svm)
        maps_se.append(map_se)
        maps_svm.append(map_svm)
        # improvement
        abs_map_diff = map_svm - map_se
        rel_map_diff = (map_svm - map_se) * 100 / map_se
        abs_maps.append(abs_map_diff)
        rel_maps.append(rel_map_diff)
        print "MAP: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (
            map_se * 100, map_svm * 100, abs_map_diff, rel_map_diff)

        # Recall-of-1@1
        rec_se = metrics.recall_of_1(ir, th)[0]
        rec_svm = metrics.recall_of_1(svm, th)[0]
        recalls1_se.append(rec_se)
        recalls1_svm.append(rec_svm)

        # improvement
        abs_rec_diff = rec_svm - rec_se
        rel_rec_diff = (rec_svm - rec_se) * 100 / rec_se
        abs_recalls.append(abs_rec_diff)
        rel_recalls.append(rel_rec_diff)

        print "P@1: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (
            rec_se, rec_svm, abs_rec_diff, rel_rec_diff)

        num_folds += 1

    # mrrs
    avg_mrr_se, std_mrr_se = mean_and_std(mrrs_se)
    avg_mrr_svm, std_mrr_svm = mean_and_std(mrrs_svm)
    avg_abs_impr_mrr, std_abs_impr_mrr = mean_and_std(abs_mrrs)
    avg_rel_impr_mrr, std_rel_impr_mrr = mean_and_std(rel_mrrs)

    # maps
    avg_map_se, std_map_se = mean_and_std(maps_se)
    avg_map_svm, std_map_svm = mean_and_std(maps_svm)
    avg_abs_impr_map, std_abs_impr_map = mean_and_std(abs_maps)
    avg_rel_impr_map, std_rel_impr_map = mean_and_std(rel_maps)

    # recall
    avg_rec1_se, std_rec1_se = mean_and_std(recalls1_se)  # se
    avg_rec1_svm, std_rec1_svm = mean_and_std(recalls1_svm)  # svm
    avg_abs_impr_rec1, std_abs_impr_rec1 = mean_and_std(
        abs_recalls)  # absolute
    avg_rel_impr_rec1, std_rel_impr_rec1 = mean_and_std(
        rel_recalls)  # relative

    FMT = u"%3s: %5.2f \u00B1 %4.2f %5.2f \u00B1 %4.2f %+6.2f%% \u00B1 %4.2f %+6.2f%% \u00B1 %4.2f"
    print
    print "Averaged over %s folds" % num_folds
    print "%17s %12s %14s %14s" % ("IR", "SVM", "(abs)", "(rel)")
    print FMT % ("MRR", avg_mrr_se, std_mrr_se, avg_mrr_svm, std_mrr_svm,
                 avg_abs_impr_mrr, std_abs_impr_mrr, avg_rel_impr_mrr,
                 std_rel_impr_mrr)
    print FMT % ("MAP", avg_map_se * 100, std_map_se, avg_map_svm * 100,
                 std_map_svm, avg_abs_impr_map, std_abs_impr_map,
                 avg_rel_impr_map, std_rel_impr_map)
    print FMT % ("P@1", avg_rec1_se, std_rec1_se, avg_rec1_svm, std_rec1_svm,
                 avg_abs_impr_rec1, std_abs_impr_rec1, avg_rel_impr_rec1,
                 std_rel_impr_rec1)
    print "Table view"
    print "	MRR	MAP	P@1"
    print u"IR	%5.2f\u00B1%4.2f	%5.2f\u00B1%4.2f	 %5.2f\u00B1%4.2f" % (
        avg_mrr_se, std_mrr_se, avg_map_se * 100, std_map_se * 100,
        avg_rec1_se, std_rec1_se)
    print u"SVM	%5.2f\u00B1%4.2f	%5.2f\u00B1%4.2f	 %5.2f\u00B1%4.2f" % (
        avg_mrr_svm, std_mrr_svm, avg_map_svm * 100, std_map_svm * 100,
        avg_rec1_svm, std_rec1_svm)
Esempio n. 10
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print("")
    print("*** Official score (MAP for SYS): %5.4f" % (map_svm))
    print("")
    print("")
    print("******************************")
    print("*** Classification results ***")
    print("******************************")
    print("")
    print("Acc = %5.4f" % (acc))
    print("P   = %5.4f" % (p))
    print("R   = %5.4f" % (r))
    print("F1  = %5.4f" % (f1))
    print("")
    print("")
    print("********************************")
    print("*** Detailed ranking results ***")
    print("********************************")
    print("")
    print("IR  -- Score for the output of the IR system (baseline).")
    print("SYS -- Score for the output of the tested system.")
    print("")
    print("%13s %5s" % ("IR", "SYS"))
    print("MAP   : %5.4f %5.4f" % (map_se, map_svm))
    print("AvgRec: %5.4f %5.4f" % (avg_acc1_ir, avg_acc1_svm))
    print("MRR   : %6.2f %6.2f" % (mrr_se, mrr_svm))
    print("%16s %6s  %14s %6s  %14s %6s  %12s %4s" %
          ("IR", "SYS", "IR", "SYS", "IR", "SYS", "IR", "SYS"))
    for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2,
            a_svm2) in enumerate(
                zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1,
                    acc_se2, acc_svm2), 1):
        print(
            "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f"
            % (i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2,
               a_svm2))
    print()
    print(
        "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks where questions have at most one correct answer)"
    )
    print(
        "ACC   - accuracy, i.e., number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
    )
    print(
        "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
    )
    print("AC2   - the absolute number of correct answers at @X")
Esempio n. 11
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    #print ""
    #print "*** Official score (MAP for SYS): %5.4f" %(map_svm)
    #print ""
    #print ""
    #print "******************************"
    #print "*** Classification results ***"
    #print "******************************"
    #print ""
    #print "Acc = %5.4f" %(acc)
    #print "P   = %5.4f" %(p)
    #print "R   = %5.4f" %(r)
    #print "F1  = %5.4f" %(f1)
    #print ""
    #print ""
    #print "********************************"
    #print "*** Detailed ranking results ***"
    #print "********************************"
    #print ""
    #print "IR  -- Score for the output of the IR system (baseline)."
    #print "SYS -- Score for the output of the tested system."
    #print ""
    #print "%13s %5s" %("IR", "SYS")
    #print "MAP   : %5.4f %5.4f" %(map_se, map_svm)
    #print "AvgRec: %5.4f %5.4f" %(avg_acc1_ir, avg_acc1_svm)
    #print "MRR   : %6.2f %6.2f" %(mrr_se, mrr_svm)
    print "MAP   : %5.4f\tMRR   : %5.4f\tAvgRec: %5.4f" % (map_svm, mrr_svm,
                                                           avg_acc1_svm)
    #print "Acc   : %5.4f" %(acc)
    #print "P     : %5.4f" %(p)
    #print "R     : %5.4f" %(r)
    #print "F1    : %5.4f" %(f1)
    """
Esempio n. 12
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname, pred_fname, format, verbose,
                                      reranking_th=reranking_th,
                                      ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (conf_matrix['true']['true'] + conf_matrix['false']['false']) / (conf_matrix['true']['true'] + conf_matrix['false']['false'] + conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print ("")
    print ("*** Official score (MAP for SYS): %5.4f" %(map_svm))
    print ("")
    print ("")
    print( "******************************")
    print( "*** Classification results ***")
    print( "******************************")
    print( "")
    print( "Acc = %5.4f" %(acc))
    print( "P   = %5.4f" %(p))
    print( "R   = %5.4f" %(r))
    print( "F1  = %5.4f" %(f1))
    print( "")
    print( "")
    print( "********************************")
    print( "*** Detailed ranking results ***")
    print( "********************************")
    print( "")
    print( "IR  -- Score for the output of the IR system (baseline).")
    print( "SYS -- Score for the output of the tested system.")
    print( "")
    print( "%13s %5s" %("IR", "SYS"))
    print( "MAP   : %5.4f %5.4f" %(map_se, map_svm))
    print( "AvgRec: %5.4f %5.4f" %(avg_acc1_ir, avg_acc1_svm))
    print( "MRR   : %6.2f %6.2f" %(mrr_se, mrr_svm))
    print( "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SYS", "IR", "SYS", "IR", "SYS", "IR", "SYS"))
    for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
        print( "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2))

    print( "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks where questions have at most one correct answer)")
    print( "ACC   - accuracy, i.e., number of correct answers retrieved at rank @X normalized by the rank and the total number of questions")
    print( "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)")
    print( "AC2   - the absolute number of correct answers at @X")

    return map_svm