def plotEvaluation(trecRun, qrels, measure, outputFile=None, showPlot=True): """ Plots an histogram with one bar per topic. Each bar represents the difference between measure computed on the topic and the average measure among all topics. If outputFile is a string specifying the name of a file, then the plot is saved into that file. If showPlot then the plot is shown to the user (but not necessarily stored into a file). """ plt.clf() avg, details = pytrec_eval.evaluate(trecRun, qrels, measure, True) # to be sure that qId order is the same of score order (maybe it's not necessary...) lstDetails = [(qId, score) for qId, score in details.items()] lstDetails.sort(key=lambda x: x[0]) qIds = [qId for qId, _ in lstDetails] scores = [score - avg for _, score in lstDetails] plt.figure(1) x = [i for i in range(len(qIds))] # np.arange(len(qIds)) plt.bar(x, scores, width=0.6) plt.xticks(x, qIds, rotation=90, size=5) plt.xlim(xmax=len(qIds)) plt.xlabel('Topic Id') plt.ylabel('Difference of ' + pytrec_eval.METRICS_NAMES[measure] + ' from Average') if showPlot: plt.show() if outputFile is not None: plt.savefig(outputFile, bbox_inches=0)
def plotEvaluationAll(trecRuns, qrels, measure, outputFile, style=PLOT_STYLE): """ Plots an histogram with one bar per topic. Each bar represents the difference between measure computed on the topic and the average measure among all topics. OutputFile is a string specifying the name of the file the plot is saved into. """ qIds = list(qrels.getTopicIds()) qIds.sort() bar_chart = pygal.Bar() # bar_chart.spacing = 50 bar_chart.label_font_size = 8 bar_chart.style = style bar_chart.x_label_rotation = 90 bar_chart.x_labels = qIds bar_chart.x_title = 'Topic Id' bar_chart.legend_at_bottom = True bar_chart.legend_font_size = 10 bar_chart.legend_box_size = 8 bar_chart.y_title = pytrec_eval.METRICS_NAMES[measure] for trecRun in trecRuns: avg, details = pytrec_eval.evaluate(trecRun, qrels, measure, True) lstDetails = [details[topicId] if topicId in details else 0 for topicId in qIds] bar_chart.add(trecRun.name, lstDetails) bar_chart.render_to_file(outputFile)
def plotEvaluation(trecRun, qrels, measure, outputFile, style=PLOT_STYLE): """ Plots an histogram with one bar per topic. Each bar represents the difference between measure computed on the topic and the average measure among all topics. OutputFile is a string specifying the name of the file the plot is saved into. """ avg, details = pytrec_eval.evaluate(trecRun, qrels, measure, True) # to be sure that qId order is the same of score order (maybe it's not necessary...) bar_chart = pygal.Bar() bar_chart.style = style lstDetails = [(qId, score) for qId, score in details.items()] lstDetails.sort(key=lambda x: x[0]) qIds = [qId for qId, _ in lstDetails] scores = [score for _, score in lstDetails] bar_chart.add(trecRun.name, scores) bar_chart.label_font_size = 8 bar_chart.legend_at_bottom = True bar_chart.legend_font_size = 10 bar_chart.legend_box_size = 8 bar_chart.x_label_rotation = 90 bar_chart.x_labels = qIds bar_chart.x_title = 'query ids' bar_chart.y_title = pytrec_eval.METRICS_NAMES[measure] bar_chart.render_to_file(outputFile)
def rankRuns(runs, qrels, measure): """Ranks the runs based on measure. Returns a list of pairs (run, score) ordered by score descending. """ rank = [ (run, pytrec_eval.evaluate(run, qrels, [measure])[0]) for run in runs ] rank.sort(key= lambda x : x[1], reverse=True) return rank
def ttest(victim_run, allTheOther_runs, qrels, metric): """ Computes ttest between victim_run and all runs contained in allTheOther_runs using relevance judgements contained in qrels to compute the specified metric. Returns a dictionary d[otherRunName] = p-value """ victimAvg, victimDetails = evaluation.evaluate(victim_run, qrels, metric, True) # to read the scores always in the same order keyList = list(victimDetails.keys()) victimScores = [ victimDetails[k] for k in keyList ] result = {} for othertrun in allTheOther_runs: otherAvg, otherDetails = evaluation.evaluate(othertrun, qrels, metric, True) otherScores = [otherDetails[k] for k in keyList] _, p = stats.ttest_ind(victimScores, otherScores) result[othertrun.name] = p return result
def plotDifferenceWith(targetRun, otherRuns, qrels, measure, outputFile, style=PLOT_STYLE): avg_baseline, baseline_scores = pytrec_eval.evaluate(targetRun, qrels, measure, True) bar_chart = pygal.Bar() bar_chart.style = style allTopics = list(qrels.getTopicIds()) bar_chart.label_font_size = 8 bar_chart.legend_at_bottom = True bar_chart.legend_font_size = 10 bar_chart.legend_box_size = 8 bar_chart.x_label_rotation = 90 bar_chart.x_labels = allTopics bar_chart.x_title = 'Topic Id' bar_chart.y_title = 'Difference from ' + targetRun.name + ' (' + pytrec_eval.METRICS_NAMES[measure] + ')' for otherRun in otherRuns: _, other_scores = pytrec_eval.evaluate(otherRun, qrels, measure, True) points = [(other_scores[topicId] if topicId in other_scores else 0) - (baseline_scores[topicId] if topicId in baseline_scores else 0) for topicId in allTopics] bar_chart.add(otherRun.name, points) bar_chart.render_to_file(outputFile)