def generatePrecisionRecallHistogramPlots(averageScores, projectRoot, experimentNames, fileName, title, xLabel): """ Generate histogram-style plots for the query generation keyword source experiments """ sortedMetrics = [ 'recall', 'precision' ] # Generate the header for the metrics dataContent = "Experiment " for experiment in experimentNames: dataContent += '"' + splitCamelCase(experiment).title().replace('&', 'and') + '" ' dataContent += '\n' # Build the results for this experiment for metric in sortedMetrics: dataContent += '"' + splitCamelCase(metric).title().replace('&', 'and') + '" ' for experiment in experimentNames: dataContent += str(averageScores[experiment][metric]) + ' ' dataContent += "\n" # Write it out to the input file to gnuplot open("input.dat", 'w').write(dataContent) # Get the configuration configurationFile = projectRoot + '/analysis/configurations/query_precision_recall_histograms' configuration = open(configurationFile).read() configuration = configuration % ( projectRoot + '/analysis/output/retrieval/' + fileName + '.png', title, xLabel, ) open('config', 'w').write(configuration) # Run gnuplot subprocess.Popen(['gnuplot', 'config']).communicate() os.remove('config') os.remove('input.dat')
# The order in which the groups of histograms should appear sortedExperiments = [ 'BM25Ranking', experiment, 'BM25BaselineRanking' ] # Sort the metrics too sortedMetrics = averageScores[experiment].keys() sortedMetrics.sort() # Generate the header for the metrics dataContent = "Experiment " for metric in sortedMetrics: dataContent += '"' + splitCamelCase(metric).title().replace('&', 'and') + '" ' dataContent += '\n' # Build the results for this experiment for drawnExperiment in sortedExperiments: dataContent += '"' + experiments[drawnExperiment].replace('&', 'and') + '" ' for metric in sortedMetrics: dataContent += str(averageScores[drawnExperiment][metric]) + ' ' dataContent += "\n" # Write it out to the input file to gnuplot open("input.dat", 'w').write(dataContent) # Get the configuration configurationFile = projectRoot + '/analysis/configurations/single_ranking_feature_clustered_histograms' configuration = open(configurationFile).read()
averageScores = averageEntityScores(data) # Concatenate the names of metrics for the horizontal dataContent = "Experiment " sortedAverageScores = [ ('BM25Ranking', 'None'), ('DMOZSmoothed500BM25Ranking', '500 Documents'), ('DMOZSmoothed1000BM25Ranking', '1000 Documents'), ('DMOZSmoothed5000BM25Ranking', '5000 Documents'), ('DMOZSmoothed1000BM25Ranking', '10000 Documents') ] sortedMetrics = averageScores[sortedAverageScores[0][0]].keys() sortedMetrics.sort() for metric in sortedMetrics: dataContent += '"' + splitCamelCase(metric).title() + '" ' dataContent += '\n' for experiment in sortedAverageScores: dataContent += '"' + experiment[1] + '" ' for metric in sortedMetrics: dataContent += str(averageScores[experiment[0]][metric]) + ' ' dataContent += "\n" # Write it out to the input file to gnuplot open("input.dat", 'w').write(dataContent) # Get the configuration configurationFile = projectRoot + '/analysis/configurations/smoothing_clustered_histograms' configuration = open(configurationFile).read() # Fill out the configuration