コード例 #1
0
def generatePrecisionRecallHistogramPlots(averageScores, projectRoot, experimentNames, fileName, title, xLabel):
    """
      Generate histogram-style plots for the query generation keyword source experiments
    """


    sortedMetrics = [
        'recall',
        'precision'
    ]

    # Generate the header for the metrics
    dataContent = "Experiment "
    for experiment in experimentNames:
        dataContent += '"' + splitCamelCase(experiment).title().replace('&', 'and') + '" '
    dataContent += '\n'

    # Build the results for this experiment
    for metric in sortedMetrics:
        dataContent += '"' + splitCamelCase(metric).title().replace('&', 'and') + '" '
        for experiment in experimentNames:
            dataContent += str(averageScores[experiment][metric]) + ' '
        dataContent += "\n"

    # Write it out to the input file to gnuplot
    open("input.dat", 'w').write(dataContent)

    # Get the configuration
    configurationFile = projectRoot + '/analysis/configurations/query_precision_recall_histograms'
    configuration = open(configurationFile).read()
    configuration = configuration % (
        projectRoot + '/analysis/output/retrieval/' + fileName + '.png',
        title,
        xLabel,
        )
    open('config', 'w').write(configuration)

    # Run gnuplot
    subprocess.Popen(['gnuplot', 'config']).communicate()

    os.remove('config')
    os.remove('input.dat')
コード例 #2
0
            # The order in which the groups of histograms should appear
            sortedExperiments = [
                'BM25Ranking',
                experiment,
                'BM25BaselineRanking'
            ]

            # Sort the metrics too
            sortedMetrics = averageScores[experiment].keys()
            sortedMetrics.sort()

            # Generate the header for the metrics
            dataContent = "Experiment "
            for metric in sortedMetrics:
                dataContent += '"' + splitCamelCase(metric).title().replace('&', 'and') + '" '
            dataContent += '\n'

            # Build the results for this experiment
            for drawnExperiment in sortedExperiments:
                dataContent += '"' + experiments[drawnExperiment].replace('&', 'and') + '" '
                for metric in sortedMetrics:
                    dataContent += str(averageScores[drawnExperiment][metric]) + ' '
                dataContent += "\n"

            # Write it out to the input file to gnuplot
            open("input.dat", 'w').write(dataContent)

            # Get the configuration
            configurationFile = projectRoot + '/analysis/configurations/single_ranking_feature_clustered_histograms'
            configuration = open(configurationFile).read()
コード例 #3
0
    averageScores = averageEntityScores(data)

    # Concatenate the names of metrics for the horizontal
    dataContent = "Experiment "
    sortedAverageScores = [
        ('BM25Ranking', 'None'),
        ('DMOZSmoothed500BM25Ranking', '500 Documents'),
        ('DMOZSmoothed1000BM25Ranking', '1000 Documents'),
        ('DMOZSmoothed5000BM25Ranking', '5000 Documents'),
        ('DMOZSmoothed1000BM25Ranking', '10000 Documents')
    ]
    sortedMetrics = averageScores[sortedAverageScores[0][0]].keys()
    sortedMetrics.sort()
    for metric in sortedMetrics:
        dataContent += '"' + splitCamelCase(metric).title() + '" '
    dataContent += '\n'
    for experiment in sortedAverageScores:
        dataContent += '"' + experiment[1] + '" '
        for metric in sortedMetrics:
            dataContent += str(averageScores[experiment[0]][metric]) + ' '
        dataContent += "\n"

    # Write it out to the input file to gnuplot
    open("input.dat", 'w').write(dataContent)

    # Get the configuration
    configurationFile = projectRoot + '/analysis/configurations/smoothing_clustered_histograms'
    configuration = open(configurationFile).read()

    # Fill out the configuration