Beispiel #1
0
    def make_score_vs_rmsd_plot(self, loop):
        """
        Create a score vs RMSD plot for the given loop.  In fact two plots are 
        made: one which includes every model and one which includes only the 
        top 75% best scoring models.  Normally the second plot is of more 
        interest, because it focuses better on the interesting lower-left 
        region of the plot.  The full plots often have outliers that really 
        scale the score axis.
        """

        # This method would be much more concise if it used matplotlib.

        if not loop.has_data:
            return

        tsv_path = os.path.join(loop.latex_dir, 'score_vs_rmsd.tsv')
        gnu_path = os.path.join(loop.latex_dir, 'score_vs_rmsd.gnu')
        pdf_path_100 = os.path.join(loop.latex_dir, 'score_vs_rmsd_all.pdf')
        pdf_path_75 = os.path.join(loop.latex_dir,
                                   'score_vs_rmsd_third_quartile.pdf')

        tsv_row = '{0.id}\t{0.rmsd}\t{0.score}\n'

        sorted_models = loop.models_sorted_by_score
        scores = loop.scores
        min_score, max_score = min(scores), max(scores)
        third_quartile = numpy.percentile(scores, 75)
        native_score = 0  # This isn't stored in the database yet.

        # Write score vs RMSD data to a tab-separated value (TSV) file that can
        # easily be parsed by gnuplot.

        with open(tsv_path, 'w') as file:
            file.write('#Model\tLoop_rmsd\tTotal_score\n')
            file.write('input_structure\t0.0\t{0}\n'.format(native_score))

            # All models
            file.write('\n\n')
            for model in sorted_models:
                file.write(tsv_row.format(model))

            # Top X scoring models
            file.write('\n\n')
            for model in sorted_models[:top_x]:
                file.write(tsv_row.format(model))

            # Top scoring model
            file.write('\n\n')
            file.write(tsv_row.format(sorted_models[0]))

        # Write the gnuplot script and generate the EPS plots.

        gnuplot_script = '''\
set autoscale
set border 31
set tics out
set terminal pdf
set xtics autofreq
set xtics nomirror
set ytics autofreq
set ytics nomirror
set noy2tics
set nox2tics

set style line 1 lt 1 lc rgb "dark-magenta" lw 2
set style line 2 lt 1 lc rgb "{loop.benchmark.color}" lw 2 ps 0.5 pt 7
set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13
set style line 4 lt 1 lc rgb "dark-gray" lw 2 ps 0.5 pt 7
set style line 5 lt 1 lc rgb "black" lw 2 ps 0.8 pt 13
set style line 6 lt 1 lc rgb "black" lw 2
set style line 7 lt 1 lc rgb "dark-gray" lw 2
set style line 8 lt 1 lc rgb "gray" lw 2
set style line 9 lt 2 lc rgb "dark-gray" lw 5

set boxwidth 0.75

set key below right
set xrange [0:]
set encoding iso_8859_1
set title "{loop.pdb_id}: {loop.percent_subangstrom:0.2f}% sub-\305 models"
set xlabel "r.m.s. deviation to crystal loop [\305]"
set arrow from 1, graph 0 to 1, graph 1 ls 9 nohead
set ylabel "Rosetta all-atom score"
set output "{pdf_path_100}"
plot "{tsv_path}" index 1 using ($2):($3) with points ls 2 title "all models" axes x1y1, \\
     "{tsv_path}" index 2 using ($2):($3) with points ls 4 title "5 lowest energy models" axes x1y1, \\
     "{tsv_path}" index 3 using ($2):($3) with points ls 5 title "top 5 best model" axes x1y1
set yrange [:{third_quartile}]
set output "{pdf_path_75}"
set xrange [0:]
plot "{tsv_path}" index 1 using ($2):($3) with points ls 2 title "75% lowest-scoring models" axes x1y1, \\
     "{tsv_path}" index 2 using ($2):($3) with points ls 4 title "5 lowest energy models" axes x1y1, \\
     "{tsv_path}" index 3 using ($2):($3) with points ls 5 title "top 5 best model" axes x1y1
'''
        with open(gnu_path, 'w') as file:
            file.write(gnuplot_script.format(**locals()))

        utilities.run_gnuplot(gnu_path, verbose=self.verbose)

        return pdf_path_100, pdf_path_75
Beispiel #2
0
    def make_comparison_plot(self,
                             distributions,
                             path_template,
                             custom_gnuplot_commands,
                             custom_plot_arguments=''):
        """
        Create a plot comparing the same distribution from several different 
        benchmarks.  Examples include the percent subangstrom distribution and 
        the RMSDs of the lowest scoring predictions.  The resulting plot will 
        have a nicely colored box plot for each benchmark.

        Inputs
        ------
        distributions:
            A dictionary mapping benchmark objects to some sort of 
            distribution.  The box plot will be created using the distribution 
            and labeled using the benchmark object.

        path_template:
            The base file name used to create the TSV, GNU, and EPS files 
            generated by this method.

        custom_gnuplot_commands:
            A string containing custom commands to pass to gnuplot immediately 
            before the 'plot' command.  This is meant to be used for doing 
            things like labeling the axes or adding useful vertical lines.

        Outputs
        -------
        This method creates three files: a TSV file containing the raw data 
        being plotted, a GNU file containing the gnuplot commands used to 
        generate the plot, and an EPS file containing the plot itself.  The 
        path to the generated EPS file is also returned.
        """

        tsv_path = os.path.join(self.latex_dir, path_template + '.tsv')
        gnu_path = os.path.join(self.latex_dir, path_template + '.gnu')
        pdf_path = os.path.join(self.latex_dir, path_template + '.pdf')

        # Write data to TSV file that can be easily parsed by gnuplot.

        boxplot_header = '#' + '\t'.join([
            'Protocol', 'x', 'lower', 'first_quartile', 'median',
            'third_quartile', 'upper'
        ]) + '\n'

        boxplot_row = '\t'.join([
            '{benchmark.name}', '{gnuplot_index}', '{stats.lower_whisker}',
            '{stats.first_quartile}', '{stats.median}',
            '{stats.third_quartile}', '{stats.upper_whisker}'
        ]) + '\n'

        outlier_header = '#' + '\t'.join(['Protocol', 'x', 'outlier']) + '\n'

        outlier_row = '\t'.join(
            ['{benchmark.name}', '{gnuplot_index}', '{outlier}']) + '\n'

        for index, benchmark in enumerate(reversed(distributions)):
            distribution = {1: distributions[benchmark]}
            boxplots = statistics.tukeyBoxAndWhisker(distribution)
            stats, outliers = boxplots[1]
            gnuplot_index = index + 1

            if not outliers:
                outliers = '?'

            with open(tsv_path, 'a') as file:
                file.write(boxplot_header)
                file.write(boxplot_row.format(**locals()))
                file.write('\n\n')
                file.write(outlier_header)
                for outlier in outliers:
                    file.write(outlier_row.format(**locals()))
                file.write('\n\n')

        # Generate plot using gnuplot.

        x_range = len(distributions) + 1
        x_ticks = ', '.join([
            '"{0.title}" {1}'.format(benchmark, i + 1)
            for i, benchmark in enumerate(reversed(distributions))
        ])
        fig_height = min(1 + len(self), 5)

        gnuplot_script = '''\
set autoscale
set border 31
set tics out
set terminal pdf size {fig_height},6
set xtics ({x_ticks}) rotate by -90
set xtics nomirror
set ytics autofreq rotate by -90 center
set ytics nomirror
set noy2tics
set nox2tics

set style line 1 lt 1 lc rgb "dark-magenta" lw 2
set style line 2 lt 1 lc rgb "blue" lw 5 ps 1 pt 7
set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13
set style line 4 lt 1 lc rgb "gold" lw 2 ps 1 pt 7
set style line 5 lt 1 lc rgb "red" lw 2 ps 2 pt 13
set style line 6 lt 1 lc rgb "black" lw 2
set style line 7 lt 1 lc rgb "dark-gray" lw 2
set style line 8 lt 1 lc rgb "gray" lw 2
set style line 9 lt 2 lc rgb "dark-gray" lw 5
set style fill solid 0.5

set boxwidth 0.75
set key below right
set xrange [0:{x_range}]
set encoding iso_8859_1
set notitle
unset xlabel
set yrange [0:]
set output "{pdf_path}"
{custom_gnuplot_commands}
plot {plot_arguments}
'''

        plot_template = ', \\\n     '.join([
            '"{tsv_path}" index {box_plot_index} using 2:4:3:7:6 with candlesticks whiskerbars lt 1 lc rgb "{color}" lw 5 notitle',
            '"{tsv_path}" index {box_plot_index} using 2:5:5:5:5 with candlesticks lt 1 lc rgb "black" lw 5 notitle',
            '"{tsv_path}" index {outliers_index} using 2:3 with points lt 1 lc rgb "{color}" lw 5 ps 0.5 pt 7 notitle',
        ])

        if not distributions:
            raise Exception(
                'An error occurred retrieving data from the database.')
        plot_arguments = ', \\\n     '.join([
            plot_template.format(tsv_path=tsv_path,
                                 box_plot_index=2 * i,
                                 outliers_index=2 * i + 1,
                                 color=benchmark.color)
            for i, benchmark in enumerate(reversed(distributions))
        ])

        if custom_plot_arguments:
            plot_arguments += ', ' + custom_plot_arguments

        with open(gnu_path, 'w') as file:
            file.write(gnuplot_script.format(**locals()))

        utilities.run_gnuplot(gnu_path, verbose=self.verbose)

        return pdf_path
Beispiel #3
0
    def make_summary_box_plots(self, benchmark):
        tsv_path = os.path.join(benchmark.latex_dir, 'best_model_dists.tsv')
        gnu_path = os.path.join(benchmark.latex_dir, 'best_model_dists.gnu')
        pdf_path_rmsd = os.path.join(benchmark.latex_dir,
                                     'best_model_dists_rmsd.pdf')
        pdf_path_score = os.path.join(benchmark.latex_dir,
                                      'best_model_dists_score.pdf')
        pdf_path_subA = os.path.join(
            benchmark.latex_dir, 'best_model_dists_percent_subangstrom.pdf')

        # Calculate box plot parameters.

        best_top_x_models = benchmark.best_top_x_models

        distributions = {
            1: [x.rmsd for x in best_top_x_models],
            2: [x.score for x in best_top_x_models],
            3: benchmark.percents_subangstrom,
        }

        box_plots = statistics.tukeyBoxAndWhisker(distributions)

        # Write box plot data to a tab-separated value (TSV) file that can
        # easily be parsed by gnuplot.

        with open(tsv_path, 'w') as file:
            for x in box_plots:
                box_params, outliers = box_plots[x]

                file.write('#x\t' + 'lower\t' + 'first_quartile\t' +
                           'median\t' + 'third_quartile\t' + 'upper\n')
                for item in box_params:
                    file.write('{0}\t'.format(item))
                file.write('\n\n\n')
                file.write('#x\toutlier\n')
                for outlier in outliers:
                    file.write('{0}\t{1}\n'.format(x, outlier))
                if not outliers: file.write('{0}\t?\n'.format(x))
                file.write('\n\n')

        # Write the gnuplot script and generate the EPS plots.

        gnuplot_script = '''\
set autoscale
set border 31
set tics out
set terminal pdf
set size ratio 1
set noxtics
set xrange [0.5:1.5]
set nox2tics
set ytics 1
set ytics nomirror
set noy2tics

set style line 1 lt 1 lc rgb "dark-magenta" lw 2
set style line 2 lt 1 lc rgb "{benchmark.color}" lw 5 pt 7
set style line 3 lt 1 lc rgb "{benchmark.color}" lw 5
set style line 4 lt 1 lc rgb "gold" lw 2
set style line 5 lt 1 lc rgb "red" lw 5 pt 7
set style line 6 lt 1 lc rgb "black" lw 5
set style line 7 lt 1 lc rgb "dark-gray" lw 2
set style line 8 lt 1 lc rgb "gray" lw 2
set style line 9 lt 0 lc rgb "black" lw 5

set boxwidth 0.25
set key tmargin
set title "Best models performance distribution"
set noxlabel
set style fill solid 0.5
set encoding iso_8859_1
set ylabel "r.m.s. deviation to crystal loop [\305]"
set output "{pdf_path_rmsd}"
f(x)=1
plot "{tsv_path}" index 0 using 1:3:2:6:5 with candlesticks whiskerbars ls 2 notitle axes x1y1,\\
     "{tsv_path}" index 0 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\
     "{tsv_path}" index 1 using 1:2 with points ls 2 ps 0.5 pt 7 notitle,\\
     f(x) with lines ls 9 notitle

set ylabel "Rosetta all-atom score"
set xrange [1.5:2.5]
set ytics autofreq
set output "{pdf_path_score}"
plot "{tsv_path}" index 2 using 1:3:2:6:5 with candlesticks whiskerbars ls 5 notitle axes x1y1,\\
     "{tsv_path}" index 2 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\
     "{tsv_path}" index 3 using 1:2 with points ls 5 ps 0.5 pt 7 notitle

set title "Protocol performance distribution"
set ylabel "Fraction sub-\305 models [%]"
set xrange [2.5:3.5]
set ytics 10
set output "{pdf_path_subA}"
plot "{tsv_path}" index 4 using 1:3:2:6:5 with candlesticks whiskerbars ls 3 notitle axes x1y1,\\
     "{tsv_path}" index 4 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\
     "{tsv_path}" index 5 using 1:2 with points ls 3 ps 0.5 pt 7 notitle
'''
        with open(gnu_path, 'w') as file:
            file.write(gnuplot_script.format(**locals()))

        # If there are no outliers, gnuplot will produce a warning.  This is a
        # pretty common occurrence, and I think it's really bad to produce
        # warning message for common occurrences.  So instead I opt to ignore
        # stderr.  This is a little dangerous.  It would probably be better to
        # suppress only the exact warning I know about.  But if we were really
        # interested in doing things the right way, we would use matplotlib
        # instead of gnuplot.

        with open(os.devnull) as devnull:
            utilities.run_gnuplot(gnu_path,
                                  stderr=devnull,
                                  verbose=self.verbose)

        return pdf_path_rmsd, pdf_path_score, pdf_path_subA
Beispiel #4
0
    def make_rmsd_histogram(self, loop):
        """
        Create a smoothed RMSD histogram for the given loop.  100 bins are used 
        when making the plot, and the smoothing is done by gnuplot.
        """

        # This method would be much more concise if it used matplotlib.

        if not loop.has_data:
            return

        tsv_path = os.path.join(loop.latex_dir, 'rmsd_histogram.tsv')
        gnu_path = os.path.join(loop.latex_dir, 'rmsd_histogram.gnu')
        pdf_path = os.path.join(loop.latex_dir, 'rmsd_histogram.pdf')

        # Write histogram data to a tab-separated value (TSV) file that can
        # easily be parsed by gnuplot.

        num_bins = 100
        histogram = statistics.histogram(loop.rmsds, num_bins)

        with open(tsv_path, 'w') as file:
            file.write('#All models\n')
            file.write('#RMSD\tFrequency\n')

            for rmsd, count in histogram:
                count = num_bins * count / len(loop)
                file.write('{0}\t{1}\n'.format(rmsd, count))

        # Write the gnuplot script and generate the EPS plot.

        gnuplot_script = '''\
set autoscale
set border 31
set tics out
set terminal pdf enhanced color
set xtics autofreq
set xtics nomirror
set ytics autofreq
set ytics nomirror
set noy2tics
set nox2tics

set style line 1 lt 1 lc rgb "dark-magenta" lw 2
set style line 2 lt 1 lc rgb "{loop.benchmark.color}" lw 8 ps 1 pt 7
set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13
set style line 4 lt 1 lc rgb "gold" lw 2 ps 1 pt 7
set style line 5 lt 1 lc rgb "red" lw 2 ps 2 pt 13
set style line 6 lt 1 lc rgb "black" lw 2
set style line 7 lt 1 lc rgb "dark-gray" lw 2
set style line 8 lt 1 lc rgb "gray" lw 2
set style line 9 lt 2 lc rgb "dark-gray" lw 5

set boxwidth 0.75

set key below right
set xrange [0:]
set encoding iso_8859_1
set title "{loop.pdb_id}: {loop.percent_subangstrom:0.2f}% sub-\305 models"
set xlabel "r.m.s. deviation to crystal loop [\305]"
set yrange [0:]
set arrow from 1, graph 0 to 1, graph 1 ls 9 nohead
set ylabel "Fraction of models [%]"
set output "{pdf_path}"
plot "{tsv_path}" index 0 using ($1):($2) smooth bezier with lines ls 2 title "all models" axes x1y1
'''
        with open(gnu_path, 'w') as file:
            file.write(gnuplot_script.format(**locals()))

        utilities.run_gnuplot(gnu_path, verbose=self.verbose)

        return pdf_path
Beispiel #5
0
    def make_score_vs_rmsd_plot(self, loop):
        """
        Create a score vs RMSD plot for the given loop.  In fact two plots are 
        made: one which includes every model and one which includes only the 
        top 75% best scoring models.  Normally the second plot is of more 
        interest, because it focuses better on the interesting lower-left 
        region of the plot.  The full plots often have outliers that really 
        scale the score axis.
        """

        # This method would be much more concise if it used matplotlib.

        if not loop.has_data:
            return

        tsv_path = os.path.join(loop.latex_dir, 'score_vs_rmsd.tsv')
        gnu_path = os.path.join(loop.latex_dir, 'score_vs_rmsd.gnu')
        pdf_path_100 = os.path.join(loop.latex_dir, 'score_vs_rmsd_all.pdf')
        pdf_path_75 = os.path.join(loop.latex_dir, 'score_vs_rmsd_third_quartile.pdf')

        tsv_row = '{0.id}\t{0.rmsd}\t{0.score}\n'

        sorted_models = loop.models_sorted_by_score
        scores = loop.scores
        min_score, max_score = min(scores), max(scores)
        third_quartile = numpy.percentile(scores, 75)
        native_score = 0    # This isn't stored in the database yet.

        # Write score vs RMSD data to a tab-separated value (TSV) file that can 
        # easily be parsed by gnuplot.

        with open(tsv_path, 'w') as file:
            file.write('#Model\tLoop_rmsd\tTotal_score\n')
            file.write('input_structure\t0.0\t{0}\n'.format(native_score))

            # All models
            file.write('\n\n')
            for model in sorted_models:
                file.write(tsv_row.format(model))

            # Top X scoring models
            file.write('\n\n')
            for model in sorted_models[:top_x]:
                file.write(tsv_row.format(model))

            # Top scoring model
            file.write('\n\n')
            file.write(tsv_row.format(sorted_models[0]))

        # Write the gnuplot script and generate the EPS plots.

        gnuplot_script = '''\
set autoscale
set border 31
set tics out
set terminal pdf
set xtics autofreq
set xtics nomirror
set ytics autofreq
set ytics nomirror
set noy2tics
set nox2tics

set style line 1 lt 1 lc rgb "dark-magenta" lw 2
set style line 2 lt 1 lc rgb "{loop.benchmark.color}" lw 2 ps 0.5 pt 7
set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13
set style line 4 lt 1 lc rgb "dark-gray" lw 2 ps 0.5 pt 7
set style line 5 lt 1 lc rgb "black" lw 2 ps 0.8 pt 13
set style line 6 lt 1 lc rgb "black" lw 2
set style line 7 lt 1 lc rgb "dark-gray" lw 2
set style line 8 lt 1 lc rgb "gray" lw 2
set style line 9 lt 2 lc rgb "dark-gray" lw 5

set boxwidth 0.75

set key below right
set xrange [0:]
set encoding iso_8859_1
set title "{loop.pdb_id}: {loop.percent_subangstrom:0.2f}% sub-\305 models"
set xlabel "r.m.s. deviation to crystal loop [\305]"
set arrow from 1, graph 0 to 1, graph 1 ls 9 nohead
set ylabel "Rosetta all-atom score"
set output "{pdf_path_100}"
plot "{tsv_path}" index 1 using ($2):($3) with points ls 2 title "all models" axes x1y1, \\
     "{tsv_path}" index 2 using ($2):($3) with points ls 4 title "5 lowest energy models" axes x1y1, \\
     "{tsv_path}" index 3 using ($2):($3) with points ls 5 title "top 5 best model" axes x1y1
set yrange [:{third_quartile}]
set output "{pdf_path_75}"
set xrange [0:]
plot "{tsv_path}" index 1 using ($2):($3) with points ls 2 title "75% lowest-scoring models" axes x1y1, \\
     "{tsv_path}" index 2 using ($2):($3) with points ls 4 title "5 lowest energy models" axes x1y1, \\
     "{tsv_path}" index 3 using ($2):($3) with points ls 5 title "top 5 best model" axes x1y1
'''
        with open(gnu_path, 'w') as file:
            file.write(gnuplot_script.format(**locals()))

        utilities.run_gnuplot(gnu_path, verbose=self.verbose)

        return pdf_path_100, pdf_path_75
Beispiel #6
0
    def make_summary_box_plots(self, benchmark):
        tsv_path = os.path.join(benchmark.latex_dir, 'best_model_dists.tsv')
        gnu_path = os.path.join(benchmark.latex_dir, 'best_model_dists.gnu')
        pdf_path_rmsd = os.path.join(benchmark.latex_dir, 'best_model_dists_rmsd.pdf')
        pdf_path_score = os.path.join(benchmark.latex_dir, 'best_model_dists_score.pdf')
        pdf_path_subA = os.path.join(benchmark.latex_dir, 'best_model_dists_percent_subangstrom.pdf')

        # Calculate box plot parameters.

        best_top_x_models = benchmark.best_top_x_models

        distributions = {
                1: [x.rmsd for x in best_top_x_models],
                2: [x.score for x in best_top_x_models],
                3: benchmark.percents_subangstrom,
        }

        box_plots = statistics.tukeyBoxAndWhisker(distributions)

        # Write box plot data to a tab-separated value (TSV) file that can 
        # easily be parsed by gnuplot.

        with open(tsv_path, 'w') as file:
            for x in box_plots:
                box_params, outliers = box_plots[x]

                file.write('#x\t'+'lower\t'+'first_quartile\t'+'median\t'+'third_quartile\t'+'upper\n')
                for item in box_params: file.write('{0}\t'.format(item))
                file.write('\n\n\n')
                file.write('#x\toutlier\n')
                for outlier in outliers: file.write('{0}\t{1}\n'.format(x, outlier))
                if not outliers: file.write('{0}\t?\n'.format(x))
                file.write('\n\n')

        # Write the gnuplot script and generate the EPS plots.

        gnuplot_script = '''\
set autoscale
set border 31
set tics out
set terminal pdf
set size ratio 1
set noxtics
set xrange [0.5:1.5]
set nox2tics
set ytics 1
set ytics nomirror
set noy2tics

set style line 1 lt 1 lc rgb "dark-magenta" lw 2
set style line 2 lt 1 lc rgb "{benchmark.color}" lw 5 pt 7
set style line 3 lt 1 lc rgb "{benchmark.color}" lw 5
set style line 4 lt 1 lc rgb "gold" lw 2
set style line 5 lt 1 lc rgb "red" lw 5 pt 7
set style line 6 lt 1 lc rgb "black" lw 5
set style line 7 lt 1 lc rgb "dark-gray" lw 2
set style line 8 lt 1 lc rgb "gray" lw 2
set style line 9 lt 0 lc rgb "black" lw 5

set boxwidth 0.25
set key tmargin
set title "Best models performance distribution"
set noxlabel
set style fill solid 0.5
set encoding iso_8859_1
set ylabel "r.m.s. deviation to crystal loop [\305]"
set output "{pdf_path_rmsd}"
f(x)=1
plot "{tsv_path}" index 0 using 1:3:2:6:5 with candlesticks whiskerbars ls 2 notitle axes x1y1,\\
     "{tsv_path}" index 0 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\
     "{tsv_path}" index 1 using 1:2 with points ls 2 ps 0.5 pt 7 notitle,\\
     f(x) with lines ls 9 notitle

set ylabel "Rosetta all-atom score"
set xrange [1.5:2.5]
set ytics autofreq
set output "{pdf_path_score}"
plot "{tsv_path}" index 2 using 1:3:2:6:5 with candlesticks whiskerbars ls 5 notitle axes x1y1,\\
     "{tsv_path}" index 2 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\
     "{tsv_path}" index 3 using 1:2 with points ls 5 ps 0.5 pt 7 notitle

set title "Protocol performance distribution"
set ylabel "Fraction sub-\305 models [%]"
set xrange [2.5:3.5]
set ytics 10
set output "{pdf_path_subA}"
plot "{tsv_path}" index 4 using 1:3:2:6:5 with candlesticks whiskerbars ls 3 notitle axes x1y1,\\
     "{tsv_path}" index 4 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\
     "{tsv_path}" index 5 using 1:2 with points ls 3 ps 0.5 pt 7 notitle
'''
        with open(gnu_path, 'w') as file:
            file.write(gnuplot_script.format(**locals()))

        # If there are no outliers, gnuplot will produce a warning.  This is a 
        # pretty common occurrence, and I think it's really bad to produce 
        # warning message for common occurrences.  So instead I opt to ignore 
        # stderr.  This is a little dangerous.  It would probably be better to 
        # suppress only the exact warning I know about.  But if we were really 
        # interested in doing things the right way, we would use matplotlib 
        # instead of gnuplot.

        with open(os.devnull) as devnull:
            utilities.run_gnuplot(gnu_path, stderr=devnull, verbose=self.verbose)

        return pdf_path_rmsd, pdf_path_score, pdf_path_subA
Beispiel #7
0
    def make_comparison_plot(self, distributions, path_template,
            custom_gnuplot_commands, custom_plot_arguments=''):
        """
        Create a plot comparing the same distribution from several different 
        benchmarks.  Examples include the percent subangstrom distribution and 
        the RMSDs of the lowest scoring predictions.  The resulting plot will 
        have a nicely colored box plot for each benchmark.

        Inputs
        ------
        distributions:
            A dictionary mapping benchmark objects to some sort of 
            distribution.  The box plot will be created using the distribution 
            and labeled using the benchmark object.

        path_template:
            The base file name used to create the TSV, GNU, and EPS files 
            generated by this method.

        custom_gnuplot_commands:
            A string containing custom commands to pass to gnuplot immediately 
            before the 'plot' command.  This is meant to be used for doing 
            things like labeling the axes or adding useful vertical lines.

        Outputs
        -------
        This method creates three files: a TSV file containing the raw data 
        being plotted, a GNU file containing the gnuplot commands used to 
        generate the plot, and an EPS file containing the plot itself.  The 
        path to the generated EPS file is also returned.
        """

        tsv_path = os.path.join(self.latex_dir, path_template + '.tsv')
        gnu_path = os.path.join(self.latex_dir, path_template + '.gnu')
        pdf_path = os.path.join(self.latex_dir, path_template + '.pdf')

        # Write data to TSV file that can be easily parsed by gnuplot.

        boxplot_header = '#' + '\t'.join([
                'Protocol',
                'x',
                'lower',
                'first_quartile',
                'median',
                'third_quartile',
                'upper']) + '\n'

        boxplot_row = '\t'.join([
                '{benchmark.name}',
                '{gnuplot_index}',
                '{stats.lower_whisker}',
                '{stats.first_quartile}',
                '{stats.median}',
                '{stats.third_quartile}',
                '{stats.upper_whisker}']) + '\n'

        outlier_header = '#' + '\t'.join([
                'Protocol',
                'x',
                'outlier']) + '\n'

        outlier_row = '\t'.join([
                '{benchmark.name}',
                '{gnuplot_index}',
                '{outlier}']) + '\n'

        for index, benchmark in enumerate(reversed(distributions)):
            distribution = {1: distributions[benchmark]}
            boxplots = statistics.tukeyBoxAndWhisker(distribution)
            stats, outliers = boxplots[1]
            gnuplot_index = index + 1

            if not outliers:
                outliers = '?'

            with open(tsv_path, 'a') as file:
                file.write(boxplot_header)
                file.write(boxplot_row.format(**locals()))
                file.write('\n\n')
                file.write(outlier_header)
                for outlier in outliers:
                    file.write(outlier_row.format(**locals()))
                file.write('\n\n')

        # Generate plot using gnuplot.

        x_range = len(distributions) + 1
        x_ticks = ', '.join([
            '"{0.title}" {1}'.format(benchmark, i+1)
            for i, benchmark in enumerate(reversed(distributions))
        ])
        fig_height = min(1 + len(self), 5)

        gnuplot_script = '''\
set autoscale
set border 31
set tics out
set terminal pdf size {fig_height},6
set xtics ({x_ticks}) rotate by -90
set xtics nomirror
set ytics autofreq rotate by -90 center
set ytics nomirror
set noy2tics
set nox2tics

set style line 1 lt 1 lc rgb "dark-magenta" lw 2
set style line 2 lt 1 lc rgb "blue" lw 5 ps 1 pt 7
set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13
set style line 4 lt 1 lc rgb "gold" lw 2 ps 1 pt 7
set style line 5 lt 1 lc rgb "red" lw 2 ps 2 pt 13
set style line 6 lt 1 lc rgb "black" lw 2
set style line 7 lt 1 lc rgb "dark-gray" lw 2
set style line 8 lt 1 lc rgb "gray" lw 2
set style line 9 lt 2 lc rgb "dark-gray" lw 5
set style fill solid 0.5

set boxwidth 0.75
set key below right
set xrange [0:{x_range}]
set encoding iso_8859_1
set notitle
unset xlabel
set yrange [0:]
set output "{pdf_path}"
{custom_gnuplot_commands}
plot {plot_arguments}
'''

        plot_template = ', \\\n     '.join([
                '"{tsv_path}" index {box_plot_index} using 2:4:3:7:6 with candlesticks whiskerbars lt 1 lc rgb "{color}" lw 5 notitle',
                '"{tsv_path}" index {box_plot_index} using 2:5:5:5:5 with candlesticks lt 1 lc rgb "black" lw 5 notitle',
                '"{tsv_path}" index {outliers_index} using 2:3 with points lt 1 lc rgb "{color}" lw 5 ps 0.5 pt 7 notitle',
        ])

        if not distributions:
            raise Exception('An error occurred retrieving data from the database.')
        plot_arguments = ', \\\n     '.join([

                plot_template.format(
                    tsv_path=tsv_path,
                    box_plot_index=2*i,
                    outliers_index=2*i+1,
                    color=benchmark.color)

                for i, benchmark in enumerate(reversed(distributions))
        ])

        if custom_plot_arguments:
            plot_arguments += ', ' + custom_plot_arguments

        with open(gnu_path, 'w') as file:
            file.write(gnuplot_script.format(**locals()))

        utilities.run_gnuplot(gnu_path, verbose=self.verbose)

        return pdf_path
Beispiel #8
0
    def make_rmsd_histogram(self, loop):
        """
        Create a smoothed RMSD histogram for the given loop.  100 bins are used 
        when making the plot, and the smoothing is done by gnuplot.
        """

        # This method would be much more concise if it used matplotlib.

        if not loop.has_data:
            return

        tsv_path = os.path.join(loop.latex_dir, 'rmsd_histogram.tsv')
        gnu_path = os.path.join(loop.latex_dir, 'rmsd_histogram.gnu')
        pdf_path = os.path.join(loop.latex_dir, 'rmsd_histogram.pdf')

        # Write histogram data to a tab-separated value (TSV) file that can 
        # easily be parsed by gnuplot.

        num_bins = 100
        histogram = statistics.histogram(loop.rmsds, num_bins)

        with open(tsv_path, 'w') as file:
            file.write('#All models\n')
            file.write('#RMSD\tFrequency\n')

            for rmsd, count in histogram:
                count = num_bins * count / len(loop)
                file.write('{0}\t{1}\n'.format(rmsd, count))

        # Write the gnuplot script and generate the EPS plot.

        gnuplot_script='''\
set autoscale
set border 31
set tics out
set terminal pdf enhanced color
set xtics autofreq
set xtics nomirror
set ytics autofreq
set ytics nomirror
set noy2tics
set nox2tics

set style line 1 lt 1 lc rgb "dark-magenta" lw 2
set style line 2 lt 1 lc rgb "{loop.benchmark.color}" lw 8 ps 1 pt 7
set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13
set style line 4 lt 1 lc rgb "gold" lw 2 ps 1 pt 7
set style line 5 lt 1 lc rgb "red" lw 2 ps 2 pt 13
set style line 6 lt 1 lc rgb "black" lw 2
set style line 7 lt 1 lc rgb "dark-gray" lw 2
set style line 8 lt 1 lc rgb "gray" lw 2
set style line 9 lt 2 lc rgb "dark-gray" lw 5

set boxwidth 0.75

set key below right
set xrange [0:]
set encoding iso_8859_1
set title "{loop.pdb_id}: {loop.percent_subangstrom:0.2f}% sub-\305 models"
set xlabel "r.m.s. deviation to crystal loop [\305]"
set yrange [0:]
set arrow from 1, graph 0 to 1, graph 1 ls 9 nohead
set ylabel "Fraction of models [%]"
set output "{pdf_path}"
plot "{tsv_path}" index 0 using ($1):($2) smooth bezier with lines ls 2 title "all models" axes x1y1
'''
        with open(gnu_path, 'w') as file:
            file.write(gnuplot_script.format(**locals()))

        utilities.run_gnuplot(gnu_path, verbose=self.verbose)

        return pdf_path