def make_score_vs_rmsd_plot(self, loop): """ Create a score vs RMSD plot for the given loop. In fact two plots are made: one which includes every model and one which includes only the top 75% best scoring models. Normally the second plot is of more interest, because it focuses better on the interesting lower-left region of the plot. The full plots often have outliers that really scale the score axis. """ # This method would be much more concise if it used matplotlib. if not loop.has_data: return tsv_path = os.path.join(loop.latex_dir, 'score_vs_rmsd.tsv') gnu_path = os.path.join(loop.latex_dir, 'score_vs_rmsd.gnu') pdf_path_100 = os.path.join(loop.latex_dir, 'score_vs_rmsd_all.pdf') pdf_path_75 = os.path.join(loop.latex_dir, 'score_vs_rmsd_third_quartile.pdf') tsv_row = '{0.id}\t{0.rmsd}\t{0.score}\n' sorted_models = loop.models_sorted_by_score scores = loop.scores min_score, max_score = min(scores), max(scores) third_quartile = numpy.percentile(scores, 75) native_score = 0 # This isn't stored in the database yet. # Write score vs RMSD data to a tab-separated value (TSV) file that can # easily be parsed by gnuplot. with open(tsv_path, 'w') as file: file.write('#Model\tLoop_rmsd\tTotal_score\n') file.write('input_structure\t0.0\t{0}\n'.format(native_score)) # All models file.write('\n\n') for model in sorted_models: file.write(tsv_row.format(model)) # Top X scoring models file.write('\n\n') for model in sorted_models[:top_x]: file.write(tsv_row.format(model)) # Top scoring model file.write('\n\n') file.write(tsv_row.format(sorted_models[0])) # Write the gnuplot script and generate the EPS plots. gnuplot_script = '''\ set autoscale set border 31 set tics out set terminal pdf set xtics autofreq set xtics nomirror set ytics autofreq set ytics nomirror set noy2tics set nox2tics set style line 1 lt 1 lc rgb "dark-magenta" lw 2 set style line 2 lt 1 lc rgb "{loop.benchmark.color}" lw 2 ps 0.5 pt 7 set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13 set style line 4 lt 1 lc rgb "dark-gray" lw 2 ps 0.5 pt 7 set style line 5 lt 1 lc rgb "black" lw 2 ps 0.8 pt 13 set style line 6 lt 1 lc rgb "black" lw 2 set style line 7 lt 1 lc rgb "dark-gray" lw 2 set style line 8 lt 1 lc rgb "gray" lw 2 set style line 9 lt 2 lc rgb "dark-gray" lw 5 set boxwidth 0.75 set key below right set xrange [0:] set encoding iso_8859_1 set title "{loop.pdb_id}: {loop.percent_subangstrom:0.2f}% sub-\305 models" set xlabel "r.m.s. deviation to crystal loop [\305]" set arrow from 1, graph 0 to 1, graph 1 ls 9 nohead set ylabel "Rosetta all-atom score" set output "{pdf_path_100}" plot "{tsv_path}" index 1 using ($2):($3) with points ls 2 title "all models" axes x1y1, \\ "{tsv_path}" index 2 using ($2):($3) with points ls 4 title "5 lowest energy models" axes x1y1, \\ "{tsv_path}" index 3 using ($2):($3) with points ls 5 title "top 5 best model" axes x1y1 set yrange [:{third_quartile}] set output "{pdf_path_75}" set xrange [0:] plot "{tsv_path}" index 1 using ($2):($3) with points ls 2 title "75% lowest-scoring models" axes x1y1, \\ "{tsv_path}" index 2 using ($2):($3) with points ls 4 title "5 lowest energy models" axes x1y1, \\ "{tsv_path}" index 3 using ($2):($3) with points ls 5 title "top 5 best model" axes x1y1 ''' with open(gnu_path, 'w') as file: file.write(gnuplot_script.format(**locals())) utilities.run_gnuplot(gnu_path, verbose=self.verbose) return pdf_path_100, pdf_path_75
def make_comparison_plot(self, distributions, path_template, custom_gnuplot_commands, custom_plot_arguments=''): """ Create a plot comparing the same distribution from several different benchmarks. Examples include the percent subangstrom distribution and the RMSDs of the lowest scoring predictions. The resulting plot will have a nicely colored box plot for each benchmark. Inputs ------ distributions: A dictionary mapping benchmark objects to some sort of distribution. The box plot will be created using the distribution and labeled using the benchmark object. path_template: The base file name used to create the TSV, GNU, and EPS files generated by this method. custom_gnuplot_commands: A string containing custom commands to pass to gnuplot immediately before the 'plot' command. This is meant to be used for doing things like labeling the axes or adding useful vertical lines. Outputs ------- This method creates three files: a TSV file containing the raw data being plotted, a GNU file containing the gnuplot commands used to generate the plot, and an EPS file containing the plot itself. The path to the generated EPS file is also returned. """ tsv_path = os.path.join(self.latex_dir, path_template + '.tsv') gnu_path = os.path.join(self.latex_dir, path_template + '.gnu') pdf_path = os.path.join(self.latex_dir, path_template + '.pdf') # Write data to TSV file that can be easily parsed by gnuplot. boxplot_header = '#' + '\t'.join([ 'Protocol', 'x', 'lower', 'first_quartile', 'median', 'third_quartile', 'upper' ]) + '\n' boxplot_row = '\t'.join([ '{benchmark.name}', '{gnuplot_index}', '{stats.lower_whisker}', '{stats.first_quartile}', '{stats.median}', '{stats.third_quartile}', '{stats.upper_whisker}' ]) + '\n' outlier_header = '#' + '\t'.join(['Protocol', 'x', 'outlier']) + '\n' outlier_row = '\t'.join( ['{benchmark.name}', '{gnuplot_index}', '{outlier}']) + '\n' for index, benchmark in enumerate(reversed(distributions)): distribution = {1: distributions[benchmark]} boxplots = statistics.tukeyBoxAndWhisker(distribution) stats, outliers = boxplots[1] gnuplot_index = index + 1 if not outliers: outliers = '?' with open(tsv_path, 'a') as file: file.write(boxplot_header) file.write(boxplot_row.format(**locals())) file.write('\n\n') file.write(outlier_header) for outlier in outliers: file.write(outlier_row.format(**locals())) file.write('\n\n') # Generate plot using gnuplot. x_range = len(distributions) + 1 x_ticks = ', '.join([ '"{0.title}" {1}'.format(benchmark, i + 1) for i, benchmark in enumerate(reversed(distributions)) ]) fig_height = min(1 + len(self), 5) gnuplot_script = '''\ set autoscale set border 31 set tics out set terminal pdf size {fig_height},6 set xtics ({x_ticks}) rotate by -90 set xtics nomirror set ytics autofreq rotate by -90 center set ytics nomirror set noy2tics set nox2tics set style line 1 lt 1 lc rgb "dark-magenta" lw 2 set style line 2 lt 1 lc rgb "blue" lw 5 ps 1 pt 7 set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13 set style line 4 lt 1 lc rgb "gold" lw 2 ps 1 pt 7 set style line 5 lt 1 lc rgb "red" lw 2 ps 2 pt 13 set style line 6 lt 1 lc rgb "black" lw 2 set style line 7 lt 1 lc rgb "dark-gray" lw 2 set style line 8 lt 1 lc rgb "gray" lw 2 set style line 9 lt 2 lc rgb "dark-gray" lw 5 set style fill solid 0.5 set boxwidth 0.75 set key below right set xrange [0:{x_range}] set encoding iso_8859_1 set notitle unset xlabel set yrange [0:] set output "{pdf_path}" {custom_gnuplot_commands} plot {plot_arguments} ''' plot_template = ', \\\n '.join([ '"{tsv_path}" index {box_plot_index} using 2:4:3:7:6 with candlesticks whiskerbars lt 1 lc rgb "{color}" lw 5 notitle', '"{tsv_path}" index {box_plot_index} using 2:5:5:5:5 with candlesticks lt 1 lc rgb "black" lw 5 notitle', '"{tsv_path}" index {outliers_index} using 2:3 with points lt 1 lc rgb "{color}" lw 5 ps 0.5 pt 7 notitle', ]) if not distributions: raise Exception( 'An error occurred retrieving data from the database.') plot_arguments = ', \\\n '.join([ plot_template.format(tsv_path=tsv_path, box_plot_index=2 * i, outliers_index=2 * i + 1, color=benchmark.color) for i, benchmark in enumerate(reversed(distributions)) ]) if custom_plot_arguments: plot_arguments += ', ' + custom_plot_arguments with open(gnu_path, 'w') as file: file.write(gnuplot_script.format(**locals())) utilities.run_gnuplot(gnu_path, verbose=self.verbose) return pdf_path
def make_summary_box_plots(self, benchmark): tsv_path = os.path.join(benchmark.latex_dir, 'best_model_dists.tsv') gnu_path = os.path.join(benchmark.latex_dir, 'best_model_dists.gnu') pdf_path_rmsd = os.path.join(benchmark.latex_dir, 'best_model_dists_rmsd.pdf') pdf_path_score = os.path.join(benchmark.latex_dir, 'best_model_dists_score.pdf') pdf_path_subA = os.path.join( benchmark.latex_dir, 'best_model_dists_percent_subangstrom.pdf') # Calculate box plot parameters. best_top_x_models = benchmark.best_top_x_models distributions = { 1: [x.rmsd for x in best_top_x_models], 2: [x.score for x in best_top_x_models], 3: benchmark.percents_subangstrom, } box_plots = statistics.tukeyBoxAndWhisker(distributions) # Write box plot data to a tab-separated value (TSV) file that can # easily be parsed by gnuplot. with open(tsv_path, 'w') as file: for x in box_plots: box_params, outliers = box_plots[x] file.write('#x\t' + 'lower\t' + 'first_quartile\t' + 'median\t' + 'third_quartile\t' + 'upper\n') for item in box_params: file.write('{0}\t'.format(item)) file.write('\n\n\n') file.write('#x\toutlier\n') for outlier in outliers: file.write('{0}\t{1}\n'.format(x, outlier)) if not outliers: file.write('{0}\t?\n'.format(x)) file.write('\n\n') # Write the gnuplot script and generate the EPS plots. gnuplot_script = '''\ set autoscale set border 31 set tics out set terminal pdf set size ratio 1 set noxtics set xrange [0.5:1.5] set nox2tics set ytics 1 set ytics nomirror set noy2tics set style line 1 lt 1 lc rgb "dark-magenta" lw 2 set style line 2 lt 1 lc rgb "{benchmark.color}" lw 5 pt 7 set style line 3 lt 1 lc rgb "{benchmark.color}" lw 5 set style line 4 lt 1 lc rgb "gold" lw 2 set style line 5 lt 1 lc rgb "red" lw 5 pt 7 set style line 6 lt 1 lc rgb "black" lw 5 set style line 7 lt 1 lc rgb "dark-gray" lw 2 set style line 8 lt 1 lc rgb "gray" lw 2 set style line 9 lt 0 lc rgb "black" lw 5 set boxwidth 0.25 set key tmargin set title "Best models performance distribution" set noxlabel set style fill solid 0.5 set encoding iso_8859_1 set ylabel "r.m.s. deviation to crystal loop [\305]" set output "{pdf_path_rmsd}" f(x)=1 plot "{tsv_path}" index 0 using 1:3:2:6:5 with candlesticks whiskerbars ls 2 notitle axes x1y1,\\ "{tsv_path}" index 0 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\ "{tsv_path}" index 1 using 1:2 with points ls 2 ps 0.5 pt 7 notitle,\\ f(x) with lines ls 9 notitle set ylabel "Rosetta all-atom score" set xrange [1.5:2.5] set ytics autofreq set output "{pdf_path_score}" plot "{tsv_path}" index 2 using 1:3:2:6:5 with candlesticks whiskerbars ls 5 notitle axes x1y1,\\ "{tsv_path}" index 2 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\ "{tsv_path}" index 3 using 1:2 with points ls 5 ps 0.5 pt 7 notitle set title "Protocol performance distribution" set ylabel "Fraction sub-\305 models [%]" set xrange [2.5:3.5] set ytics 10 set output "{pdf_path_subA}" plot "{tsv_path}" index 4 using 1:3:2:6:5 with candlesticks whiskerbars ls 3 notitle axes x1y1,\\ "{tsv_path}" index 4 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\ "{tsv_path}" index 5 using 1:2 with points ls 3 ps 0.5 pt 7 notitle ''' with open(gnu_path, 'w') as file: file.write(gnuplot_script.format(**locals())) # If there are no outliers, gnuplot will produce a warning. This is a # pretty common occurrence, and I think it's really bad to produce # warning message for common occurrences. So instead I opt to ignore # stderr. This is a little dangerous. It would probably be better to # suppress only the exact warning I know about. But if we were really # interested in doing things the right way, we would use matplotlib # instead of gnuplot. with open(os.devnull) as devnull: utilities.run_gnuplot(gnu_path, stderr=devnull, verbose=self.verbose) return pdf_path_rmsd, pdf_path_score, pdf_path_subA
def make_rmsd_histogram(self, loop): """ Create a smoothed RMSD histogram for the given loop. 100 bins are used when making the plot, and the smoothing is done by gnuplot. """ # This method would be much more concise if it used matplotlib. if not loop.has_data: return tsv_path = os.path.join(loop.latex_dir, 'rmsd_histogram.tsv') gnu_path = os.path.join(loop.latex_dir, 'rmsd_histogram.gnu') pdf_path = os.path.join(loop.latex_dir, 'rmsd_histogram.pdf') # Write histogram data to a tab-separated value (TSV) file that can # easily be parsed by gnuplot. num_bins = 100 histogram = statistics.histogram(loop.rmsds, num_bins) with open(tsv_path, 'w') as file: file.write('#All models\n') file.write('#RMSD\tFrequency\n') for rmsd, count in histogram: count = num_bins * count / len(loop) file.write('{0}\t{1}\n'.format(rmsd, count)) # Write the gnuplot script and generate the EPS plot. gnuplot_script = '''\ set autoscale set border 31 set tics out set terminal pdf enhanced color set xtics autofreq set xtics nomirror set ytics autofreq set ytics nomirror set noy2tics set nox2tics set style line 1 lt 1 lc rgb "dark-magenta" lw 2 set style line 2 lt 1 lc rgb "{loop.benchmark.color}" lw 8 ps 1 pt 7 set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13 set style line 4 lt 1 lc rgb "gold" lw 2 ps 1 pt 7 set style line 5 lt 1 lc rgb "red" lw 2 ps 2 pt 13 set style line 6 lt 1 lc rgb "black" lw 2 set style line 7 lt 1 lc rgb "dark-gray" lw 2 set style line 8 lt 1 lc rgb "gray" lw 2 set style line 9 lt 2 lc rgb "dark-gray" lw 5 set boxwidth 0.75 set key below right set xrange [0:] set encoding iso_8859_1 set title "{loop.pdb_id}: {loop.percent_subangstrom:0.2f}% sub-\305 models" set xlabel "r.m.s. deviation to crystal loop [\305]" set yrange [0:] set arrow from 1, graph 0 to 1, graph 1 ls 9 nohead set ylabel "Fraction of models [%]" set output "{pdf_path}" plot "{tsv_path}" index 0 using ($1):($2) smooth bezier with lines ls 2 title "all models" axes x1y1 ''' with open(gnu_path, 'w') as file: file.write(gnuplot_script.format(**locals())) utilities.run_gnuplot(gnu_path, verbose=self.verbose) return pdf_path
def make_summary_box_plots(self, benchmark): tsv_path = os.path.join(benchmark.latex_dir, 'best_model_dists.tsv') gnu_path = os.path.join(benchmark.latex_dir, 'best_model_dists.gnu') pdf_path_rmsd = os.path.join(benchmark.latex_dir, 'best_model_dists_rmsd.pdf') pdf_path_score = os.path.join(benchmark.latex_dir, 'best_model_dists_score.pdf') pdf_path_subA = os.path.join(benchmark.latex_dir, 'best_model_dists_percent_subangstrom.pdf') # Calculate box plot parameters. best_top_x_models = benchmark.best_top_x_models distributions = { 1: [x.rmsd for x in best_top_x_models], 2: [x.score for x in best_top_x_models], 3: benchmark.percents_subangstrom, } box_plots = statistics.tukeyBoxAndWhisker(distributions) # Write box plot data to a tab-separated value (TSV) file that can # easily be parsed by gnuplot. with open(tsv_path, 'w') as file: for x in box_plots: box_params, outliers = box_plots[x] file.write('#x\t'+'lower\t'+'first_quartile\t'+'median\t'+'third_quartile\t'+'upper\n') for item in box_params: file.write('{0}\t'.format(item)) file.write('\n\n\n') file.write('#x\toutlier\n') for outlier in outliers: file.write('{0}\t{1}\n'.format(x, outlier)) if not outliers: file.write('{0}\t?\n'.format(x)) file.write('\n\n') # Write the gnuplot script and generate the EPS plots. gnuplot_script = '''\ set autoscale set border 31 set tics out set terminal pdf set size ratio 1 set noxtics set xrange [0.5:1.5] set nox2tics set ytics 1 set ytics nomirror set noy2tics set style line 1 lt 1 lc rgb "dark-magenta" lw 2 set style line 2 lt 1 lc rgb "{benchmark.color}" lw 5 pt 7 set style line 3 lt 1 lc rgb "{benchmark.color}" lw 5 set style line 4 lt 1 lc rgb "gold" lw 2 set style line 5 lt 1 lc rgb "red" lw 5 pt 7 set style line 6 lt 1 lc rgb "black" lw 5 set style line 7 lt 1 lc rgb "dark-gray" lw 2 set style line 8 lt 1 lc rgb "gray" lw 2 set style line 9 lt 0 lc rgb "black" lw 5 set boxwidth 0.25 set key tmargin set title "Best models performance distribution" set noxlabel set style fill solid 0.5 set encoding iso_8859_1 set ylabel "r.m.s. deviation to crystal loop [\305]" set output "{pdf_path_rmsd}" f(x)=1 plot "{tsv_path}" index 0 using 1:3:2:6:5 with candlesticks whiskerbars ls 2 notitle axes x1y1,\\ "{tsv_path}" index 0 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\ "{tsv_path}" index 1 using 1:2 with points ls 2 ps 0.5 pt 7 notitle,\\ f(x) with lines ls 9 notitle set ylabel "Rosetta all-atom score" set xrange [1.5:2.5] set ytics autofreq set output "{pdf_path_score}" plot "{tsv_path}" index 2 using 1:3:2:6:5 with candlesticks whiskerbars ls 5 notitle axes x1y1,\\ "{tsv_path}" index 2 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\ "{tsv_path}" index 3 using 1:2 with points ls 5 ps 0.5 pt 7 notitle set title "Protocol performance distribution" set ylabel "Fraction sub-\305 models [%]" set xrange [2.5:3.5] set ytics 10 set output "{pdf_path_subA}" plot "{tsv_path}" index 4 using 1:3:2:6:5 with candlesticks whiskerbars ls 3 notitle axes x1y1,\\ "{tsv_path}" index 4 using 1:4:4:4:4 with candlesticks ls 6 notitle,\\ "{tsv_path}" index 5 using 1:2 with points ls 3 ps 0.5 pt 7 notitle ''' with open(gnu_path, 'w') as file: file.write(gnuplot_script.format(**locals())) # If there are no outliers, gnuplot will produce a warning. This is a # pretty common occurrence, and I think it's really bad to produce # warning message for common occurrences. So instead I opt to ignore # stderr. This is a little dangerous. It would probably be better to # suppress only the exact warning I know about. But if we were really # interested in doing things the right way, we would use matplotlib # instead of gnuplot. with open(os.devnull) as devnull: utilities.run_gnuplot(gnu_path, stderr=devnull, verbose=self.verbose) return pdf_path_rmsd, pdf_path_score, pdf_path_subA
def make_comparison_plot(self, distributions, path_template, custom_gnuplot_commands, custom_plot_arguments=''): """ Create a plot comparing the same distribution from several different benchmarks. Examples include the percent subangstrom distribution and the RMSDs of the lowest scoring predictions. The resulting plot will have a nicely colored box plot for each benchmark. Inputs ------ distributions: A dictionary mapping benchmark objects to some sort of distribution. The box plot will be created using the distribution and labeled using the benchmark object. path_template: The base file name used to create the TSV, GNU, and EPS files generated by this method. custom_gnuplot_commands: A string containing custom commands to pass to gnuplot immediately before the 'plot' command. This is meant to be used for doing things like labeling the axes or adding useful vertical lines. Outputs ------- This method creates three files: a TSV file containing the raw data being plotted, a GNU file containing the gnuplot commands used to generate the plot, and an EPS file containing the plot itself. The path to the generated EPS file is also returned. """ tsv_path = os.path.join(self.latex_dir, path_template + '.tsv') gnu_path = os.path.join(self.latex_dir, path_template + '.gnu') pdf_path = os.path.join(self.latex_dir, path_template + '.pdf') # Write data to TSV file that can be easily parsed by gnuplot. boxplot_header = '#' + '\t'.join([ 'Protocol', 'x', 'lower', 'first_quartile', 'median', 'third_quartile', 'upper']) + '\n' boxplot_row = '\t'.join([ '{benchmark.name}', '{gnuplot_index}', '{stats.lower_whisker}', '{stats.first_quartile}', '{stats.median}', '{stats.third_quartile}', '{stats.upper_whisker}']) + '\n' outlier_header = '#' + '\t'.join([ 'Protocol', 'x', 'outlier']) + '\n' outlier_row = '\t'.join([ '{benchmark.name}', '{gnuplot_index}', '{outlier}']) + '\n' for index, benchmark in enumerate(reversed(distributions)): distribution = {1: distributions[benchmark]} boxplots = statistics.tukeyBoxAndWhisker(distribution) stats, outliers = boxplots[1] gnuplot_index = index + 1 if not outliers: outliers = '?' with open(tsv_path, 'a') as file: file.write(boxplot_header) file.write(boxplot_row.format(**locals())) file.write('\n\n') file.write(outlier_header) for outlier in outliers: file.write(outlier_row.format(**locals())) file.write('\n\n') # Generate plot using gnuplot. x_range = len(distributions) + 1 x_ticks = ', '.join([ '"{0.title}" {1}'.format(benchmark, i+1) for i, benchmark in enumerate(reversed(distributions)) ]) fig_height = min(1 + len(self), 5) gnuplot_script = '''\ set autoscale set border 31 set tics out set terminal pdf size {fig_height},6 set xtics ({x_ticks}) rotate by -90 set xtics nomirror set ytics autofreq rotate by -90 center set ytics nomirror set noy2tics set nox2tics set style line 1 lt 1 lc rgb "dark-magenta" lw 2 set style line 2 lt 1 lc rgb "blue" lw 5 ps 1 pt 7 set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13 set style line 4 lt 1 lc rgb "gold" lw 2 ps 1 pt 7 set style line 5 lt 1 lc rgb "red" lw 2 ps 2 pt 13 set style line 6 lt 1 lc rgb "black" lw 2 set style line 7 lt 1 lc rgb "dark-gray" lw 2 set style line 8 lt 1 lc rgb "gray" lw 2 set style line 9 lt 2 lc rgb "dark-gray" lw 5 set style fill solid 0.5 set boxwidth 0.75 set key below right set xrange [0:{x_range}] set encoding iso_8859_1 set notitle unset xlabel set yrange [0:] set output "{pdf_path}" {custom_gnuplot_commands} plot {plot_arguments} ''' plot_template = ', \\\n '.join([ '"{tsv_path}" index {box_plot_index} using 2:4:3:7:6 with candlesticks whiskerbars lt 1 lc rgb "{color}" lw 5 notitle', '"{tsv_path}" index {box_plot_index} using 2:5:5:5:5 with candlesticks lt 1 lc rgb "black" lw 5 notitle', '"{tsv_path}" index {outliers_index} using 2:3 with points lt 1 lc rgb "{color}" lw 5 ps 0.5 pt 7 notitle', ]) if not distributions: raise Exception('An error occurred retrieving data from the database.') plot_arguments = ', \\\n '.join([ plot_template.format( tsv_path=tsv_path, box_plot_index=2*i, outliers_index=2*i+1, color=benchmark.color) for i, benchmark in enumerate(reversed(distributions)) ]) if custom_plot_arguments: plot_arguments += ', ' + custom_plot_arguments with open(gnu_path, 'w') as file: file.write(gnuplot_script.format(**locals())) utilities.run_gnuplot(gnu_path, verbose=self.verbose) return pdf_path
def make_rmsd_histogram(self, loop): """ Create a smoothed RMSD histogram for the given loop. 100 bins are used when making the plot, and the smoothing is done by gnuplot. """ # This method would be much more concise if it used matplotlib. if not loop.has_data: return tsv_path = os.path.join(loop.latex_dir, 'rmsd_histogram.tsv') gnu_path = os.path.join(loop.latex_dir, 'rmsd_histogram.gnu') pdf_path = os.path.join(loop.latex_dir, 'rmsd_histogram.pdf') # Write histogram data to a tab-separated value (TSV) file that can # easily be parsed by gnuplot. num_bins = 100 histogram = statistics.histogram(loop.rmsds, num_bins) with open(tsv_path, 'w') as file: file.write('#All models\n') file.write('#RMSD\tFrequency\n') for rmsd, count in histogram: count = num_bins * count / len(loop) file.write('{0}\t{1}\n'.format(rmsd, count)) # Write the gnuplot script and generate the EPS plot. gnuplot_script='''\ set autoscale set border 31 set tics out set terminal pdf enhanced color set xtics autofreq set xtics nomirror set ytics autofreq set ytics nomirror set noy2tics set nox2tics set style line 1 lt 1 lc rgb "dark-magenta" lw 2 set style line 2 lt 1 lc rgb "{loop.benchmark.color}" lw 8 ps 1 pt 7 set style line 3 lt 1 lc rgb "forest-green" lw 2 ps 2 pt 13 set style line 4 lt 1 lc rgb "gold" lw 2 ps 1 pt 7 set style line 5 lt 1 lc rgb "red" lw 2 ps 2 pt 13 set style line 6 lt 1 lc rgb "black" lw 2 set style line 7 lt 1 lc rgb "dark-gray" lw 2 set style line 8 lt 1 lc rgb "gray" lw 2 set style line 9 lt 2 lc rgb "dark-gray" lw 5 set boxwidth 0.75 set key below right set xrange [0:] set encoding iso_8859_1 set title "{loop.pdb_id}: {loop.percent_subangstrom:0.2f}% sub-\305 models" set xlabel "r.m.s. deviation to crystal loop [\305]" set yrange [0:] set arrow from 1, graph 0 to 1, graph 1 ls 9 nohead set ylabel "Fraction of models [%]" set output "{pdf_path}" plot "{tsv_path}" index 0 using ($1):($2) smooth bezier with lines ls 2 title "all models" axes x1y1 ''' with open(gnu_path, 'w') as file: file.write(gnuplot_script.format(**locals())) utilities.run_gnuplot(gnu_path, verbose=self.verbose) return pdf_path