def correctness_abacus_plot(output_directory, file_prefix, df, x_series_index, y_series_index, facet_index, peptide_index, series_color, plot_title = '', x_axis_label = '', y_axis_label = '', fcorrect_x_cutoff = 1.0, fcorrect_y_cutoff = 1.0, min_experimental_ddg = None, max_experimental_ddg = None): try: os.mkdir(output_directory) except: pass assert (os.path.exists(output_directory)) #first_peptide = df.ix[:, peptide_index].min() #last_peptide = df.ix[:, peptide_index].max() df['Categorization'] = df.apply(lambda r: _determine_fraction_correct_class(r[x_series_index], r[y_series_index])[0], axis = 1) categorization_index = len(df.columns.values) - 1 df['CategorizationShape'] = df.apply(lambda r: _determine_fraction_correct_class(r[x_series_index], r[y_series_index])[1], axis = 1) categorization_shape_index = len(df.columns.values) - 1 df['CategorizationColor'] = df.apply(lambda r: _determine_fraction_correct_class(r[x_series_index], r[y_series_index])[2], axis = 1) categorization_color_index = len(df.columns.values) - 1 # Create the R script boxplot_r_script = ''' library(ggplot2) library(gridExtra) library(scales) library(qualV) # PNG generation png('%(file_prefix)s.png', width=2560, height=2048, bg="white", res=600) txtalpha <- 0.6 redtxtalpha <- 0.6 %(png_plot_commands)s ''' xy_table_filename = '{0}.txt'.format(file_prefix) xy_table_filepath = os.path.join(output_directory, xy_table_filename) header_names = df.columns.values #x_series = header_names[x_series_index] #y_series = header_names[y_series_index] facet_series = header_names[facet_index] peptide_series = header_names[peptide_index] #categorization_series = header_names[categorization_index] #print(x_series,y_series, facet_series, peptide_series, categorization_series) data_table = df.to_csv(header = True, index = False) print(data_table) df = df.sort_values([facet_series, peptide_series]) data_table = df.to_csv(header = True, index = False) print(data_table) write_file(xy_table_filepath, data_table) main_plot_script = ''' # Set the margins par(mar=c(5, 5, 1, 1)) xy_data <- read.csv('%(xy_table_filename)s', header=T) names(xy_data)[%(x_series_index)d + 1] <- "xvalues" names(xy_data)[%(y_series_index)d + 1] <- "yvalues" names(xy_data)[%(facet_index)d + 1] <- "facets" names(xy_data)[%(peptide_index)d + 1] <- "peptides" names(xy_data)[%(categorization_index)d + 1] <- "categorization" names(xy_data)[%(categorization_shape_index)d + 1] <- "categorization_shape" names(xy_data)[%(categorization_color_index)d + 1] <- "categorization_color" xy_data[%(peptide_index)d + 1] peptide_names <- sort(xy_data[[%(peptide_index)d + 1]]) peptide_names class(peptide_names) first_peptide = peptide_names[1] last_peptide = peptide_names[length(peptide_names)] xlabel <- "%(x_axis_label)s" ylabel <- "%(y_axis_label)s" plot_title <- "%(plot_title)s" xy_data # Set graph limits and the position for the correlation value miny <- min(0.0, min(xy_data$xvalues) - 0.1) # "X-axis" values are plotted on to Y-axis maxy <- max(1.0, max(xy_data$xvalues) + 0.1) ''' if min_experimental_ddg != None: main_plot_script += ''' miny <- min(miny - 0.2, %(min_experimental_ddg)f - 0.2) ''' if min_experimental_ddg != None: main_plot_script += ''' maxy <- max(maxy + 0.5, %(min_experimental_ddg)f + 0.5) first_peptide last_peptide ''' main_plot_script += ''' #aes(color = categorization_color, shape = categorization_shape) p <- ggplot(data=xy_data, aes(x=peptides, y = xvalues, color = categorization_color, shape = categorization_color, group = facets)) + theme(legend.position = "none") + # hide the legend annotate("rect", xmin = first_peptide, xmax = last_peptide, ymin = -1, ymax = +1, alpha = .2) + xlab(xlabel) + labs(title = "%(plot_title)s") + theme(plot.title = element_text(color = "#555555", size=rel(0.55))) + labs(x = xlabel, y = ylabel) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 3)) + geom_point() + scale_colour_manual(values = c("black", "blue", "green", "red")) + scale_shape_manual(values = c(16, 18, 25, 17)) + facet_wrap(~facets) # Plot graph p dev.off() ''' # Create the R script plot_type = 'png' png_plot_commands = main_plot_script % locals() boxplot_r_script = boxplot_r_script % locals() r_script_filename = '{0}.R'.format(file_prefix) r_script_filepath = os.path.join(output_directory, r_script_filename) write_file(r_script_filepath, boxplot_r_script) # Run the R script run_r_script(r_script_filename, cwd = output_directory)
def error_by_error_scatterplot(output_directory, file_prefix, df, reference_series_index, x_series_index, y_series_index, x_color, y_color, x_series_name = None, y_series_name = None, plot_title = '', x_axis_label = '', y_axis_label = '', similarity_range = 0.25, add_similarity_range_annotation = True, shape_by_category = False, shape_category_series_index = None, shape_category_title = 'Case', label_series_index = None, label_outliers = True, use_geom_text_repel = True, ): """ Creates a scatterplot of error versus error intended to show which computational method (X or Y) has the least amount of error relative to a reference series. The difference vectors (reference_series - x_series, reference_series - y_series) are created and these differences (errors) are plotted against each other. :param output_directory: The output directory. :param file_prefix: A prefix for the generated files. A CSV file with the plot points, the R script, and the R output is saved along with the plot itself. :param df: A pandas dataframe. Note: The dataframe is zero-indexed. :param reference_series_index: The numerical index of the reference series e.g. experimental data. :param x_series_index: The numerical index of the X-axis series e.g. predictions from a computational method. :param y_series_index: The numerical index of the Y-axis series e.g. predictions from a second computational method. :param x_color: The color of the "method X is better" points. :param y_color: The color of the "method Y is better" points. :param x_series_name: A name for the X-series which is used in the the classification legend. :param y_series_name: A name for the Y-series which is used in the the classification legend. :param plot_title: Plot title. :param x_axis_label: X-axis label. :param y_axis_label: Y-axis label. :param similarity_range: A point (x, y) is considered as similar if |x - y| <= similarity_range. :param add_similarity_range_annotation: If true then the similarity range is included in the plot. :param shape_by_category: Boolean. If set then points are shaped by the column identified with shape_category_series_index. Otherwise, points are shaped by classification ("X is better", "Y is better", or "Similar") :param shape_category_series_index: The numerical index of the series used to choose point shapes. :param shape_category_title: The title of the shape legend. :param label_series_index: The numerical index of the series label_series_index :param label_outliers: Boolean. If set then label outliers using the column identified with label_series_index. :param use_geom_text_repel: Boolean. If set then the ggrepel package is used to avoid overlapping labels. This function was adapted from the Kortemme Lab covariation benchmark (https://github.com/Kortemme-Lab/covariation). todo: I need to check that ggplot2 is respecting the color choices. It may be doing its own thing. """ try: os.mkdir(output_directory) except: pass assert (os.path.exists(output_directory)) if not isinstance(shape_category_series_index, int): shape_by_category = False if not isinstance(label_series_index, int): label_outliers = False assert(x_series_name != None and y_series_name != None) df = df.copy() headers = df.columns.values num_categories = len(set(df.ix[:, shape_category_series_index].values)) legal_shapes = range(15,25+1) + range(0,14+1) if num_categories > len(legal_shapes): colortext.warning('Too many categories ({0}) to plot using meaningful shapes.'.format(num_categories)) shape_by_category = False else: legal_shapes = legal_shapes[:num_categories] df['X_error'] = abs(df[headers[reference_series_index]] - df[headers[x_series_index]]) x_error_index = len(df.columns.values) - 1 df['Y_error'] = abs(df[headers[reference_series_index]] - df[headers[y_series_index]]) y_error_index = len(df.columns.values) - 1 # Get the list of domains common to both runs df['Classification'] = df.apply(lambda r: _classify_smallest_error(r['X_error'], r['Y_error'], similarity_range, x_series_name, y_series_name), axis = 1) error_classification_index = len(df.columns.values) - 1 # Create the R script boxplot_r_script = ''' library(ggplot2) library(gridExtra) library(scales) library(qualV) library(grid)''' if use_geom_text_repel: boxplot_r_script +=''' library(ggrepel) # install with 'install.packages("ggrepel")' inside the R interactive shell. ''' boxplot_r_script += ''' # PNG generation png('%(file_prefix)s.png', width=2560, height=2048, bg="white", res=600) txtalpha <- 0.8 redtxtalpha <- 0.8 %(png_plot_commands)s ''' xy_table_filename = '{0}.txt'.format(file_prefix) xy_table_filepath = os.path.join(output_directory, xy_table_filename) data_table = df.to_csv(header = True, index = False) write_file(xy_table_filepath, data_table) main_plot_script = ''' # Set the margins par(mar=c(5, 5, 1, 1)) xy_data <- read.csv('%(xy_table_filename)s', header=T) names(xy_data)[%(x_error_index)d + 1] <- "xerrors" names(xy_data)[%(y_error_index)d + 1] <- "yerrors" ''' if label_outliers: main_plot_script +='''names(xy_data)[%(label_series_index)d + 1] <- "outlier_labels"''' main_plot_script +=''' names(xy_data)[%(shape_category_series_index)d + 1] <- "categories" xy_data[%(x_error_index)d + 1] xy_data[%(y_error_index)d + 1] # coefs contains two values: (Intercept) and yerrors coefs <- coef(lm(xerrors~yerrors, data = xy_data)) fitcoefs = coef(lm(xerrors~0 + yerrors, data = xy_data)) fitlmv_yerrors <- as.numeric(fitcoefs[1]) lmv_intercept <- as.numeric(coefs[1]) lmv_yerrors <- as.numeric(coefs[2]) lm(xy_data$yerrors~xy_data$xerrors) xlabel <- "%(x_axis_label)s" ylabel <- "%(y_axis_label)s" plot_title <- "%(plot_title)s" rvalue <- cor(xy_data$yerrors, xy_data$xerrors) # Alphabetically, "Similar" < "X" < "Y" so the logic below works countsim <- paste("Similar =", dim(subset(xy_data, Classification=="Similar"))[1]) countX <- paste("%(x_series_name)s =", dim(subset(xy_data, Classification=="%(x_series_name)s"))[1]) countY <- paste("%(y_series_name)s =", dim(subset(xy_data, Classification=="%(y_series_name)s"))[1]) countX countY countsim # Set graph limits and the position for the correlation value minx <- min(0.0, min(xy_data$xerrors) - 0.1) miny <- min(0.0, min(xy_data$yerrors) - 0.1) maxx <- max(1.0, max(xy_data$xerrors) + 0.1) maxy <- max(1.0, max(xy_data$yerrors) + 0.1) # Create a square plot (x-range = y-range) minx <- min(minx, miny) miny <- minx maxx <- max(maxx, maxy) maxy <- maxx xpos <- maxx / 25.0 ypos <- maxy - (maxy / 25.0) ypos_2 <- maxy - (2 * maxy / 25.0) plot_scale <- scale_color_manual( "Counts", values = c( "Similar" = '#444444', "%(x_series_name)s" = '%(x_color)s', "%(y_series_name)s" ='%(y_color)s'), labels = c( "Similar" = countsim, "%(x_series_name)s" = countX, "%(y_series_name)s" = countY) )''' if add_similarity_range_annotation: main_plot_script += ''' # Polygon denoting the similarity range. We turn off plot clipping below (gt$layout$clip) so we need to be more exact than using 4 points when defining the region boxy_mc_boxface <- data.frame( X = c(minx - 0, maxx - %(similarity_range)f, maxx + 0, maxx + 0, 0 + %(similarity_range)f, 0), Y = c(minx - 0 + %(similarity_range)f, maxx + 0, maxx + 0, maxx + 0 -%(similarity_range)f, 0, 0 ) )''' else: main_plot_script += ''' # Polygon denoting the similarity range. We turn off plot clipping below (gt$layout$clip) so we need to be more exact than using 4 points when defining the region boxy_mc_boxface <- data.frame( X = c(minx - 1, maxx + 1, maxx + 1, minx - 1), Y = c(minx - 1 + %(similarity_range)f, maxx + 1 + %(similarity_range)f, maxx + 1 - %(similarity_range)f, minx - 1 - %(similarity_range)f) )''' if shape_by_category: main_plot_script += ''' # Plot p <- qplot(main="", xerrors, yerrors, data=xy_data, xlab=xlabel, ylab=ylabel, alpha = I(txtalpha), shape=factor(categories), col=factor(Classification)) +''' else: main_plot_script += ''' # Plot p <- qplot(main="", xerrors, yerrors, data=xy_data, xlab=xlabel, ylab=ylabel, alpha = I(txtalpha), shape=factor(Classification), col=factor(Classification)) +''' main_plot_script += ''' geom_polygon(data=boxy_mc_boxface, aes(X, Y), fill = "#bbbbbb", alpha = 0.4, color = "darkseagreen", linetype="blank", inherit.aes = FALSE, show.legend = FALSE) + plot_scale + geom_point() + guides(col = guide_legend()) + labs(title = "%(plot_title)s") + theme(plot.title = element_text(color = "#555555", size=rel(0.75))) + theme(axis.title = element_text(color = "#555555", size=rel(0.6))) + theme(legend.title = element_text(color = "#555555", size=rel(0.45)), legend.text = element_text(color = "#555555", size=rel(0.4))) + coord_cartesian(xlim = c(minx, maxx), ylim = c(miny, maxy)) + # set the graph limits annotate("text", hjust=0, size = 2, colour="#222222", x = xpos, y = ypos, label = sprintf("R = %%0.2f", round(rvalue, digits = 4))) + # add correlation text; hjust=0 sets left-alignment. Using annotate instead of geom_text avoids blocky text caused by geom_text being run multiple times over the series''' if label_outliers: if use_geom_text_repel: main_plot_script += ''' # Label outliers geom_text_repel(size=1.5, segment.size = 0.15, color="#000000", alpha=0.6, data=subset(xy_data, abs(yerrors - xerrors) > maxx/3 & xerrors <= maxx / 2 & yerrors >=maxy/2), aes(xerrors, yerrors-maxy/100, label=outlier_labels)) + geom_text_repel(size=1.5, segment.size = 0.15, color="#000000", alpha=0.6, data=subset(xy_data, abs(yerrors - xerrors) > maxx/3 & xerrors <= maxx / 2 & yerrors < maxy/2), aes(xerrors, yerrors+2*maxy/100, label=outlier_labels)) + geom_text_repel(size=1.5, segment.size = 0.15, color="#000000", alpha=0.6, data=subset(xy_data, abs(yerrors - xerrors) > maxx/3 & xerrors > maxx / 2 & yerrors >=maxy/2), aes(xerrors, yerrors-maxy/100, label=outlier_labels)) + geom_text_repel(size=1.5, segment.size = 0.15, color="#000000", alpha=0.6, data=subset(xy_data, abs(yerrors - xerrors) > maxx/3 & xerrors > maxx / 2 & yerrors < maxy/2), aes(xerrors, yerrors+2*maxy/100, label=outlier_labels)) +''' else: main_plot_script += ''' # Label outliers geom_text(hjust = 0, size=1.5, color="#000000", alpha=0.6, data=subset(xy_data, abs(yerrors - xerrors) > maxx/3 & xerrors <= maxx / 2 & yerrors >=maxy/2), aes(xerrors, yerrors-maxy/100, label=outlier_labels)) + geom_text(hjust = 0, size=1.5, color="#000000", alpha=0.6, data=subset(xy_data, abs(yerrors - xerrors) > maxx/3 & xerrors <= maxx / 2 & yerrors < maxy/2), aes(xerrors, yerrors+2*maxy/100, label=outlier_labels)) + geom_text(hjust = 1, size=1.5, color="#000000", alpha=0.6, data=subset(xy_data, abs(yerrors - xerrors) > maxx/3 & xerrors > maxx / 2 & yerrors >=maxy/2), aes(xerrors, yerrors-maxy/100, label=outlier_labels)) + geom_text(hjust = 1, size=1.5, color="#000000", alpha=0.6, data=subset(xy_data, abs(yerrors - xerrors) > maxx/3 & xerrors > maxx / 2 & yerrors < maxy/2), aes(xerrors, yerrors+2*maxy/100, label=outlier_labels)) +''' counts_title = 'Counts' if add_similarity_range_annotation: counts_title += '*' main_plot_script += ''' #geom_text(hjust = 0, size=1.5, color="#000000", alpha=0.6, data=subset(xy_data, abs(yvalues - xvalues) > 2 & xvalues <= 0), aes(xvalues, yvalues+0.35, label=Origin_of_peptide), check_overlap = TRUE) + # label outliers #geom_text(hjust = 1, size=1.5, color="#000000", alpha=0.6, data=subset(xy_data, abs(yvalues - xvalues) > 2 & xvalues > 0), aes(xvalues, yvalues+0.35, label=Origin_of_peptide), check_overlap = TRUE) + # label outliers scale_colour_manual('%(counts_title)s', values = c('#444444', '%(x_color)s', '%(y_color)s'), labels = c( "Similar" = countsim, "%(x_series_name)s" = countX, "%(y_series_name)s" = countY)) +''' if shape_by_category: legal_shapes_str = ', '.join(map(str, legal_shapes)) main_plot_script += ''' scale_shape_manual('%(shape_category_title)s', values = c(%(legal_shapes_str)s), labels = c( "Similar" = countsim, "%(x_series_name)s" = countX, "%(y_series_name)s" = countY))''' else: main_plot_script += ''' scale_shape_manual('%(counts_title)s', values = c(18, 16, 15), labels = c( "Similar" = countsim, "%(x_series_name)s" = countX, "%(y_series_name)s" = countY))''' if add_similarity_range_annotation: main_plot_script += '''+ # Add a caption annotation_custom(grob = textGrob(gp = gpar(fontsize = 5), hjust = 0, sprintf("* Similar \\u225d \\u00b1 %%0.2f", round(%(similarity_range)f, digits = 2))), xmin = maxx + (2 * maxx / 10), ymin = -1, ymax = -1)''' main_plot_script += ''' # Plot graph p ''' if add_similarity_range_annotation: main_plot_script += ''' # Code to override clipping gt <- ggplot_gtable(ggplot_build(p)) gt$layout$clip[gt$layout$name=="panel"] <- "off" grid.draw(gt)''' main_plot_script +=''' dev.off() ''' # Create the R script plot_type = 'png' png_plot_commands = main_plot_script % locals() boxplot_r_script = boxplot_r_script % locals() r_script_filename = '{0}.R'.format(file_prefix) r_script_filepath = os.path.join(output_directory, r_script_filename) write_file(r_script_filepath, boxplot_r_script) # Run the R script run_r_script(r_script_filename, cwd = output_directory)
def multicategory_scatterplot( output_directory, file_prefix, df, x_series_index, y_series_index, category_series_index, series_color, plot_title="", x_axis_label="", y_axis_label="", min_predicted_ddg=None, max_predicted_ddg=None, min_experimental_ddg=None, max_experimental_ddg=None, ): """This function was adapted from the covariation benchmark.""" # todo: Abstract this graph from the current usage (DDG measurements). # todo: make the capped value for unquantified but classified measurements (e.g. DDG > 7 kcal/mol) parameterizable # todo: add an option to identify outliers by standard deviations (over the set of errors |x - y|) rather than by fixed value # todo: add an option to use geom_text_repel to avoid/reduce overlapping text # todo: allow users to provide colors for the facets / categories # Changeset # todo: Change it to take in a pandas dataframe instead of the data_table_headers + data_table parameters. # todo: Add exception if number of cases > 2 so the general case can be implemented once we have test data. # todo: use one column as the category e.g. "PDB". assert that there is a maximum number of categories. Test with > 2 categories # todo: remove all references to SNX27 and NHERF1 below and loop over the set of categories instead # print(df[facet_index]) color_map = {} categories = list(df.ix[:, category_series_index].unique()) print(type(categories)) num_categories = len(categories) category_colors = get_spaced_plot_colors(num_categories) for x in xrange(num_categories): color_map[categories[x]] = "#" + category_colors[x] df["CategorizationColor"] = df.apply(lambda r: color_map[r[category_series_index]], axis=1) categorization_color_index = len(df.columns.values) - 1 # Monday: continue here print(df) sys.exit(0) try: os.mkdir(output_directory) except: pass assert os.path.exists(output_directory) df["Categorization"] = df.apply( lambda r: _determine_fraction_correct_class(r[x_series_index], r[y_series_index])[0], axis=1 ) categorization_index = len(df.columns.values) - 1 df["CategorizationShape"] = df.apply( lambda r: _determine_fraction_correct_class(r[x_series_index], r[y_series_index])[1], axis=1 ) categorization_shape_index = len(df.columns.values) - 1 # Create the R script boxplot_r_script = """ library(ggplot2) library(gridExtra) library(scales) library(qualV) # PNG generation png('%(file_prefix)s.png', width=2560, height=2048, bg="white", res=600) txtalpha <- 0.6 redtxtalpha <- 0.6 %(png_plot_commands)s """ xy_table_filename = "{0}.txt".format(file_prefix) xy_table_filepath = os.path.join(output_directory, xy_table_filename) write_file(xy_table_filepath, "\n".join(",".join(map(str, line)) for line in [data_table_headers] + data_table)) single_plot_commands = """ # Set the margins par(mar=c(5, 5, 1, 1)) xy_data <- read.csv('%(xy_table_filename)s', header=T) names(xy_data)[%(x_series_index)d + 1] <- "xvalues" names(xy_data)[%(y_series_index)d + 1] <- "yvalues" # coefs contains two values: (Intercept) and yvalues coefs <- coef(lm(xvalues~yvalues, data = xy_data)) fitcoefs = coef(lm(xvalues~0 + yvalues, data = xy_data)) fitlmv_yvalues <- as.numeric(fitcoefs[1]) lmv_intercept <- as.numeric(coefs[1]) lmv_yvalues <- as.numeric(coefs[2]) lm(xy_data$yvalues~xy_data$xvalues) xlabel <- "%(x_axis_label)s" ylabel <- "%(y_axis_label)s" plot_title <- "%(plot_title)s" rvalue <- cor(xy_data$yvalues, xy_data$xvalues) rvalue xy_data #3QDO = SNX27 #1G9O = NHERF1 valid_xy_data <- xy_data[which(xy_data$xvalues < 6.99),] rvalue <- cor(valid_xy_data$yvalues, valid_xy_data$xvalues) rvalue valid_xy_data valid_xy_data_NHERF1 <- xy_data[which(xy_data$xvalues < 6.99 & xy_data$PDB == '1G9O'),] rvalue_NHERF1 <- cor(valid_xy_data_NHERF1$yvalues, valid_xy_data_NHERF1$xvalues) rvalue_NHERF1 valid_xy_data_NHERF1 coefs_NHERF1 <- coef(lm(xvalues~yvalues, data = valid_xy_data_NHERF1)) lmv_intercept_NHERF1 <- as.numeric(coefs_NHERF1[1]) lmv_yvalues_NHERF1 <- as.numeric(coefs_NHERF1[2]) valid_xy_data_SNX27 <- xy_data[which(xy_data$xvalues < 6.99 & xy_data$PDB == '3QDO'),] rvalue_SNX27 <- cor(valid_xy_data_SNX27$yvalues, valid_xy_data_SNX27$xvalues) rvalue_SNX27 valid_xy_data_SNX27 coefs_SNX27 <- coef(lm(xvalues~yvalues, data = valid_xy_data_SNX27)) lmv_intercept_SNX27 <- as.numeric(coefs_SNX27[1]) lmv_yvalues_SNX27 <- as.numeric(coefs_SNX27[2]) lmv_intercept lmv_yvalues lmv_intercept_NHERF1 lmv_yvalues_NHERF1 lmv_intercept_SNX27 lmv_yvalues_SNX27 # Set graph limits and the position for the correlation value minx <- min(0.0, min(xy_data$xvalues) - 0.1) miny <- min(0.0, min(xy_data$yvalues) - 0.1) maxx <- max(1.0, max(xy_data$xvalues) + 0.1) maxy <- max(1.0, max(xy_data$yvalues) + 0.1) """ if min_predicted_ddg != None: single_plot_commands += """ miny <- min(miny - 0.2, %(min_predicted_ddg)f - 0.2) """ if max_predicted_ddg != None: single_plot_commands += """ maxy <- max(maxy + 0.5, %(max_predicted_ddg)f + 0.5) miny <- -6 maxy <- 12.5 """ if min_experimental_ddg != None: single_plot_commands += """ minx <- min(minx, %(min_experimental_ddg)f) """ if max_experimental_ddg != None: single_plot_commands += """ maxx <- max(maxx, %(max_experimental_ddg)f) + 0.2 """ single_plot_commands += """ xpos <- minx + 0.2 ypos <- maxy - 1 ypos_SNX27 <- ypos - 1 ypos_NHERF1 <- ypos_SNX27 - 1 lrt <- expression('R'^tst) p <- qplot(main="", xvalues, yvalues, data=xy_data, xlab=xlabel, ylab=ylabel, shape = PDB, alpha = I(txtalpha)) + geom_point(aes(color = PDB), alpha = 0.6) + scale_colour_manual(name="", values = c("1G9O"="orange", "3QDO"="blue", "3"="red", "value3"="grey", "value2"="black")) + labs(title = "%(plot_title)s") + theme(plot.title = element_text(color = "#555555", size=rel(0.75))) + # Correlation fit lines (global + one per facet geom_abline(size = 0.125, color="black", intercept = lmv_intercept, slope = lmv_yvalues, alpha=0.2) + geom_abline(size = 0.125, color="orange", intercept = lmv_intercept_NHERF1, slope = lmv_yvalues_NHERF1, alpha=0.4) + geom_abline(size = 0.125, color="blue", intercept = lmv_intercept_SNX27, slope = lmv_yvalues_SNX27, alpha=0.4) + geom_abline(slope=1, intercept=0, linetype=3, size=0.25, alpha=0.4) + # add a diagonal (dotted) coord_cartesian(xlim = c(minx, maxx), ylim = c(miny, maxy)) + # set the graph limits geom_text(hjust = 0, size=1.5, color="#000000", alpha=0.6, data=subset(xy_data, abs(yvalues - xvalues) > 2 & xvalues <= 0), aes(xvalues, yvalues+0.35, label=Origin_of_peptide), check_overlap = TRUE) + # label outliers geom_text(hjust = 1, size=1.5, color="#000000", alpha=0.6, data=subset(xy_data, abs(yvalues - xvalues) > 2 & xvalues > 0), aes(xvalues, yvalues+0.35, label=Origin_of_peptide), check_overlap = TRUE) + # label outliers geom_text(hjust=0, size=2, colour="black", aes(x = xpos, y = ypos, label = sprintf("R == %%0.2f", round(rvalue, digits = 4))), parse = TRUE) + geom_text(hjust=0, size=2, colour="darkorange", aes(x = xpos, y = ypos_NHERF1, label = sprintf("R[NHERF] == %%0.2f", round(rvalue_NHERF1, digits = 4))), parse = TRUE) + geom_text(hjust=0, size=2, colour="blue", aes(x = xpos, y = ypos_SNX27, label = sprintf("R[SNX27] == %%0.2f", round(rvalue_SNX27, digits = 4))), parse = TRUE) + theme(legend.position = "none") # geom_text(hjust=0, size=2, colour="black", aes(xpos, ypos, fontface="plain", family = "sans", label=paste(sprintf("R = %%0.2f%%s", round(rvalue, digits = 4), lrt), expression('R'[3]) ))) # add correlation text; hjust=0 sets left-alignment #geom_text(hjust=0, size=3, colour="black", aes(xpos, ypos, fontface="plain", family = "sans", label=sprintf("R = %%0.2f", round(rvalue, digits = 4)))) # add correlation text; hjust=0 sets left-alignment # geom_text(hjust=0, size=3, colour="black", aes(xpos, ypos, fontface="plain", family = "sans", label=sprintf("R = %%0.2f", round(rvalue, digits = 4)))) # add correlation text; hjust=0 sets left-alignment # Plot graph p dev.off() """ # geom_point(aes(color = C)) + # color = "%(series_color)s" # Create the R script plot_type = "png" png_plot_commands = single_plot_commands % locals() boxplot_r_script = boxplot_r_script % locals() r_script_filename = "{0}.R".format(file_prefix) r_script_filepath = os.path.join(output_directory, r_script_filename) write_file(r_script_filepath, boxplot_r_script) # Run the R script run_r_script(r_script_filename, cwd=output_directory)