def summarize_report(filenames, matrices, num_factors, start_time, file_layout): import time import subprocess from genomicode import htmllib from genomicode import parselib #def highlight(s): # return htmllib.SPAN(s, style="background-color:yellow") assert len(filenames) == len(matrices) lines = [] w = lines.append w("<HTML>") w(htmllib.HEAD(htmllib.TITLE("BFRMNormalize Report"))) w("<BODY>") w(htmllib.CENTER(htmllib.H1(htmllib.EM("BFRMNormalize") + " Report"))) w(htmllib.H3("I. Overview")) files = [os.path.split(x)[1] for x in filenames] x1 = "one data set" if len(files) > 1: x1 = "the following data sets" x2 = "factor" if num_factors > 1: x2 = "factors" x = "I normalized %s using %d %s." % (x1, num_factors, x2) l = [x] for i in range(len(files)): name = files[i] num_samples = matrices[i].ncol() x = "%s (%d samples)" % (name, num_samples) l.append(htmllib.LI() + x) l = "\n".join(l) w(htmllib.UL(l)) w(htmllib.P()) x = os.path.split(file_layout.DS_PROC)[1] w("The merged gene expression data set is available at " + htmllib.A(x, href=x) + ".") w(htmllib.BR()) x = os.path.split(file_layout.DS_FINAL)[1] w("The normalized data set is available at " + htmllib.A(x, href=x) + ".") w(htmllib.P()) w(htmllib.H3("II. Results")) # Make the table of the heatmaps. x = os.path.split(file_layout.DS_PROC_HEATMAP)[1] x1 = htmllib.CENTER( htmllib.B("Before Normalization") + htmllib.BR() + htmllib.A(htmllib.IMG(height=480, src=x), href=x)) x = os.path.split(file_layout.DS_FINAL_HEATMAP)[1] x2 = htmllib.CENTER( htmllib.B("After Normalization") + htmllib.BR() + htmllib.A(htmllib.IMG(height=480, src=x), href=x)) row1 = htmllib.TR(htmllib.TD(x1) + htmllib.TD(x2)) x = htmllib.TD( htmllib.B("Figure 1: Heatmaps. ") + "These heatmaps show the expression patterns in the data before " "and after normalization. " "The rows contain the %d genes that exhibit the highest variance " "in gene expression across the original data set. " "The columns contain the samples in the data sets provided. " "The genes and samples are in the same order in both heatmaps. " "Warm colors indicate high expression of the gene, and cool colors " "indicate low expression." % NUM_FILTERED_GENES, colspan=2) row2 = htmllib.TR(x) w(htmllib.TABLE(row1 + row2, border=0, cellspacing=10, width="50%%")) w(htmllib.P()) # Make the table of the scatter plots. x = os.path.split(file_layout.DS_PROC_SCATTER)[1] x1 = htmllib.CENTER( htmllib.B("Before Normalization") + htmllib.BR() + htmllib.A(htmllib.IMG(height=400, src=x), href=x)) x = os.path.split(file_layout.DS_FINAL_SCATTER)[1] x2 = htmllib.CENTER( htmllib.B("After Normalization") + htmllib.BR() + htmllib.A(htmllib.IMG(height=400, src=x), href=x)) row1 = htmllib.TR(htmllib.TD(x1) + htmllib.TD(x2)) x1 = ( "These plots show the samples projected onto the first two principal " "components of the expression profiles of the %d genes that " "exhibit the highest variance across the original data set. " % NUM_FILTERED_GENES) x2 = ("Each point represents a sample, and samples from the same data " "set have the same color. " "If there are batch effects, the samples from the same data set " "(the same color) will cluster together. " "If there are no batch effects, the colors should be mixed.") if len(filenames) == 1: x2 = "" x = htmllib.TD(htmllib.B("Figure 2: PCA Plots. ") + x1 + x2, colspan=2) row2 = htmllib.TR(x) w(htmllib.TABLE(row1 + row2, border=0, cellspacing=10, width="50%%")) # Format the current time. end_time = time.time() time_str = parselib.pretty_date(start_time) x = int(end_time - start_time) num_min = x / 60 num_secs = x % 60 if num_min == 0: run_time = "%ss" % parselib.pretty_int(num_secs) else: run_time = "%sm %ss" % (parselib.pretty_int(num_min), num_secs) # Get the hostname. cmd = "hostname" p = subprocess.Popen(cmd, shell=True, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) wh, r = p.stdin, p.stdout wh.close() hostname = r.read().strip() assert hostname, "I could not get the hostname." w(htmllib.P()) w(htmllib.HR()) w( htmllib.EM( "This analysis was run on %s on %s. It took %s to complete." % (time_str, hostname, run_time))) w("</BODY>") w("</HTML>") x = "\n".join(lines) + "\n" outfile = file_layout.REPORT open(outfile, 'w').write(x)
def run(self, network, antecedents, out_attributes, user_options, num_cores, outfile): import os import shutil from Betsy import bie3 outfile_folder = outfile outfile = os.path.join(outfile_folder, 'report.html') if not os.path.exists(outfile_folder): os.mkdir(outfile_folder) result_files = [] for index, data_node in enumerate(antecedents): filename = data_node.identifier new_name = os.path.join(outfile_folder, os.path.split(filename)[-1]) #rename one of the pcaplot filename if index == 2: new_name = os.path.join(outfile_folder, 'after_' + os.path.split(filename)[-1]) if os.path.isdir(filename): shutil.copytree(filename, new_name) else: shutil.copyfile(filename, new_name) result_files.append(os.path.split(new_name)[-1]) data_node1, data_node2, data_node3, data_node4, data_node5, data_node6 = antecedents #write the report.html from genomicode import parselib from genomicode import htmllib #def highlight(s): # from genomicode import htmllib # return htmllib.SPAN(s, style="background-color:yellow") #def smaller(s): # from genomicode import htmllib # return htmllib.FONT(s, size=-1) try: lines = [] w = lines.append w("<HTML>") title = "Normalization Results" x = parselib.remove_all_tags(title) w(htmllib.HEAD(htmllib.TITLE(x))) w("<BODY>") w(htmllib.CENTER(htmllib.H1(title))) w('I generated a file that contains the normalized gene expression values' ) w(htmllib.P()) w(htmllib.A(result_files[0], result_files[0])) w(htmllib.P()) w(htmllib.A("Methods", href="#methods_normalization")) w(htmllib.P()) ## if pipelines[1] == pipelines[2]: ## w(htmllib.A(htmllib.IMG(height=500, ## src=result_files[1]), href=result_files[1])) ## else: rows = [] x = htmllib.TR( htmllib.TD(htmllib.A(htmllib.IMG(height=500, src=result_files[1]), href=result_files[1]), align="CENTER") + htmllib.TD(htmllib.A( htmllib.IMG(height=500, src=result_files[2]), href=result_files[2]), align="CENTER")) rows.append(x) x = htmllib.TR( htmllib.TH("Before", align="CENTER") + htmllib.TH("After", align="CENTER")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=None, cellpadding=3, cellspacing=0)) w(htmllib.P()) w(htmllib.P()) name = 'Figure 1: This pca plot shows the similarities among your samples' w(htmllib.B(name)) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=500, src=result_files[3]), href=result_files[3])) w(htmllib.P()) name = 'Figure 2: This boxplot shows the distribution of signal values' w(htmllib.B(name)) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=500, src=result_files[4]), href=result_files[4])) w(htmllib.P()) name = 'Figure 3: This plot shows the values of ACTB and TUBB genes' w(htmllib.B(name)) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=500, src=result_files[5]), href=result_files[5])) name = 'Figure 4: This plot shows the average values control genes' w(htmllib.P()) w(htmllib.B(name)) w(htmllib.HR()) w( htmllib.A("<methods_normalization>", name="methods_normalization")) w(htmllib.CENTER(htmllib.H2("Methods"))) w(htmllib.H3("1.Normalization File")) w('To generate this file, I ran the following analysis:') w(htmllib.P()) bie3.plot_network_gv(os.path.join(outfile_folder, "network.png"), network) w( htmllib.A(htmllib.IMG(height=500, src="network.png"), href="network.png")) w('I used the following parameters:') rows = [] x = htmllib.TR( htmllib.TH("Parameter", align="LEFT") + htmllib.TH("Value", align="LEFT")) rows.append(x) for key in data_node1.data.attributes.keys(): x = htmllib.TR( htmllib.TD(key, align="LEFT") + htmllib.TD(data_node1.data.attributes[key], align="LEFT")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0)) w(htmllib.P()) w(htmllib.H3("2. PCA analysis")) w('I made a principal component plot that shows the similarities among your samples.' ) w(htmllib.P()) w(htmllib.H3("3. Signal distribution")) w('I made a box plot that shows the distribution of signal values.' ) w(htmllib.P()) w(htmllib.H3("4. Control signal")) w('I made two plots that show the values of control signal.') w(htmllib.P()) # Write out the footer. #time_str = parselib.pretty_date(time.time()) #hostname = pretty_hostname() w(htmllib.P()) w(htmllib.HR()) #w(htmllib.EM( # "This analysis was run on %s on %s. \n" % # (time_str, hostname))) w("</BODY>") w("</HTML>") x = "\n".join(lines) + "\n" open(outfile, 'w').write(x) except: raise
def run(self, network, antecedents, out_attributes, user_options, num_cores, outfile): import os import shutil import math from Betsy import bie3 import arrayio from genomicode import htmllib from genomicode import parselib outfile_folder = outfile outfile = os.path.join(outfile_folder, 'report.html') if not os.path.exists(outfile_folder): os.mkdir(outfile_folder) result_files = [] for data_node in antecedents: filename = data_node.identifier new_name = os.path.join(outfile_folder, os.path.split(filename)[-1]) if os.path.isdir(filename): shutil.copytree(filename, new_name) else: shutil.copyfile(filename, new_name) result_files.append(os.path.split(new_name)[-1]) (data_node1, data_node2, data_node3, data_node4, data_node5, data_node6, data_node7, data_node8, data_node9, data_node10, data_node11) = antecedents #write the report.html #def highlight(s): # from genomicode import htmllib # return htmllib.SPAN(s, style="background-color:yellow") #def smaller(s): # from genomicode import htmllib # return htmllib.FONT(s, size=-1) try: lines = [] w = lines.append w("<HTML>") title = "Classification Results" x = parselib.remove_all_tags(title) w(htmllib.HEAD(htmllib.TITLE(x))) w("<BODY>") w(htmllib.CENTER(htmllib.H1(title))) #------------------------------------ w(htmllib.H3("SVM")) w(htmllib.P()) w(htmllib.A("Methods", href="#methods_svm")) w(htmllib.P()) #------------------------------------ whole_row = [] name = 'Table 1: Table of genes used in classification' w(htmllib.B(name)) w(htmllib.P()) M = arrayio.read(os.path.join(outfile_folder, result_files[0])) ids = M._row_order genes = M.row_names(ids[0]) ncolumn = 3 nrow = 8 rows = [] for i in range(min(nrow, len(genes) / ncolumn)): a = [] for j in range(0, ncolumn): a.append('<td>' + genes[ncolumn * i + j] + '</td>') x = htmllib.TR("\n".join(a)) rows.append(x) more_genes = 0 if len(genes) > ncolumn * nrow: more_genes = len(genes) - ncolumn * nrow y = htmllib.TR( htmllib.TD(htmllib.TABLE( "\n".join(rows), border=1, cellpadding=3, cellspacing=0), align='CENTER') + htmllib.TD(htmllib.A( htmllib.IMG(height=400, src=result_files[5]), href=result_files[5]), align='CENTER')) #--------------------------------- whole_row.append(y) y = htmllib.TR( htmllib.TD(htmllib.A( str(more_genes) + ' more genes', result_files[0]), align='LEFT') + htmllib.TD(htmllib.B( 'Figure 1: This figure shows the PCA plot of samples colored by prediction' ), align='CENTER')) whole_row.append(y) x = htmllib.TR( htmllib.TD(htmllib.A(htmllib.IMG(height=400, src=result_files[4]), href=result_files[4]), align="CENTER") + htmllib.TD(htmllib.A( htmllib.IMG(height=400, src=result_files[2]), href=result_files[2]), align="CENTER")) whole_row.append(x) x = htmllib.TR( htmllib.TH(htmllib.A("Figure 2. Loocv result on training data", result_files[3]), align="CENTER") + htmllib.TH(htmllib.A( "Figure 3. Prediction result on test data", result_files[1]), align="CENTER")) whole_row.append(x) w( htmllib.TABLE("\n".join(whole_row), border=None, cellpadding=3, cellspacing=0)) w(htmllib.P()) #------------------------------------ w(htmllib.H3("Weighted Voting")) w(htmllib.P()) w(htmllib.A("Methods", href="#methods_wv")) w(htmllib.P()) #------------------------------------ whole_row = [] name = 'Table 1: Table of genes used in classification' w(htmllib.B(name)) w(htmllib.P()) nfeature = 10 if 'num_features_value' in user_options: nfeature = user_options['num_features_value'] M = arrayio.read(os.path.join(outfile_folder, result_files[0])) ids = M._row_order genes = M.row_names(ids[0])[0:nfeature] nrow = min(8, int(math.ceil(float(len(genes)) / ncolumn))) ncolumn = 3 if len(genes) < nrow * ncolumn: genes.extend([''] * (nrow * ncolumn - len(genes))) rows = [] for i in range(nrow): a = [] for j in range(ncolumn): a.append('<td>' + genes[ncolumn * i + j] + '</td>') x = htmllib.TR("\n".join(a)) rows.append(x) more_genes = 0 if len(genes) > ncolumn * nrow: more_genes = len(genes) - ncolumn * nrow y = htmllib.TR( htmllib.TD(htmllib.TABLE( "\n".join(rows), border=1, cellpadding=3, cellspacing=0), align='CENTER') + htmllib.TD(htmllib.A( htmllib.IMG(height=400, src=result_files[10]), href=result_files[10]), align='CENTER')) #--------------------------------- whole_row.append(y) y = htmllib.TR( htmllib.TD(htmllib.A( str(more_genes) + ' more genes', result_files[0]), align='LEFT') + htmllib.TD(htmllib.B( 'Figure 4: This figure shows the PCA plot of samples colored by prediction' ), align='CENTER')) whole_row.append(y) x = htmllib.TR( htmllib.TD(htmllib.A(htmllib.IMG(height=400, src=result_files[9]), href=result_files[9]), align="CENTER") + htmllib.TD(htmllib.A( htmllib.IMG(height=400, src=result_files[7]), href=result_files[7]), align="CENTER")) whole_row.append(x) x = htmllib.TR( htmllib.TH(htmllib.A("Figure 2. Loocv result on training data", result_files[8]), align="CENTER") + htmllib.TH(htmllib.A( "Figure 3. Prediction result on test data", result_files[6]), align="CENTER")) whole_row.append(x) w( htmllib.TABLE("\n".join(whole_row), border=None, cellpadding=3, cellspacing=0)) w(htmllib.P()) #-------------------------------- w(htmllib.HR()) w(htmllib.A("<methods_svm>", name="methods_svm")) w('To generate these files, I ran the following analysis:') bie3.plot_network_gv(os.path.join(outfile_folder, "network.png"), network) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=500, src="network.png"), href="network.png")) w(htmllib.CENTER(htmllib.H2("SVM Methods"))) w(htmllib.H3("Prediction Result")) w('I used the following parameters:') rows = [] x = htmllib.TR( htmllib.TH("Parameter", align="LEFT") + htmllib.TH("Value", align="LEFT")) rows.append(x) for key in data_node2.data.attributes.keys(): x = htmllib.TR( htmllib.TD(key, align="LEFT") + htmllib.TD(data_node2.data.attributes[key], align="LEFT")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0)) w(htmllib.P()) w(htmllib.A("<methods_wv>", name="methods_wv")) w(htmllib.CENTER(htmllib.H2("Weighted Voting Methods"))) w(htmllib.H3("Prediction Result")) w('I used the following parameters:') rows = [] x = htmllib.TR( htmllib.TH("Parameter", align="LEFT") + htmllib.TH("Value", align="LEFT")) rows.append(x) for key in data_node7.data.attributes.keys(): x = htmllib.TR( htmllib.TD(key, align="LEFT") + htmllib.TD(data_node7.data.attributes[key], align="LEFT")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0)) w(htmllib.P()) # Write out the footer. #time_str = parselib.pretty_date(time.time()) #hostname = pretty_hostname() w(htmllib.P()) w(htmllib.HR()) #w(htmllib.EM( # "This analysis was run on %s on %s. \n" % # (time_str, hostname))) w("</BODY>") w("</HTML>") x = "\n".join(lines) + "\n" open(outfile, 'w').write(x) except: raise
def run(self, network, antecedents, out_attributes, user_options, num_cores, outfile): import os from genomicode import parselib from genomicode import htmllib from Betsy import reportlib from Betsy import bie3 out_path = outfile outfile = os.path.join(out_path, 'report.html') if not os.path.exists(out_path): os.mkdir(out_path) #(data_node1, data_node2, data_node3, data_node4, data_node5, # data_node6, data_node7, data_node8, data_node9) = antecedents # Make a list of the (relative) files for each input nodes: # 0 SignalFile Preprocessed gene expression data. # 1 IntensityPlot Box plot of signal intensity values. # 2 BiotinPlot # 3 PcaPlot Has normalization. # 4 ActbPlot From _SignalFile_Impute. # 5 PcaPlot No normalization or anything. # 6 HousekeepingPlot # 7 Hyb_barPlot No normalization or anything. # 8 ControlFile File with Illumina control probes. def rename_pca_file(i, in_file): out_file = in_file if i == 3: out_file = 'after_%s' % in_file return out_file filenames = reportlib.extract_filenames(antecedents, out_path, rename_pca_file) for x in filenames: in_file, out_file, in_filename, out_filename = x reportlib.copy_file_or_path(in_filename, out_filename) signal_file = filenames[0][1] intensity_file = filenames[1][1] biotin_file = filenames[2][1] after_pca_file = filenames[3][1] actb_file = filenames[4][1] before_pca_file = filenames[5][1] housekeeping_file = filenames[6][1] hyb_file = filenames[7][1] control_file = filenames[8][1] signal_node = antecedents[0] #write the report.html IMG_HEIGHT = 400 lines = [] w = lines.append w("<HTML>") title = "Normalization Results" x = parselib.remove_all_tags(title) w(htmllib.HEAD(htmllib.TITLE(x))) w("<BODY>") w(htmllib.CENTER(htmllib.H1(title))) # Provide a link to the signal file. w('Preprocessed signal values: ') # TODO: Show the attributes for this data. w(htmllib.A(signal_file, signal_file)) w(htmllib.P()) ##w(htmllib.A("Methods", href="#methods_normalization")) ##w(htmllib.P()) ## if pipelines[1] == pipelines[2]: ## w(htmllib.A(htmllib.IMG(height=500, ## src=result_files[1]), href=result_files[1])) ## else: # Show the PCA plot before and after normalization. rows = [] col1 = htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=before_pca_file), href=before_pca_file) col2 = htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=after_pca_file), href=after_pca_file) x = htmllib.TR( htmllib.TD(col1, align="CENTER") + htmllib.TD(col2, align="CENTER")) rows.append(x) x = htmllib.TR( htmllib.TH("Before", align="CENTER") + htmllib.TH("After", align="CENTER")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=None, cellpadding=3, cellspacing=0)) w(htmllib.P()) w(htmllib.P()) name = 'Figure 1: This pca plot shows the similarities among your samples' w(htmllib.B(name)) w(htmllib.P()) # Show the distribution of the signal values. w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=intensity_file), href=intensity_file)) w(htmllib.P()) name = 'Figure 2: This boxplot shows the distribution of signal values' w(htmllib.B(name)) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=actb_file), href=actb_file)) w(htmllib.P()) name = 'Figure 3: This plot shows the values of ACTB and TUBB genes' w(htmllib.B(name)) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=biotin_file), href=biotin_file)) w(htmllib.P()) x = 'Figure 4: This plot shows the value of biotin control genes' w(htmllib.B(x)) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=housekeeping_file), href=housekeeping_file)) w(htmllib.P()) x = 'Figure 5: This plot shows the value of housekeeping control genes' w(htmllib.B(x)) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=hyb_file), href=hyb_file)) w(htmllib.P()) x = 'Figure 6: This barplot shows the distribution control values' w(htmllib.B(x)) w(htmllib.P()) # Methods. w(htmllib.A("<methods_normalization>", name="methods_normalization")) w(htmllib.CENTER(htmllib.H2("Methods"))) w(htmllib.H3("1.Normalization File")) w('To generate this file, I ran the following analysis:') w(htmllib.P()) bie3.plot_network_gv(os.path.join(out_path, "network.png"), network) w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src="network.png"), href="network.png")) w(htmllib.P()) w('I used the following parameters:') rows = [] x = htmllib.TR( htmllib.TH("Parameter", align="LEFT") + htmllib.TH("Value", align="LEFT")) rows.append(x) for key in signal_node.data.attributes.keys(): x = htmllib.TR( htmllib.TD(key, align="LEFT") + htmllib.TD(signal_node.data.attributes[key], align="LEFT")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0)) w(htmllib.P()) w(htmllib.H3("2. PCA analysis")) w('I made a principal component plot that shows the similarities among your samples.' ) w(htmllib.P()) w(htmllib.H3("3. Signal distribution")) w('I made a box plot that shows the distribution of signal values.') w(htmllib.P()) w(htmllib.H3("4. Control signal")) w('I made two plots that show the values of control signal.') w(htmllib.P()) w(htmllib.H3("5. Control signal")) w('I made a bar plot that shows the hybridization controls.') w(htmllib.P()) w('The control file is ') w(htmllib.A(control_file, control_file)) w(htmllib.P()) # Write out the footer. #time_str = parselib.pretty_date(time.time()) #hostname = pretty_hostname() w(htmllib.P()) w(htmllib.HR()) #w(htmllib.EM( # "This analysis was run on %s on %s. \n" % # (time_str, hostname))) w("</BODY>") w("</HTML>") x = "\n".join(lines) + "\n" open(outfile, 'w').write(x)
def run(self, network, antecedents, out_attributes, user_options, num_cores, outfile): import os import shutil from Betsy import bie3 from genomicode import htmllib from genomicode import parselib outfile_folder = outfile outfile = os.path.join(outfile_folder, 'report.html') if not os.path.exists(outfile_folder): os.mkdir(outfile_folder) result_files = [] for data_node in antecedents: filename = data_node.identifier new_name = os.path.join(outfile_folder, os.path.split(filename)[-1]) if os.path.isdir(filename): shutil.copytree(filename, new_name) else: shutil.copyfile(filename, new_name) result_files.append(os.path.split(new_name)[-1]) data_node1, data_node2 = antecedents #write the report.html #def highlight(s): # from genomicode import htmllib # return htmllib.SPAN(s, style="background-color:yellow") #def smaller(s): # from genomicode import htmllib # return htmllib.FONT(s, size=-1) try: lines = [] w = lines.append w("<HTML>") title = "Geneset Analysis Results" x = parselib.remove_all_tags(title) w(htmllib.HEAD(htmllib.TITLE(x))) w("<BODY>") w(htmllib.CENTER(htmllib.H1(title))) w('I generated a file that contains the analysis result of the geneset' ) w(htmllib.P()) w(htmllib.A(result_files[0], result_files[0])) w(htmllib.P()) w(htmllib.A("Methods", href="#methods")) w(htmllib.P()) filenames = os.listdir( os.path.join(outfile_folder, result_files[1])) c = 0 for filename in filenames: c = c + 1 w( htmllib.A(htmllib.IMG(height=500, src=os.path.join( result_files[1], filename)), href=os.path.join(result_files[1], filename))) w(htmllib.P()) name = 'Figure ' + str(c) + ': Geneset Plot.' w(htmllib.B(name)) w(htmllib.HR()) w(htmllib.A("<methods>", name="methods")) w(htmllib.CENTER(htmllib.H2("Methods"))) w(htmllib.H3("1.Result File")) w('To generate this file, I ran the following analysis:') bie3.plot_network_gv(os.path.join(outfile_folder, "network.png"), network) w( htmllib.A(htmllib.IMG(height=500, src="network.png"), href="network.png")) w(htmllib.P()) w('I used the following parameters:') rows = [] x = htmllib.TR( htmllib.TH("Parameter", align="LEFT") + htmllib.TH("Value", align="LEFT")) rows.append(x) for key in data_node1.data.attributes.keys(): x = htmllib.TR( htmllib.TD(key, align="LEFT") + htmllib.TD(data_node1.data.attributes[key], align="LEFT")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0)) w(htmllib.P()) # Write out the footer. #time_str = parselib.pretty_date(time.time()) #hostname = pretty_hostname() w(htmllib.P()) w(htmllib.HR()) #w(htmllib.EM( # "This analysis was run on %s on %s. \n" % # (time_str, hostname))) w("</BODY>") w("</HTML>") x = "\n".join(lines) + "\n" open(outfile, 'w').write(x) except: raise
def run(self, network, antecedents, out_attributes, user_options, num_cores, outfile): import os import shutil from Betsy import bie3 in_data = antecedents outfile_folder = outfile outfile = os.path.join(outfile_folder, 'report.html') if not os.path.exists(outfile_folder): os.mkdir(outfile_folder) result_files = [] filename = in_data.identifier new_name = os.path.join(outfile_folder, os.path.split(filename)[-1]) if os.path.isdir(filename): shutil.copytree(filename, new_name) else: shutil.copyfile(filename, new_name) result_files.append(os.path.split(new_name)[-1]) #write the report.html from genomicode import parselib from genomicode import htmllib #def highlight(s): # from genomicode import htmllib # return htmllib.SPAN(s, style="background-color:yellow") #def smaller(s): # from genomicode import htmllib # return htmllib.FONT(s, size=-1) try: lines = [] w = lines.append w("<HTML>") title = "Heatmap Results" x = parselib.remove_all_tags(title) w(htmllib.HEAD(htmllib.TITLE(x))) w("<BODY>") w(htmllib.CENTER(htmllib.H1(title))) w(htmllib.P()) w(htmllib.A("Methods", href="#methods_clustering")) w(htmllib.P()) w( htmllib.A(htmllib.IMG(height=500, src=result_files[0]), href=result_files[0])) w(htmllib.P()) name = 'Figure 1: In this heatmap, each row contains a signature and each column \ contains a sample from your data set.' w(htmllib.B(name)) w(htmllib.HR()) w(htmllib.A("<methods_clustering>", name="methods_clustering")) w(htmllib.CENTER(htmllib.H2("Methods"))) w(htmllib.H3("1.Heatmap File")) w('To generate this file, I ran the following analysis:') bie3.plot_network_gv(os.path.join(outfile_folder, "network.png"), network) w( htmllib.A(htmllib.IMG(height=500, src="network.png"), href="network.png")) w(htmllib.P()) w('I used the following parameters:') w(htmllib.H3("1. Heatmap File")) rows = [] x = htmllib.TR( htmllib.TH("Parameter", align="LEFT") + htmllib.TH("Value", align="LEFT")) rows.append(x) for key in in_data.data.attributes.keys(): x = htmllib.TR( htmllib.TD(key, align="LEFT") + htmllib.TD(in_data.data.attributes[key], align="LEFT")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0)) w(htmllib.P()) # Write out the footer. #time_str = parselib.pretty_date(time.time()) #hostname = pretty_hostname() w(htmllib.P()) w(htmllib.HR()) #w(htmllib.EM( # "This analysis was run on %s on %s. \n" % # (time_str, hostname))) w("</BODY>") w("</HTML>") x = "\n".join(lines) + "\n" open(outfile, 'w').write(x) except: raise
def run(self, network, antecedents, out_attributes, user_options, num_cores, outfile): import os import shutil #import time from genomicode import parselib from genomicode import htmllib from Betsy import bie3 out_path = outfile outfile = os.path.join(out_path, 'report.html') if not os.path.exists(out_path): os.mkdir(out_path) (data_node1, data_node2, data_node3, data_node4, data_node5, data_node6) = antecedents # Make a list of the (relative) files for each input nodes: # 0 SignalFile Preprocessed gene expression data. # 1 IntensityPlot Box plot of signal intensity values. # 2 ControlPlot AFFX control probes. # 3 PcaPlot Has normalization. # 4 ActbPlot From _SignalFile_Impute. # 5 PcaPlot No normalization or anything. result_files = [] for index, data_node in enumerate(antecedents): in_filename = data_node.identifier # full path in_path, in_file = os.path.split(in_filename) out_file = in_file #rename one of the pcaplot filename if index == 3: out_file = "after_%s" % in_file out_filename = os.path.join(out_path, out_file) if os.path.isdir(in_filename): shutil.copytree(in_filename, out_filename) else: shutil.copyfile(in_filename, out_filename) result_files.append(out_file) #write the report.html IMG_HEIGHT = 400 lines = [] w = lines.append w("<HTML>") title = "Normalization Results" x = parselib.remove_all_tags(title) w(htmllib.HEAD(htmllib.TITLE(x))) w("<BODY>") w(htmllib.CENTER(htmllib.H1(title))) # Provide a link to the signal file. w('Preprocessed signal values: ') # TODO: Show the attributes for this data. w(htmllib.A(result_files[0], result_files[0])) w(htmllib.P()) ##w(htmllib.A("Methods", href="#methods_normalization")) ##w(htmllib.P()) ## if pipelines[1] == pipelines[2]: ## w(htmllib.A(htmllib.IMG(height=500, ## src=result_files[1]), href=result_files[1])) ## else: # Show the PCA plot before and after normalization. rows = [] col1 = htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[5]), href=result_files[5]) col2 = htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[3]), href=result_files[3]) x = htmllib.TR( htmllib.TD(col1, align="CENTER") + htmllib.TD(col2, align="CENTER")) rows.append(x) x = htmllib.TR( htmllib.TH("Before", align="CENTER") + htmllib.TH("After", align="CENTER")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=None, cellpadding=3, cellspacing=0)) w(htmllib.P()) w(htmllib.P()) name = 'Figure 1: This pca plot shows the similarities among your samples' w(htmllib.B(name)) w(htmllib.P()) # Show the distribution of the signal values. w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[1]), href=result_files[1])) w(htmllib.P()) name = 'Figure 2: This boxplot shows the distribution of signal values' w(htmllib.B(name)) w(htmllib.P()) # Show the actin and tubulin values. w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[4]), href=result_files[4])) w(htmllib.P()) name = 'Figure 3: This plot shows the values of ACTB and TUBB genes' w(htmllib.B(name)) w(htmllib.P()) # Affymetrix control genes. w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[2]), href=result_files[2])) name = 'Figure 4: This plot shows the average values Affymetrix control genes' w(htmllib.P()) w(htmllib.B(name)) w(htmllib.HR()) w(htmllib.A("<methods_normalization>", name="methods_normalization")) w(htmllib.CENTER(htmllib.H2("Methods"))) w(htmllib.H3("1.Normalization File")) w('To generate this file, I ran the following analysis:') w(htmllib.P()) bie3.plot_network_gv(os.path.join(out_path, "network.png"), network) w( htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src="network.png"), href="network.png")) w(htmllib.P()) w('I used the following parameters:') rows = [] x = htmllib.TR( htmllib.TH("Parameter", align="LEFT") + htmllib.TH("Value", align="LEFT")) rows.append(x) for key in data_node1.data.attributes.keys(): x = htmllib.TR( htmllib.TD(key, align="LEFT") + htmllib.TD(data_node1.data.attributes[key], align="LEFT")) rows.append(x) w( htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0)) w(htmllib.P()) # Write out the footer. #time_str = parselib.pretty_date(time.time()) #hostname = pretty_hostname() w(htmllib.P()) w(htmllib.HR()) #w(htmllib.EM( # "This analysis was run on %s on %s. \n" % # (time_str, hostname))) w("</BODY>") w("</HTML>") x = "\n".join(lines) + "\n" open(outfile, 'w').write(x)
def summarize_report( analysis_name, signatures, orig_signatures, report_files, start_time, why_dropped, file_layout): import time from genomicode import parselib from genomicode import htmllib def highlight(s): return htmllib.SPAN(s, style="background-color:yellow") def smaller(s): return htmllib.FONT(s, size=-1) id2orig = {} for sig in orig_signatures: id2orig[sig.xID] = sig id2new = {} for sig in signatures: id2new[sig.xID] = sig assert len(signatures) == len(report_files) id2reportfile = {} for sig, file_ in zip(signatures, report_files): # The report_file in the HTML should be a relative path. x, file_ = os.path.split(file_) id2reportfile[sig.xID] = file_ # Figure out which of the signatures were dropped. missing_ids = [] for sig in orig_signatures: if sig.xID in id2new: continue missing_ids.append(sig.xID) # Make a list of all the signatures. all_ids = {}.fromkeys(id2orig.keys() + id2new.keys()) schwartz = [(id2orig[x].Name, x) for x in all_ids] schwartz.sort() all_ids = [x[-1] for x in schwartz] lines = [] w = lines.append w("<HTML>") #title = "%s Report" % htmllib.EM("ScoreSignatures") title = "%s Report" % "ScoreSignatures" if analysis_name: title = "%s for %s" % (title, htmllib.EM(analysis_name)) x = parselib.remove_all_tags(title) w(htmllib.HEAD(htmllib.TITLE(x))) w("<BODY>") w(htmllib.CENTER(htmllib.H1(title))) w(htmllib.H3("I. Signatures")) # Make a table with each of the signatures. rows = [] x = htmllib.TR( htmllib.TH("ID", align="LEFT") + htmllib.TH("Signature", align="LEFT") + htmllib.TH("Preprocessing", align="LEFT") + htmllib.TH("Genes", align="LEFT") + htmllib.TH("Metagenes", align="LEFT") + htmllib.TH("Normalization", align="LEFT") ) rows.append(x) which_changed = {} # ID -> 1 for id_ in all_ids: orig = id2orig[id_] sig = id2new.get(id_) cols = [] # ID cols.append(htmllib.TD(orig.xID)) # Name name = orig.Name report_file = None if sig: report_file = id2reportfile.get(sig.xID) if report_file: name = htmllib.A(name, href=report_file) cols.append(htmllib.TD(name)) # If this signature was not run, then skip the rest of the columns. if not sig: x = why_dropped.get(orig.xID, "Skipped for unknown reason.") x = htmllib.TD(highlight(x), colspan=4) cols.append(x) rows.append( htmllib.TR("\n".join(cols))) continue # Preprocessing x = sig.Normalization if sig.Normalization != orig.Normalization: which_changed[sig.xID] = 1 x = "%s<BR>%s" % ( highlight(sig.Normalization), smaller(htmllib.EM("default: %s" % orig.Normalization))) cols.append(htmllib.TD(x)) # Genes x = sig.Genes if sig.Genes != orig.Genes: which_changed[sig.xID] = 1 x = "%s<BR>%s" % ( highlight(sig.Genes), smaller(htmllib.EM("default: %s" % orig.Genes))) cols.append(htmllib.TD(x)) # Metagenes x = sig.Metagenes if sig.Metagenes != orig.Metagenes: which_changed[sig.xID] = 1 x = "%s<BR>%s" % ( highlight(sig.Metagenes), smaller(htmllib.EM("default: %s" % orig.Metagenes))) cols.append(htmllib.TD(x)) # Normalization norm = [] if sig.Quantile.upper() == "YES": norm.append("Quantile") if sig.Shift_Scale.upper() == "YES": norm.append("Shift-Scale") norm_str = "None" if norm: norm_str = " and ".join(norm) if sig.Quantile.upper() != orig.Quantile.upper() or \ sig.Shift_Scale.upper() != orig.Shift_Scale.upper(): which_changed[sig.xID] = 1 norm = [] if orig.Quantile.upper() == "YES": norm.append("Quantile") if orig.Shift_Scale.upper() == "YES": norm.append("Shift-Scale") x = "None" if norm: x = " and ".join(norm) norm_str = "%s<BR>%s" % ( highlight(norm_str), smaller(htmllib.EM("default: %s" % x))) cols.append(htmllib.TD(norm_str)) #assert sig_changed == getattr(sig, "Changed", False), "%s %s %s" % ( # sig.Name, sig_changed, getattr(sig, "Changed", "missing")) x = htmllib.TR("\n".join(cols)) rows.append(x) w(htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0)) w(htmllib.P()) w(htmllib.B("Table 1: Signatures Analyzed.")) if not which_changed: w("All signatures were run with the default parameters, " "as shown above.") else: w("The customized parameters are highlighted in yellow.") w(htmllib.P()) w(htmllib.H3("II. Results")) prob_file = os.path.split(file_layout.PROBABILITIES_PNG)[1] w(htmllib.A(htmllib.IMG(height=768, src=prob_file), href=prob_file)) w(htmllib.P()) w(htmllib.B("Figure 1: Predictions.")) w("In this heatmap, each row contains a signature and each column " "contains a sample from your data set.") if which_changed: #names = sorted([id2orig[x].Name for x in which_changed]) w("The asterisks denote the signatures that were run with " "customized parameters.") w("The color corresponds to the probability that a pathway is activated " "in a sample.") w("Warm colors represent high probabilities, and cool colors low.\n") w(htmllib.P()) prob_file = os.path.split(file_layout.PROBABILITIES_PCL)[1] w("The raw values from this plot are available as a " 'PCL-formatted file: %s' % htmllib.A(prob_file, href=prob_file)) # Write out the footer. end_time = time.time() time_str = parselib.pretty_date(start_time) run_time = pretty_runtime(start_time, end_time) hostname = pretty_hostname() w(htmllib.P()) w(htmllib.HR()) w(htmllib.EM( "This analysis was run on %s on %s. It took %s to complete.\n" % (time_str, hostname, run_time))) w("</BODY>") w("</HTML>") x = "\n".join(lines) + "\n" outfile = file_layout.REPORT open(outfile, 'w').write(x)