Esempio n. 1
0
def summarize_report(filenames, matrices, num_factors, start_time,
                     file_layout):
    import time
    import subprocess
    from genomicode import htmllib
    from genomicode import parselib

    #def highlight(s):
    #    return htmllib.SPAN(s, style="background-color:yellow")

    assert len(filenames) == len(matrices)

    lines = []
    w = lines.append
    w("<HTML>")
    w(htmllib.HEAD(htmllib.TITLE("BFRMNormalize Report")))
    w("<BODY>")
    w(htmllib.CENTER(htmllib.H1(htmllib.EM("BFRMNormalize") + " Report")))

    w(htmllib.H3("I.  Overview"))
    files = [os.path.split(x)[1] for x in filenames]
    x1 = "one data set"
    if len(files) > 1:
        x1 = "the following data sets"
    x2 = "factor"
    if num_factors > 1:
        x2 = "factors"
    x = "I normalized %s using %d %s." % (x1, num_factors, x2)
    l = [x]
    for i in range(len(files)):
        name = files[i]
        num_samples = matrices[i].ncol()
        x = "%s (%d samples)" % (name, num_samples)
        l.append(htmllib.LI() + x)
    l = "\n".join(l)
    w(htmllib.UL(l))

    w(htmllib.P())
    x = os.path.split(file_layout.DS_PROC)[1]
    w("The merged gene expression data set is available at " +
      htmllib.A(x, href=x) + ".")
    w(htmllib.BR())
    x = os.path.split(file_layout.DS_FINAL)[1]
    w("The normalized data set is available at " + htmllib.A(x, href=x) + ".")

    w(htmllib.P())
    w(htmllib.H3("II.  Results"))

    # Make the table of the heatmaps.
    x = os.path.split(file_layout.DS_PROC_HEATMAP)[1]
    x1 = htmllib.CENTER(
        htmllib.B("Before Normalization") + htmllib.BR() +
        htmllib.A(htmllib.IMG(height=480, src=x), href=x))
    x = os.path.split(file_layout.DS_FINAL_HEATMAP)[1]
    x2 = htmllib.CENTER(
        htmllib.B("After Normalization") + htmllib.BR() +
        htmllib.A(htmllib.IMG(height=480, src=x), href=x))
    row1 = htmllib.TR(htmllib.TD(x1) + htmllib.TD(x2))

    x = htmllib.TD(
        htmllib.B("Figure 1: Heatmaps. ") +
        "These heatmaps show the expression patterns in the data before "
        "and after normalization.  "
        "The rows contain the %d genes that exhibit the highest variance "
        "in gene expression across the original data set.  "
        "The columns contain the samples in the data sets provided.  "
        "The genes and samples are in the same order in both heatmaps.  "
        "Warm colors indicate high expression of the gene, and cool colors "
        "indicate low expression." % NUM_FILTERED_GENES,
        colspan=2)
    row2 = htmllib.TR(x)

    w(htmllib.TABLE(row1 + row2, border=0, cellspacing=10, width="50%%"))

    w(htmllib.P())

    # Make the table of the scatter plots.
    x = os.path.split(file_layout.DS_PROC_SCATTER)[1]
    x1 = htmllib.CENTER(
        htmllib.B("Before Normalization") + htmllib.BR() +
        htmllib.A(htmllib.IMG(height=400, src=x), href=x))
    x = os.path.split(file_layout.DS_FINAL_SCATTER)[1]
    x2 = htmllib.CENTER(
        htmllib.B("After Normalization") + htmllib.BR() +
        htmllib.A(htmllib.IMG(height=400, src=x), href=x))
    row1 = htmllib.TR(htmllib.TD(x1) + htmllib.TD(x2))

    x1 = (
        "These plots show the samples projected onto the first two principal "
        "components of the expression profiles of the %d genes that "
        "exhibit the highest variance across the original data set.  " %
        NUM_FILTERED_GENES)
    x2 = ("Each point represents a sample, and samples from the same data "
          "set have the same color.  "
          "If there are batch effects, the samples from the same data set "
          "(the same color) will cluster together.  "
          "If there are no batch effects, the colors should be mixed.")
    if len(filenames) == 1:
        x2 = ""
    x = htmllib.TD(htmllib.B("Figure 2: PCA Plots. ") + x1 + x2, colspan=2)
    row2 = htmllib.TR(x)

    w(htmllib.TABLE(row1 + row2, border=0, cellspacing=10, width="50%%"))

    # Format the current time.
    end_time = time.time()
    time_str = parselib.pretty_date(start_time)
    x = int(end_time - start_time)
    num_min = x / 60
    num_secs = x % 60
    if num_min == 0:
        run_time = "%ss" % parselib.pretty_int(num_secs)
    else:
        run_time = "%sm %ss" % (parselib.pretty_int(num_min), num_secs)

    # Get the hostname.
    cmd = "hostname"
    p = subprocess.Popen(cmd,
                         shell=True,
                         bufsize=0,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         close_fds=True)
    wh, r = p.stdin, p.stdout
    wh.close()
    hostname = r.read().strip()
    assert hostname, "I could not get the hostname."

    w(htmllib.P())
    w(htmllib.HR())
    w(
        htmllib.EM(
            "This analysis was run on %s on %s.  It took %s to complete." %
            (time_str, hostname, run_time)))

    w("</BODY>")
    w("</HTML>")

    x = "\n".join(lines) + "\n"
    outfile = file_layout.REPORT
    open(outfile, 'w').write(x)
Esempio n. 2
0
    def run(self, network, antecedents, out_attributes, user_options,
            num_cores, outfile):
        import os
        import shutil
        from Betsy import bie3
        outfile_folder = outfile
        outfile = os.path.join(outfile_folder, 'report.html')
        if not os.path.exists(outfile_folder):
            os.mkdir(outfile_folder)

        result_files = []
        for index, data_node in enumerate(antecedents):
            filename = data_node.identifier
            new_name = os.path.join(outfile_folder,
                                    os.path.split(filename)[-1])
            #rename one of the pcaplot filename
            if index == 2:
                new_name = os.path.join(outfile_folder,
                                        'after_' + os.path.split(filename)[-1])
            if os.path.isdir(filename):
                shutil.copytree(filename, new_name)
            else:
                shutil.copyfile(filename, new_name)
            result_files.append(os.path.split(new_name)[-1])

        data_node1, data_node2, data_node3, data_node4, data_node5, data_node6 = antecedents
        #write the report.html
        from genomicode import parselib
        from genomicode import htmllib

        #def highlight(s):
        #    from genomicode import htmllib
        #    return htmllib.SPAN(s, style="background-color:yellow")

        #def smaller(s):
        #    from genomicode import htmllib
        #    return htmllib.FONT(s, size=-1)

        try:
            lines = []
            w = lines.append
            w("<HTML>")
            title = "Normalization Results"
            x = parselib.remove_all_tags(title)
            w(htmllib.HEAD(htmllib.TITLE(x)))
            w("<BODY>")
            w(htmllib.CENTER(htmllib.H1(title)))
            w('I generated a file that contains the normalized gene expression values'
              )
            w(htmllib.P())
            w(htmllib.A(result_files[0], result_files[0]))
            w(htmllib.P())
            w(htmllib.A("Methods", href="#methods_normalization"))
            w(htmllib.P())
            ##        if pipelines[1] == pipelines[2]:
            ##            w(htmllib.A(htmllib.IMG(height=500,
            ##                src=result_files[1]), href=result_files[1]))
            ##        else:
            rows = []
            x = htmllib.TR(
                htmllib.TD(htmllib.A(htmllib.IMG(height=500,
                                                 src=result_files[1]),
                                     href=result_files[1]),
                           align="CENTER") + htmllib.TD(htmllib.A(
                               htmllib.IMG(height=500, src=result_files[2]),
                               href=result_files[2]),
                                                        align="CENTER"))
            rows.append(x)
            x = htmllib.TR(
                htmllib.TH("Before", align="CENTER") +
                htmllib.TH("After", align="CENTER"))
            rows.append(x)
            w(
                htmllib.TABLE("\n".join(rows),
                              border=None,
                              cellpadding=3,
                              cellspacing=0))
            w(htmllib.P())

            w(htmllib.P())
            name = 'Figure 1: This pca plot shows the similarities among your samples'
            w(htmllib.B(name))
            w(htmllib.P())
            w(
                htmllib.A(htmllib.IMG(height=500, src=result_files[3]),
                          href=result_files[3]))
            w(htmllib.P())
            name = 'Figure 2: This boxplot shows the distribution of signal values'
            w(htmllib.B(name))
            w(htmllib.P())
            w(
                htmllib.A(htmllib.IMG(height=500, src=result_files[4]),
                          href=result_files[4]))
            w(htmllib.P())
            name = 'Figure 3: This plot shows the values of ACTB and TUBB genes'
            w(htmllib.B(name))
            w(htmllib.P())

            w(
                htmllib.A(htmllib.IMG(height=500, src=result_files[5]),
                          href=result_files[5]))
            name = 'Figure 4: This plot shows the average values control genes'
            w(htmllib.P())
            w(htmllib.B(name))

            w(htmllib.HR())
            w(
                htmllib.A("<methods_normalization>",
                          name="methods_normalization"))
            w(htmllib.CENTER(htmllib.H2("Methods")))
            w(htmllib.H3("1.Normalization File"))
            w('To generate this file, I ran the following analysis:')
            w(htmllib.P())
            bie3.plot_network_gv(os.path.join(outfile_folder, "network.png"),
                                 network)
            w(
                htmllib.A(htmllib.IMG(height=500, src="network.png"),
                          href="network.png"))

            w('I used the following parameters:')
            rows = []
            x = htmllib.TR(
                htmllib.TH("Parameter", align="LEFT") +
                htmllib.TH("Value", align="LEFT"))
            rows.append(x)
            for key in data_node1.data.attributes.keys():
                x = htmllib.TR(
                    htmllib.TD(key, align="LEFT") +
                    htmllib.TD(data_node1.data.attributes[key], align="LEFT"))
                rows.append(x)
            w(
                htmllib.TABLE("\n".join(rows),
                              border=1,
                              cellpadding=3,
                              cellspacing=0))
            w(htmllib.P())
            w(htmllib.H3("2. PCA analysis"))
            w('I made a principal component plot that shows the similarities among your samples.'
              )
            w(htmllib.P())
            w(htmllib.H3("3. Signal distribution"))
            w('I made a box plot that shows the distribution of signal values.'
              )
            w(htmllib.P())
            w(htmllib.H3("4. Control signal"))
            w('I made two plots that show the values of control signal.')
            w(htmllib.P())
            # Write out the footer.
            #time_str = parselib.pretty_date(time.time())
            #hostname = pretty_hostname()
            w(htmllib.P())
            w(htmllib.HR())
            #w(htmllib.EM(
            #    "This analysis was run on %s on %s. \n" %
            #    (time_str, hostname)))
            w("</BODY>")
            w("</HTML>")
            x = "\n".join(lines) + "\n"
            open(outfile, 'w').write(x)
        except:
            raise
    def run(self, network, antecedents, out_attributes, user_options,
            num_cores, outfile):
        import os
        import shutil
        import math
        from Betsy import bie3
        import arrayio
        from genomicode import htmllib
        from genomicode import parselib
        outfile_folder = outfile
        outfile = os.path.join(outfile_folder, 'report.html')
        if not os.path.exists(outfile_folder):
            os.mkdir(outfile_folder)

        result_files = []
        for data_node in antecedents:
            filename = data_node.identifier
            new_name = os.path.join(outfile_folder,
                                    os.path.split(filename)[-1])
            if os.path.isdir(filename):
                shutil.copytree(filename, new_name)
            else:
                shutil.copyfile(filename, new_name)
            result_files.append(os.path.split(new_name)[-1])

        (data_node1, data_node2, data_node3, data_node4, data_node5,
         data_node6, data_node7, data_node8, data_node9, data_node10,
         data_node11) = antecedents

        #write the report.html

        #def highlight(s):
        #    from genomicode import htmllib
        #    return htmllib.SPAN(s, style="background-color:yellow")

        #def smaller(s):
        #    from genomicode import htmllib
        #    return htmllib.FONT(s, size=-1)

        try:
            lines = []
            w = lines.append
            w("<HTML>")
            title = "Classification Results"
            x = parselib.remove_all_tags(title)
            w(htmllib.HEAD(htmllib.TITLE(x)))
            w("<BODY>")
            w(htmllib.CENTER(htmllib.H1(title)))
            #------------------------------------
            w(htmllib.H3("SVM"))
            w(htmllib.P())
            w(htmllib.A("Methods", href="#methods_svm"))
            w(htmllib.P())
            #------------------------------------
            whole_row = []
            name = 'Table 1: Table of genes used in classification'
            w(htmllib.B(name))
            w(htmllib.P())
            M = arrayio.read(os.path.join(outfile_folder, result_files[0]))
            ids = M._row_order
            genes = M.row_names(ids[0])
            ncolumn = 3
            nrow = 8
            rows = []
            for i in range(min(nrow, len(genes) / ncolumn)):
                a = []
                for j in range(0, ncolumn):
                    a.append('<td>' + genes[ncolumn * i + j] + '</td>')
                x = htmllib.TR("\n".join(a))
                rows.append(x)
            more_genes = 0
            if len(genes) > ncolumn * nrow:
                more_genes = len(genes) - ncolumn * nrow
            y = htmllib.TR(
                htmllib.TD(htmllib.TABLE(
                    "\n".join(rows), border=1, cellpadding=3, cellspacing=0),
                           align='CENTER') + htmllib.TD(htmllib.A(
                               htmllib.IMG(height=400, src=result_files[5]),
                               href=result_files[5]),
                                                        align='CENTER'))
            #---------------------------------
            whole_row.append(y)
            y = htmllib.TR(
                htmllib.TD(htmllib.A(
                    str(more_genes) + ' more genes', result_files[0]),
                           align='LEFT') +
                htmllib.TD(htmllib.B(
                    'Figure 1: This figure shows the PCA plot of samples colored by prediction'
                ),
                           align='CENTER'))
            whole_row.append(y)
            x = htmllib.TR(
                htmllib.TD(htmllib.A(htmllib.IMG(height=400,
                                                 src=result_files[4]),
                                     href=result_files[4]),
                           align="CENTER") + htmllib.TD(htmllib.A(
                               htmllib.IMG(height=400, src=result_files[2]),
                               href=result_files[2]),
                                                        align="CENTER"))
            whole_row.append(x)
            x = htmllib.TR(
                htmllib.TH(htmllib.A("Figure 2. Loocv result on training data",
                                     result_files[3]),
                           align="CENTER") + htmllib.TH(htmllib.A(
                               "Figure 3. Prediction result on test data",
                               result_files[1]),
                                                        align="CENTER"))
            whole_row.append(x)
            w(
                htmllib.TABLE("\n".join(whole_row),
                              border=None,
                              cellpadding=3,
                              cellspacing=0))
            w(htmllib.P())

            #------------------------------------
            w(htmllib.H3("Weighted Voting"))
            w(htmllib.P())
            w(htmllib.A("Methods", href="#methods_wv"))
            w(htmllib.P())
            #------------------------------------
            whole_row = []
            name = 'Table 1: Table of genes used in classification'
            w(htmllib.B(name))
            w(htmllib.P())
            nfeature = 10
            if 'num_features_value' in user_options:
                nfeature = user_options['num_features_value']

            M = arrayio.read(os.path.join(outfile_folder, result_files[0]))
            ids = M._row_order
            genes = M.row_names(ids[0])[0:nfeature]
            nrow = min(8, int(math.ceil(float(len(genes)) / ncolumn)))
            ncolumn = 3
            if len(genes) < nrow * ncolumn:
                genes.extend([''] * (nrow * ncolumn - len(genes)))
            rows = []
            for i in range(nrow):
                a = []
                for j in range(ncolumn):
                    a.append('<td>' + genes[ncolumn * i + j] + '</td>')
                x = htmllib.TR("\n".join(a))
                rows.append(x)
            more_genes = 0
            if len(genes) > ncolumn * nrow:
                more_genes = len(genes) - ncolumn * nrow

            y = htmllib.TR(
                htmllib.TD(htmllib.TABLE(
                    "\n".join(rows), border=1, cellpadding=3, cellspacing=0),
                           align='CENTER') + htmllib.TD(htmllib.A(
                               htmllib.IMG(height=400, src=result_files[10]),
                               href=result_files[10]),
                                                        align='CENTER'))
            #---------------------------------
            whole_row.append(y)
            y = htmllib.TR(
                htmllib.TD(htmllib.A(
                    str(more_genes) + ' more genes', result_files[0]),
                           align='LEFT') +
                htmllib.TD(htmllib.B(
                    'Figure 4: This figure shows the PCA plot of samples colored by prediction'
                ),
                           align='CENTER'))
            whole_row.append(y)
            x = htmllib.TR(
                htmllib.TD(htmllib.A(htmllib.IMG(height=400,
                                                 src=result_files[9]),
                                     href=result_files[9]),
                           align="CENTER") + htmllib.TD(htmllib.A(
                               htmllib.IMG(height=400, src=result_files[7]),
                               href=result_files[7]),
                                                        align="CENTER"))
            whole_row.append(x)
            x = htmllib.TR(
                htmllib.TH(htmllib.A("Figure 2. Loocv result on training data",
                                     result_files[8]),
                           align="CENTER") + htmllib.TH(htmllib.A(
                               "Figure 3. Prediction result on test data",
                               result_files[6]),
                                                        align="CENTER"))
            whole_row.append(x)
            w(
                htmllib.TABLE("\n".join(whole_row),
                              border=None,
                              cellpadding=3,
                              cellspacing=0))
            w(htmllib.P())

            #--------------------------------

            w(htmllib.HR())
            w(htmllib.A("<methods_svm>", name="methods_svm"))
            w('To generate these files, I ran the following analysis:')
            bie3.plot_network_gv(os.path.join(outfile_folder, "network.png"),
                                 network)
            w(htmllib.P())
            w(
                htmllib.A(htmllib.IMG(height=500, src="network.png"),
                          href="network.png"))
            w(htmllib.CENTER(htmllib.H2("SVM Methods")))
            w(htmllib.H3("Prediction Result"))

            w('I used the following parameters:')
            rows = []
            x = htmllib.TR(
                htmllib.TH("Parameter", align="LEFT") +
                htmllib.TH("Value", align="LEFT"))
            rows.append(x)
            for key in data_node2.data.attributes.keys():
                x = htmllib.TR(
                    htmllib.TD(key, align="LEFT") +
                    htmllib.TD(data_node2.data.attributes[key], align="LEFT"))
                rows.append(x)
            w(
                htmllib.TABLE("\n".join(rows),
                              border=1,
                              cellpadding=3,
                              cellspacing=0))
            w(htmllib.P())
            w(htmllib.A("<methods_wv>", name="methods_wv"))
            w(htmllib.CENTER(htmllib.H2("Weighted Voting Methods")))
            w(htmllib.H3("Prediction Result"))
            w('I used the following parameters:')
            rows = []
            x = htmllib.TR(
                htmllib.TH("Parameter", align="LEFT") +
                htmllib.TH("Value", align="LEFT"))
            rows.append(x)
            for key in data_node7.data.attributes.keys():
                x = htmllib.TR(
                    htmllib.TD(key, align="LEFT") +
                    htmllib.TD(data_node7.data.attributes[key], align="LEFT"))
                rows.append(x)
            w(
                htmllib.TABLE("\n".join(rows),
                              border=1,
                              cellpadding=3,
                              cellspacing=0))
            w(htmllib.P())

            # Write out the footer.
            #time_str = parselib.pretty_date(time.time())
            #hostname = pretty_hostname()
            w(htmllib.P())
            w(htmllib.HR())
            #w(htmllib.EM(
            #    "This analysis was run on %s on %s. \n" %
            #    (time_str, hostname)))
            w("</BODY>")
            w("</HTML>")
            x = "\n".join(lines) + "\n"
            open(outfile, 'w').write(x)
        except:
            raise
    def run(self, network, antecedents, out_attributes, user_options,
            num_cores, outfile):
        import os
        from genomicode import parselib
        from genomicode import htmllib
        from Betsy import reportlib
        from Betsy import bie3

        out_path = outfile
        outfile = os.path.join(out_path, 'report.html')
        if not os.path.exists(out_path):
            os.mkdir(out_path)

        #(data_node1, data_node2, data_node3, data_node4, data_node5,
        # data_node6, data_node7, data_node8, data_node9) = antecedents

        # Make a list of the (relative) files for each input nodes:
        # 0  SignalFile         Preprocessed gene expression data.
        # 1  IntensityPlot      Box plot of signal intensity values.
        # 2  BiotinPlot
        # 3  PcaPlot            Has normalization.
        # 4  ActbPlot           From _SignalFile_Impute.
        # 5  PcaPlot            No normalization or anything.
        # 6  HousekeepingPlot
        # 7  Hyb_barPlot        No normalization or anything.
        # 8  ControlFile        File with Illumina control probes.

        def rename_pca_file(i, in_file):
            out_file = in_file
            if i == 3:
                out_file = 'after_%s' % in_file
            return out_file

        filenames = reportlib.extract_filenames(antecedents, out_path,
                                                rename_pca_file)
        for x in filenames:
            in_file, out_file, in_filename, out_filename = x
            reportlib.copy_file_or_path(in_filename, out_filename)
        signal_file = filenames[0][1]
        intensity_file = filenames[1][1]
        biotin_file = filenames[2][1]
        after_pca_file = filenames[3][1]
        actb_file = filenames[4][1]
        before_pca_file = filenames[5][1]
        housekeeping_file = filenames[6][1]
        hyb_file = filenames[7][1]
        control_file = filenames[8][1]

        signal_node = antecedents[0]

        #write the report.html

        IMG_HEIGHT = 400

        lines = []
        w = lines.append

        w("<HTML>")
        title = "Normalization Results"
        x = parselib.remove_all_tags(title)
        w(htmllib.HEAD(htmllib.TITLE(x)))
        w("<BODY>")
        w(htmllib.CENTER(htmllib.H1(title)))

        # Provide a link to the signal file.
        w('Preprocessed signal values: ')
        # TODO: Show the attributes for this data.
        w(htmllib.A(signal_file, signal_file))
        w(htmllib.P())

        ##w(htmllib.A("Methods", href="#methods_normalization"))
        ##w(htmllib.P())
        ##        if pipelines[1] == pipelines[2]:
        ##            w(htmllib.A(htmllib.IMG(height=500,
        ##                src=result_files[1]), href=result_files[1]))
        ##        else:

        # Show the PCA plot before and after normalization.
        rows = []
        col1 = htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=before_pca_file),
                         href=before_pca_file)
        col2 = htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=after_pca_file),
                         href=after_pca_file)
        x = htmllib.TR(
            htmllib.TD(col1, align="CENTER") +
            htmllib.TD(col2, align="CENTER"))
        rows.append(x)
        x = htmllib.TR(
            htmllib.TH("Before", align="CENTER") +
            htmllib.TH("After", align="CENTER"))
        rows.append(x)
        w(
            htmllib.TABLE("\n".join(rows),
                          border=None,
                          cellpadding=3,
                          cellspacing=0))
        w(htmllib.P())
        w(htmllib.P())
        name = 'Figure 1: This pca plot shows the similarities among your samples'
        w(htmllib.B(name))
        w(htmllib.P())

        # Show the distribution of the signal values.
        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=intensity_file),
                      href=intensity_file))
        w(htmllib.P())
        name = 'Figure 2: This boxplot shows the distribution of signal values'
        w(htmllib.B(name))
        w(htmllib.P())

        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=actb_file),
                      href=actb_file))
        w(htmllib.P())
        name = 'Figure 3: This plot shows the values of ACTB and TUBB genes'
        w(htmllib.B(name))
        w(htmllib.P())

        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=biotin_file),
                      href=biotin_file))
        w(htmllib.P())
        x = 'Figure 4: This plot shows the value of biotin control genes'
        w(htmllib.B(x))
        w(htmllib.P())

        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=housekeeping_file),
                      href=housekeeping_file))
        w(htmllib.P())
        x = 'Figure 5: This plot shows the value of housekeeping control genes'
        w(htmllib.B(x))
        w(htmllib.P())

        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=hyb_file),
                      href=hyb_file))
        w(htmllib.P())
        x = 'Figure 6: This barplot shows the distribution control values'
        w(htmllib.B(x))
        w(htmllib.P())

        # Methods.
        w(htmllib.A("<methods_normalization>", name="methods_normalization"))
        w(htmllib.CENTER(htmllib.H2("Methods")))

        w(htmllib.H3("1.Normalization File"))
        w('To generate this file, I ran the following analysis:')
        w(htmllib.P())
        bie3.plot_network_gv(os.path.join(out_path, "network.png"), network)
        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src="network.png"),
                      href="network.png"))
        w(htmllib.P())

        w('I used the following parameters:')
        rows = []
        x = htmllib.TR(
            htmllib.TH("Parameter", align="LEFT") +
            htmllib.TH("Value", align="LEFT"))
        rows.append(x)
        for key in signal_node.data.attributes.keys():
            x = htmllib.TR(
                htmllib.TD(key, align="LEFT") +
                htmllib.TD(signal_node.data.attributes[key], align="LEFT"))
            rows.append(x)
        w(
            htmllib.TABLE("\n".join(rows),
                          border=1,
                          cellpadding=3,
                          cellspacing=0))
        w(htmllib.P())
        w(htmllib.H3("2. PCA analysis"))
        w('I made a principal component plot that shows the similarities among your samples.'
          )
        w(htmllib.P())
        w(htmllib.H3("3. Signal distribution"))
        w('I made a box plot that shows the distribution of signal values.')
        w(htmllib.P())
        w(htmllib.H3("4. Control signal"))
        w('I made two plots that show the values of control signal.')
        w(htmllib.P())
        w(htmllib.H3("5. Control signal"))
        w('I made a bar plot that shows the hybridization controls.')
        w(htmllib.P())
        w('The control file is ')
        w(htmllib.A(control_file, control_file))
        w(htmllib.P())
        # Write out the footer.
        #time_str = parselib.pretty_date(time.time())
        #hostname = pretty_hostname()
        w(htmllib.P())
        w(htmllib.HR())
        #w(htmllib.EM(
        #    "This analysis was run on %s on %s. \n" %
        #    (time_str, hostname)))
        w("</BODY>")
        w("</HTML>")
        x = "\n".join(lines) + "\n"
        open(outfile, 'w').write(x)
Esempio n. 5
0
    def run(self, network, antecedents, out_attributes, user_options,
            num_cores, outfile):
        import os
        import shutil
        from Betsy import bie3
        from genomicode import htmllib
        from genomicode import parselib
        outfile_folder = outfile
        outfile = os.path.join(outfile_folder, 'report.html')
        if not os.path.exists(outfile_folder):
            os.mkdir(outfile_folder)

        result_files = []
        for data_node in antecedents:
            filename = data_node.identifier
            new_name = os.path.join(outfile_folder,
                                    os.path.split(filename)[-1])
            if os.path.isdir(filename):
                shutil.copytree(filename, new_name)
            else:
                shutil.copyfile(filename, new_name)
            result_files.append(os.path.split(new_name)[-1])

        data_node1, data_node2 = antecedents
        #write the report.html

        #def highlight(s):
        #    from genomicode import htmllib
        #    return htmllib.SPAN(s, style="background-color:yellow")

        #def smaller(s):
        #    from genomicode import htmllib
        #    return htmllib.FONT(s, size=-1)

        try:
            lines = []
            w = lines.append
            w("<HTML>")
            title = "Geneset Analysis Results"
            x = parselib.remove_all_tags(title)
            w(htmllib.HEAD(htmllib.TITLE(x)))
            w("<BODY>")
            w(htmllib.CENTER(htmllib.H1(title)))
            w('I generated a file that contains the analysis result of the geneset'
              )
            w(htmllib.P())
            w(htmllib.A(result_files[0], result_files[0]))
            w(htmllib.P())
            w(htmllib.A("Methods", href="#methods"))
            w(htmllib.P())
            filenames = os.listdir(
                os.path.join(outfile_folder, result_files[1]))
            c = 0
            for filename in filenames:
                c = c + 1
                w(
                    htmllib.A(htmllib.IMG(height=500,
                                          src=os.path.join(
                                              result_files[1], filename)),
                              href=os.path.join(result_files[1], filename)))
                w(htmllib.P())
                name = 'Figure ' + str(c) + ': Geneset Plot.'
                w(htmllib.B(name))
            w(htmllib.HR())
            w(htmllib.A("<methods>", name="methods"))
            w(htmllib.CENTER(htmllib.H2("Methods")))
            w(htmllib.H3("1.Result File"))
            w('To generate this file, I ran the following analysis:')
            bie3.plot_network_gv(os.path.join(outfile_folder, "network.png"),
                                 network)
            w(
                htmllib.A(htmllib.IMG(height=500, src="network.png"),
                          href="network.png"))
            w(htmllib.P())

            w('I used the following parameters:')
            rows = []
            x = htmllib.TR(
                htmllib.TH("Parameter", align="LEFT") +
                htmllib.TH("Value", align="LEFT"))
            rows.append(x)
            for key in data_node1.data.attributes.keys():
                x = htmllib.TR(
                    htmllib.TD(key, align="LEFT") +
                    htmllib.TD(data_node1.data.attributes[key], align="LEFT"))
                rows.append(x)
            w(
                htmllib.TABLE("\n".join(rows),
                              border=1,
                              cellpadding=3,
                              cellspacing=0))
            w(htmllib.P())

            # Write out the footer.
            #time_str = parselib.pretty_date(time.time())
            #hostname = pretty_hostname()
            w(htmllib.P())
            w(htmllib.HR())
            #w(htmllib.EM(
            #    "This analysis was run on %s on %s. \n" %
            #    (time_str, hostname)))
            w("</BODY>")
            w("</HTML>")
            x = "\n".join(lines) + "\n"
            open(outfile, 'w').write(x)
        except:
            raise
Esempio n. 6
0
    def run(self, network, antecedents, out_attributes, user_options,
            num_cores, outfile):
        import os
        import shutil
        from Betsy import bie3
        in_data = antecedents
        outfile_folder = outfile
        outfile = os.path.join(outfile_folder, 'report.html')
        if not os.path.exists(outfile_folder):
            os.mkdir(outfile_folder)

        result_files = []
        filename = in_data.identifier
        new_name = os.path.join(outfile_folder, os.path.split(filename)[-1])
        if os.path.isdir(filename):
            shutil.copytree(filename, new_name)
        else:
            shutil.copyfile(filename, new_name)

        result_files.append(os.path.split(new_name)[-1])
        #write the report.html
        from genomicode import parselib
        from genomicode import htmllib

        #def highlight(s):
        #    from genomicode import htmllib
        #    return htmllib.SPAN(s, style="background-color:yellow")

        #def smaller(s):
        #    from genomicode import htmllib
        #    return htmllib.FONT(s, size=-1)

        try:
            lines = []
            w = lines.append
            w("<HTML>")
            title = "Heatmap Results"
            x = parselib.remove_all_tags(title)
            w(htmllib.HEAD(htmllib.TITLE(x)))
            w("<BODY>")
            w(htmllib.CENTER(htmllib.H1(title)))
            w(htmllib.P())
            w(htmllib.A("Methods", href="#methods_clustering"))
            w(htmllib.P())
            w(
                htmllib.A(htmllib.IMG(height=500, src=result_files[0]),
                          href=result_files[0]))
            w(htmllib.P())
            name = 'Figure 1: In this heatmap, each row contains a signature and each column \
            contains a sample from your data set.'

            w(htmllib.B(name))

            w(htmllib.HR())
            w(htmllib.A("<methods_clustering>", name="methods_clustering"))
            w(htmllib.CENTER(htmllib.H2("Methods")))
            w(htmllib.H3("1.Heatmap File"))
            w('To generate this file, I ran the following analysis:')
            bie3.plot_network_gv(os.path.join(outfile_folder, "network.png"),
                                 network)
            w(
                htmllib.A(htmllib.IMG(height=500, src="network.png"),
                          href="network.png"))
            w(htmllib.P())
            w('I used the following parameters:')
            w(htmllib.H3("1. Heatmap File"))
            rows = []
            x = htmllib.TR(
                htmllib.TH("Parameter", align="LEFT") +
                htmllib.TH("Value", align="LEFT"))
            rows.append(x)

            for key in in_data.data.attributes.keys():
                x = htmllib.TR(
                    htmllib.TD(key, align="LEFT") +
                    htmllib.TD(in_data.data.attributes[key], align="LEFT"))
                rows.append(x)
            w(
                htmllib.TABLE("\n".join(rows),
                              border=1,
                              cellpadding=3,
                              cellspacing=0))
            w(htmllib.P())
            # Write out the footer.
            #time_str = parselib.pretty_date(time.time())
            #hostname = pretty_hostname()
            w(htmllib.P())
            w(htmllib.HR())
            #w(htmllib.EM(
            #    "This analysis was run on %s on %s. \n" %
            #    (time_str, hostname)))
            w("</BODY>")
            w("</HTML>")
            x = "\n".join(lines) + "\n"
            open(outfile, 'w').write(x)
        except:
            raise
    def run(self, network, antecedents, out_attributes, user_options,
            num_cores, outfile):
        import os
        import shutil
        #import time
        from genomicode import parselib
        from genomicode import htmllib
        from Betsy import bie3

        out_path = outfile
        outfile = os.path.join(out_path, 'report.html')
        if not os.path.exists(out_path):
            os.mkdir(out_path)

        (data_node1, data_node2, data_node3, data_node4, data_node5,
         data_node6) = antecedents

        # Make a list of the (relative) files for each input nodes:
        # 0  SignalFile     Preprocessed gene expression data.
        # 1  IntensityPlot  Box plot of signal intensity values.
        # 2  ControlPlot    AFFX control probes.
        # 3  PcaPlot        Has normalization.
        # 4  ActbPlot       From _SignalFile_Impute.
        # 5  PcaPlot        No normalization or anything.
        result_files = []
        for index, data_node in enumerate(antecedents):
            in_filename = data_node.identifier  # full path
            in_path, in_file = os.path.split(in_filename)
            out_file = in_file
            #rename one of the pcaplot filename
            if index == 3:
                out_file = "after_%s" % in_file
            out_filename = os.path.join(out_path, out_file)
            if os.path.isdir(in_filename):
                shutil.copytree(in_filename, out_filename)
            else:
                shutil.copyfile(in_filename, out_filename)
            result_files.append(out_file)

        #write the report.html

        IMG_HEIGHT = 400

        lines = []
        w = lines.append

        w("<HTML>")
        title = "Normalization Results"
        x = parselib.remove_all_tags(title)
        w(htmllib.HEAD(htmllib.TITLE(x)))
        w("<BODY>")
        w(htmllib.CENTER(htmllib.H1(title)))

        # Provide a link to the signal file.
        w('Preprocessed signal values: ')
        # TODO: Show the attributes for this data.
        w(htmllib.A(result_files[0], result_files[0]))
        w(htmllib.P())

        ##w(htmllib.A("Methods", href="#methods_normalization"))
        ##w(htmllib.P())
        ##        if pipelines[1] == pipelines[2]:
        ##            w(htmllib.A(htmllib.IMG(height=500,
        ##                src=result_files[1]), href=result_files[1]))
        ##        else:

        # Show the PCA plot before and after normalization.
        rows = []
        col1 = htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[5]),
                         href=result_files[5])
        col2 = htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[3]),
                         href=result_files[3])
        x = htmllib.TR(
            htmllib.TD(col1, align="CENTER") +
            htmllib.TD(col2, align="CENTER"))
        rows.append(x)
        x = htmllib.TR(
            htmllib.TH("Before", align="CENTER") +
            htmllib.TH("After", align="CENTER"))
        rows.append(x)
        w(
            htmllib.TABLE("\n".join(rows),
                          border=None,
                          cellpadding=3,
                          cellspacing=0))
        w(htmllib.P())
        w(htmllib.P())
        name = 'Figure 1: This pca plot shows the similarities among your samples'
        w(htmllib.B(name))
        w(htmllib.P())

        # Show the distribution of the signal values.
        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[1]),
                      href=result_files[1]))
        w(htmllib.P())
        name = 'Figure 2: This boxplot shows the distribution of signal values'
        w(htmllib.B(name))
        w(htmllib.P())

        # Show the actin and tubulin values.
        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[4]),
                      href=result_files[4]))
        w(htmllib.P())
        name = 'Figure 3: This plot shows the values of ACTB and TUBB genes'
        w(htmllib.B(name))
        w(htmllib.P())

        # Affymetrix control genes.
        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src=result_files[2]),
                      href=result_files[2]))
        name = 'Figure 4: This plot shows the average values Affymetrix control genes'
        w(htmllib.P())
        w(htmllib.B(name))

        w(htmllib.HR())
        w(htmllib.A("<methods_normalization>", name="methods_normalization"))
        w(htmllib.CENTER(htmllib.H2("Methods")))
        w(htmllib.H3("1.Normalization File"))
        w('To generate this file, I ran the following analysis:')
        w(htmllib.P())
        bie3.plot_network_gv(os.path.join(out_path, "network.png"), network)
        w(
            htmllib.A(htmllib.IMG(height=IMG_HEIGHT, src="network.png"),
                      href="network.png"))
        w(htmllib.P())

        w('I used the following parameters:')
        rows = []
        x = htmllib.TR(
            htmllib.TH("Parameter", align="LEFT") +
            htmllib.TH("Value", align="LEFT"))
        rows.append(x)
        for key in data_node1.data.attributes.keys():
            x = htmllib.TR(
                htmllib.TD(key, align="LEFT") +
                htmllib.TD(data_node1.data.attributes[key], align="LEFT"))
            rows.append(x)
        w(
            htmllib.TABLE("\n".join(rows),
                          border=1,
                          cellpadding=3,
                          cellspacing=0))
        w(htmllib.P())
        # Write out the footer.
        #time_str = parselib.pretty_date(time.time())
        #hostname = pretty_hostname()
        w(htmllib.P())
        w(htmllib.HR())
        #w(htmllib.EM(
        #    "This analysis was run on %s on %s. \n" %
        #    (time_str, hostname)))
        w("</BODY>")
        w("</HTML>")
        x = "\n".join(lines) + "\n"
        open(outfile, 'w').write(x)
Esempio n. 8
0
def summarize_report(
    analysis_name, signatures, orig_signatures, report_files, start_time,
    why_dropped, file_layout):
    import time
    from genomicode import parselib
    from genomicode import htmllib

    def highlight(s):
        return htmllib.SPAN(s, style="background-color:yellow")
    def smaller(s):
        return htmllib.FONT(s, size=-1)

    id2orig = {}
    for sig in orig_signatures:
        id2orig[sig.xID] = sig

    id2new = {}
    for sig in signatures:
        id2new[sig.xID] = sig

    assert len(signatures) == len(report_files)
    id2reportfile = {}
    for sig, file_ in zip(signatures, report_files):
        # The report_file in the HTML should be a relative path.
        x, file_ = os.path.split(file_)
        id2reportfile[sig.xID] = file_

    # Figure out which of the signatures were dropped.
    missing_ids = []
    for sig in orig_signatures:
        if sig.xID in id2new:
            continue
        missing_ids.append(sig.xID)

    # Make a list of all the signatures.
    all_ids = {}.fromkeys(id2orig.keys() + id2new.keys())
    schwartz = [(id2orig[x].Name, x) for x in all_ids]
    schwartz.sort()
    all_ids = [x[-1] for x in schwartz]
    

    lines = []
    w = lines.append
    w("<HTML>")
    #title = "%s Report" % htmllib.EM("ScoreSignatures")
    title = "%s Report" % "ScoreSignatures"
    if analysis_name:
        title = "%s for %s" % (title, htmllib.EM(analysis_name))
    x = parselib.remove_all_tags(title)
    w(htmllib.HEAD(htmllib.TITLE(x)))
    w("<BODY>")
    w(htmllib.CENTER(htmllib.H1(title)))

    w(htmllib.H3("I.  Signatures"))

    # Make a table with each of the signatures.
    rows = []

    x = htmllib.TR(
        htmllib.TH("ID", align="LEFT") +
        htmllib.TH("Signature", align="LEFT") +
        htmllib.TH("Preprocessing", align="LEFT") +
        htmllib.TH("Genes", align="LEFT") +
        htmllib.TH("Metagenes", align="LEFT") +
        htmllib.TH("Normalization", align="LEFT")
        )
    rows.append(x)
    
    which_changed = {}  # ID -> 1
    for id_ in all_ids:
        orig = id2orig[id_]
        sig = id2new.get(id_)

        cols = []

        # ID
        cols.append(htmllib.TD(orig.xID))

        # Name
        name = orig.Name
        report_file = None
        if sig:
            report_file = id2reportfile.get(sig.xID)
        if report_file:
            name = htmllib.A(name, href=report_file)
        cols.append(htmllib.TD(name))

        # If this signature was not run, then skip the rest of the columns.
        if not sig:
            x = why_dropped.get(orig.xID, "Skipped for unknown reason.")
            x = htmllib.TD(highlight(x), colspan=4)
            cols.append(x)
            rows.append(
                htmllib.TR("\n".join(cols)))
            continue
        
        # Preprocessing
        x = sig.Normalization
        if sig.Normalization != orig.Normalization:
            which_changed[sig.xID] = 1
            x = "%s<BR>%s" % (
                highlight(sig.Normalization),
                smaller(htmllib.EM("default: %s" % orig.Normalization)))
        cols.append(htmllib.TD(x))

        # Genes
        x = sig.Genes
        if sig.Genes != orig.Genes:
            which_changed[sig.xID] = 1
            x = "%s<BR>%s" % (
                highlight(sig.Genes),
                smaller(htmllib.EM("default: %s" % orig.Genes)))
        cols.append(htmllib.TD(x))
        
        # Metagenes
        x = sig.Metagenes
        if sig.Metagenes != orig.Metagenes:
            which_changed[sig.xID] = 1
            x = "%s<BR>%s" % (
                highlight(sig.Metagenes),
                smaller(htmllib.EM("default: %s" % orig.Metagenes)))
        cols.append(htmllib.TD(x))

        # Normalization
        norm = []
        if sig.Quantile.upper() == "YES":
            norm.append("Quantile")
        if sig.Shift_Scale.upper() == "YES":
            norm.append("Shift-Scale")
        norm_str = "None"
        if norm:
            norm_str = " and ".join(norm)
        if sig.Quantile.upper() != orig.Quantile.upper() or \
           sig.Shift_Scale.upper() != orig.Shift_Scale.upper():
            which_changed[sig.xID] = 1
            norm = []
            if orig.Quantile.upper() == "YES":
                norm.append("Quantile")
            if orig.Shift_Scale.upper() == "YES":
                norm.append("Shift-Scale")
            x = "None"
            if norm:
                x = " and ".join(norm)
            norm_str = "%s<BR>%s" % (
                highlight(norm_str), smaller(htmllib.EM("default: %s" % x)))
        cols.append(htmllib.TD(norm_str))

        #assert sig_changed == getattr(sig, "Changed", False), "%s %s %s" % (
        #   sig.Name, sig_changed, getattr(sig, "Changed", "missing"))
        x = htmllib.TR("\n".join(cols))
        rows.append(x)
        
    w(htmllib.TABLE("\n".join(rows), border=1, cellpadding=3, cellspacing=0))

    w(htmllib.P())
    w(htmllib.B("Table 1: Signatures Analyzed."))
    if not which_changed:
        w("All signatures were run with the default parameters, "
          "as shown above.")
    else:
        w("The customized parameters are highlighted in yellow.")

    w(htmllib.P())

    w(htmllib.H3("II.  Results"))

    prob_file = os.path.split(file_layout.PROBABILITIES_PNG)[1]
    w(htmllib.A(htmllib.IMG(height=768, src=prob_file), href=prob_file))

    w(htmllib.P())

    w(htmllib.B("Figure 1: Predictions."))
    w("In this heatmap, each row contains a signature and each column "
      "contains a sample from your data set.")
    if which_changed:
        #names = sorted([id2orig[x].Name for x in which_changed])
        w("The asterisks denote the signatures that were run with "
          "customized parameters.")
    w("The color corresponds to the probability that a pathway is activated "
      "in a sample.")
    w("Warm colors represent high probabilities, and cool colors low.\n")

    w(htmllib.P())
    prob_file = os.path.split(file_layout.PROBABILITIES_PCL)[1]
    w("The raw values from this plot are available as a "
      'PCL-formatted file: %s' % htmllib.A(prob_file, href=prob_file))

    # Write out the footer.
    end_time = time.time()
    time_str = parselib.pretty_date(start_time)
    run_time = pretty_runtime(start_time, end_time)
    hostname = pretty_hostname()
    w(htmllib.P())
    w(htmllib.HR())
    w(htmllib.EM(
        "This analysis was run on %s on %s.  It took %s to complete.\n" %
        (time_str, hostname, run_time)))
    
    w("</BODY>")
    w("</HTML>")

    x = "\n".join(lines) + "\n"
    outfile = file_layout.REPORT
    open(outfile, 'w').write(x)