Beispiel #1
0
 def variant_calling(self):
     """ Variants detected section.
     """
     datatable = DataTable(self.df, 'vc')
     # set options
     datatable.datatable.datatable_options = {
         'scrollX': 'true',
         'pageLength': 30,
         'scrollCollapse': 'true',
         'dom': 'Bfrtip',
         'buttons': ['copy', 'csv']
     }
     js = datatable.create_javascript_function()
     html_tab = datatable.create_datatable(float_format='%.3f')
     self.sections.append({
         'name':
         "Variants Detected",
         'anchor':
         'basic_stats',
         'content':
         "<p>This table present variant detected by freebayes after "
         "filtering.</p>\n{0}\n{1}\n<p>Note: the freebayes score can be"
         " understood as 1 - P(locus is homozygous given the data)</p>".
         format(js, html_tab)
     })
Beispiel #2
0
    def get_table_dependencies(self):
        """ Return dependencies of Sequana.
        """
        dep_list = easydev.get_dependencies('sequana')
        # if installed with conda, this will be empty
        if len(dep_list) == 0:
            return ""

        project_name = list()
        version = list()
        link = list()
        pypi = 'https://pypi.python.org/pypi/{0}'
        for dep in dep_list:
            version.append(dep.version)
            project_name.append(dep.project_name)
            link.append(pypi.format(dep.project_name))
        df = pd.DataFrame({
            'package': project_name,
            'version': version,
            'link': link
        })
        df['sort'] = df['package'].str.lower()
        df.sort_values(by='sort', axis=0, inplace=True)
        df.drop('sort', axis=1, inplace=True)
        datatable = DataTable(df, 'dep')
        datatable.datatable.datatable_options = {
            'paging': 'false',
            'bFilter': 'false',
            'bInfo': 'false',
            'bSort': 'false'
        }
        datatable.datatable.set_links_to_column('link', 'package')
        js = datatable.create_javascript_function()
        html = datatable.create_datatable()
        return js + '\n' + html
Beispiel #3
0
def df2html(df, name=None, dom='Brt', show_index=False, pageLength=15):
    """Simple wrapper to create HTML from dataframe

    If a columns ends in _links and a name_links exists, then the columns name 
    will be shown with the clickable name_links.
    """

    if name is None:
        name = uuid.uuid1().time_low
        # looks like datatable does not like ID made of numbers, even in string
        # so we convert to ABCDEFGH values
        name = "".join([chr(65 + int(x)) for x in str(name)])

    datatable = DataTable(df, name, index=show_index)
    datatable.datatable.datatable_options = {
        'pageLength': pageLength,
        'scrollCollapse': 'false',
        'dom': dom,
        'buttons': ['copy', 'csv']
    }

    # identify links (columns ending in _links)
    for column in df.columns:
        if column.endswith('_links'):
            prefix = column.replace('_links', '')
            if prefix in df.columns:
                datatable.datatable.set_links_to_column(column, prefix)

    js = datatable.create_javascript_function()
    html = datatable.create_datatable(float_format='%.6g')
    return js + html
Beispiel #4
0
 def chromosome_table(self, html_list):
     """ Create table with links to chromosome reports
     """
     df = pd.DataFrame([[
         chrom.chrom_name,
         chrom.get_size(),
         chrom.get_mean_cov(),
         chrom.get_var_coef(), page
     ] for chrom, page in zip(self.bed.chr_list, html_list)],
                       columns=[
                           "chromosome", "size", "mean_coverage",
                           "coef_variation", "link"
                       ])
     datatable = DataTable(df, 'chrom')
     datatable.datatable.datatable_options = {
         'pageLength': 15,
         'dom': 'Bfrtip',
         'buttons': ['copy', 'csv']
     }
     datatable.datatable.set_links_to_column('link', 'chromosome')
     js = datatable.create_javascript_function()
     html_table = datatable.create_datatable(float_format='%.3g')
     self.sections.append({
         "name":
         "Chromosomes",
         "anchor":
         "chromosomes",
         "content":
         "<p>Link to coverage analysis report for each chromosome. "
         "Size, mean coverage and coefficient of variation are reported"
         " in the table below.</p>\n{0}\n{1}".format(js, html_table)
     })
Beispiel #5
0
    def _get_html_stats(self):
        from sequana.tools import StatsBAM2Mapped
        from easydev import precision
        data = StatsBAM2Mapped(self.directory + "bwa_mem_stats.json").data
        html = "Reads with Phix: %s %%<br>" % precision(data['contamination'], 3)

        # add HTML table
        if "R2_mapped" in data.keys():
            df = pd.DataFrame({
              'R1': [data['R1_mapped'], data['R1_unmapped']],
              'R2': [data['R2_mapped'], data['R2_unmapped']]})
        else:
            df = pd.DataFrame({
              'R1': [data['R1_mapped'], data['R1_unmapped']]})
        df.index = ['mapped', 'unmapped']

        datatable = DataTable(df, "bwa_bam")
        datatable.datatable.datatable_options = {
             'scrollX': '300px',
             'pageLength': 15,
             'scrollCollapse': 'true',
             'dom': 'irtpB',
             "paging": "false",
             'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')
        #html += "{} {}".format(html_tab, js)

        html += "Unpaired: %s <br>" % data['unpaired']
        html += "duplicated: %s <br>" % data['duplicated']
        return html
Beispiel #6
0
    def add_table(self):
        df = self.summary.copy()
        df.columns = ['data']
        df['url'] = ['http://sequana.readthedocs.org'] * len(df)

        table = DataTable(df, "table", index=True)
        table.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tB',
            "paging": "false",
            'buttons': ['copy', 'csv']
        }
        table.datatable.set_links_to_column('url', 'data')

        js = table.create_javascript_function()
        html_tab = table.create_datatable(float_format='%.3g')
        html = "{} {}".format(html_tab, js)

        self.sections.append({
            "name": "Table",
            "anchor": "table",
            "content": html
        })
Beispiel #7
0
    def summary(self):
        """ Add information of filter.
        """
        Sdefault = self.rnadiff.summary()
        self.rnadiff.log2_fc = 1
        S1 = self.rnadiff.summary()

        # set options
        options = {
            'scrollX': 'true',
            'pageLength': 20,
            'scrollCollapse': 'true',
            'dom': '',
            'buttons': []
        }

        S = pd.concat([Sdefault, S1])

        N = len(Sdefault)
        df = pd.DataFrame({
            'comparison_link': [1] * len(S),
            'comparison':
            S.index.values,
            'Description':
            ['Number of DGE (any FC)'] * N + ['Number of DGE (|FC| > 1)'] * N,
            'Down':
            S['down'].values,
            'Up':
            S['up'].values,
            'Total':
            S['all'].values
        })
        df = df[[
            'comparison', 'Description', 'Down', 'Up', 'Total',
            'comparison_link'
        ]]

        df['comparison_link'] = [f"#{name}_table_all" for name in Sdefault.index] + \
                                [f"#{name}_table_sign" for name in Sdefault.index]

        dt = DataTable(df, 'dge')
        dt.datatable.set_links_to_column('comparison_link',
                                         'comparison',
                                         new_page=False)
        dt.datatable.datatable_options = options
        js_all = dt.create_javascript_function()
        html = dt.create_datatable(float_format='%d')
        self.sections.append({
            'name':
            "Summary",
            'anchor':
            'filters_option',
            'content':
            f"""<p>Here below is a summary of thfinal Differententially Gene
Expression (DGE) analysis. You can find two entries per comparison. The first
one has no filter except for an adjusted p-value of 0.05. The second shows the
expressed genes with a filter of the log2 fold change of 1 (factor 2 in a normal
scale). Clicking on any of the link will lead you to section of the comparison. 
{js_all} {html} </p>"""
        })
Beispiel #8
0
    def add_main_section(self):
        links = glob.glob("{}".format(self.pattern))
        names = [filename.rsplit('/',1)[1].split('.html')[0] for filename in links]

        df = pd.DataFrame({
            "names": names,
            "links": [link.split(os.sep,1)[1] for link in links]
        })
        df.sort_values(by='names')

        datatable = DataTable(df, "fastqc", index=False)
        datatable.datatable.set_links_to_column("links", "names")

        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'rtpB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable()

        html = "{} {}".format(html_tab, js)

        self.sections.append({
             "name": "FastQC report(s)",
             "anchor": "fastqc",
             "content": "<p> Here below are link(s) to original FastQC report. "
                        "Please click on one of the links to jump to the main "
                        "report.  {} </p>".format(html)
        })
Beispiel #9
0
    def _get_html_stats_section(self):
        df = self._get_stats()
        datatable = DataTable(df, "phix_stats", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 30,
            'scrollCollapse': 'true',
            'dom': 'tpB',
            "paging": "false",
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        # Important that the columns of type integer are indeed in integer type
        # otherwise the %.3g herebelow would round integers. For instance 123456
        # would appear as 123000. The dtypes must be taken care in _get_stats()
        # method
        html_tab = datatable.create_datatable(float_format='%.3g')
        html = """<p>We mapped the raw reads on a reference (see config file).
The reads mapped are removed and the unmapped reads are kept for further
cleaning (adapter removal). Here below are some statistics about the mapped and unmapped reads.
</p><p>
The A, C, G, T, N columns report the percentage of each bases in the overall
sequences. The GC content column is in percentage. Finally, note that for paired
data, the number of reads in the mapped files (R1 and R2) may differ due to . However,
the unmapped reads must agree. </p>"""
        html += "{} {}".format(html_tab, js)
        return html
Beispiel #10
0
    def add_adapters_section(self):
        # Create a Table with adapters
        df = pd.DataFrame()
        df = pd.DataFrame({'Length': [], 'Trimmed':[], 'Type':[], 'Sequence': [], })

        for count, adapter in enumerate(self.data['adapters']):
            name = adapter['name']
            info = adapter['info']
            df.ix[name] = [info['Length'], info['Trimmed'],
                info['Type'], info['Sequence']]
        df.columns = ['Length', 'Trimmed', 'Type', 'Sequence']
        df['Trimmed'] = df.Trimmed.map(lambda x: int(x.replace("times.", "")))

        # df.to_json(self.sample_name + "/cutadapt/cutadapt_stats2.json")
        df.sort_values(by="Trimmed", ascending=False, inplace=True)

        datatable = DataTable(df, "adapters", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': 'true',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'frtipB',
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')
        self.jinja['adapters'] = ""
        self.sections.append({
            "name": "Adapters",
            "anchor": "adapters",
            "content": "<p>{} {}</p>".format(html_tab, js)
        })
Beispiel #11
0
    def get_table_dependencies(self):
        """ Return dependencies of Sequana.
        """
        dep_list = easydev.get_dependencies('sequana')
        # if installed with conda, this will be empty
        if len(dep_list) == 0:
            return ""

        project_name = list()
        version = list()
        link = list()
        pypi = 'https://pypi.python.org/pypi/{0}'
        for dep in dep_list:
            version.append(dep.version)
            project_name.append(dep.project_name)
            link.append(pypi.format(dep.project_name))
        df = pd.DataFrame({'package': project_name, 'version': version,
                           'link': link})
        df['sort'] = df['package'].str.lower()
        df.sort_values(by='sort', axis=0, inplace=True)
        df.drop('sort', axis=1, inplace=True)
        datatable = DataTable(df, 'dep')
        datatable.datatable.datatable_options = {'paging': 'false',
                                                 'bFilter': 'false',
                                                 'bInfo': 'false',
                                                 'bSort': 'false'}
        datatable.datatable.set_links_to_column('link', 'package')
        js = datatable.create_javascript_function()
        html = datatable.create_datatable()
        return js + '\n' + html
Beispiel #12
0
    def _get_html_stats_section(self):
        df = self._get_stats()
        datatable = DataTable(df, "phix_stats", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tpB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        # Important that the columns of type integer are indeed in integer type
        # otherwise the %.3g herebelow would round integers. For instance 123456
        # would appear as 123000. The dtypes must be taken care in _get_stats()
        # method
        html_tab = datatable.create_datatable(float_format='%.3g')
        html = """<p>We mapped the raw reads on a reference (see config file).
The reads mapped are removed and the unmapped reads are kept for further
cleaning (adapter removal). Here below are some statistics about the mapped and unmapped reads.
</p><p>
The A, C, G, T, N columns report the percentage of each bases in the overall
sequences. The GC content column is in percentage. Finally, note that for paired
data, the number of reads in the mapped files (R1 and R2) may differ due to . However,
the unmapped reads must agree. </p>""" 
        html += "{} {}".format(html_tab, js)
        return html
Beispiel #13
0
    def _get_summary_section(self):

        df = self._get_stats()
        if len(df) == 1 and df.iloc[0]['taxon'] == -1:
            pngimage = sequana_data("no_data.jpg")
            extra = "<p> no reads could be identified with the given the database(s)."
        else:
            pngimage = self.directory + os.sep + "kraken.png"
            extra = """<p>The following <b>clickable image</b> is a simplified 
version (only genus are shown) of an interactive and more detailled version 
based on Krona. Finally, note that the unclassified species in the pie plot 
may correspond to species not present in the data base or adapters (if not 
removed).</p>"""

        html = """
    <p>Overview of the Taxonomic content of the filtered reads. </p>
    <p>The taxonomic analysis is performed with Kraken (see database name in 
the configuration file. The analysis is performed with a Kmer
approach.
The details about the database itself are available in the <a
href="http://sequana.readthedocs.io">Sequana documentation</a>.
The taxonomic analysis should give a good idea of the content of the FastQ
files but should be used as a sanity check. Indeed, species absent
from the database won't be detected leading to false detection (close species 
may be detected instead). 
Besides, be aware that closely related species may not be classified precisely.
</p>

    {0}
    <div style="text-align:center"><a href="./{1}/kraken.html"> {2} </a></div>
    <br>
""".format(extra,
           self.directory.split(os.sep, 1)[1],
           self.png_to_embedded_png(pngimage))

        datatable = DataTable(df, "kraken", index=False)
        # add links
        if "ena" in df.columns:
            urlena = "http://www.ebi.ac.uk/ena/data/view/"
            datatable.datatable.set_links_to_column(
                "ena", [urlena + this for this in df['ena']])
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 30,
            'scrollCollapse': 'true',
            'dom': 'irtpB',
            "paging": "false",
            "order": [[2, "desc"]],
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html += "{} {}".format(html_tab, js)
        """# Rounding and convert in string to avoid exp notation
        df['percentage']  = df['percentage'].apply(lambda x: str(round(x,4)))
        #self.jinja['kraken_json'] = df.to_json()"""

        return html
Beispiel #14
0
 def get_html_table(self, user_key_list):
     df = self.get_single_data(user_key_list)
     datatable = DataTable(df, 'name')
     datatable.datatable.datatable_options = {
         'pageLength': 15,
         'scrollCollapse': 'false',
         'dom': 'Brt',
         'buttons': ['copy', 'csv']
     }
     js = datatable.create_javascript_function()
     html = datatable.create_datatable(float_format='%.6g')
     return js + html
Beispiel #15
0
    def _get_stats_section(self, tablename="stats"):
        self.df_stats = self.get_stats()
        filenames, mode = self._get_files("*boxplot.png")

        datatable = DataTable(self.df_stats, tablename, index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'rtpB',
            "paging": "false",
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = """<p>The following table gives some basic statistics about the data before any filtering.
   The A, C, G, T, N columns report the percentage of each bases in the overall sequences.
   The GC content is provided in percentage as well. </p>
   <div>{} {}</div>
   <div>""".format(html_tab, js)

        html += """
   <p>The following figure(s) gives the average quality (red line) of raw reads
   (500,000 at max). The x-axis being the length of the reads. The yellow
   enveloppe gives the variation of the quality (1 standard deviation).</p>
   <p> Click on the image to jump to a full FastQC report.</p>"""

        if len(filenames) == 2: width = "49"
        else: width = "65"

        filename = os.path.split(filenames[0])[1].replace(
            "_boxplot.png", "_fastqc.html")
        href = self.path_to_fastqc + os.sep + filename
        html += """
   <figure style="float:left; width:{}%; padding:0px; margin:0px;">
       <a href="{}">{}</a>
   <figcaption style="font-style:italic">Fig1: R1 reads</figcaption>
   </figure>""".format(width, href, self.png_to_embedded_png(filenames[0]))

        if len(filenames) == 2:
            filename = os.path.split(filenames[1])[1].replace(
                "_boxplot.png", "_fastqc.html")
            href = self.path_to_fastqc + os.sep + filename
            html += """
   <figure style="float:right; width:{}%; padding:0px; margin:0px;">
       <a href="{}">{}</a>
   <figcaption style="font-style:italic">Fig2: R2 reads</figcaption>
   </figure>""".format(width, href, self.png_to_embedded_png(filenames[1]))

        return html
Beispiel #16
0
    def _get_stats_section(self, tablename="stats"):
        self.df_stats = self.get_stats()
        filenames, mode = self._get_files("*boxplot.png")

        datatable = DataTable(self.df_stats, tablename, index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'rtpB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = """<p>The following table gives some basic statistics about the data before any filtering.
   The A, C, G, T, N columns report the percentage of each bases in the overall sequences.
   The GC content is provided in percentage as well. </p>
   <div>{} {}</div>
   <div>""".format(html_tab, js)

        html += """
   <p>The following figure(s) gives the average quality (red line) of raw reads
   (500,000 at max). The x-axis being the length of the reads. The yellow
   enveloppe gives the variation of the quality (1 standard deviation).</p>
   <p> Click on the image to jump to a full FastQC report.</p>"""

        if len(filenames)==2: width="49"
        else: width="65"

        filename = os.path.split(filenames[0])[1].replace("_boxplot.png", "_fastqc.html")
        href = self.path_to_fastqc + os.sep + filename
        html += """
   <figure style="float:left; width:{}%; padding:0px; margin:0px;">
       <a href="{}">{}</a>
   <figcaption style="font-style:italic">Fig1: R1 reads</figcaption>
   </figure>""".format(width, href, self.png_to_embedded_png(filenames[0]))

        if len(filenames) == 2:
            filename = os.path.split(filenames[1])[1].replace("_boxplot.png", "_fastqc.html")
            href = self.path_to_fastqc + os.sep + filename
            html += """
   <figure style="float:right; width:{}%; padding:0px; margin:0px;">
       <a href="{}">{}</a>
   <figcaption style="font-style:italic">Fig2: R2 reads</figcaption>
   </figure>""".format(width, href, self.png_to_embedded_png(filenames[1]))


        return html
Beispiel #17
0
 def _get_stat_section(self):
     datatable = DataTable(self._get_stats(), "cutadapt", index=True)
     datatable.datatable.datatable_options = {
         'scrollX': '300px',
         'pageLength': 15,
         'scrollCollapse': 'true',
         'dom': 'rtpB',
         "paging": "false",
         'buttons': ['copy', 'csv']}
     js = datatable.create_javascript_function()
     html_tab = datatable.create_datatable(float_format='%.3g')
     #csv_link = self.create_link('link', self.filename)
     #vcf_link = self.create_link('here', 'test.vcf')
     html = "Reads statistics after trimming and adapter removal. The " +\
            "A, C, G, T, N columns report the percentage of each bases in " +\
            "the overall sequences"
     html += "<p>{} {}</p>".format(html_tab, js)
     return html
Beispiel #18
0
 def _get_stat_section(self):
     datatable = DataTable(self._get_stats(), "cutadapt", index=True)
     datatable.datatable.datatable_options = {
         'scrollX': '300px',
         'pageLength': 30,
         'scrollCollapse': 'true',
         'dom': 'rtpB',
         "paging": "false",
         'buttons': ['copy', 'csv']}
     js = datatable.create_javascript_function()
     html_tab = datatable.create_datatable(float_format='%.3g')
     #csv_link = self.create_link('link', self.filename)
     #vcf_link = self.create_link('here', 'test.vcf')
     html = "Reads statistics after trimming and adapter removal. The " +\
            "A, C, G, T, N columns report the percentage of each bases in " +\
            "the overall sequences"
     html += "<p>{} {}</p>".format(html_tab, js)
     return html
Beispiel #19
0
    def add_adapters_section(self):
        # Create a Table with adapters
        df = pd.DataFrame()
        df = pd.DataFrame({'Length': [], 'Trimmed':[], 'Type':[], 'Sequence': [], })

        for count, adapter in enumerate(self.data['adapters']):
            name = adapter['name']
            info = adapter['info']
            df.loc[name] = [info['Length'], info['Trimmed'],
                info['Type'], info['Sequence']]

        df.columns = ['Length', 'Trimmed', 'Type', 'Sequence']
        try:
            df['Trimmed'] = df.Trimmed.map(lambda x: int(x.replace("times.", "")))
        except:
            pass
        try:
            df['Trimmed'] = df.Trimmed.map(lambda x: int(x.replace("times", "")))
        except:
            pass

        # df.to_json(self.sample_name + "/cutadapt/cutadapt_stats2.json")
        df.sort_values(by="Trimmed", ascending=False, inplace=True)

        datatable = DataTable(df, "adapters", index=True)
        datatable.datatable.datatable_options = {
                'scrollX': 'true',
                'pageLength': 15,
                'scrollCollapse': 'true',
                'dom': 'frtipB',
                'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')
        self.jinja['adapters'] = ""
        self.sections.append({
            "name": "Adapters",
            "anchor": "adapters",
            "content": "<p>{} {}</p>".format(html_tab, js)
        })
Beispiel #20
0
    def add_table(self):

        datatable = DataTable(self.trf.df, "result", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tBifp',
            "paging": "true",
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = ""
        html += "{} {}".format(html_tab, js)

        self.sections.append({
            "name": "TRF results",
            "anchor": "results",
            "content": html
        })
Beispiel #21
0
    def add_main_section(self):
        links = glob.glob("{}".format(self.pattern))
        names = [
            filename.rsplit('/', 1)[1].split('.html')[0] for filename in links
        ]

        df = pd.DataFrame({
            "names": names,
            "links": [link.split(os.sep, 1)[1] for link in links]
        })
        df.sort_values(by='names')

        datatable = DataTable(df, "fastqc", index=False)
        datatable.datatable.set_links_to_column("links", "names")

        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 30,
            'scrollCollapse': 'true',
            'dom': 'rtpB',
            "paging": "false",
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable()

        html = "{} {}".format(html_tab, js)

        self.sections.append({
            "name":
            "FastQC report(s)",
            "anchor":
            "fastqc",
            "content":
            "<p> Here below are link(s) to original FastQC report. "
            "Please click on one of the links to jump to the main "
            "report.  {} </p>".format(html)
        })
Beispiel #22
0
    def add_flag_section(self):
        data = self._computation()
        df = data['flags']

        datatable = DataTable(df, "flags", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = ""
        html += "{} {}".format(html_tab, js)

        self.sections.append({
          "name": "Flags information",
          "anchor": "flags",
          "content": html
        })
Beispiel #23
0
    def add_stats(self):
        df = pd.Series(self.summary['read_stats']).to_frame().T
        df.index = ['read length stats']
        table = DataTable(df, "table", index=True)
        table.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 't',
            "paging": "false",
            'buttons': ['copy', 'csv']
            }
        js = table.create_javascript_function()
        # IMPORTANT: here conversion to integer with %d
        # to round and make integer. !! The GC is therefore
        # converted to integer as well.
        html_tab = table.create_datatable(float_format='%d')
        html = "{} {}".format(html_tab, js)

        self.sections.append({
          "name": "Basic stats on read length",
          "anchor": "table",
          "content": html
        })
Beispiel #24
0
        def get_html_table(this_df, identifier):
            df = this_df.copy()
            links = ["https://www.ebi.ac.uk/QuickGO/term/{}".format(x) for x in df["id"]]
            df['links'] = links
            for x in ['term', 'fdr2', 'abs_log2_fold_enrichment', 'pct_diff_expr']:
                try:del df[x]
                except:pass

            first_col = df.pop("id")
            df.insert(0, "id", first_col)
            df = df.sort_values(by="fold_enrichment", ascending=False)

            datatable = DataTable(pd.DataFrame(df), identifier)
            datatable.datatable.set_links_to_column("links", "id")
            datatable.datatable.datatable_options = {
                 'scrollX': 'true',
                 'pageLength': 10,
                 'scrollCollapse': 'true',
                 'dom': 'Bfrtip',
                 'buttons': ['copy', 'csv']
            }
            js = datatable.create_javascript_function()
            html_table = datatable.create_datatable(float_format='%E')
            return js + html_table
Beispiel #25
0
    def add_table(self):
        df = self.summary.copy()
        df.columns = ['data']
        df['url'] = ['http://sequana.readthedocs.org'] * len(df)

        table = DataTable(df, "table", index=True)
        table.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tB',
            "paging": "false",
            'buttons': ['copy', 'csv']}
        table.datatable.set_links_to_column('url', 'data')

        js = table.create_javascript_function()
        html_tab = table.create_datatable(float_format='%.3g')
        html = "{} {}".format(html_tab, js)

        self.sections.append({
          "name": "Table",
          "anchor": "table",
          "content": html
        })
Beispiel #26
0
    def add_stats(self):
        df = pd.Series(self.summary['read_stats']).to_frame().T
        df.index = ['read length stats']
        table = DataTable(df, "table", index=True)
        table.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 't',
            "paging": "false",
            'buttons': ['copy', 'csv']
            }
        js = table.create_javascript_function()
        # IMPORTANT: here conversion to integer with %d
        # to round and make integer. !! The GC is therefore
        # converted to integer as well.
        html_tab = table.create_datatable(float_format='%d')
        html = "{} {}".format(html_tab, js)

        self.sections.append({
          "name": "Basic stats on read length",
          "anchor": "table",
          "content": html
        })
Beispiel #27
0
    def add_flag_section(self):
        data = self._computation()
        df = data['flags']

        datatable = DataTable(df, "flags", index=True)
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 15,
            'scrollCollapse': 'true',
            'dom': 'tB',
            "paging": "false",
            'buttons': ['copy', 'csv']
        }
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')

        html = ""
        html += "{} {}".format(html_tab, js)

        self.sections.append({
            "name": "Flags information",
            "anchor": "flags",
            "content": html
        })
Beispiel #28
0
    def add_kegg(self):
        logger.info("Enrichment module: kegg term")
        style="width:45%"
        from sequana.enrichment import KeggPathwayEnrichment

        ke = KeggPathwayEnrichment(self.gene_lists,
            self.organism,
            mapper=self.enrichment_params["mapper"],
            log2_fc=self.enrichment_params['log2_fc'],
            background=self.enrichment_params['kegg_background'],
            preload_directory=self.enrichment_params['preload_directory'])

        logger.info("Saving all pathways in kegg_pathways/mmu")
        ke.export_pathways_to_json()

        # Image kegg pathways down
        def plot_barplot_down(filename):
            ke.barplot('down')
            pylab.savefig(filename)
        img_barplot_down = self.create_embedded_png(plot_barplot_down, "filename", style=style)
        def plot_scatter_down(filename):
            ke.scatterplot('down')
            pylab.savefig(filename)
        img_scatter_down = self.create_embedded_png(plot_scatter_down, "filename", style=style)

        # Image kegg pathways up
        def plot_barplot_up(filename):
            ke.barplot('up')
            pylab.savefig(filename)
        img_barplot_up = self.create_embedded_png(plot_barplot_up, "filename", style=style)
        def plot_scatter_up(filename):
            ke.scatterplot('up')
            pylab.savefig(filename)
        img_scatter_up = self.create_embedded_png(plot_scatter_up, "filename", style=style)

        # Results down (pathway info)
        html_before_table = """<p>Enrichment pathways summary</p>"""
        df_down = ke.barplot('down')

        if len(df_down):
            links = ["https://www.genome.jp/dbget-bin/www_bget?path:{}".format(x) for x in df_down["pathway_id"]]
            df_down['links'] = links
            df_down = df_down[["pathway_id", "name", "size", "Overlap", "P-value", 
                "Adjusted P-value", "Genes", "links"]]

            # save pathways and add fotorama
            logger.setLevel("WARNING")
            pb = Progress(len(df_down))
            files = []
            for i, ID in enumerate(df_down['pathway_id']):
                df = ke.save_pathway(ID, self.data, filename=f"{config.output_dir}/{ID}.png")
                files.append(f"{ID}.png")
                pb.animate(i+1)
            fotorama_down = self.add_fotorama(files, width=800)


            datatable = DataTable(df_down, 'kegg_down')
            datatable.datatable.set_links_to_column("links", "pathway_id")
            datatable.datatable.datatable_options = {
                 'scrollX': 'true',
                 'pageLength': 20,
                 'scrollCollapse': 'true',
                 'dom': 'Bfrtip',
                 'buttons': ['copy', 'csv']
            }
            js_table_down = datatable.create_javascript_function()
            html_table_down = datatable.create_datatable(float_format='%E')


        # Results up (pathway info)
        df_up = ke.barplot('up')
        if len(df_up):
            links = ["https://www.genome.jp/dbget-bin/www_bget?path:{}".format(x) for x in df_up["pathway_id"]]
            df_up['links'] = links
            df_up = df_up[["pathway_id", "name", "size", "Overlap", "P-value", "Adjusted P-value", "Genes", "links"]]
            datatable = DataTable(df_up, 'kegg_up')
            datatable.datatable.set_links_to_column("links", "pathway_id")
            datatable.datatable.datatable_options = {
                 'scrollX': 'true',
                 'pageLength': 20,
                 'scrollCollapse': 'true',
                 'dom': 'Bfrtip',
                 'buttons': ['copy', 'csv']
            }
            js_table_up = datatable.create_javascript_function()
            html_table_up = datatable.create_datatable(float_format='%E')
            pb = Progress(len(df_up))
            files = []
            for i, ID in enumerate(df_up['pathway_id']):
                df = ke.save_pathway(ID, self.data, filename=f"{config.output_dir}/{ID}.png")
                files.append(f"{ID}.png")
                pb.animate(i+1)
            fotorama_up = self.add_fotorama(files, width=800)
            #logger.setLevel(level)

        Ndown = len(df_down)
        Nup = len(df_up)

        if Ndown == 0:
            img_barplot_down = ""
            img_scatter_down = ""
            fotorama_down = ""
            js_table_down = ""
            html_table_down = ""
        if Nup == 0:
            img_barplot_up = ""
            img_scatter_up = ""
            fotorama_up = ""
            js_table_up = ""
            html_table_up = ""

        html = f"""
<h3>2.1 - KEGG pathways down regulated</h3>
<p>{Ndown} KEGG pathways are found to be down regulated</p>
<br>
{img_barplot_down}
{img_scatter_down}
<hr>
{js_table_down} {html_table_down}
<hr>
{fotorama_down}


<h3>2.1 - KEGG pathways up regulated</h3>
<p>{Nup} KEGG pathways are found to be up regulated</p>
<br>
{img_barplot_up}
{img_scatter_up}
<hr>
{js_table_up} {html_table_up}
<hr>
{fotorama_up}
"""
        self.sections.append({"name": "2 - KEGG", "anchor": "kegg", "content": html})
Beispiel #29
0
    def add_section(self):
        logger.info("Found %s projects/samples/ directories" % len(self.summaries))
        for filename in self.filenames:
            logger.info(filename)

        self.jinja = {}

        self.jinja['canvas'] = '<script type="text/javascript" src="js/canvasjs.min.js"></script>'
        self.jinja['canvas'] += """<script type="text/javascript">
            window.onload = function () {"""

        # Information to put on top of the page (added later in a module.intro)
        # We should get the link name from the project name contained in the json
        links = [{'href': filename.replace(".json", ".html"),'caption': project}
                               for filename, project in zip(self.filenames,self.projects)]
        introhtml = "<div><b>Number of samples:</b>{}</div>".format(len(self.summaries))
        #introhtml += '<div class="multicolumns"><ul>'
        #for link in links:
        #    introhtml += ' <li><a href="{}">{}</a></li> '.format(
        #                                link["href"], link["caption"])
        #introhtml += '\n</ul>\n</div>'


        self.jinja['sections'] = []

        # This will used to stored all information
        self.df = {}

        # The order does not matter here, everything is done in JINJA
        try:self.populate_nreads_raw()
        except Exception as err:
            print(err)

        try: self.populate_phix()
        except Exception as err:
            logger.debug("multi_summary: skip phix")

        try: self.populate_gc_samples()
        except Exception as err:
            logger.debug("multi_summary: skip gc samples")

        try: self.populate_trimming()
        except Exception as err:
            logger.debug("multi_summary: skip trimming")

        try: self.populate_mean_quality()
        except Exception as err:
            logger.debug("multi_summary: skip mean quality")

        try: self.populate_adapters()
        except Exception as err:
            logger.debug("multi_summary: skip adapters")

        try: self.populate_output_total_reads()
        except Exception as err:
            logger.debug("multi_summary: skip total reads")

        # Now we have all data in df as dictionaries. Let us merge them together

        keys = list(self.df.keys())
        if len(keys) >= 1:
            df = pd.DataFrame(self.df[keys[0]])
        if len(keys) > 1: # we can merge things
            for key in keys[1:]:
                df = pd.merge(df, pd.DataFrame(self.df[key]), on=['name', 'url'])

        # For the quality_control pipeline
        columns = []
        for this in ["name",
                    "url",
                    "N_raw",
                    "GC_raw_(%)",
                    "Mean_quality_raw",
                    'Phix_content_(%)',
                    "Adapters_content_(%)",
                    "Trimmed_reads_(%)",
                    "N_final"
                    ]:
            if this in df.columns:
                columns.append(this)
        df = df[columns]
        df.rename(columns={"name": "Sample name"}, inplace=True)


        from sequana.utils.datatables_js import DataTable
        datatable = DataTable(df, "multi_summary")
        datatable.datatable.datatable_options = {
            'scrollX': '300px',
            'pageLength': 30,
            'scrollCollapse': 'true',
            'dom': 'rtpB',
            "paging": "false",
            'buttons': ['copy', 'csv']}

        datatable.datatable.set_links_to_column("url", "Sample name")
        js = datatable.create_javascript_function()
        html_tab = datatable.create_datatable(float_format='%.3g')
        html = "{} {}".format(html_tab, js)

        self.jinja['canvas'] += """
    function onClick(e){
        window.open(e.dataPoint.url)
    }
}</script>"""

        caption = """<p>The table below gives a brief summary of the analysis. The
first column contains clickable sample name that redirects to complete summary
page. The table contains the following columns:</p>

   <b>Table caption</b>
    <table>
        <tr><td>N_raw</td><td>Number of reads in the data</td></tr>
        <tr><td>GC_raw_(%)</td><td>GC content in percentage in the raw data 
across all reads</td></tr>
        <tr><td>Mean_quality_raw</td><td>Mean quality across all reads all bases
in the raw data</td></tr>
        <tr><td>Phix_content_(%)</td><td>Percentage of reads found with Phix174</td></tr>
        <tr><td>Adapters_content_(%)</td><td>Percentage of reads with adapters (after phix
removal if applied)  </td></tr>
        <tr><td>Trimmed_reads_(%)</td><td>Percentage of reads trimmed (after
phix and adapter removal)</td></tr>
        <tr><td>N_final</td><td>Final number of reads (after phix and adapter
removal and trimming)</td></tr>
    </table>
"""
        infohtml = self.create_hide_section('information', 
            '(Show information)', caption, True)
        infohtml = "\n".join(infohtml)

        self.intro = introhtml + """ <hr><b>Summary</b>: """ + infohtml +html

        self.sections.append({
            'name': None,
            'anchor': None,
            'content': self.jinja['canvas'] + "\n".join(self.jinja['sections'])
        })
Beispiel #30
0
    def add_individual_report(self, comp, name, counter):
        style = "width:45%"

        description = """<p>
In the dispersion estimation and model fitting is done, statistical testing is
performed. The distribution of raw p-values computed by the statistical test 
is expected to be a mixture of a uniform distribution on [0, 1] and a peak
around 0 corresponding to the differentially expressed features. This may not
always be the case. </p>"""

        def plot_pvalue_hist(filename):
            import pylab
            pylab.ioff()
            pylab.clf()
            comp.plot_pvalue_hist()
            pylab.savefig(filename)
            pylab.close()

        def plot_padj_hist(filename):
            import pylab
            pylab.ioff()
            pylab.clf()
            comp.plot_padj_hist()
            pylab.savefig(filename)
            pylab.close()

        img1 = self.create_embedded_png(plot_pvalue_hist,
                                        "filename",
                                        style=style)
        img2 = self.create_embedded_png(plot_padj_hist,
                                        "filename",
                                        style=style)

        # FIXME. pvalues adjusted are not relevant so commented for now
        img2 = ""

        self.sections.append({
            "name": f"6.{counter}.a pvalue distribution ({name})",
            "anchor": f"dge_summary",
            "content": description + img1 + img2
        })

        def plot_volcano(filename):
            import pylab
            pylab.ioff()
            pylab.clf()
            comp.plot_volcano()
            pylab.savefig(filename)
            pylab.close()

        html_volcano = """<p>The volcano plot here below shows the differentially
expressed features with a adjusted p-value below 0.05 (dashed back line). 
The volcano plot represents the log10 of the adjusted P
value as a function of the log2 ratio of differential expression. </p>"""
        #img3 = self.create_embedded_png(plot_volcano, "filename", style=style)
        img3 = ""
        fig = comp.plot_volcano(plotly=True,
                                annotations=self.rnadiff.annotation)
        from plotly import offline
        plotly = offline.plot(fig, output_type="div", include_plotlyjs=False)

        self.sections.append({
            "name": f"6.{counter}.b volcano plots ({name})",
            "anchor": f"{name}_volcano",
            "content": html_volcano + img3 + "<hr>" + plotly
        })

        # finally, let us add the tables
        from pylab import log10

        df = comp.df.copy()  #.reset_index()

        # here we need to add the annotation if possible
        try:
            df = pd.concat(
                [df, self.rnadiff.annotation.annotation.loc[comp.df.index]],
                axis=1)
        except Exception as err:
            logger.critical(f"Could not add annotation. {err}")

        df = df.reset_index()

        fold_change = 2**df['log2FoldChange']
        log10padj = -log10(df['padj'])
        df.insert(
            df.columns.get_loc('log2FoldChange') + 1, 'FoldChange',
            fold_change)
        df.insert(df.columns.get_loc('padj') + 1, 'log10_padj', log10padj)

        try:
            del df['dispGeneEst']
            #del df['dispFit']
            #del df['dispMap']
        except:
            pass

        for x in ['lfcSE', 'stat', 'dispersion']:
            try:
                del df[x]
            except:
                pass
        # set options
        options = {
            'scrollX': 'true',
            'pageLength': 10,
            'scrollCollapse': 'true',
            'dom': 'Bfrtip',
            'buttons': ['copy', 'csv']
        }

        datatable = DataTable(df, f'{name}_table_all')
        datatable.datatable.datatable_options = options
        js_all = datatable.create_javascript_function()
        html_tab_all = datatable.create_datatable(float_format='%.3e')

        df_sign = df.query(
            "padj<=0.05 and (log2FoldChange>1 or log2FoldChange<-1)")
        datatable = DataTable(df_sign, f'{name}_table_sign')
        datatable.datatable.datatable_options = options
        js_sign = datatable.create_javascript_function()
        html_tab_sign = datatable.create_datatable(float_format='%.3e')

        self.sections.append({
            'name':
            f"6.{counter}.c {name} Tables ({name})",
            'anchor':
            f"{name} stats",
            'content':
            f"""<p>The following tables give all DGE results. The
first table contains all significant genes (adjusted p-value below 0.05 and
absolute fold change of at least 0.5). The following tables contains all results
without any filtering. Here is a short explanation for each column:
<ul>
<li> baseMean: base mean over all samples</li>
<li> norm.sampleName: rounded normalized counts per sample</li>
<li> FC: fold change in natural base</li>
<li> log2FoldChange: log2 Fold Change estimated by the model. Reflects change
between the condition versus the reference condition</li>
<li> stat: Wald statistic for the coefficient (contrast) tested</li>
<li> pvalue: raw p-value from statistical test</li>
<li> padj: adjusted pvalue. Used for cutoff at 0.05 </li>
<li> betaConv: convergence of the coefficients of the model </li>
<li> maxCooks: maximum Cook's distance of the feature </li>
<li> outlier: indicate if the feature is an outlier according to Cook's distance
</li>
</ul>
</p>
<h3>Significative only<a id="{name}_table_sign"></a></h3>
here below is a subset of the next table. It contains all genes below adjusted
p-value of 0.05 and absolute log2 fold change above 1.
{js_sign} {html_tab_sign} 

<h3>All genes<a id="{name}_table_all"></a></h3>
{js_all} {html_tab_all}"""
        })