def df2html(df, name=None, dom='Brt', show_index=False, pageLength=15): """Simple wrapper to create HTML from dataframe If a columns ends in _links and a name_links exists, then the columns name will be shown with the clickable name_links. """ if name is None: name = uuid.uuid1().time_low # looks like datatable does not like ID made of numbers, even in string # so we convert to ABCDEFGH values name = "".join([chr(65 + int(x)) for x in str(name)]) datatable = DataTable(df, name, index=show_index) datatable.datatable.datatable_options = { 'pageLength': pageLength, 'scrollCollapse': 'false', 'dom': dom, 'buttons': ['copy', 'csv'] } # identify links (columns ending in _links) for column in df.columns: if column.endswith('_links'): prefix = column.replace('_links', '') if prefix in df.columns: datatable.datatable.set_links_to_column(column, prefix) js = datatable.create_javascript_function() html = datatable.create_datatable(float_format='%.6g') return js + html
def add_main_section(self): links = glob.glob("{}".format(self.pattern)) names = [filename.rsplit('/',1)[1].split('.html')[0] for filename in links] df = pd.DataFrame({ "names": names, "links": [link.split(os.sep,1)[1] for link in links] }) df.sort_values(by='names') datatable = DataTable(df, "fastqc", index=False) datatable.datatable.set_links_to_column("links", "names") datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'rtpB', "paging": "false", 'buttons': ['copy', 'csv']} js = datatable.create_javascript_function() html_tab = datatable.create_datatable() html = "{} {}".format(html_tab, js) self.sections.append({ "name": "FastQC report(s)", "anchor": "fastqc", "content": "<p> Here below are link(s) to original FastQC report. " "Please click on one of the links to jump to the main " "report. {} </p>".format(html) })
def add_adapters_section(self): # Create a Table with adapters df = pd.DataFrame() df = pd.DataFrame({'Length': [], 'Trimmed':[], 'Type':[], 'Sequence': [], }) for count, adapter in enumerate(self.data['adapters']): name = adapter['name'] info = adapter['info'] df.ix[name] = [info['Length'], info['Trimmed'], info['Type'], info['Sequence']] df.columns = ['Length', 'Trimmed', 'Type', 'Sequence'] df['Trimmed'] = df.Trimmed.map(lambda x: int(x.replace("times.", ""))) # df.to_json(self.sample_name + "/cutadapt/cutadapt_stats2.json") df.sort_values(by="Trimmed", ascending=False, inplace=True) datatable = DataTable(df, "adapters", index=True) datatable.datatable.datatable_options = { 'scrollX': 'true', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'frtipB', 'buttons': ['copy', 'csv']} js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') self.jinja['adapters'] = "" self.sections.append({ "name": "Adapters", "anchor": "adapters", "content": "<p>{} {}</p>".format(html_tab, js) })
def get_table_dependencies(self): """ Return dependencies of Sequana. """ dep_list = easydev.get_dependencies('sequana') # if installed with conda, this will be empty if len(dep_list) == 0: return "" project_name = list() version = list() link = list() pypi = 'https://pypi.python.org/pypi/{0}' for dep in dep_list: version.append(dep.version) project_name.append(dep.project_name) link.append(pypi.format(dep.project_name)) df = pd.DataFrame({'package': project_name, 'version': version, 'link': link}) df['sort'] = df['package'].str.lower() df.sort_values(by='sort', axis=0, inplace=True) df.drop('sort', axis=1, inplace=True) datatable = DataTable(df, 'dep') datatable.datatable.datatable_options = {'paging': 'false', 'bFilter': 'false', 'bInfo': 'false', 'bSort': 'false'} datatable.datatable.set_links_to_column('link', 'package') js = datatable.create_javascript_function() html = datatable.create_datatable() return js + '\n' + html
def _get_html_stats_section(self): df = self._get_stats() datatable = DataTable(df, "phix_stats", index=True) datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'tpB', "paging": "false", 'buttons': ['copy', 'csv']} js = datatable.create_javascript_function() # Important that the columns of type integer are indeed in integer type # otherwise the %.3g herebelow would round integers. For instance 123456 # would appear as 123000. The dtypes must be taken care in _get_stats() # method html_tab = datatable.create_datatable(float_format='%.3g') html = """<p>We mapped the raw reads on a reference (see config file). The reads mapped are removed and the unmapped reads are kept for further cleaning (adapter removal). Here below are some statistics about the mapped and unmapped reads. </p><p> The A, C, G, T, N columns report the percentage of each bases in the overall sequences. The GC content column is in percentage. Finally, note that for paired data, the number of reads in the mapped files (R1 and R2) may differ due to . However, the unmapped reads must agree. </p>""" html += "{} {}".format(html_tab, js) return html
def _get_html_stats(self): from sequana.tools import StatsBAM2Mapped from easydev import precision data = StatsBAM2Mapped(self.directory + "bwa_mem_stats.json").data html = "Reads with Phix: %s %%<br>" % precision(data['contamination'], 3) # add HTML table if "R2_mapped" in data.keys(): df = pd.DataFrame({ 'R1': [data['R1_mapped'], data['R1_unmapped']], 'R2': [data['R2_mapped'], data['R2_unmapped']]}) else: df = pd.DataFrame({ 'R1': [data['R1_mapped'], data['R1_unmapped']]}) df.index = ['mapped', 'unmapped'] datatable = DataTable(df, "bwa_bam") datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'irtpB', "paging": "false", 'buttons': ['copy', 'csv']} js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') #html += "{} {}".format(html_tab, js) html += "Unpaired: %s <br>" % data['unpaired'] html += "duplicated: %s <br>" % data['duplicated'] return html
def _get_summary_section(self): df = self._get_stats() if len(df) == 1 and df.iloc[0]['taxon'] == -1: pngimage = sequana_data("no_data.jpg") extra = "<p> no reads could be identified with the given the database(s)." else: pngimage = self.directory + os.sep + "kraken.png" extra = """<p>The following <b>clickable image</b> is a simplified version (only genus are shown) of an interactive and more detailled version based on Krona. Finally, note that the unclassified species in the pie plot may correspond to species not present in the data base or adapters (if not removed).</p>""" html = """ <p>Overview of the Taxonomic content of the filtered reads. </p> <p>The taxonomic analysis is performed with Kraken (see database name in the configuration file. The analysis is performed with a Kmer approach. The details about the database itself are available in the <a href="http://sequana.readthedocs.io">Sequana documentation</a>. The taxonomic analysis should give a good idea of the content of the FastQ files but should be used as a sanity check. Indeed, species absent from the database won't be detected leading to false detection (close species may be detected instead). Besides, be aware that closely related species may not be classified precisely. </p> {0} <div style="text-align:center"><a href="./{1}/kraken.html"> {2} </a></div> <br> """.format(extra, self.directory.split(os.sep, 1)[1], self.png_to_embedded_png(pngimage)) datatable = DataTable(df, "kraken", index=False) # add links if "ena" in df.columns: urlena = "http://www.ebi.ac.uk/ena/data/view/" datatable.datatable.set_links_to_column( "ena", [urlena + this for this in df['ena']]) datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 30, 'scrollCollapse': 'true', 'dom': 'irtpB', "paging": "false", "order": [[2, "desc"]], 'buttons': ['copy', 'csv'] } js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') html += "{} {}".format(html_tab, js) """# Rounding and convert in string to avoid exp notation df['percentage'] = df['percentage'].apply(lambda x: str(round(x,4))) #self.jinja['kraken_json'] = df.to_json()""" return html
def _get_stats_section(self, tablename="stats"): self.df_stats = self.get_stats() filenames, mode = self._get_files("*boxplot.png") datatable = DataTable(self.df_stats, tablename, index=True) datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'rtpB', "paging": "false", 'buttons': ['copy', 'csv'] } js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') html = """<p>The following table gives some basic statistics about the data before any filtering. The A, C, G, T, N columns report the percentage of each bases in the overall sequences. The GC content is provided in percentage as well. </p> <div>{} {}</div> <div>""".format(html_tab, js) html += """ <p>The following figure(s) gives the average quality (red line) of raw reads (500,000 at max). The x-axis being the length of the reads. The yellow enveloppe gives the variation of the quality (1 standard deviation).</p> <p> Click on the image to jump to a full FastQC report.</p>""" if len(filenames) == 2: width = "49" else: width = "65" filename = os.path.split(filenames[0])[1].replace( "_boxplot.png", "_fastqc.html") href = self.path_to_fastqc + os.sep + filename html += """ <figure style="float:left; width:{}%; padding:0px; margin:0px;"> <a href="{}">{}</a> <figcaption style="font-style:italic">Fig1: R1 reads</figcaption> </figure>""".format(width, href, self.png_to_embedded_png(filenames[0])) if len(filenames) == 2: filename = os.path.split(filenames[1])[1].replace( "_boxplot.png", "_fastqc.html") href = self.path_to_fastqc + os.sep + filename html += """ <figure style="float:right; width:{}%; padding:0px; margin:0px;"> <a href="{}">{}</a> <figcaption style="font-style:italic">Fig2: R2 reads</figcaption> </figure>""".format(width, href, self.png_to_embedded_png(filenames[1])) return html
def _get_stats_section(self, tablename="stats"): self.df_stats = self.get_stats() filenames, mode = self._get_files("*boxplot.png") datatable = DataTable(self.df_stats, tablename, index=True) datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'rtpB', "paging": "false", 'buttons': ['copy', 'csv']} js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') html = """<p>The following table gives some basic statistics about the data before any filtering. The A, C, G, T, N columns report the percentage of each bases in the overall sequences. The GC content is provided in percentage as well. </p> <div>{} {}</div> <div>""".format(html_tab, js) html += """ <p>The following figure(s) gives the average quality (red line) of raw reads (500,000 at max). The x-axis being the length of the reads. The yellow enveloppe gives the variation of the quality (1 standard deviation).</p> <p> Click on the image to jump to a full FastQC report.</p>""" if len(filenames)==2: width="49" else: width="65" filename = os.path.split(filenames[0])[1].replace("_boxplot.png", "_fastqc.html") href = self.path_to_fastqc + os.sep + filename html += """ <figure style="float:left; width:{}%; padding:0px; margin:0px;"> <a href="{}">{}</a> <figcaption style="font-style:italic">Fig1: R1 reads</figcaption> </figure>""".format(width, href, self.png_to_embedded_png(filenames[0])) if len(filenames) == 2: filename = os.path.split(filenames[1])[1].replace("_boxplot.png", "_fastqc.html") href = self.path_to_fastqc + os.sep + filename html += """ <figure style="float:right; width:{}%; padding:0px; margin:0px;"> <a href="{}">{}</a> <figcaption style="font-style:italic">Fig2: R2 reads</figcaption> </figure>""".format(width, href, self.png_to_embedded_png(filenames[1])) return html
def create_datatable(self): """ Variants detected section. """ datatable = DataTable(self.vcf.df, 'jc') datatable.datatable.datatable_options = { 'scrollX': 'true', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'Bfrtip', 'buttons': ['copy', 'csv'] } for i, s in enumerate(self.vcf.vcf.samples): datatable.datatable.set_tooltips_to_column('info_{0}'.format(i), s) options = datatable.datatable._create_datatable_option() html_tab = datatable._create_hidden_csv(float_format='%.3f') html_tab += datatable._create_html_table(style='width: 100%;') return html_tab, options
def _get_stat_section(self): datatable = DataTable(self._get_stats(), "cutadapt", index=True) datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'rtpB', "paging": "false", 'buttons': ['copy', 'csv']} js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') #csv_link = self.create_link('link', self.filename) #vcf_link = self.create_link('here', 'test.vcf') html = "Reads statistics after trimming and adapter removal. The " +\ "A, C, G, T, N columns report the percentage of each bases in " +\ "the overall sequences" html += "<p>{} {}</p>".format(html_tab, js) return html
def test_datatables(): bed = bedtools.GenomeCov(sequana_data("JB409847.bed"), sequana_data("JB409847.gbk")) fasta = sequana_data("JB409847.fasta") bed.compute_gc_content(fasta) c = bed.chr_list[0] c.run(4001) rois = c.get_rois() rois.df['link'] = 'test' datatable_js = DataTableFunction(rois.df, 'roi') datatable_js.set_links_to_column('link', 'start') datatable_js.datatable_options = {'scrollX': 'true', 'pageLength': 15, 'scrollCollapse' : 'true', 'dom': 'Bfrtip', 'buttons': ['copy', 'csv']} datatable = DataTable(rois.df, 'rois', datatable_js) html_table = datatable.create_datatable(float_format='%.3g')
def test_datatables(): bed = bedtools.GenomeCov(sequana_data("JB409847.bed"), sequana_data("JB409847.gbk")) fasta = sequana_data("JB409847.fasta") bed.compute_gc_content(fasta) c = bed.chr_list[0] c.run(4001) rois = c.get_rois() rois.df['link'] = 'test' datatable_js = DataTableFunction(rois.df, 'roi') datatable_js.set_links_to_column('link', 'start') datatable_js.datatable_options = { 'scrollX': 'true', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'Bfrtip', 'buttons': ['copy', 'csv'] } datatable = DataTable(rois.df, 'rois', datatable_js) html_table = datatable.create_datatable(float_format='%.3g')
def add_flag_section(self): data = self._computation() df = data['flags'] datatable = DataTable(df, "flags", index=True) datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'tB', "paging": "false", 'buttons': ['copy', 'csv']} js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') html = "" html += "{} {}".format(html_tab, js) self.sections.append({ "name": "Flags information", "anchor": "flags", "content": html })
def add_flag_section(self): data = self._computation() df = data['flags'] datatable = DataTable(df, "flags", index=True) datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'tB', "paging": "false", 'buttons': ['copy', 'csv'] } js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') html = "" html += "{} {}".format(html_tab, js) self.sections.append({ "name": "Flags information", "anchor": "flags", "content": html })
def add_table(self): df = self.summary.copy() df.columns = ['data'] df['url'] = ['http://sequana.readthedocs.org'] * len(df) table = DataTable(df, "table", index=True) table.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 'tB', "paging": "false", 'buttons': ['copy', 'csv']} table.datatable.set_links_to_column('url', 'data') js = table.create_javascript_function() html_tab = table.create_datatable(float_format='%.3g') html = "{} {}".format(html_tab, js) self.sections.append({ "name": "Table", "anchor": "table", "content": html })
def add_stats(self): df = pd.Series(self.summary['read_stats']).to_frame().T df.index = ['read length stats'] table = DataTable(df, "table", index=True) table.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 15, 'scrollCollapse': 'true', 'dom': 't', "paging": "false", 'buttons': ['copy', 'csv'] } js = table.create_javascript_function() # IMPORTANT: here conversion to integer with %d # to round and make integer. !! The GC is therefore # converted to integer as well. html_tab = table.create_datatable(float_format='%d') html = "{} {}".format(html_tab, js) self.sections.append({ "name": "Basic stats on read length", "anchor": "table", "content": html })