Example #1
0
    def __init__(self, c_id, mod):

        modname = mod['config'].get('section_name', c_id.replace('_', ' ').title())
        if modname == '' or modname is None:
            modname = 'Custom Content'

        # Initialise the parent object
        super(MultiqcModule, self).__init__(
            name = modname,
            anchor = mod['config'].get('section_anchor', c_id),
            href = mod['config'].get('section_href'),
            info = mod['config'].get('description')
        )

        pconfig = mod['config'].get('pconfig', {})
        if pconfig.get('title') is None:
            pconfig['title'] = modname

        # Table
        if mod['config'].get('plot_type') == 'table':
            pconfig['sortRows'] = pconfig.get('sortRows', False)
            headers = mod['config'].get('headers')
            self.add_section( plot = table.plot(mod['data'], headers, pconfig) )
            self.write_data_file( mod['data'], "multiqc_{}".format(modname.lower().replace(' ', '_')) )

        # Bar plot
        elif mod['config'].get('plot_type') == 'bargraph':
            self.add_section( plot = bargraph.plot(mod['data'], mod['config'].get('categories'), pconfig) )

        # Line plot
        elif mod['config'].get('plot_type') == 'linegraph':
            self.add_section( plot = linegraph.plot(mod['data'], pconfig) )

        # Scatter plot
        elif mod['config'].get('plot_type') == 'scatter':
            self.add_section( plot = scatter.plot(mod['data'], pconfig) )

        # Heatmap
        elif mod['config'].get('plot_type') == 'heatmap':
            self.add_section( plot = heatmap.plot(mod['data'], mod['config'].get('xcats'), mod['config'].get('ycats'), pconfig) )

        # Beeswarm plot
        elif mod['config'].get('plot_type') == 'beeswarm':
            self.add_section( plot = beeswarm.plot(mod['data'], pconfig) )

        # Raw HTML
        elif mod['config'].get('plot_type') == 'html':
            self.add_section( content = mod['data'] )

        # Raw image file as html
        elif mod['config'].get('plot_type') == 'image':
            self.add_section( content = mod['data'] )

        # Not supplied
        elif mod['config'].get('plot_type') == None:
            log.warning("Plot type not found for content ID '{}'".format(c_id))

        # Not recognised
        else:
            log.warning("Error - custom content plot type '{}' not recognised for content ID {}".format(mod['config'].get('plot_type'), c_id))
Example #2
0
    def parse_plotCorrelation(self):
        """Find plotCorrelation output"""
        self.deeptools_plotCorrelationData = dict()
        for f in self.find_log_files('deeptools/plotCorrelationData', filehandles=False):
            parsed_data, samples = self.parsePlotCorrelationData(f)
            for k, v in parsed_data.items():
                if k in self.deeptools_plotCorrelationData:
                    log.warning("Replacing duplicate sample {}.".format(k))
                self.deeptools_plotCorrelationData[k] = v
            if len(parsed_data) > 0:
                self.add_data_source(f, section='plotCorrelation')

        if len(self.deeptools_plotCorrelationData) > 0:
            config = {
                'id': 'deeptools_correlation_plot',
                'title': 'deeptools: Correlation Plot',
            }
            data = []
            for s_name in samples:
                try:
                    data.append(self.deeptools_plotCorrelationData[s_name])
                except KeyError:
                    pass
            if len(data) == 0:
                log.debug('No valid data for correlation plot')
                return None

            self.add_section(
                name="Correlation heatmap",
                anchor="deeptools_correlation",
                description="Pairwise correlations of samples based on distribution of sequence reads",
                plot=heatmap.plot(data, samples, samples, config)
            )

        return len(self.deeptools_plotCorrelationData)
Example #3
0
    def parse_relatedness2(self):
        matrices = {}
        for f in self.find_log_files('vcftools/relatedness2',
                                     filehandles=True):
            m = _Relatedness2Matrix(f)
            if m.data and m.x_labels and m.y_labels:
                matrices[f['s_name']] = m

        matrices = self.ignore_samples(matrices)
        log.info('Found %s valid relatedness2 matrices', len(matrices))

        helptext = '''
        `RELATEDNESS_PHI` gives a relatedness score between two samples. A higher score indicates a higher degree of
        relatedness, up to a maximum of 0.5. Samples are sorted alphabetically on each axis, and specific IDs can be
        found in the graph with the Highlight tab.
        '''

        for name, m in matrices.items():
            self.add_section(
                name='Vcftools relatedness2',
                anchor='vcftools_relatedness2',
                description=
                "**Input:** `{}`.\n\n Heatmap of `RELATEDNESS_PHI` values from the output of vcftools relatedness2."
                .format(name),
                helptext=helptext,
                plot=heatmap.plot(m.data,
                                  xcats=m.x_labels,
                                  ycats=m.y_labels,
                                  pconfig={
                                      'square': True,
                                      'decimalPlaces': 7
                                  }))

        return len(matrices)
Example #4
0
    def primer_heatmap(self):
        """ Heatmap showing information on each primer found for every sample """
        # Top level dict contains sample IDs + OrderedDict(primer, counts)

        final_data = list()
        final_xcats = list()
        final_ycats = list()

        for k, v in self.ivar_primers.items():
            final_ycats.append(k)
            tmp_prim_val = list()
            for prim, val in v.items():
                final_xcats.append(prim)
                tmp_prim_val.append(val)
            final_data.append(tmp_prim_val)

        if self.ivar_primers is not None:
            pconfig = {
                'id': 'ivar-primer-count-heatmap',
                'decimalPlaces': 0,
                'square': False,
                'title': 'iVar: Number of primers found for each sample'
            }

            self.add_section(
                name='iVar Primer Counts',
                anchor='ivar-primers-heatmap',
                description='Counts observed for each primer per sample.',
                helptext=
                'This lists the number of times a specific primer was found in the respective sample.',
                plot=heatmap.plot(final_data, final_xcats, final_ycats,
                                  pconfig))
Example #5
0
    def __init__(self, c_id, mod):

        modname = mod['config'].get('section_name', c_id.replace('_', ' ').title())
        if modname == '' or modname is None:
            modname = 'Custom Content'

        # Initialise the parent object
        super(MultiqcModule, self).__init__(
            name = modname,
            anchor = mod['config'].get('section_anchor', c_id),
            href = mod['config'].get('section_href'),
            info = mod['config'].get('description')
        )

        pconfig = mod['config'].get('pconfig', {})
        if pconfig.get('title') is None:
            pconfig['title'] = modname

        # Table
        if mod['config'].get('plot_type') == 'table':
            pconfig['sortRows'] = pconfig.get('sortRows', False)
            headers = mod['config'].get('headers')
            self.add_section( plot = table.plot(mod['data'], headers, pconfig) )
            self.write_data_file( mod['data'], "multiqc_{}".format(modname.lower().replace(' ', '_')) )

        # Bar plot
        elif mod['config'].get('plot_type') == 'bargraph':
            self.add_section( plot = bargraph.plot(mod['data'], mod['config'].get('categories'), pconfig) )

        # Line plot
        elif mod['config'].get('plot_type') == 'linegraph':
            self.add_section( plot = linegraph.plot(mod['data'], pconfig) )

        # Scatter plot
        elif mod['config'].get('plot_type') == 'scatter':
            self.add_section( plot = scatter.plot(mod['data'], pconfig) )

        # Heatmap
        elif mod['config'].get('plot_type') == 'heatmap':
            self.add_section( plot = heatmap.plot(mod['data'], mod['config'].get('xcats'), mod['config'].get('ycats'), pconfig) )

        # Beeswarm plot
        elif mod['config'].get('plot_type') == 'beeswarm':
            self.add_section( plot = beeswarm.plot(mod['data'], pconfig) )

        # Raw HTML
        elif mod['config'].get('plot_type') == 'html':
            self.add_section( content = mod['data'] )

        # Raw image file as html
        elif mod['config'].get('plot_type') == 'image':
            self.add_section( content = mod['data'] )

        # Not supplied
        elif mod['config'].get('plot_type') == None:
            log.warning("Plot type not found for content ID '{}'".format(c_id))

        # Not recognised
        else:
            log.warning("Error - custom content plot type '{}' not recognised for content ID {}".format(mod['config'].get('plot_type'), c_id))
Example #6
0
    def parse_relatedness2(self):
        matrices = {}
        for f in self.find_log_files('vcftools/relatedness2', filehandles=True):
            m = _Relatedness2Matrix(f)
            if m.data and m.x_labels and m.y_labels:
                matrices[f['s_name']] = m

        matrices = self.ignore_samples(matrices)

        if len(matrices) == 0:
            return 0

        log.info('Found {} valid relatedness2 matrices'.format(len(matrices)))

        helptext = '''
        `RELATEDNESS_PHI` gives a relatedness score between two samples. A higher score indicates a higher degree of
        relatedness, up to a maximum of 0.5. Samples are sorted alphabetically on each axis, and specific IDs can be
        found in the graph with the Highlight tab.
        '''

        for name, m in matrices.items():
            self.add_section(
                name = 'Relatedness2',
                anchor = 'vcftools-relatedness2',
                description = "**Input:** `{}`.\n\n Heatmap of `RELATEDNESS_PHI` values from the output of vcftools relatedness2.".format(name),
                helptext = helptext,
                plot = heatmap.plot(
                    m.data,
                    xcats = m.x_labels,
                    ycats = m.y_labels,
                    pconfig = {'square': True, 'decimalPlaces': 7, 'title': 'VCFTools: Relatedness2'}
                )
            )

        return len(matrices)
Example #7
0
 def roary_heatmap_plot(self, directory):
     config = {
         'id' : "roary_" + directory,
         'title': "Roary: " + directory,
         'square': False,
         'colstops': [ [0, '#FFFFFF'], [1, '#000000'], ],
         'legend': False,
     }
     return heatmap.plot(self.roary_gene_data[directory], self.roary_gene_samples[directory], self.roary_gene_genes[directory], config)
Example #8
0
    def status_heatmap(self):
        """ Heatmap showing all statuses for every sample """
        status_numbers = {
            'pass': 1,
            'warn': 0.5,
            'fail': 0.25
        }
        data = []
        s_names = []
        status_cats = OrderedDict()
        for s_name in sorted(self.fastqc_data.keys()):
            s_names.append(s_name)
            for status_cat, status in self.fastqc_data[s_name]['statuses'].items():
                if status_cat not in status_cats:
                    status_cats[status_cat] = status_cat.replace('_', ' ').title().replace('Gc', 'GC')
        for s_name in s_names:
            row = []
            for status_cat in status_cats:
                try:
                    row.append(status_numbers[self.fastqc_data[s_name]['statuses'][status_cat]])
                except KeyError:
                    row.append(0)
            data.append(row)

        pconfig = {
            'fastqc-status-heatmap'
            'title': 'FastQC: Statuses',
            'xTitle': 'Category',
            'yTitle': 'Sample',
            'min': 0,
            'max': 1,
            'square': False,
            'colstops': [
                [0, '#ffffff'],
                [0.25, '#d9534f'],
                [0.5, '#fee391'],
                [1, '#5cb85c'],
            ],
            'decimalPlaces': 1,
            'legend': False,
            'datalabels': False
        }

        self.add_section (
            name = 'Statuses',
            anchor = 'fastqc-statuses',
            description = 'FastQC section statuses for each sample.',
            helptext = '''
            FastQC assigns a status for each section of the report.
            Here, we summarise all of these into a single heatmap for a quick overview.

            Note that not all FastQC sections have plots in MultiQC reports, but all statuses
            are shown in this heatmap.
            ''',
            plot = heatmap.plot(data, list(status_cats.values()), s_names, pconfig)
        )
Example #9
0
    def cor_heatmap_plot(self, heatmap_name, heatmap_val):
        """ Make the HighCharts HTML to plot sample correlation heatmap. """

        # Split the data into SE and PE
        pconfig = {
            'title': 'Pearson correlation',
            'xlab': True,
        }
        self.add_section(
            description=
            'Pearson correlation between log<sub>2</sub> normalised CPM values are calculated and clustered.',
            plot=heatmap.plot(heatmap_val, heatmap_name, pconfig=pconfig))
Example #10
0
    def __init__(self, c_id, mod):

        modname = mod['config'].get('section_name',
                                    c_id.replace('_', ' ').title())

        # Initialise the parent object
        super(MultiqcModule,
              self).__init__(name=modname,
                             anchor=mod['config'].get('section_anchor', c_id),
                             href=mod['config'].get('section_href'),
                             info=mod['config'].get('description'))

        pconfig = mod['config'].get('pconfig', {})
        if pconfig.get('title') is None:
            pconfig['title'] = modname

        # Table
        if mod['config'].get('plot_type') == 'table':
            pconfig['sortRows'] = pconfig.get('sortRows', False)
            self.intro += table.plot(mod['data'], None, pconfig)

        # Bar plot
        elif mod['config'].get('plot_type') == 'bargraph':
            self.intro += bargraph.plot(mod['data'],
                                        mod['config'].get('categories'),
                                        pconfig)

        # Line plot
        elif mod['config'].get('plot_type') == 'linegraph':
            self.intro += linegraph.plot(mod['data'], pconfig)

        # Scatter plot
        elif mod['config'].get('plot_type') == 'scatter':
            self.intro += scatter.plot(mod['data'], pconfig)

        # Heatmap
        elif mod['config'].get('plot_type') == 'heatmap':
            self.intro += heatmap.plot(mod['data'], mod['config'].get('xcats'),
                                       mod['config'].get('ycats'), pconfig)

        # Beeswarm plot
        elif mod['config'].get('plot_type') == 'beeswarm':
            self.intro += beeswarm.plot(mod['data'], pconfig)

        # Not supplied
        elif mod['config'].get('plot_type') == None:
            log.warning("Plot type not found for content ID '{}'".format(c_id))

        # Not recognised
        else:
            log.warning(
                "Error - custom content plot type '{}' not recognised for content ID {}"
                .format(mod['config'].get('plot_type'), c_id))
Example #11
0
    def parse_relatedness2(self):
        matrices = {}
        for f in self.find_log_files("vcftools/relatedness2",
                                     filehandles=True):
            m = _Relatedness2Matrix(f)
            if m.data and m.x_labels and m.y_labels:
                matrices[f["s_name"]] = m
            self.add_data_source(f, section="Relatedness")

        matrices = self.ignore_samples(matrices)

        if len(matrices) == 0:
            return 0

        log.info("Found {} valid relatedness2 matrices".format(len(matrices)))

        # The matrices cannot be written to a file in their current format
        # self.write_data_file(matrices, "vcftools_relatedness")

        helptext = """
        `RELATEDNESS_PHI` gives a relatedness score between two samples. A higher score indicates a higher degree of
        relatedness, up to a maximum of 0.5. Samples are sorted alphabetically on each axis, and specific IDs can be
        found in the graph with the Highlight tab.
        """

        idx = 0
        for name, m in matrices.items():
            idx += 1
            self.add_section(
                name="Relatedness2",
                anchor="vcftools-relatedness2-{}".format(idx),
                description=
                "**Input:** `{}`.\n\n Heatmap of `RELATEDNESS_PHI` values from the output of vcftools relatedness2."
                .format(name),
                helptext=helptext,
                plot=heatmap.plot(
                    m.data,
                    xcats=m.x_labels,
                    ycats=m.y_labels,
                    pconfig={
                        "id": "vcftools-relatedness2-heatmap-{}".format(idx),
                        "title": "VCFTools: Relatedness2",
                        "square": True,
                        "decimalPlaces": 7,
                    },
                ),
            )

        return len(matrices)
Example #12
0
def parse_reports(self):

    #Set up vars
    self.heatmap_data = dict()

    #Default search pattern
    sp = config.sp['deepTools']['Corr']

    #Go through files and parse data
    found_heatmap = False
    for f in self.find_log_files(sp):
        # Parse the file
        xcats = None
        ycats = []
        data = []
        for l in f['f'].splitlines():
            s = l.split()
            if xcats is None:
                xcats = s
            else:
                ycats.append(s[0])
                data.append([float(x) for x in s[1:]])
        #Should only have one heat map per report
        if found_heatmap:
            log.warning("Duplicate sample name found! Overwriting: {}".format(
                f['s_name']))
        found_heatmap = True
        self.add_data_source(f, section='Correlation')

    if found_heatmap:
        pconfig = {'title': 'Samples correlation', 'reverseColors': False}
        self.sections.append({
            'name':
            'Samples Similarity',
            'anchor':
            'deepToolsheatmap',
            'content':
            '<p>This heatmap was generated by ' +
            '<a href="http://deeptools.readthedocs.io/en/latest/content/tools/plotCorrelation.html" target="_blank">plotCorrelation</a> '
            + ' based on the output of ' +
            '<a href="http://deeptools.readthedocs.io/en/latest/content/tools/multiBamSummary.html" target="_blank">multiBamSummary</a> or'
            +
            '<a href="http://deeptools.readthedocs.io/en/latest/content/tools/multiBigwigSummary.html" target="_blank">multiBigwigSummary</a>'
            + '</p>' + hm.plot(data, xcats, ycats, pconfig)
        })

    #Return number of samples found
    return 1 if found_heatmap else 0
Example #13
0
    def somalier_relatedness_heatmap_plot(self):
        # inspiration: MultiQC/modules/vcftools/relatedness2.py

        data = []
        labels = set()
        rels = defaultdict(dict)
        for s_name, d in self.somalier_data.items():
            if "relatedness" in d:
                a, b = s_name.split("*")
                labels.add(a)
                labels.add(b)
                rels[a][b] = rels[b][a] = float(d["relatedness"])
                rels[a][a] = rels[b][b] = float(1)

        # impose alphabetical order and avoid json serialisation errors in utils.report
        labels = sorted(labels)

        for x in labels:
            line = []
            for y in labels:
                try:
                    line.append(rels[x][y])
                except KeyError:
                    line.append(-2)
            data.append(line)

        if len(data) > 0:
            pconfig = {
                "id": "somalier_relatedness_heatmap_plot",
                "title": "Somalier: Sample Relatedness",
                "xlab": "Sample A",
                "ylab": "Sample B",
            }

            self.add_section(
                name="Relatedness Heatmap",
                anchor="somalier-relatedness-heatmap",
                description="Heatmap displaying relatedness of sample pairs.",
                plot=heatmap.plot(
                    data=data,
                    xcats=labels,
                    ycats=labels,
                    pconfig=pconfig,
                ),
            )
Example #14
0
 def abricate_heatmap_plot(self, db):
     config = {
         'id':
         "abricate_" + db,
         'title':
         "ABRicate: " + db,
         'square':
         False,
         'colstops': [
             [0, '#FFFFFF'],
             [0.6, '#ffffe5'],
             [0.7, '#d9f0a3'],
             [0.95, '#004529'],
             [1, '#000000'],
         ]
     }
     return heatmap.plot(self.abricate_data[db], self.abricate_xcats[db],
                         self.abricate_ycats[db], config)
Example #15
0
 def plot_correlation_heatmap(self):
     """ Return HTML for correlation heatmap """
     data = None
     corr_type = None
     correlation_type = getattr(config, 'rna_seqc' ,{}).get('default_correlation', 'spearman')
     if self.rna_seqc_spearman is not None and correlation_type != 'pearson':
         data = self.rna_seqc_spearman
         corr_type = 'Spearman'
     elif self.rna_seqc_pearson is not None:
         data = self.rna_seqc_pearson
         corr_type = 'Pearson'
     if data is not None:
         pconfig = {
             'id': 'rna_seqc_correlation_heatmap',
             'title': 'RNA-SeQC: {} Sample Correlation'.format(corr_type)
         }
         self.add_section (
             name = '{} Correlation'.format(corr_type),
             anchor = 'rseqc-rna_seqc_correlation',
             plot = heatmap.plot(data[1], data[0], data[0], pconfig)
         )
Example #16
0
 def plot_correlation_heatmap(self):
     """Return HTML for correlation heatmap"""
     data = None
     corr_type = None
     correlation_type = getattr(config, "rna_seqc", {}).get("default_correlation", "spearman")
     if self.rna_seqc_spearman is not None and correlation_type != "pearson":
         data = self.rna_seqc_spearman
         corr_type = "Spearman"
     elif self.rna_seqc_pearson is not None:
         data = self.rna_seqc_pearson
         corr_type = "Pearson"
     if data is not None:
         pconfig = {
             "id": "rna_seqc_correlation_heatmap",
             "title": "RNA-SeQC: {} Sample Correlation".format(corr_type),
         }
         self.add_section(
             name="{} Correlation".format(corr_type),
             anchor="rseqc-rna_seqc_correlation",
             plot=heatmap.plot(data[1], data[0], data[0], pconfig),
         )
Example #17
0
 def plot_correlation_heatmap(self):
     """ Return HTML for correlation heatmap """
     data = None
     corr_type = None
     correlation_type = getattr(config, 'rna_seqc' ,{}).get('default_correlation', 'spearman')
     if self.rna_seqc_spearman is not None and correlation_type != 'pearson':
         data = self.rna_seqc_spearman
         corr_type = 'Spearman'
     elif self.rna_seqc_pearson is not None:
         data = self.rna_seqc_pearson
         corr_type = 'Pearson'
     if data is not None:
         pconfig = {
             'id': 'rna_seqc_correlation_heatmap',
             'title': 'RNA-SeQC: {} Sample Correlation'.format(corr_type)
         }
         self.sections.append({
             'name': '{} Correlation'.format(corr_type),
             'anchor': 'rseqc-rna_seqc_correlation',
             'content': heatmap.plot(data[1], data[0], data[0], pconfig)
         })
Example #18
0
def parse_reports(self):
    """ Find bamtools stats reports and parse their data """

    # Set up vars
    self.heatmap_data = dict()

    # Go through files and parse data using regexes
    found_heatmap = False
    for f in self.find_log_files('ngi_rnaseq/heatmap'):
        # Parse the file
        xcats = None
        ycats = []
        data = []
        for l in f['f'].splitlines():
            s = l.split()
            if xcats is None:
                xcats = s
            else:
                ycats.append(s[0])
                data.append([float(x) for x in s[1:]])
        # Should only have one heat map per report
        if found_heatmap:
            log.debug("Duplicate sample name found! Overwriting: {}".format(
                f['s_name']))
        found_heatmap = True
        self.add_data_source(f, section='heatmap')

    if found_heatmap:
        pconfig = {'title': 'Sample Distances', 'reverseColors': True}
        self.add_section(
            name='Sample Similarity',
            anchor='ngi_rnaseq-sample_similarity',
            description=
            '''To generate this plot, gene counts are normalised using
                <a href="https://bioconductor.org/packages/release/bioc/html/edgeR.html" target="_blank">edgeR</a>.
                Euclidean distances between log<sub>2</sub> normalised CPM values are then calculated and clustered.''',
            plot=heatmap.plot(data, xcats, ycats, pconfig))

    # Return number of samples found
    return 1 if found_heatmap else 0
Example #19
0
    def bcbio_qsignature_chart(self, names) :
        """ Make the bcbio assignment rates plot """

        hmdata = list()
        data = defaultdict(dict)
        for f in self.find_log_files(names):
            s_name = self.clean_s_name(f['fn'], root=None)
            for l in f['f'].splitlines():
                cols = l.strip().split()
                data[cols[0]][cols[1]] = float(cols[2])
                data[cols[1]][cols[0]] = float(cols[2])
                data[cols[0]][cols[0]] = 0
                data[cols[1]][cols[1]] = 0

        names = data.keys()
        for name in names:
            row = list()
            for name2 in names:
                row.append(data[name][name2])
            hmdata.append(row)

        return heatmap.plot(hmdata, names)
Example #20
0
    def parse_plotCorrelation(self):
        """Find plotCorrelation output"""
        self.deeptools_plotCorrelationData = dict()
        for f in self.find_log_files('deeptools/plotCorrelationData',
                                     filehandles=False):
            parsed_data, samples = self.parsePlotCorrelationData(f)
            for k, v in parsed_data.items():
                if k in self.deeptools_plotCorrelationData:
                    log.warning("Replacing duplicate sample {}.".format(k))
                self.deeptools_plotCorrelationData[k] = v
            if len(parsed_data) > 0:
                self.add_data_source(f, section='plotCorrelation')

        self.deeptools_plotCorrelationData = self.ignore_samples(
            self.deeptools_plotCorrelationData)

        if len(self.deeptools_plotCorrelationData) > 0:
            config = {
                'id': 'deeptools_correlation_plot',
                'title': 'deeptools: Correlation Plot',
            }
            data = []
            for s_name in samples:
                try:
                    data.append(self.deeptools_plotCorrelationData[s_name])
                except KeyError:
                    pass
            if len(data) == 0:
                log.debug('No valid data for correlation plot')
                return None

            self.add_section(
                name="Correlation heatmap",
                anchor="deeptools_correlation",
                description=
                "Pairwise correlations of samples based on distribution of sequence reads",
                plot=heatmap.plot(data, samples, samples, config))

        return len(self.deeptools_plotCorrelationData)
Example #21
0
    def bcbio_qsignature_chart(self, names) :
        """ Make the bcbio assignment rates plot """

        hmdata = list()
        data = defaultdict(dict)
        for f in self.find_log_files(names):
            s_name = self.clean_s_name(f['fn'], root=None)
            for l in f['f'].splitlines():
                cols = l.strip().split()
                data[cols[0]][cols[1]] = float(cols[2])
                data[cols[1]][cols[0]] = float(cols[2])
                data[cols[0]][cols[0]] = 0
                data[cols[1]][cols[1]] = 0

        names = data.keys()
        for name in names:
            row = list()
            for name2 in names:
                row.append(data[name][name2])
            hmdata.append(row)

        return heatmap.plot(hmdata, names)
Example #22
0
    def bcbio_qsignature_chart(self, fnames):
        """ Make the bcbio assignment rates plot """

        hmdata = list()
        data = defaultdict(dict)
        for f in self.find_log_files(fnames):
            s_name = self.clean_s_name(f['fn'], root=None)
            with open(os.path.join(f['root'], f['fn'])) as in_handle:
                for l in in_handle:
                    cols = l.strip().split()
                    data[cols[0]][cols[1]] = float(cols[2])
                    data[cols[1]][cols[0]] = float(cols[2])
                    data[cols[0]][cols[0]] = 0
                    data[cols[1]][cols[1]] = 0

        if data:
            names = list(data.keys())
            for name in names:
                row = list()
                for name2 in names:
                    row.append(data[name][name2])
                hmdata.append(row)
            return heatmap.plot(hmdata, names)
Example #23
0
def parse_reports(self):
    """ Find diff number table and parse their data """

    # Set up vars
    self.diff_num = dict()

    # Go through files and parse data using regexes
    for n, f in enumerate(
            self.find_log_files('diffNum/diff_matrix',
                                filehandles=False,
                                filecontents=False)):
        if n > 0:
            raise ValueError('more than one diff number table found.')
        self.diff_num['table'] = f

    if len(self.diff_num) == 0:
        raise UserWarning

    self.write_data_file(self.diff_num, 'multiqc_diffNum_matrix')
    diff_num_file = os.path.join(self.diff_num['table']['root'],
                                 self.diff_num['table']['fn'])
    diff_num_df = pd.read_csv(diff_num_file, index_col=0)
    heatmap_name = list(diff_num_df.columns)
    heatmap_val = [list(diff_num_df.loc[i]) for i in diff_num_df.index]

    pconfig = {
        'title': 'Differential Expressed Genes',
        'xlab': True,
    }

    self.add_section(
        description=('This plot shows the Differential expressed gene number'
                     ' for each compare.'),
        plot=heatmap.plot(heatmap_val, heatmap_name, pconfig=pconfig))

    # Return number of compares found
    return len(heatmap_val)
Example #24
0
    def status_heatmap(self):
        """Heatmap showing all statuses for every sample"""
        status_numbers = {"pass": 1, "warn": 0.5, "fail": 0.25}
        data = []
        s_names = []
        status_cats = OrderedDict()
        for s_name in sorted(self.fastqc_data.keys()):
            s_names.append(s_name)
            for status_cat, status in self.fastqc_data[s_name]["statuses"].items():
                if status_cat not in status_cats:
                    status_cats[status_cat] = status_cat.replace("_", " ").title().replace("Gc", "GC")
        for s_name in s_names:
            row = []
            for status_cat in status_cats:
                try:
                    row.append(status_numbers[self.fastqc_data[s_name]["statuses"][status_cat]])
                except KeyError:
                    row.append(0)
            data.append(row)

        pconfig = {
            "id": "fastqc-status-check-heatmap",
            "title": "FastQC: Status Checks",
            "xTitle": "Section Name",
            "yTitle": "Sample",
            "min": 0,
            "max": 1,
            "square": False,
            "colstops": [
                [0, "#ffffff"],
                [0.25, "#d9534f"],
                [0.5, "#fee391"],
                [1, "#5cb85c"],
            ],
            "decimalPlaces": 1,
            "legend": False,
            "datalabels": False,
            "xcats_samples": False,
        }

        self.add_section(
            name="Status Checks",
            anchor="fastqc_status_checks",
            description="""
                Status for each FastQC section showing whether results seem entirely normal (green),
                slightly abnormal (orange) or very unusual (red).
            """,
            helptext="""
                FastQC assigns a status for each section of the report.
                These give a quick evaluation of whether the results of the analysis seem
                entirely normal (green), slightly abnormal (orange) or very unusual (red).

                It is important to stress that although the analysis results appear to give a pass/fail result,
                these evaluations must be taken in the context of what you expect from your library.
                A 'normal' sample as far as FastQC is concerned is random and diverse.
                Some experiments may be expected to produce libraries which are biased in particular ways.
                You should treat the summary evaluations therefore as pointers to where you should concentrate
                your attention and understand why your library may not look random and diverse.

                Specific guidance on how to interpret the output of each module can be found in the relevant
                report section, or in the [FastQC help](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/).

                In this heatmap, we summarise all of these into a single heatmap for a quick overview.
                Note that not all FastQC sections have plots in MultiQC reports, but all status checks
                are shown in this heatmap.
            """,
            plot=heatmap.plot(data, list(status_cats.values()), s_names, pconfig),
        )
Example #25
0
    def heatmap(self):
        """
        Generates Heatmap for samples considering all features
        Computes the Jensen Shannon Divergence between all samples
        Value 0 corresponds to similar samples and 1 to dissimilar samples
        Output : Plots heatmap and table showing presence of missing features in samples
        """
        names = []
        gc_names = []
        seq_names = []
        missing_names = {}
        gc_exists = {}
        seq_exists = {}
        for f in self.find_log_files('salmon/fld'):
            if os.path.basename(f['root']) == 'libParams':
                s_name = os.path.abspath(f['root'])
                path = s_name[:-10]
                sample_name = self.get_sample_name(s_name)

                if 'no_bias' in s_name:
                    continue
                path_meta_info = os.path.join(path, 'aux_info', 'meta_info.json')
                with open(path_meta_info, 'r') as info:
                    meta_info = json.load(info)

                    gc_exists[sample_name] = meta_info['gc_bias_correct']
                    seq_exists[sample_name] = meta_info['seq_bias_correct']

                if gc_exists[sample_name]:
                    gc_names.append(sample_name)
                    if sample_name not in missing_names:
                        missing_names[sample_name] = {}
                    missing_names[sample_name]['Missing GC Feature'] = 'No'
                else :
                    if sample_name not in missing_names:
                        missing_names[sample_name] = {}
                    missing_names[sample_name]['Missing GC Feature'] = 'Yes'
                if seq_exists[sample_name]:
                    seq_names.append(sample_name)
                    if sample_name not in missing_names:
                        missing_names[sample_name] = {}
                    missing_names[sample_name]['Missing Seq Feature'] = 'No'
                else:
                    if sample_name not in missing_names:
                        missing_names[sample_name] = {}
                    missing_names[sample_name]['Missing Seq Feature'] = 'Yes'
                if gc_exists[sample_name] and seq_exists[sample_name]:
                    names.append(sample_name)

        sims_gc = [[0 for j in range(len(gc_names))] for i in range(len(gc_names))]
        sims_3 = [[0 for j in range(len(seq_names))] for i in range(len(seq_names))]
        sims_5 = [[0 for j in range(len(seq_names))] for i in range(len(seq_names))]
        sims = [[0 for j in range(len(names))] for i in range(len(names))]

        for i in range(len(names)):
            for j in range(len(names)):
                feature_count = 0
                if gc_exists[names[i]] and gc_exists[names[j]]:
                    sims[i][j] += self.jensen_shannon_divergence(self.matrix_gc[names[i]], self.matrix_gc[names[j]])
                    feature_count += 1.0
                for k in range(len(self.nucleotides)):
                    if seq_exists[names[i]] and seq_exists[names[j]]:
                        sims[i][j] += self.jensen_shannon_divergence(self.matrix_seq3[k][names[i]], self.matrix_seq3[k][names[j]])
                        sims[i][j] += self.jensen_shannon_divergence(self.matrix_seq5[k][names[i]], self.matrix_seq5[k][names[j]])
                        feature_count += 2.0

                sims[i][j] /= feature_count
        for i in range(len(gc_names)):
            for j in range(len(gc_names)):
                if gc_exists[gc_names[i]] and gc_exists[gc_names[j]]:
                    sims_gc[i][j] += self.jensen_shannon_divergence(self.matrix_gc[gc_names[i]], self.matrix_gc[gc_names[j]])

        for i in range(len(seq_names)):
            for j in range(len(seq_names)):
                for k in range(len(self.nucleotides)):
                    if seq_exists[seq_names[i]] and seq_exists[seq_names[j]]:
                        sims_3[i][j] += self.jensen_shannon_divergence(self.matrix_seq3[k][seq_names[i]], self.matrix_seq3[k][seq_names[j]])
                        sims_5[i][j] += self.jensen_shannon_divergence(self.matrix_seq5[k][seq_names[i]], self.matrix_seq5[k][seq_names[j]])
                sims_3[i][j] /= (1.0*len(self.nucleotides))
                sims_5[i][j] /= (1.0*len(self.nucleotides))
        pconfig_sim = {
            'title': 'Sample similarity (JSD)',
            'xTitle': 'Samples',
            'yTitle': 'Samples',
        }
        pconfig_sim_gc = {
            'title': 'Feature GC Sample similarity (JSD)',
            'xTitle': 'Samples',
            'yTitle': 'Samples',
        }
        pconfig_sim_3 = {
            'title': 'Feature Seq 3 Sample similarity (JSD)',
            'xTitle': 'Samples',
            'yTitle': 'Samples',
        }
        pconfig_sim_5 = {
            'title': 'Feature Seq 5 Sample similarity (JSD)',
            'xTitle': 'Samples',
            'yTitle': 'Samples',
        }

        if len(gc_exists) > 0:
            self.add_section(plot = heatmap.plot(sims_gc, gc_names, pconfig=pconfig_sim_gc))
        if len(seq_exists) > 0:
            self.add_section(plot = heatmap.plot(sims_3, seq_names, pconfig=pconfig_sim_3))
        if len(seq_exists) > 0:
            self.add_section(plot = heatmap.plot(sims_5, seq_names, pconfig=pconfig_sim_5))
        if len(names) > 0:
            self.add_section(plot = heatmap.plot(sims, names, pconfig=pconfig_sim))

        self.add_section(plot = table.plot(missing_names))
Example #26
0
    def __init__(self):

        # Initialise the parent object
        super(MultiqcModule, self).__init__(
            name='Salmon',
            anchor='salmon',
            href='http://combine-lab.github.io/salmon/',
            info=
            "is a tool for quantifying the expression of transcripts using RNA-seq data."
        )

        # Parse meta information. JSON win!
        self.salmon_meta = dict()

        # Declaring dicts to hold ratios for first,midddle,last rows with weights and avergage ratio for GC Bias
        self.salmon_bias_FirstSampleWeights = dict()
        self.salmon_bias_MiddleSampleWeights = dict()
        self.salmon_bias_LastSampleWights = dict()
        self.salmon_bias_Average = dict()
        self.salmon_bias_TotalAverage = dict()

        #Declaring dicts to hold sequence 3' and 5' marginalized ratio for all bases i.e A,C,G,T and the average bias for 3' and 5'
        self.salmon_seq3A = dict()
        self.salmon_seq3C = dict()
        self.salmon_seq3G = dict()
        self.salmon_seq3T = dict()
        self.salmon_seq5A = dict()
        self.salmon_seq5C = dict()
        self.salmon_seq5G = dict()
        self.salmon_seq5T = dict()
        self.salmon_seq3Average = dict()
        self.salmon_seq5Average = dict()

        #Declaring dict to hold the ratios of Effective v/s Actual length of samples from quant.sf file
        self.salmon_quant = dict()

        # Declaring lists to hold arrays for every sample used in Heatmaps
        self.heatmapFirstrow = []
        self.heatMapMiddleRow = []
        self.heatMapLastRow = []
        self.averageBiasHeatMap = []
        self.salmon_seq3HeatMap = []
        self.salmon_seq5HeatMap = []

        # List of all the sample names
        self.sample_names = []

        count = 0
        for f in self.find_log_files('salmon/meta'):
            # Get the s_name from the parent directory
            s_name = os.path.basename(os.path.dirname(f['root']))
            s_name = self.clean_s_name(s_name, f['root'])
            self.salmon_meta[s_name] = json.loads(f['f'])
            s_name_trimmed = s_name.partition('|')[0].split()
            self.sample_names.append(s_name_trimmed)

            # Check if folder contains GC bias files
            gcBias = checkJSONForBias(os.path.dirname(f['root']), 'gcBias')

            if gcBias:
                # Dicts for every sample for all the bucket(25) ratios to hold (x,y) data for linegraphs
                firstRatioWeight = OrderedDict()
                middleRatioWeight = OrderedDict()
                lastRatioWeight = OrderedDict()
                average = OrderedDict()
                sampleAverage = OrderedDict()

                gc = GCModel()  # Instantiate GCModel class
                # Call the GCModel method to get all observed and expected values
                gc.from_file(os.path.dirname(f['root']))
                first_Row = (gc.obs_[0] / gc.exp_[0]) * (gc.obs_weights_[0] /
                                                         gc.exp_weights_[0])
                middle_Row = (gc.obs_[1] / gc.exp_[1]) * (gc.obs_weights_[1] /
                                                          gc.exp_weights_[1])
                last_Row = (gc.obs_[2] / gc.exp_[2]) * (gc.obs_weights_[2] /
                                                        gc.exp_weights_[2])

                # Avergaing all the ratios for the entire sample
                totalSampleAverage = (
                    (sum(first_Row) + sum(middle_Row) + sum(last_Row)) /
                    (len(first_Row) + len(middle_Row) + len(last_Row)))
                sampleAverage[count] = totalSampleAverage
                count = count + 1
                self.salmon_bias_TotalAverage[
                    s_name_trimmed[0]] = sampleAverage
                #Avergaing ratios for each row used in Heatmap for every row
                self.heatmapFirstrow.append(first_Row.tolist())
                self.heatMapMiddleRow.append(middle_Row.tolist())
                self.heatMapLastRow.append(last_Row.tolist())

                heatmapAverage = []
                # Iterating over all the buckets to create Ordered Dicts
                for i in range(len(first_Row)):
                    index = i * (100 / len(first_Row))
                    firstRatioWeight[index] = first_Row[i]
                    middleRatioWeight[index] = middle_Row[i]
                    lastRatioWeight[index] = last_Row[i]
                    average[index] = np.mean(
                        [first_Row[i], middle_Row[i], last_Row[i]])
                    heatmapAverage.append(average[index])

                # Setting all the ordered dicts to the outermost Dictionaries with sample name as keys
                self.salmon_bias_FirstSampleWeights[s_name] = firstRatioWeight
                self.salmon_bias_MiddleSampleWeights[
                    s_name] = middleRatioWeight
                self.salmon_bias_LastSampleWights[s_name] = lastRatioWeight
                self.salmon_bias_Average[s_name] = average
                self.averageBiasHeatMap.append(heatmapAverage)

            # Check if folder contains sequence bias files
            seqBias = checkJSONForBias(os.path.dirname(f['root']), 'seqBias')
            if seqBias:
                # Dicts for every base for 3' and 5' sequence, average 3' and average 5' and quant dict
                seq3A = OrderedDict()
                seq5A = OrderedDict()
                seq3C = OrderedDict()
                seq5C = OrderedDict()
                seq3G = OrderedDict()
                seq5G = OrderedDict()
                seq3T = OrderedDict()
                seq5T = OrderedDict()
                seq3_Average = OrderedDict()
                seq5_Average = OrderedDict()
                quant_Dict = OrderedDict()

                # Calculate the ratio of all rows for observed by expected
                seq = SeqModel()  # Instantiate SeqModel class
                # Call the SeqModel method to get all observed and expected ratios
                seq.from_file(os.path.dirname(f['root']))
                seq3A_prob = seq.obs3_[0] / seq.exp3_[0]
                seq3C_prob = seq.obs3_[1] / seq.exp3_[1]
                seq3G_prob = seq.obs3_[2] / seq.exp3_[2]
                seq3T_prob = seq.obs3_[3] / seq.exp3_[3]
                seq5A_prob = seq.obs5_[0] / seq.exp5_[0]
                seq5C_prob = seq.obs5_[1] / seq.exp5_[1]
                seq5G_prob = seq.obs5_[2] / seq.exp5_[2]
                seq5T_prob = seq.obs5_[3] / seq.exp5_[3]

                seq3_HeatMap = []
                seq5_HeatMap = []
                # Iterate over the contect length to create all Orderede Dictonaries of (x,y) values for linegraph and list for Heatmap
                for i in range(len(seq3A_prob)):
                    index = i * (100 / len(seq3A_prob))
                    seq3A[index] = seq3A_prob[i]
                    seq5A[index] = seq5A_prob[i]
                    seq3C[index] = seq3C_prob[i]
                    seq5C[index] = seq5C_prob[i]
                    seq3G[index] = seq3G_prob[i]
                    seq5G[index] = seq5G_prob[i]
                    seq3T[index] = seq3T_prob[i]
                    seq5T[index] = seq5T_prob[i]
                    seq3_Average[index] = np.mean([
                        seq3A_prob[i], seq3C_prob[i], seq3G_prob[i],
                        seq3T_prob[i]
                    ])
                    seq5_Average[index] = np.mean([
                        seq5A_prob[i], seq5C_prob[i], seq5G_prob[i],
                        seq5T_prob[i]
                    ])
                    seq3_HeatMap.append(seq3_Average[index])
                    seq5_HeatMap.append(seq5_Average[index])

                # Setting all the ordered dicts to the outermost Dictionaries with sample name as keys
                self.salmon_seq3A[s_name] = seq3A
                self.salmon_seq5A[s_name] = seq5A
                self.salmon_seq3C[s_name] = seq3C
                self.salmon_seq5C[s_name] = seq5C
                self.salmon_seq3G[s_name] = seq3G
                self.salmon_seq5G[s_name] = seq5G
                self.salmon_seq3T[s_name] = seq3T
                self.salmon_seq5T[s_name] = seq5T
                self.salmon_seq3Average[s_name] = seq3_Average
                self.salmon_seq5Average[s_name] = seq5_Average
                self.salmon_seq3HeatMap.append(seq3_HeatMap)
                self.salmon_seq5HeatMap.append(seq5_HeatMap)

                # Call Quant model which reads the quant.sf file and returns ratio of Effective/Actual length
                quant = QuantModel()
                quant.from_file(os.path.dirname(f['root']))
                quant_ratio = quant.ratio

                for i in range(len(quant_ratio)):
                    quant_Dict[i] = quant_ratio[i]

                self.salmon_quant[s_name] = quant_Dict

        # Parse Fragment Length Distribution logs
        self.salmon_fld = dict()
        for f in self.find_log_files('salmon/fld'):
            # Get the s_name from the parent directory
            if os.path.basename(f['root']) == 'libParams':
                s_name = os.path.basename(os.path.dirname(f['root']))
                s_name = self.clean_s_name(s_name, f['root'])
                parsed = OrderedDict()
                for i, v in enumerate(f['f'].split()):
                    parsed[i] = float(v)
                if len(parsed) > 0:
                    if s_name in self.salmon_fld:
                        log.debug(
                            "Duplicate sample name found! Overwriting: {}".
                            format(s_name))
                    self.add_data_source(f, s_name)
                    self.salmon_fld[s_name] = parsed

        # Parse Fragment Length Distribution logs

        # Filter to strip out ignored sample names
        self.salmon_meta = self.ignore_samples(self.salmon_meta)
        self.salmon_fld = self.ignore_samples(self.salmon_fld)

        if len(self.salmon_meta) == 0 and len(self.salmon_fld) == 0:
            raise UserWarning

        if len(self.salmon_meta) > 0:
            log.info("Found {} meta reports".format(len(self.salmon_meta)))
            self.write_data_file(self.salmon_meta, 'multiqc_salmon')
        if len(self.salmon_fld) > 0:
            log.info("Found {} fragment length distributions".format(
                len(self.salmon_fld)))

        if len(self.salmon_bias_Average) > 0:
            log.info("Found {} GC Bias".format(len(self.salmon_bias_Average)))

        if len(self.salmon_seq3Average) > 0:
            log.info("Found {} Sequence 3' bias".format(
                len(self.salmon_seq3Average)))

        if len(self.salmon_seq5Average) > 0:
            log.info("Found {} Sequence 5' bias".format(
                len(self.salmon_seq5Average)))

        # Add alignment rate to the general stats table
        headers = OrderedDict()
        headers['percent_mapped'] = {
            'title': '% Aligned',
            'description': '% Mapped reads',
            'max': 100,
            'min': 0,
            'suffix': '%',
            'scale': 'YlGn'
        }
        headers['num_mapped'] = {
            'title': 'M Aligned',
            'description': 'Mapped reads (millions)',
            'min': 0,
            'scale': 'PuRd',
            'modify': lambda x: float(x) / 1000000,
            'shared_key': 'read_count'
        }
        self.general_stats_addcols(self.salmon_meta, headers)

        # Fragment length distribution plot
        pconfig = {
            'smooth_points': 500,
            'id': 'salmon_plot',
            'title': 'Salmon: Fragment Length Distribution',
            'ylab': 'Fraction',
            'xlab': 'Fragment Length (bp)',
            'ymin': 0,
            'xmin': 0,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(plot=linegraph.plot(self.salmon_fld, pconfig))

        # GC Bias First Row plot
        pconfig_GCBias_Begin = {
            'smooth_points': 500,
            'title': 'Salmon : GC Bias Ratio in Beginning of Read',
            'ylab': 'Ratio',
            'xlab': 'GC Biases',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='GC Bias First Row',
                         plot=linegraph.plot(
                             self.salmon_bias_FirstSampleWeights,
                             pconfig_GCBias_Begin))

        # GC Bias Middle row plot
        pconfig_GCBias_Middle = {
            'smooth_points': 500,
            'title': 'Salmon : GC Bias Ratio in Middle of Read',
            'ylab': 'Ratio',
            'xlab': 'GC Biases',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='GC Bias Middle Row',
                         plot=linegraph.plot(
                             self.salmon_bias_MiddleSampleWeights,
                             pconfig_GCBias_Middle))

        # GC Bias Last row plot
        pconfig_GCBias_Last = {
            'smooth_points': 500,
            'id': 'salmon_plot6',
            'title': 'Salmon : GC Bias Ratio in Last of Read',
            'ylab': 'Ratio',
            'xlab': 'GC Biases',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='GC Bias Last Row',
                         plot=linegraph.plot(self.salmon_bias_LastSampleWights,
                                             pconfig_GCBias_Last))

        # GC Bias Average across all samples
        pconfig_GCBias_Average = {
            'smooth_points': 500,
            'title': 'Salmon : Average GC Bias of all samples',
            'ylab': 'Ratio',
            'xlab': 'Bias',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='GC Bias Average',
                         plot=linegraph.plot(self.salmon_bias_Average,
                                             pconfig_GCBias_Average))

        # GC Bias Average bar plot
        pconfig_GCBias_Bar = {
            'smooth_points': 500,
            'title': 'Salmon : Average GC Bias bar plot',
            'ylab': 'Ratios',
            'xlab': 'Samples',
            'ymin': 0,
            'xmin': 0,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='GC Bias Average Bar Plot',
                         plot=bargraph.plot(self.salmon_bias_TotalAverage,
                                            pconfig=pconfig_GCBias_Bar))

        # Sequence 3' Bias for A
        pconfig_Seq3_A = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 3 A Base',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 3 A-Base',
                         plot=linegraph.plot(self.salmon_seq3A,
                                             pconfig_Seq3_A))

        # Sequence 3' Bias for C
        pconfig_Seq3_C = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 3 C Base',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 3 C-Base',
                         plot=linegraph.plot(self.salmon_seq3C,
                                             pconfig_Seq3_C))

        # Sequence 3' Bias for G
        pconfig_Seq3_G = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 3 G Base',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 3 G-Base',
                         plot=linegraph.plot(self.salmon_seq3G,
                                             pconfig_Seq3_G))

        # Sequence 3' Bias for T
        pconfig_Seq3_T = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 3 T base',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 3 T-Base',
                         plot=linegraph.plot(self.salmon_seq3T,
                                             pconfig_Seq3_T))

        # Sequence 3' Average
        pconfig_Seq3_Avg = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 3 Average',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 3 Average',
                         plot=linegraph.plot(self.salmon_seq3Average,
                                             pconfig_Seq3_Avg))

        # Sequence 5' Bias for A
        pconfig_Seq5_A = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 5 A Base',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 5 A-Base',
                         plot=linegraph.plot(self.salmon_seq5A,
                                             pconfig_Seq5_A))

        # Sequence 5' Bias for C
        pconfig_Seq5_C = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 5 C Base',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 5 C-Base',
                         plot=linegraph.plot(self.salmon_seq5C,
                                             pconfig_Seq5_C))

        # Sequence 5' Bias for G
        pconfig_Seq5_G = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 5 G Base',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 5 G-Base',
                         plot=linegraph.plot(self.salmon_seq5G,
                                             pconfig_Seq5_G))

        # Sequence 5' Bias for T
        pconfig_Seq5_T = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 5 T base',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 5 T-Base',
                         plot=linegraph.plot(self.salmon_seq5T,
                                             pconfig_Seq5_T))

        # Sequence 5' Average
        pconfig_Seq5_Avg = {
            'smooth_points': 500,
            'title': 'Salmon : Seq 5 Average',
            'ylab': 'Marginalized Probability Ratio',
            'xlab': 'Sequence',
            'ymin': 0,
            'xmin': 0,
            'xmax': 100,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Sequence 5 Average',
                         plot=linegraph.plot(self.salmon_seq5Average,
                                             pconfig_Seq5_Avg))

        # Quant Plot
        pconfig_Quant = {
            'smooth_points': 500,
            'id': 'salmon_plot7',
            'title': 'Salmon : Quant plot',
            'ylab': 'Effective/Actual Length ',
            'xlab': 'Samples',
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        self.add_section(name='Quant Plot',
                         plot=linegraph.plot(self.salmon_quant, pconfig_Quant))

        # First Row of all samples Heatmap
        FirstRowCoff = np.corrcoef(self.heatmapFirstrow)
        self.add_section(
            name='GC Bias First Row Heatmap',
            description=
            'Heatmap to display variance between first row ratios of all the samples',
            plot=heatmap.plot(FirstRowCoff, self.sample_names,
                              self.sample_names))

        # Middle Row of all samples Heatmap
        MiddleRowCoff = np.corrcoef(self.heatMapMiddleRow)
        self.add_section(
            name='GC Bias Middle Row Heatmap',
            description=
            'Heatmap to display variance between middle row ratios of all the samples',
            plot=heatmap.plot(MiddleRowCoff, self.sample_names,
                              self.sample_names))

        # Last Row of all samples Heatmap
        LastRowCoff = np.corrcoef(self.heatMapLastRow)
        self.add_section(
            name='GC Bias Last Row Heatmap',
            description=
            'Heatmap to display variance between last row ratios of all the samples',
            plot=heatmap.plot(LastRowCoff, self.sample_names,
                              self.sample_names))

        # GC Bias HeatMap
        AverageCoff = np.corrcoef(self.averageBiasHeatMap)
        self.add_section(
            name='GC Bias Heatmap',
            description='Heatmap to display average bias across all samples',
            plot=heatmap.plot(AverageCoff, self.sample_names,
                              self.sample_names))

        # Seq 3' Heatmap
        Seq3HeatMap = np.corrcoef(self.salmon_seq3HeatMap)
        self.add_section(
            name='Sequence 3 Heatmap',
            description=
            'Heatmap to display Sequence 3 prime across all samples',
            plot=heatmap.plot(Seq3HeatMap, self.sample_names,
                              self.sample_names))

        # Seq 5' Heatmap
        Seq5HeatMap = np.corrcoef(self.salmon_seq5HeatMap)
        self.add_section(
            name='Sequence 5 Heatmap',
            description=
            'Heatmap to display Sequence 5 prime across all samples',
            plot=heatmap.plot(Seq5HeatMap, self.sample_names,
                              self.sample_names))
Example #27
0
    def hops_heatmap(self):
        """Heatmap showing all statuses for every sample"""
        heatmap_numbers = {"none": 1, "edit_only": 2, "damage_only": 3, "edit_and_damage": 4}

        samples = []
        for s in self.hops_data:
            samples.append(s)

        # As all samples always have same taxa, will take from the first sample
        taxa = []
        for t in self.hops_data[samples[0]]:
            taxa.append(t.replace("_", " "))

        # Get values from named list into a list of lists required for heatmap
        levels = []
        for s in samples:
            levels.append(self.hops_data[s].values())

        pconfig = {
            "id": "hops-heatmap",
            "title": "HOPS: Potential Candidates",
            "xTitle": "Node",
            "yTitle": "Sample",
            "min": 0,
            "max": 1,
            "square": False,
            "colstops": [
                [1, "#ededed"],
                [2, "#FFFFC5"],
                [3, "#F2B26C"],
                [4, "#AD2A2B"],
            ],
            "decimalPlaces": 0,
            "legend": False,
            "datalabels": False,
            "xcats_samples": False,
        }

        extra_warning = ""
        if len(self.hops_data) > 20:
            extra_warning = """
            <div class="alert alert-warning">
                Large numbers of samples can result in Y-axis labels
                overlapping. Drag the handle at the bottom of the plot down
                to expand and see all samples names.
            </div>
                """

        self.add_section(
            name="Potential Candidates",
            anchor="hops_heatmap",
            description="""
            Heatmap of candidate taxa for downstream aDNA analysis, with
            intensity representing additive categories of possible 'positive'
            hits.
            """
            + extra_warning,
            helptext="""
            HOPS assigns a category based on how many ancient DNA
            characteristics a given node (i.e. taxon) in a sample has.
            The colours indicate the following:

            * <span style="background-color: #ededed; padding:0.2rem 1rem;">**Grey**</span> - No characteristics detected
            * <span style="background-color: #FFFFC5; padding:0.2rem 1rem;">**Yellow**</span> - Small edit distance from reference
            * <span style="background-color: #F2B26C; padding:0.2rem 1rem;">**Orange**</span> - Typical aDNA damage pattern
            * <span style="background-color: #AD2a2B; padding:0.2rem 1rem;">**Red**</span> - Small edit distance _and_ aDNA damage pattern

            A red category typically indicates a good candidate for further investigation
            in downstream analysis.
            """,
            plot=heatmap.plot(levels, xcats=taxa, ycats=samples, pconfig=pconfig),
        )
Example #28
0
    def add_cc_section(self, c_id, mod):

        section_name = mod['config'].get('section_name',
                                         c_id.replace('_', ' ').title())
        if section_name == '' or section_name is None:
            section_name = 'Custom Content'

        section_description = mod['config'].get('description', '')

        pconfig = mod['config'].get('pconfig', {})
        if pconfig.get('title') is None:
            pconfig['title'] = section_name

        plot = None
        content = None

        # Table
        if mod['config'].get('plot_type') == 'table':
            pconfig['sortRows'] = pconfig.get('sortRows', False)
            headers = mod['config'].get('headers')
            plot = table.plot(mod['data'], headers, pconfig)
            self.write_data_file(
                mod['data'],
                "multiqc_{}".format(section_name.lower().replace(' ', '_')))

        # Bar plot
        elif mod['config'].get('plot_type') == 'bargraph':
            plot = bargraph.plot(mod['data'], mod['config'].get('categories'),
                                 pconfig)

        # Line plot
        elif mod['config'].get('plot_type') == 'linegraph':
            plot = linegraph.plot(mod['data'], pconfig)

        # Scatter plot
        elif mod['config'].get('plot_type') == 'scatter':
            plot = scatter.plot(mod['data'], pconfig)

        # Heatmap
        elif mod['config'].get('plot_type') == 'heatmap':
            plot = heatmap.plot(mod['data'], mod['config'].get('xcats'),
                                mod['config'].get('ycats'), pconfig)

        # Beeswarm plot
        elif mod['config'].get('plot_type') == 'beeswarm':
            plot = beeswarm.plot(mod['data'], pconfig)

        # Raw HTML
        elif mod['config'].get('plot_type') == 'html':
            content = mod['data']

        # Raw image file as html
        elif mod['config'].get('plot_type') == 'image':
            content = mod['data']

        # Not supplied
        elif mod['config'].get('plot_type') == None:
            log.warning("Plot type not found for content ID '{}'".format(c_id))

        # Not recognised
        else:
            log.warning(
                "Error - custom content plot type '{}' not recognised for content ID {}"
                .format(mod['config'].get('plot_type'), c_id))

        # Don't use exactly the same title / description text as the main module
        if section_name == self.name:
            section_name = None
        if section_description == self.info:
            section_description = ''

        self.add_section(name=section_name,
                         anchor=c_id,
                         description=section_description,
                         plot=plot,
                         content=content)
Example #29
0
    def add_cc_section(self, c_id, mod):

        section_name = mod["config"].get("section_name",
                                         c_id.replace("_", " ").title())
        if section_name == "" or section_name is None:
            section_name = "Custom Content"

        section_description = mod["config"].get("description", "")

        pconfig = mod["config"].get("pconfig", {})
        if pconfig.get("title") is None:
            pconfig["title"] = section_name

        plot = None
        content = None

        # Table
        if mod["config"].get("plot_type") == "table":
            pconfig["sortRows"] = pconfig.get("sortRows", False)
            headers = mod["config"].get("headers")
            plot = table.plot(mod["data"], headers, pconfig)
            self.write_data_file(
                mod["data"],
                "multiqc_{}".format(section_name.lower().replace(" ", "_")))

        # Bar plot
        elif mod["config"].get("plot_type") == "bargraph":
            plot = bargraph.plot(mod["data"], mod["config"].get("categories"),
                                 pconfig)

        # Line plot
        elif mod["config"].get("plot_type") == "linegraph":
            plot = linegraph.plot(mod["data"], pconfig)

        # Scatter plot
        elif mod["config"].get("plot_type") == "scatter":
            plot = scatter.plot(mod["data"], pconfig)

        # Heatmap
        elif mod["config"].get("plot_type") == "heatmap":
            plot = heatmap.plot(mod["data"], mod["config"].get("xcats"),
                                mod["config"].get("ycats"), pconfig)

        # Beeswarm plot
        elif mod["config"].get("plot_type") == "beeswarm":
            plot = beeswarm.plot(mod["data"], pconfig)

        # Raw HTML
        elif mod["config"].get("plot_type") == "html":
            content = mod["data"]

        # Raw image file as html
        elif mod["config"].get("plot_type") == "image":
            content = mod["data"]

        # Not supplied
        elif mod["config"].get("plot_type") == None:
            log.warning("Plot type not found for content ID '{}'".format(c_id))

        # Not recognised
        else:
            log.warning(
                "Error - custom content plot type '{}' not recognised for content ID {}"
                .format(mod["config"].get("plot_type"), c_id))

        # Don't use exactly the same title / description text as the main module
        if section_name == self.name:
            section_name = None
        if section_description == self.info:
            section_description = ""

        self.add_section(name=section_name,
                         anchor=c_id,
                         description=section_description,
                         plot=plot,
                         content=content)
Example #30
0
    def hops_heatmap(self):
        """ Heatmap showing all statuses for every sample """
        heatmap_numbers = {
            'none': 1,
            'edit_only': 2,
            'damage_only': 3,
            'edit_and_damage': 4
        }

        samples = []
        for s in self.hops_data:
            samples.append(s)

        # As all samples always have same taxa, will take from the first sample
        taxa = []
        for t in self.hops_data[samples[0]]:
            taxa.append(t.replace('_', ' '))

        # Get values from named list into a list of lists required for heatmap
        levels = []
        for s in samples:
            levels.append(self.hops_data[s].values())

        pconfig = {
            'id':
            'hops-heatmap',
            'title':
            'HOPS: Potential Candidates',
            'xTitle':
            'Node',
            'yTitle':
            'Sample',
            'min':
            0,
            'max':
            1,
            'square':
            False,
            'colstops': [
                [1, '#ededed'],
                [2, '#FFFFC5'],
                [3, '#F2B26C'],
                [4, '#AD2A2B'],
            ],
            'decimalPlaces':
            0,
            'legend':
            False,
            'datalabels':
            False,
            'xcats_samples':
            False,
        }

        extra_warning = ''
        if len(self.hops_data) > 20:
            extra_warning = '''
            <div class="alert alert-warning">
                Large numbers of samples can result in Y-axis labels
                overlapping. Drag the handle at the bottom of the plot down
                to expand and see all samples names.
            </div>
                '''

        self.add_section(name='Potential Candidates',
                         anchor='hops_heatmap',
                         description='''
            Heatmap of candidate taxa for downstream aDNA analysis, with
            intensity representing additive categories of possible 'positive'
            hits.
            ''' + extra_warning,
                         helptext='''
            HOPS assigns a category based on how many ancient DNA
            characteristics a given node (i.e. taxon) in a sample has.
            The colours indicate the following:

            * <span style="background-color: #ededed; padding:0.2rem 1rem;">**Grey**</span> - No characteristics detected
            * <span style="background-color: #FFFFC5; padding:0.2rem 1rem;">**Yellow**</span> - Small edit distance from reference
            * <span style="background-color: #F2B26C; padding:0.2rem 1rem;">**Orange**</span> - Typical aDNA damage pattern
            * <span style="background-color: #AD2a2B; padding:0.2rem 1rem;">**Red**</span> - Small edit distance _and_ aDNA damage pattern

            A red category typically indicates a good candidate for further investigation
            in downstream analysis.
            ''',
                         plot=heatmap.plot(levels,
                                           xcats=taxa,
                                           ycats=samples,
                                           pconfig=pconfig))
Example #31
0
    def ssds_heatmap(self):

        ## PLOT 3: Heatmap showing SPoT breakdown by type for every sample

        data = []
        spot_vals = OrderedDict()

        hm = self.SPoT_values

        # dna_types = self.SPoT_values.keys()
        dna_types = [
            "ssDNA", "ssDNA_type2", "dsDNA_hiconf", "dsDNA_loconf",
            "unclassified"
        ]

        short_dna_type = OrderedDict()
        short_dna_type["ssDNA"] = "ss"
        short_dna_type["ssDNA_type2"] = "t2"
        short_dna_type["dsDNA_hiconf"] = "dH"
        short_dna_type["dsDNA_loconf"] = "dL"
        short_dna_type["unclassified"] = "un"

        sample_names = sorted(self.SPoT_values["ssDNA"].keys())

        interval_names = []
        for s in sample_names:
            for k in sorted(self.SPoT_values["ssDNA"][s]):
                if k not in interval_names:
                    interval_names.append(k)

        s_names = []

        for d in dna_types:
            for s in sample_names:
                s_names.append("(" + short_dna_type[d] + ")" + s)

        for i in interval_names:
            row = []
            for d in dna_types:
                for s in sample_names:
                    try:
                        row.append(float(self.SPoT_values[d][s][i]))
                    except KeyError:
                        row.append(0)
            data.append(row)

        pconfig = {
            "id":
            "ssds-spot-heatmap",
            "title":
            "SSDS: Signal Percentage of Tags (%)",
            "xTitle":
            "",
            "yTitle":
            "Interval",
            "square":
            False,
            "colstops": [
                [0, "#ffffff"],
                [0.001, "#fefce9"],
                [0.50, "#ffc265"],
                [1.00, "#ff6262"],
            ],
            "decimalPlaces":
            0,
            "legend":
            False,
            "datalabels":
            True,
            "xcats_samples":
            False,
            "ycats_samples":
            False,
            "borderWidth":
            1,
        }

        self.add_section(
            name="SSDS SPoTs",
            anchor="ssds_spot_heatmap",
            description="""
                Signal Percentage of Tags (SPoT) for all samples (%). Colors indicate the value (0 / no data =white; 
                Otherwise, increasing SPoT from yellow to orange to red). Intervals annotated as (R) represent
                the SPoT when the intervals are randomly shuffled in the genome (bedtools shuffle -chrom). This 
                provides a naive, but useful estimate of random expectation for a non-enriched library.
            """,
            helptext="""
                The Signal Percentage of Tags (SPoT) represents the percentage of sequencing reads found in
                a set of genomic intervals. Higher numbers indicate that the library was enriched for reads
                in that location. The SSDS report may also contain intervals annotated as (R); these represent
                the SPoT when the intervals are randomly shuffled in the genome (bedtools shuffle -chrom). This
                represents a reasonable expectation of random overlap, however this very simple estimate should
                be formally validated more robustly.  
            """,
            plot=heatmap.plot(data, s_names, interval_names, pconfig),
        )
Example #32
0
    def top_five_duplication_heatmap(self):
        """Add a heatmap showing the minimizer duplication top-5 species"""

        duplication = list()
        pconfig = {
            "id": "kraken-topfive-duplication_plot",
            "title": f"Kraken 2: Top {self.top_n} species duplication"
        }

        rank_code = "S"
        rank_data = dict()
        # Loop through the summed tax percentages to get the top 5 across all samples
        try:
            sorted_pct = sorted(self.kraken_total_pct[rank_code].items(),
                                key=lambda x: x[1],
                                reverse=True)
        except KeyError:
            pass
            # Taxa rank not found in this sample

        i = 0
        counts_shown = {}

        showed_warning = False
        for classif, pct_sum in sorted_pct:
            i += 1
            if i > self.top_n:
                break
            # Pull out counts for this rank + classif from each sample
            for s_name, d in self.kraken_raw_data.items():
                if s_name not in rank_data:
                    rank_data[s_name] = dict()
                if s_name not in counts_shown:
                    counts_shown[s_name] = 0
                for row in d:
                    if row["rank_code"] == rank_code:
                        if row["classif"] == classif:
                            if classif not in rank_data[s_name]:
                                rank_data[s_name][classif] = 0
                            try:
                                rank_data[s_name][classif] = row[
                                    "minimizer_duplication"]
                            except KeyError:
                                del rank_data[s_name]
                                if not showed_warning:
                                    log.warning(
                                        "Kraken2 reports of different versions were found"
                                    )
                                    showed_warning = True
        # Strip empty samples
        for sample, vals in dict(rank_data).items():
            if len(vals) == 0:
                del rank_data[sample]

        # Build data structures for heatmap
        ylabels = list(rank_data.keys())
        xlabels = list(rank_data[ylabels[0]].keys())
        for sample in rank_data:
            duplication.append(list(rank_data[sample].values()))

        self.add_section(
            name="Duplication rate of top species",
            anchor="kraken-duplication-topfive",
            description=
            f"The duplication rate of minimizer falling into the top {self.top_n} species",
            helptext=f"""
                To make this plot, the minimizer duplication rate is computed for the top {self.top_n} most abundant species in all samples.

                The minimizer duplication rate is defined as: `duplication rate = (total number of minimizers / number of distinct minimizers)`

                A low coverage and high duplication rate (`>> 1`) is often sign of read stacking, which probably indicates of false positive hit.
            """,
            plot=heatmap.plot(duplication, xlabels, ylabels, pconfig),
        )
Example #33
0
    def heatmap(self, json, index):

        # config dictionary for heatmaps
        heat_pconfig = {
            "id": "htstream_primers_bargraph_" + index,
            "title": "HTStream: Primers Heatmap",
            "square": False,
            "datalabels": False,
            "xcats_samples": False,
            "ycats_samples": False,
            "colstops": [[0, "#FFFFFF"], [1, "#1DC802"]],
        }

        # Button and unique ids
        unique_id = str(random() % 1000)[5:]
        first = True
        button_list = []

        for key in json.keys():

            # creates unique heatmap id that can be queired later by js.
            heat_pconfig[
                "id"] = "htstream_primers_" + key + "_" + unique_id + "_heatmap"

            data = []
            labs = []
            counts_list = json[key]["Pr_Primer_Counts" + index]

            # get counts and labels
            for x in range(len(counts_list)):
                temp = counts_list[x]
                labs += temp[:-1]

            # remove label dups
            labs = list(set(labs))

            # Create multidimensional list
            data = [[0] * len(labs) for i in range(len(labs))]

            # Appropriately fill list for primer combos
            for x in range(len(counts_list)):
                x_pos = labs.index(counts_list[x][0])
                y_pos = labs.index(counts_list[x][1])
                data[x_pos][y_pos] = counts_list[x][-1]
                data[y_pos][x_pos] = counts_list[x][-1]

            # if this is the first sample process, lucky them, they get to be shown first and marked as active.
            # 	This step is necessary otherwise, the plot div is not initialized. The additional calls to the
            # 	heatmap function are simply to add the data to the internal jsons used by MultiQC
            if first == True:
                active = "active"  # button is default active
                first = False  # shuts off first gate

                heatmap_html = heatmap.plot(data, labs, labs, heat_pconfig)

            else:
                active = ""  # button is default off
                heatmap.plot(data, labs, labs, heat_pconfig)

            # html div attributes and text
            name = key
            pid = heat_pconfig["id"] + "_btn"

            button_list.append(
                '<button class="btn btn-default btn-sm {a}" onclick="htstream_div_switch(this)" id="{pid}">{n}</button>\n'
                .format(a=active, pid=pid, n=name))

        # Create html for multiple heatmaps
        heatmap_plot = htstream_utils.multi_heatmap_html(
            button_list, heatmap_html)

        wrapper_html = "<h4> Primers: Primer Counts </h4>"
        wrapper_html += """<p>Heatmap indicating abundance of primer combinations.</p>"""

        # Heatmaps
        wrapper_html += """<div class="mqc_hcplot_plotgroup">"""
        wrapper_html += '<div id="htstream_heat_primers_{u}" class="htstream_fadein">'.format(
            u=unique_id)
        wrapper_html += heatmap_plot + "</div></div>"

        final_html = wrapper_html

        return wrapper_html
Example #34
0
	def quality_by_cycle(self, json, read):

		# Here is the most complicated figure implementation in this whole module.
		#	The issues here are that MultiQC had limited options for displaying 
		#	multiple figures if its a heatmap. Also, it doesnt allow you to switch
		#	back and forth between figure typs. There are workarounds, however, using
		#	javascript and some clever organizations of javascript.

		title_read = " ".join(read.split("_")[1:3])

		# config dictionary for mean Q score line graph
		line_config = {
				  'smooth_points_sumcounts': False,
				  'categories': True,
				  'title': "HTStream: Mean Quality by Cycle (" + title_read + ")",
				  'xlab': "Cycle",
				  'ylab': "Mean Q Score",
				  }

		# config dictionary for heatmaps
		heat_pconfig = {'id' : "",
				   'title': "HTStream: Quality by Cycle (" + title_read + ")",
				   'yTitle': 'Q Score',
				   'xTitle': 'Cycle',
				   'square' : False,
				   'datalabels': False,
				   'max': 1.0, 
				   'colstops': [
					        [0, '#FFFFFF'],
					        [0.3, '#1DC802'],
					        [0.6, '#F3F943'],
					        [1, '#E70808']
					           ]
    			  }

		btn_id = "-".join(read.split("_")[:3]).lower()

		line_data = {}
		status_dict = {}
		first = True
		button_list = []


		for key in json.keys():

			# create dictionary for line graph. Again, format is {x: y}
			line_data[key] = {}

			# creates unique heatmap id that can be queired later by js.
			heat_pconfig["id"] = "htstream_" + btn_id + "_" + key + "_heatmap"

			# creates x and y axis labels for heatmap (categorical)
			x_lab = json[key][read]["col_names"]
			y_lab = json[key][read]["row_names"][::-1] # reverse orientation makes it easier to cycle through

			data = []

			# create variables for range functions in loops. Represents shape of data
			quality_scores = json[key][read]["shape"][0]
			cycles = json[key][read]["shape"][-1]


			# temp total list 
			total = []
			
			# iterates through positions, creates a list of the sum of scores at each position to be used
			#	to calculated frequency for heatmap. Also, calculates avg. Q score for linegraph.
			#	This chunk of code is very ugly, but is a necessary evil. 

			num_above_q30 = 0

			for pos in range(cycles):
				temp = [ score_list[pos] for score_list in json[key][read]["data"] ]
				temp_sum = sum(temp)
				total.append(temp_sum)

				# multiples count at poistion by Q Score.
				total_score = sum([(int(p) * int(s)) for p, s in zip(temp, y_lab[::-1])])

				# divides sum of total score by the number of cycles for avg fragments
				line_data[key][pos] = total_score / temp_sum # total reads

				if line_data[key][pos] > 30:
					num_above_q30 += 1


			# check to see what percent of bases have a mean Q score of at least 30
			q30_gate = (num_above_q30 / cycles) 

			if q30_gate < 0.6:
				status_dict[key] = "FAIL"

			elif q30_gate < 0.8:
				status_dict[key] = "QUESTIONABLE"

			else:
				status_dict[key] = 'PASS'


			# populates data dictionaries for heatmap
			for score in range(quality_scores - 1, -1, -1):

				# create empty list for data. The format is a little strange, each list represents a position 
				#	the value inside of it is the score at that position divided by the total score for that position
				#	giving a frequency.
				data.append([])

				for pos in range(cycles):
					data[-1].append(json[key][read]["data"][score][pos] / total[pos])


			# if this is the first sample process, lucky them, they get to be shown first and marked as active.
			#	This step is necessary otherwise, the plot div is not initialized. The additional calls to the 
			#	heatmap function are simply to add the data to the internal jsons used by MultiQC.
			if first == True:
				active = "active" # button is default active
				first = False # shuts off first gat
				heatmap_html = heatmap.plot(data, x_lab, y_lab, heat_pconfig)

			else:
				active = "" # button is default off 
				heatmap.plot(data, x_lab, y_lab, heat_pconfig)


			# html div attributes and text
			name = key
			pid = "htstream_" + btn_id + "_" + key + "_btn"

			button_list.append('<button class="btn btn-default btn-sm {a}" onclick="htstream_div_switch(this)" id="{pid}">{n}</button>\n'.format(a=active, pid=pid, n=name))

	
		status_div = htstream_utils.sample_status(status_dict)

		line_plot = linegraph.plot(line_data, line_config)

		html = htstream_utils.qual_by_cycle_html(read, status_div, line_plot, btn_id, button_list, heatmap_html)

		return html
Example #35
0
    def __init__(self):

        # Initialise the parent object
        super(MultiqcModule, self).__init__(
            name='Salmon',
            anchor='salmon',
            href='http://combine-lab.github.io/salmon/',
            info=
            "is a tool for quantifying the expression of transcripts using RNA-seq data."
        )

        # Parse meta information. JSON
        self.salmon_meta = dict()
        self.gc_bias = False
        for f in self.find_log_files('salmon/meta'):
            # Get the s_name from the parent directory
            s_name = os.path.basename(os.path.dirname(f['root']))
            s_name = self.clean_s_name(s_name, f['root'])
            self.salmon_meta[s_name] = json.loads(f['f'])
        # Parse Fragment Length Distribution logs
        self.salmon_fld = dict()
        self.gc_bias_path_list = []
        self.seq_bias_path_list = []
        for f in self.find_log_files('salmon/fld'):
            # Get the s_name from the parent directory
            if os.path.basename(f['root']) == 'libParams':
                s_name = os.path.basename(os.path.dirname(f['root']))
                s_name = self.clean_s_name(s_name, f['root'])
                parsed = OrderedDict()
                for i, v in enumerate(f['f'].split()):
                    parsed[i] = float(v)
                if len(parsed) > 0:
                    if s_name in self.salmon_fld:
                        log.debug(
                            "Duplicate sample name found! Overwriting: {}".
                            format(s_name))
                    self.add_data_source(f, s_name)
                    self.salmon_fld[s_name] = parsed
                '''
                Check the meta_info.json file to check whether the salmon tool was run with gc bias and sequential bias.
                If ran with gc_bias then add its absolute path to the list of sample paths.
                Do same thing for seq_bias. 
                '''
                meta_json_file_path = os.path.join(os.path.dirname(f['root']),
                                                   'aux_info',
                                                   'meta_info.json')
                gc_bias_base_dir = os.path.dirname(f['root'])
                with open(meta_json_file_path, 'r') as meta_data_file:
                    meta_info_data = json.load(meta_data_file)
                self.gc_bias = meta_info_data['gc_bias_correct']
                self.seq_bias = meta_info_data['seq_bias_correct']
                if self.gc_bias:
                    self.gc_bias_path_list.append(
                        os.path.abspath(gc_bias_base_dir))
                if self.seq_bias:
                    self.seq_bias_path_list.append(
                        os.path.abspath(gc_bias_base_dir))

        # Filter to strip out ignored sample names
        self.salmon_meta = self.ignore_samples(self.salmon_meta)
        self.salmon_fld = self.ignore_samples(self.salmon_fld)

        if len(self.salmon_meta) == 0 and len(self.salmon_fld) == 0:
            raise UserWarning

        if len(self.salmon_meta) > 0:
            log.info("Found {} meta reports".format(len(self.salmon_meta)))
            self.write_data_file(self.salmon_meta, 'multiqc_salmon')
        if len(self.salmon_fld) > 0:
            log.info("Found {} fragment length distributions".format(
                len(self.salmon_fld)))

        # Add alignment rate to the general stats table
        headers = OrderedDict()
        headers['percent_mapped'] = {
            'title': '% Aligned',
            'description': '% Mapped reads',
            'max': 100,
            'min': 0,
            'suffix': '%',
            'scale': 'YlGn'
        }
        headers['num_mapped'] = {
            'title': 'M Aligned',
            'description': 'Mapped reads (millions)',
            'min': 0,
            'scale': 'PuRd',
            'modify': lambda x: float(x) / 1000000,
            'shared_key': 'read_count'
        }
        self.general_stats_addcols(self.salmon_meta, headers)

        # Fragment length distribution plot
        pconfig = {
            'smooth_points': 500,
            'id': 'salmon_plot',
            'title': 'Salmon: Fragment Length Distribution',
            'ylab': 'Fraction',
            'xlab': 'Fragment Length (bp)',
            'ymin': 0,
            'xmin': 0,
            'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
        }
        '''
            Iterate over the list of paths where each path has salmon output ran with gc_bias.
            Using the GCModel class's utlity functions compute observed array, expected array and the weights.
            Multiply the observed array and expected array with the corresponding weights and create a Ordered Dictionary,
            containing the ratio of observed by expected array. Plot that Ordered Dict using matplotlib.
        '''
        self.gc_first_model_ratio = dict()
        self.gc_second_model_ratio = dict()
        self.gc_third_model_ratio = dict()
        self.gc_avg_ratio = dict()
        self.seq_three_prime = dict()
        self.seq_five_prime = dict()
        self.gc_average_data = []
        self.gc_heatmap_labels = []
        self.gc_heatmap_data = []

        for path_var in self.gc_bias_path_list:
            gc_model = GCModel()
            gc_model.from_file(path_var)
            obs_array = gc_model.obs_.tolist()
            exp_array = gc_model.exp_.tolist()
            obs_weights = list(gc_model.obs_weights_)
            exp_weights = list(gc_model.exp_weights_)
            self.path_var = path_var.split('/')[-2]
            ratio_dict = dict()
            avg_ratio_dict = OrderedDict()
            for i in range(len(obs_array)):
                obs = obs_array[i]
                exp = exp_array[i]
                obs_weight = obs_weights[i]
                exp_weight = exp_weights[i]
                ratio_value = OrderedDict()
                j = 1
                for o, e in zip(obs, exp):
                    ratio = (o * obs_weight) / (e * exp_weight)
                    ratio_value[j] = ratio
                    try:
                        avg_ratio_dict[j] += ratio
                    except:
                        avg_ratio_dict[j] = ratio
                    j += 1
                ratio_dict[i] = ratio_value

            for k in list(avg_ratio_dict.keys()):
                avg_ratio_dict[k] /= len(obs_array)

            self.gc_first_model_ratio[self.path_var] = ratio_dict[0]
            self.gc_second_model_ratio[self.path_var] = ratio_dict[1]
            self.gc_third_model_ratio[self.path_var] = ratio_dict[2]
            self.gc_avg_ratio[self.path_var] = avg_ratio_dict
            self.gc_average_data.append(list(avg_ratio_dict.values()))
            self.gc_heatmap_labels.append(self.path_var)

        for avg_data1 in self.gc_average_data:
            cosine_distance_vector = []
            for avg_data2 in self.gc_average_data:
                cosine_distance_vector.append(
                    spatial.distance.cosine(avg_data1, avg_data2))
            self.gc_heatmap_data.append(cosine_distance_vector)

        files = list(self.gc_first_model_ratio.keys())
        self.model_ratios = dict()

        firstModelAvg = OrderedDict()
        secondModelAvg = OrderedDict()
        thirdModelAvg = OrderedDict()

        for k in files:
            firstModel = self.gc_first_model_ratio[k]
            secondModel = self.gc_second_model_ratio[k]
            thirdModel = self.gc_third_model_ratio[k]

            for key in list(firstModel.keys()):
                try:
                    firstModelAvg[key] += firstModel[key]
                    secondModelAvg[key] += secondModel[key]
                    thirdModelAvg[key] += thirdModel[key]
                except:
                    firstModelAvg[key] = firstModel[key]
                    secondModelAvg[key] = secondModel[key]
                    thirdModelAvg[key] = thirdModel[key]

        for k in list(firstModelAvg.keys()):
            firstModelAvg[k] = float(firstModelAvg[k] / len(files))
            secondModelAvg[k] = float(secondModelAvg[k] / len(files))
            thirdModelAvg[k] = float(thirdModelAvg[k] / len(files))

        modelAvg = {
            "First Model": firstModelAvg,
            "Second Model": secondModelAvg,
            "Third Model": thirdModelAvg
        }
        '''
        For samples that were run with sequential bias, use the utility functions defined in the SeqModel.py class and
        read the values of the observed and expected bias values from the 3' and 5' end. Calculate the ratio of observed
        to expected from each read end for each nucleotide base.
        '''

        # Variable declarations for storing the ratios.
        self.seq_3prime_ratio = dict()
        self.seq_5prime_ratio = dict()
        self.nucleotides = ['A', 'C', 'G', 'T']
        self.seq_3prime_avg_data = []
        self.seq_5prime_avg_data = []

        # Iterate over all samples that were run with sequential bias and read values into the dictionaries.
        for path_var in self.seq_bias_path_list:
            seq_model = SEQModel()
            seq_model.from_file(path_var)
            obs3_array = seq_model.obs3_prime.tolist()
            exp3_array = seq_model.exp3_prime.tolist()
            obs5_array = seq_model.obs5_prime.tolist()
            exp5_array = seq_model.exp5_prime.tolist()
            self.path_var = path_var.split('/')[-2]
            ratio_dict_3prime = dict()
            ratio_dict_5prime = dict()
            avg_3prime_array = [0] * len(obs3_array[0])
            avg_5prime_array = [0] * len(obs5_array[0])
            for i in range(len(self.nucleotides)):
                obs_3prime = obs3_array[i]
                exp_3prime = exp3_array[i]
                obs_5prime = obs5_array[i]
                exp_5prime = exp5_array[i]
                # Ordered dictionaries to store the 3' and 5' end ratios.
                ratio_3prime_dict = OrderedDict()
                ratio_5prime_dict = OrderedDict()
                j = 1
                for o, e in zip(obs_3prime, exp_3prime):
                    ratio = o / e
                    ratio_3prime_dict[j] = ratio
                    avg_3prime_array[j - 1] += ratio
                    j += 1
                ratio_dict_3prime[self.nucleotides[i]] = ratio_3prime_dict

                j = 1
                for o, e in zip(obs_5prime, exp_5prime):
                    # Calculate observed/expected ratio and add the values to respective dictionary and average array.
                    ratio = o / e
                    ratio_5prime_dict[j] = ratio
                    avg_5prime_array[j - 1] = ratio
                    j += 1
                ratio_dict_5prime[self.nucleotides[i]] = ratio_5prime_dict

            # Calculate the average bias values for each end and store in dictionary
            self.seq_3prime_avg_data.append(
                [x / len(self.nucleotides) for x in avg_3prime_array])
            self.seq_5prime_avg_data.append(
                [x / len(self.nucleotides) for x in avg_5prime_array])
            self.seq_3prime_ratio[self.path_var] = ratio_dict_3prime
            self.seq_5prime_ratio[self.path_var] = ratio_dict_5prime

        # Variables to hold the heatmap data.
        self.seq_3prime_heatmap_data = []
        self.seq_5prime_heatmap_data = []
        # Iterate over the average ratio values for each sample and calculate cosine similarity between pairs of samples.
        for avg_data1 in self.seq_3prime_avg_data:
            cosine_distance_vector = []
            for avg_data2 in self.seq_3prime_avg_data:
                cosine_distance_vector.append(
                    spatial.distance.cosine(avg_data1, avg_data2))
            self.seq_3prime_heatmap_data.append(cosine_distance_vector)

        for avg_data1 in self.seq_5prime_avg_data:
            cosine_distance_vector = []
            for avg_data2 in self.seq_5prime_avg_data:
                cosine_distance_vector.append(
                    spatial.distance.cosine(avg_data1, avg_data2))
            self.seq_5prime_heatmap_data.append(cosine_distance_vector)

        seq_heat_map_labels = [
            x.split('/')[-2] for x in self.seq_bias_path_list
        ]
        """
        Dictionary variables to store the ratio values for each nucleotide across samples. We plot seperate
        line plots for each nucleotide taken from each read end.
        """
        A3_dict = dict()
        C3_dict = dict()
        G3_dict = dict()
        T3_dict = dict()
        A5_dict = dict()
        C5_dict = dict()
        T5_dict = dict()
        G5_dict = dict()

        for k in list(self.seq_3prime_ratio.keys()):
            A3_dict[k] = self.seq_3prime_ratio[k]['A']
            C3_dict[k] = self.seq_3prime_ratio[k]['C']
            G3_dict[k] = self.seq_3prime_ratio[k]['G']
            T3_dict[k] = self.seq_3prime_ratio[k]['T']

        for k in list(self.seq_5prime_ratio.keys()):
            A5_dict[k] = self.seq_5prime_ratio[k]['A']
            C5_dict[k] = self.seq_5prime_ratio[k]['C']
            G5_dict[k] = self.seq_5prime_ratio[k]['G']
            T5_dict[k] = self.seq_5prime_ratio[k]['T']

        # Variables to store the average sequential bias ratios for each nucleotide base across samples.
        A3_avg = dict()
        C3_avg = dict()
        G3_avg = dict()
        T3_avg = dict()
        A5_avg = dict()
        C5_avg = dict()
        T5_avg = dict()
        G5_avg = dict()
        files_count = len(self.seq_3prime_ratio.keys())

        for k in list(self.seq_3prime_ratio.keys()):
            A3 = A3_dict[k]
            C3 = C3_dict[k]
            G3 = G3_dict[k]
            T3 = T3_dict[k]
            A5 = A5_dict[k]
            C5 = C5_dict[k]
            G5 = G5_dict[k]
            T5 = T5_dict[k]
            for key in list(A3.keys()):
                try:
                    A3_avg[key] += A3[key]
                    C3_avg[key] += C3[key]
                    G3_avg[key] += G3[key]
                    T3_avg[key] += T3[key]
                    A5_avg[key] += A5[key]
                    C5_avg[key] += C5[key]
                    G5_avg[key] += G5[key]
                    T5_avg[key] += T5[key]
                except:
                    A3_avg[key] = A3[key]
                    C3_avg[key] = C3[key]
                    G3_avg[key] = G3[key]
                    T3_avg[key] = T3[key]
                    A5_avg[key] = A5[key]
                    C5_avg[key] = C5[key]
                    G5_avg[key] = G5[key]
                    T5_avg[key] = T5[key]

        for key in list(A3_avg.keys()):
            A3_avg[key] = A3_avg[key] / files_count
            C3_avg[key] = C3_avg[key] / files_count
            G3_avg[key] = G3_avg[key] / files_count
            T3_avg[key] = T3_avg[key] / files_count
            A5_avg[key] = A5_avg[key] / files_count
            C5_avg[key] = C5_avg[key] / files_count
            G5_avg[key] = G5_avg[key] / files_count
            T5_avg[key] = T5_avg[key] / files_count

        self.seq_bias_avg = {
            "A3": A3_avg,
            "C3": C3_avg,
            "G3": G3_avg,
            "T3": T3_avg,
            "A5": A5_avg,
            "C5": C5_avg,
            "G5": G5_avg,
            "T5": T5_avg
        }

        # Section that contains plot configurations and calls to plot functions.
        if self.gc_bias_path_list:
            fconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: GC Bias Distribution in first model for different samples',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(
                plot=linegraph.plot(self.gc_first_model_ratio, fconfig))

            sconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: GC Bias Distribution in second model for different samples',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(
                plot=linegraph.plot(self.gc_second_model_ratio, sconfig))

            tconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: GC Bias Distribution in third model for different samples',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(
                plot=linegraph.plot(self.gc_third_model_ratio, tconfig))

            avgconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Avg GC Bias Distribution for across all samples',
                'ylab': 'Average Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(modelAvg, avgconfig))

            gcheatmapconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title': 'Average GC Bias similarity',
                'ylab': 'Average Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(
                plot=heatmap.plot(self.gc_heatmap_data, self.gc_heatmap_labels,
                                  self.gc_heatmap_labels, gcheatmapconfig))

        if self.seq_bias_path_list:
            taprimeconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Sequence Bias Distribution for different experiments measured from 3\' prime end for nucleotide A',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(A3_dict, taprimeconfig))

            tcprimeconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Sequence Bias Distribution for different experiments measured from 3\' prime end for nucleotide C',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(C3_dict, tcprimeconfig))

            tgprimeconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Sequence Bias Distribution for different experiments measured from 3\' prime end for nucleotide G',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(G3_dict, tgprimeconfig))

            ttprimeconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Sequence Bias Distribution for different experiments measured from 3\' prime end for nucleotide T',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(T3_dict, ttprimeconfig))

            faprimeconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Sequence Bias Distribution for different experiments measured from 5\' end for nucleotide A',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(A5_dict, faprimeconfig))

            fcprimeconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Sequence Bias Distribution for different experiments measured from 5\' end for nucleotide C',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(C5_dict, fcprimeconfig))

            fgprimeconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Sequence Bias Distribution for different experiments measured from 5\' end for nucleotide G',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(G5_dict, fgprimeconfig))

            ftprimeconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Sequence Bias Distribution for different experiments measured from 5\' end for nucleotide T',
                'ylab': 'Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=linegraph.plot(T5_dict, ftprimeconfig))

            seqavgconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title':
                'Salmon: Avg Sequential Bias for each base across all samples for both 3\' and 5\' ends',
                'ylab': 'Average Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(
                plot=linegraph.plot(self.seq_bias_avg, seqavgconfig))

            seq3primeheatmappconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title': 'Average Sequential Bias (3 Prime) similarity',
                'ylab': 'Average Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=heatmap.plot(
                self.seq_3prime_heatmap_data, seq_heat_map_labels,
                seq_heat_map_labels, seq3primeheatmappconfig))

            seq5sprimeheatmappconfig = {
                'smooth_points': 500,
                'id': 'salmon_plot',
                'title': 'Average Sequential Bias (5 Prime) similarity',
                'ylab': 'Average Ratio (Observed/Expected)',
                'xlab': 'Read count',
                'ymin': 0,
                'xmin': 0,
                'tt_label': '<b>{point.x:,.0f} bp</b>: {point.y:,.0f}',
            }
            self.add_section(plot=heatmap.plot(
                self.seq_5prime_heatmap_data, seq_heat_map_labels,
                seq_heat_map_labels, seq5sprimeheatmappconfig))

        self.add_section(plot=linegraph.plot(self.salmon_fld, pconfig))