Ejemplo n.º 1
0
    def plot_accuracy(self):
        """ Generate GffCompare accuracy plot"""
        
        datasets = ['Base', 'Exon', 'Intron', 'Intron_chain', 'Transcript', 'Locus']
        
        pconfig = {
            'id': 'gffcompare_accuracy_plot',
            'title': 'GffCompare: Accuracy values',
            'ylab': 'Precision',
            'xlab': 'Sensitivity',
            'ymin': 0,
            'ymax': 1,
            'xmin': 0,
            'xmax': 1,
            'data_labels' : [{'name' : x} for x in datasets]
        }

        data_classification = [{
                sample: {
                    'x' : self.gffcompare_data[sample]['accuracy'][dataset]['sensitivity']/100,
                    'y' : self.gffcompare_data[sample]['accuracy'][dataset]['precision']/100,
                    'name' : dataset 
                }
            for sample in self.gffcompare_data.keys()
            }
        for dataset in datasets
        ]
                        
        print(data_classification)
        return scatter.plot(data_classification, pconfig)
Ejemplo n.º 2
0
    def peddy_relatedness_plot(self):
        data = dict()
        for s_name, d in self.peddy_data.items():
            if 'ibs0' in d and 'ibs2' in d:
                data[s_name] = {
                    'x': d['ibs0'],
                    'y': d['ibs2']
                }
            if 'rel' in d:
                if d['rel'] < 0.25:
                    data[s_name]['color'] = 'rgba(109, 164, 202, 0.9)'
                elif d['rel'] < 0.5:
                    data[s_name]['color'] = 'rgba(250, 160, 81, 0.8)'
                else:
                    data[s_name]['color'] = 'rgba(43, 159, 43, 0.8)'

        pconfig = {
            'id': 'peddy_relatedness_plot',
            'title': 'Peddy: Relatedness Plot',
            'xlab': 'IBS0 (no alleles shared)',
            'ylab': 'IBS2 (both alleles shared)',
        }

        if len(data) > 0:
            self.add_section (
                name = 'Relatedness',
                anchor = 'peddy-relatedness-plot',
                description = """Shared allele rates between sample pairs. Points are coloured by degree of relatedness:
                <span style="color: #6DA4CA;">less than 0.25</span>,
                <span style="color: #FAA051;">0.25 - 0.5</span>,
                <span style="color: #2B9F2B;">greather than 0.5</span>.""",
                plot = scatter.plot(data, pconfig)
            )
Ejemplo n.º 3
0
    def somalier_sex_check_plot(self):
        data = {}
        sex_index = {"female": 0, "male": 1, "unknown": 2}

        for s_name, d in self.somalier_data.items():
            if "X_depth_mean" in d and "original_pedigree_sex" in d:
                data[s_name] = {
                    "x": (random.random() - 0.5) * 0.1 +
                    sex_index.get(d["original_pedigree_sex"], 2),
                    "y":
                    d["X_depth_mean"],
                }

        if len(data) > 0:
            pconfig = {
                "id": "somalier_sex_check_plot",
                "title": "Somalier: Sample Predicted Sex",
                "xlab": "Sex from pedigree",
                "ylab": "Scaled mean depth on X",
                "categories": ["Female", "Male", "Unknown"],
            }

            self.add_section(
                name="Sex",
                description="Predicted sex against scaled depth on X",
                helptext="Higher values of depth, low values suggest male.",
                anchor="somalier-sexcheck",
                plot=scatter.plot(data, pconfig),
            )
Ejemplo n.º 4
0
    def snp_rate_scatterplot(self):
        """ Make a scatter plot showing relative coverage on X and Y chr
        """
        data = OrderedDict()
        for sample in self.sexdet_data:
            try:
                data[sample] = {
                    'x': self.sexdet_data[sample]['RateX'],
                    'y': self.sexdet_data[sample]['RateY']
                }
            except KeyError:
                pass

        config = {
            'id': 'sexdeterrmine-rate-plot',
            'title': 'SexDetErrmine: Relative coverage',
            'ylab': 'Relative Cov. on Y',
            'xlab': 'Relative Cov. on X'
        }

        if len(data) > 0:
            self.add_section(
                name='Relative Coverage',
                anchor='sexdeterrmine-rates',
                description=
                'The coverage on the X vs Y chromosome, relative to coverage on the Autosomes.',
                helptext='''
                Males are expected to have a roughly equal X- and Y-rates, while females are expected to have a Y-rate of 0 and an X-rate of 1.
                Placement between the two clusters can be indicative of contamination, while placement with higher than expected X- and/or Y-rates can be indicative of sex chromosome aneuploidy.
                ''',
                plot=scatter.plot(data, config))
Ejemplo n.º 5
0
  def quartet_scatter_plot(self, figure_data, pconfig_id, pconfig_title, name, anchor):
    data = dict()
    for index, row in figure_data.iterrows():
      s_name = row["Sample"]
      data[s_name] = {
        'x': row["F1-score"],
        'y': row["MCR"]
      }

      if row["Batch"] == "Your Datasets":
        # blue
        data[s_name]['color'] = 'rgba(109, 164, 202, 0.9)'
      else:
        # yellow
        data[s_name]['color'] = 'rgba(250, 160, 81, 0.8)'
        # green: rgba(43, 159, 43, 0.8)
    
    pconfig = {
      'id': pconfig_id,
      'title': pconfig_title,
      'xlab': 'F1-score',
      'ylab': 'Mendelian Concordance Rate',
      "use_legend": True
    }

    if len(data) > 0:
      self.add_section (
        name = name,
        anchor = anchor,
        description = """Points are coloured as follows:
        <span style="color: #6DA4CA;">Your Datasets</span>,
        <span style="color: #FAA051;">Rest Submmited Datasets</span>.""",
        plot = scatter.plot(data, pconfig)
      )
Ejemplo n.º 6
0
    def peddy_relatedness_plot(self):
        data = dict()
        for s_name, d in self.peddy_data.items():
            if 'ibs0' in d and 'ibs2' in d:
                data[s_name] = {'x': d['ibs0'], 'y': d['ibs2']}
            if 'rel' in d:
                if d['rel'] < 0.25:
                    data[s_name]['color'] = 'rgba(109, 164, 202, 0.9)'
                elif d['rel'] < 0.5:
                    data[s_name]['color'] = 'rgba(250, 160, 81, 0.8)'
                else:
                    data[s_name]['color'] = 'rgba(43, 159, 43, 0.8)'

        pconfig = {
            'id': 'peddy_relatedness_plot',
            'title': 'Peddy Relatedness Plot',
            'xlab': 'IBS0 (no alleles shared)',
            'ylab': 'IBS2 (both alleles shared)',
        }

        if len(data) > 0:
            return """<p>Shared allele rates between sample pairs. Points are coloured by degree of relatedness:
            <span style="color: #6DA4CA;">less than 0.25</span>,
            <span style="color: #FAA051;">0.25 - 0.5</span>,
            <span style="color: #2B9F2B;">greather than 0.5</span>.</p>""" + scatter.plot(
                data, pconfig)
Ejemplo n.º 7
0
    def peddy_het_check_plot(self):
        """plot the het_check scatter plot"""
        # empty dictionary to add sample names, and dictionary of values
        data = {}

        # for each sample, and list in self.peddy_data
        for s_name, d in self.peddy_data.items():
            # check the sample contains the required columns
            if 'median_depth_het_check' in d and 'het_ratio_het_check' in d:
                # add sample to dictionary with value as a dictionary of points to plot
                data[s_name] = {
                    'x': d['median_depth_het_check'],
                    'y': d['het_ratio_het_check']
                }

        pconfig = {
            'id': 'peddy_het_check_plot',
            'title': 'Peddy: Het Check',
            'xlab': 'median depth',
            'ylab': 'proportion het calls',
        }

        self.add_section(
            name='Het Check',
            description=
            "Proportion of sites that were heterozygous against median depth.",
            helptext="""
            A high proportion of heterozygous sites suggests contamination, a low proportion suggests consanguinity.
            
            See [the main peddy documentation](https://peddy.readthedocs.io/en/latest/output.html#het-check) for more details about the `het_check` command.
            """,
            anchor='peddy-hetcheck-plot',
            plot=scatter.plot(data, pconfig))
Ejemplo n.º 8
0
    def peddy_sex_check_plot(self):
        data = {}
        sex_index = {"female": 0, "male": 1, "unknown": 2}

        for s_name, d in self.peddy_data.items():
            if 'sex_het_ratio' in d and 'ped_sex_sex_check' in d:
                data[s_name] = {
                    'x': sex_index.get(d['ped_sex_sex_check'], 2),
                    'y': d["sex_het_ratio"]
                }

        pconfig = {
            'id': 'peddy_sex_check_plot',
            'title': 'Peddy: Sex Check',
            'xlab': 'Sex From Ped',
            'ylab': 'Sex Het Ratio',
            'categories': ["Female", "Male", "Unknown"]
        }

        self.add_section(
            name = 'Sex Check',
            description = "Predicted sex against heterozygosity ratio",
            helptext = """
            Higher values of Sex Het Ratio suggests the sample is female, low values suggest male.

            See [the main peddy documentation](http://peddy.readthedocs.io/en/latest/#sex-check) for more details about the `het_check` command.
            """,
            anchor='peddy-sexcheck-plot',
            plot=scatter.plot(data, pconfig)
        )
Ejemplo n.º 9
0
    def somalier_het_check_plot(self):
        """plot the het_check scatter plot"""
        # empty dictionary to add sample names, and dictionary of values
        data = {}

        # for each sample, and list in self.somalier_data
        for s_name, d in self.somalier_data.items():
            # check the sample contains the required columns
            if "gt_depth_mean" in d and "ab_std" in d:
                # add sample to dictionary with value as a dictionary of points to plot
                data[s_name] = {"x": d["gt_depth_mean"], "y": d["ab_std"]}

        if len(data) > 0:
            pconfig = {
                "id": "somalier_het_check_plot",
                "title": "Somalier: Sample Observed Heterozygosity",
                "xlab": "Mean depth",
                "ylab": "Standard deviation of allele-balance",
            }

            self.add_section(
                name="Heterozygosity",
                description=
                "Standard devation of heterozygous allele balance against mean depth.",
                helptext=
                "A high standard deviation in allele balance suggests contamination.",
                anchor="somalier-hetcheck",
                plot=scatter.plot(data, pconfig),
            )
Ejemplo n.º 10
0
    def peddy_het_check_plot(self):
        """plot the het_check scatter plot"""
        # empty dictionary to add sample names, and dictionary of values
        data = {}

        # for each sample, and list in self.peddy_data
        for s_name, d in self.peddy_data.items():
            # check the sample contains the required columns
            if 'median_depth_het_check' in d and 'het_ratio_het_check' in d:
                # add sample to dictionary with value as a dictionary of points to plot
                data[s_name] = {
                    'x': d['median_depth_het_check'],
                    'y': d['het_ratio_het_check']
                }

        pconfig = {
            'id': 'peddy_het_check_plot',
            'title': 'Peddy: Het Check',
            'xlab': 'median depth',
            'ylab': 'proportion het calls',
        }

        self.add_section (
            name = 'Het Check',
            description = "Proportion of sites that were heterozygous against median depth.",
            helptext = """
            A high proportion of heterozygous sites suggests contamination, a low proportion suggests consanguinity.
            
            See [the main peddy documentation](https://peddy.readthedocs.io/en/latest/output.html#het-check) for more details about the `het_check` command.
            """,
            anchor = 'peddy-hetcheck-plot',
            plot = scatter.plot(data, pconfig)
        )
Ejemplo n.º 11
0
    def add_pca_plots(self):
        results_path = config.metadata['output_dir']
        pca_csv_path = os.path.join(results_path, 'unsupervised', 'PCA.csv')
        if not os.path.exists(pca_csv_path):
            return 0
        # Read the PCA values
        for sample in csv.DictReader(open(pca_csv_path, 'r')):
            self.pca_dict[sample['sample']] = sample
        principle_components = {}
        for p in sample:
            pc = p.split(' ')[0]
            principle_components[pc] = p
        pca_plot_config = {
            'data_labels': [
                {'name': 'PC1 vs. PC2', 'xlab': principle_components['PC1'], 'ylab': principle_components['PC2']},
                {'name': 'PC2 vs. PC3', 'xlab': principle_components['PC2'], 'ylab': principle_components['PC3']},
                {'name': 'PC3 vs. PC4', 'xlab': principle_components['PC3'], 'ylab': principle_components['PC4']},
                {'name': 'PC4 vs. PC5', 'xlab': principle_components['PC4'], 'ylab': principle_components['PC5']}
            ],
            'id': 'atacseq_pca_plot',
            'marker_size': 5
        }

        pca_plot_data = [self.generate_pca_plot_data(principle_components['PC1'],principle_components['PC2']),
                         self.generate_pca_plot_data(principle_components['PC2'], principle_components['PC3']),
                         self.generate_pca_plot_data(principle_components['PC3'], principle_components['PC4']),
                         self.generate_pca_plot_data(principle_components['PC4'], principle_components['PC5'])]
        self.add_section(
            name='Principal Component Analysis',
            anchor='atacseq_pca',
            description='Scatter plots of PCA results',
            helptext='You can see the plots of principal components',
            plot=scatter.plot(pca_plot_data, pconfig=pca_plot_config)
        )
Ejemplo n.º 12
0
 def bin_plot(self):
     pconfig = {
         "id": "goleft_indexcov-bin-plot",
         "title":
         "goleft indexcov: Problematic low and non-uniform coverage bins",
         "xlab": "Proportion of bins with depth < 0.15",
         "ylab": "Proportion of bins with depth outside of (0.85, 1.15)",
         "yCeiling": 1.0,
         "yFloor": 0.0,
         "xCeiling": 1.0,
         "xFloor": 0.0,
     }
     self.add_section(
         name="Problem coverage bins",
         anchor="goleft_indexcov-bin",
         description=
         "This plot identifies problematic samples using binned coverage distributions.",
         helptext="""
             We expect bins to be around 1, so deviations from this indicate problems.
             Low coverage bins (`< 0.15`) on the x-axis have regions with low or missing coverage.
             Higher values indicate truncated BAM files or missing data.
             Bins with skewed distributions (`<0.85` or `>1.15`) on the y-axis detect dosage bias.
             Large values on the y-axis are likely to impact CNV and structural variant calling.
             See the [goleft indexcov bin documentation](https://github.com/brentp/goleft/blob/master/docs/indexcov/help-bin.md)
             for more details.
         """,
         plot=scatter.plot(self.bin_plot_data, pconfig),
     )
Ejemplo n.º 13
0
    def somalier_sex_check_plot(self):
        data = {}
        sex_index = {"female": 0, "male": 1, "unknown": 2}

        for s_name, d in self.somalier_data.items():
            if 'X_depth_mean' in d and 'original_pedigree_sex' in d:
                data[s_name] = {
                    'x': (random.random() - 0.5) * 0.1 +
                    sex_index.get(d['original_pedigree_sex'], 2),
                    'y':
                    d["X_depth_mean"]
                }

        if len(data) > 0:
            pconfig = {
                'id': 'somalier_sex_check_plot',
                'title': 'Somalier: Sample Predicted Sex',
                'xlab': 'Sex from pedigree',
                'ylab': 'Scaled mean depth on X',
                'categories': ["Female", "Male", "Unknown"]
            }

            self.add_section(
                name='Sex',
                description="Predicted sex against scaled depth on X",
                helptext="Higher values of depth, low values suggest male.",
                anchor='somalier-sexcheck',
                plot=scatter.plot(data, pconfig))
Ejemplo n.º 14
0
 def blobtools_blob_graph(self):
     config = {
         'id': 'blobtools-5',
         'title': 'Blobtools: blobplots',
         'showInLegend': True,
     }
     return scatter.plot(self.blobtools_blob_data, config)
Ejemplo n.º 15
0
    def add_reported_vs_empirical_section(self):
        sample_data = []
        data_labels = []

        # Loop through the different data types
        for (
                rt_type_name,
                rt_type,
        ) in recal_table_type._asdict().items():
            # This table appears to be the correct one to use for reported vs empirical
            # https://github.com/broadinstitute/gatk/blob/853b53ec2a3ac2d90d7d82a6c8451e29a34692d2/src/main/resources/org/broadinstitute/hellbender/utils/recalibration/BQSR.R#L148
            sample_tables = self.gatk_base_recalibrator[rt_type][
                "recal_table_1"]
            if len(sample_tables) == 0:
                continue

            reported_empirical = {}
            for sample, table in sample_tables.items():
                reported_empirical[sample] = []
                table_rows = [
                    dict(zip(table, r)) for r in zip(*table.values())
                ]
                table_rows.sort(key=lambda r: r["QualityScore"])
                for reported, group in groupby(table_rows,
                                               lambda r: r["QualityScore"]):
                    g = list(group)
                    reported_empirical[sample].append({
                        "x":
                        int(reported),
                        "y":
                        sum(float(r["EmpiricalQuality"])
                            for r in g) / len(g) if len(g) > 0 else 0,
                    })

            sample_data.append(reported_empirical)

            # Build data label configs for this data type
            data_labels.append({
                "name": "{} Reported vs. Empirical Quality",
                "ylab": "Empirical quality score"
            })
        plot = scatter.plot(
            sample_data,
            pconfig={
                "title": "Reported vs. Empirical Quality",
                "id": "gatk-base-recalibrator-reported-empirical-plot",
                "xlab": "Reported quality score",
                "ylab": "Empirical quality score",
                "xDecimals": False,
                "data_labels": data_labels,
            },
        )

        self.add_section(
            name="Reported Quality vs. Empirical Quality",
            anchor="gatk-base-recalibrator-reported-empirical",
            description=
            "Plot shows the reported quality score vs the empirical quality score.",
            plot=plot,
        )
Ejemplo n.º 16
0
    def peddy_relatedness_plot(self):
        data = dict()
        for s_name, d in self.peddy_data.items():
            if "ibs0_ped_check" in d and "ibs2_ped_check" in d:
                data[s_name] = {
                    "x": d["ibs0_ped_check"],
                    "y": d["ibs2_ped_check"]
                }
            if "rel_ped_check" in d:
                if d["rel_ped_check"] < 0.25:
                    data[s_name]["color"] = "rgba(109, 164, 202, 0.9)"
                elif d["rel_ped_check"] < 0.5:
                    data[s_name]["color"] = "rgba(250, 160, 81, 0.8)"
                else:
                    data[s_name]["color"] = "rgba(43, 159, 43, 0.8)"

        pconfig = {
            "id": "peddy_relatedness_plot",
            "title": "Peddy: Relatedness Plot",
            "xlab": "IBS0 (no alleles shared)",
            "ylab": "IBS2 (both alleles shared)",
        }

        if len(data) > 0:
            self.add_section(
                name="Relatedness",
                anchor="peddy-relatedness-plot",
                description=
                """Shared allele rates between sample pairs. Points are coloured by degree of relatedness:
                <span style="color: #6DA4CA;">less than 0.25</span>,
                <span style="color: #FAA051;">0.25 - 0.5</span>,
                <span style="color: #2B9F2B;">greather than 0.5</span>.""",
                plot=scatter.plot(data, pconfig),
            )
Ejemplo n.º 17
0
    def snp_rate_scatterplot(self):
        """Make a scatter plot showing relative coverage on X and Y chr"""
        data = OrderedDict()
        for sample in self.sexdet_data:
            try:
                data[sample] = {
                    "x": self.sexdet_data[sample]["RateX"],
                    "y": self.sexdet_data[sample]["RateY"]
                }
            except KeyError:
                pass

        config = {
            "id": "sexdeterrmine-rate-plot",
            "title": "SexDetErrmine: Relative coverage",
            "ylab": "Relative Cov. on Y",
            "xlab": "Relative Cov. on X",
        }

        if len(data) > 0:
            self.add_section(
                name="Relative Coverage",
                anchor="sexdeterrmine-rates",
                description=
                "The coverage on the X vs Y chromosome, relative to coverage on the Autosomes.",
                helptext="""
                Males are expected to have a roughly equal X- and Y-rates, while females are expected to have a Y-rate of 0 and an X-rate of 1.
                Placement between the two clusters can be indicative of contamination, while placement with higher than expected X- and/or Y-rates can be indicative of sex chromosome aneuploidy.
                """,
                plot=scatter.plot(data, config),
            )
Ejemplo n.º 18
0
    def peddy_sex_check_plot(self):
        data = {}
        sex_index = {"female": 0, "male": 1}

        for s_name, d in self.peddy_data.items():
            if 'sex_het_ratio' in d and 'ped_sex_sex_check' in d:
                data[s_name] = {
                    'x': sex_index[d['ped_sex_sex_check']],
                    'y': d["sex_het_ratio"]
                }

        pconfig = {
            'id': 'peddy_sex_check_plot',
            'title': 'Peddy: Sex Check',
            'xlab': 'Sex From Ped',
            'ylab': 'Sex Het Ratio',
            'categories': ["Female", "Male"]
        }

        self.add_section(
            name='Sex Check',
            description="Predicted sex against heterozygosity ratio",
            helptext="""
            Higher values of Sex Het Ratio suggests the sample is female, low values suggest male.

            See [the main peddy documentation](http://peddy.readthedocs.io/en/latest/#sex-check) for more details about the `het_check` command.
            """,
            anchor='peddy-sexcheck-plot',
            plot=scatter.plot(data, pconfig))
Ejemplo n.º 19
0
    def __init__(self, c_id, mod):

        modname = mod['config'].get('section_name', c_id.replace('_', ' ').title())
        if modname == '' or modname is None:
            modname = 'Custom Content'

        # Initialise the parent object
        super(MultiqcModule, self).__init__(
            name = modname,
            anchor = mod['config'].get('section_anchor', c_id),
            href = mod['config'].get('section_href'),
            info = mod['config'].get('description')
        )

        pconfig = mod['config'].get('pconfig', {})
        if pconfig.get('title') is None:
            pconfig['title'] = modname

        # Table
        if mod['config'].get('plot_type') == 'table':
            pconfig['sortRows'] = pconfig.get('sortRows', False)
            headers = mod['config'].get('headers')
            self.add_section( plot = table.plot(mod['data'], headers, pconfig) )
            self.write_data_file( mod['data'], "multiqc_{}".format(modname.lower().replace(' ', '_')) )

        # Bar plot
        elif mod['config'].get('plot_type') == 'bargraph':
            self.add_section( plot = bargraph.plot(mod['data'], mod['config'].get('categories'), pconfig) )

        # Line plot
        elif mod['config'].get('plot_type') == 'linegraph':
            self.add_section( plot = linegraph.plot(mod['data'], pconfig) )

        # Scatter plot
        elif mod['config'].get('plot_type') == 'scatter':
            self.add_section( plot = scatter.plot(mod['data'], pconfig) )

        # Heatmap
        elif mod['config'].get('plot_type') == 'heatmap':
            self.add_section( plot = heatmap.plot(mod['data'], mod['config'].get('xcats'), mod['config'].get('ycats'), pconfig) )

        # Beeswarm plot
        elif mod['config'].get('plot_type') == 'beeswarm':
            self.add_section( plot = beeswarm.plot(mod['data'], pconfig) )

        # Raw HTML
        elif mod['config'].get('plot_type') == 'html':
            self.add_section( content = mod['data'] )

        # Raw image file as html
        elif mod['config'].get('plot_type') == 'image':
            self.add_section( content = mod['data'] )

        # Not supplied
        elif mod['config'].get('plot_type') == None:
            log.warning("Plot type not found for content ID '{}'".format(c_id))

        # Not recognised
        else:
            log.warning("Error - custom content plot type '{}' not recognised for content ID {}".format(mod['config'].get('plot_type'), c_id))
Ejemplo n.º 20
0
    def peddy_relatedness_plot(self):
        data = dict()
        for s_name, d in self.peddy_data.items():
            if 'ibs0_ped_check' in d and 'ibs2_ped_check' in d:
                data[s_name] = {
                    'x': d['ibs0_ped_check'],
                    'y': d['ibs2_ped_check']
                }
            if 'rel_ped_check' in d:
                if d['rel_ped_check'] < 0.25:
                    data[s_name]['color'] = 'rgba(109, 164, 202, 0.9)'
                elif d['rel_ped_check'] < 0.5:
                    data[s_name]['color'] = 'rgba(250, 160, 81, 0.8)'
                else:
                    data[s_name]['color'] = 'rgba(43, 159, 43, 0.8)'

        pconfig = {
            'id': 'peddy_relatedness_plot',
            'title': 'Peddy: Relatedness Plot',
            'xlab': 'IBS0 (no alleles shared)',
            'ylab': 'IBS2 (both alleles shared)',
        }

        if len(data) > 0:
            self.add_section(
                name='Relatedness',
                anchor='peddy-relatedness-plot',
                description=
                """Shared allele rates between sample pairs. Points are coloured by degree of relatedness:
                <span style="color: #6DA4CA;">less than 0.25</span>,
                <span style="color: #FAA051;">0.25 - 0.5</span>,
                <span style="color: #2B9F2B;">greather than 0.5</span>.""",
                plot=scatter.plot(data, pconfig))
Ejemplo n.º 21
0
    def peddy_sex_check_plot(self):
        data = {}
        sex_index = {"female": 0, "male": 1, "unknown": 2}

        for s_name, d in self.peddy_data.items():
            if "sex_het_ratio" in d and "ped_sex_sex_check" in d:
                data[s_name] = {
                    "x": sex_index.get(d["ped_sex_sex_check"], 2),
                    "y": d["sex_het_ratio"]
                }

        pconfig = {
            "id": "peddy_sex_check_plot",
            "title": "Peddy: Sex Check",
            "xlab": "Sex From Ped",
            "ylab": "Sex Het Ratio",
            "categories": ["Female", "Male", "Unknown"],
        }

        if len(data) > 0:
            self.add_section(
                name="Sex Check",
                description="Predicted sex against heterozygosity ratio",
                helptext="""
                Higher values of Sex Het Ratio suggests the sample is female, low values suggest male.

                See [the main peddy documentation](http://peddy.readthedocs.io/en/latest/#sex-check) for more details about the `het_check` command.
                """,
                anchor="peddy-sexcheck-plot",
                plot=scatter.plot(data, pconfig),
            )
Ejemplo n.º 22
0
    def peddy_het_check_plot(self):
        """plot the het_check scatter plot"""
        # empty dictionary to add sample names, and dictionary of values
        data = {}

        # for each sample, and list in self.peddy_data
        for s_name, d in self.peddy_data.items():
            # check the sample contains the required columns
            if "median_depth_het_check" in d and "het_ratio_het_check" in d:
                # add sample to dictionary with value as a dictionary of points to plot
                data[s_name] = {
                    "x": d["median_depth_het_check"],
                    "y": d["het_ratio_het_check"]
                }

        pconfig = {
            "id": "peddy_het_check_plot",
            "title": "Peddy: Het Check",
            "xlab": "median depth",
            "ylab": "proportion het calls",
        }

        if len(data) > 0:
            self.add_section(
                name="Het Check",
                description=
                "Proportion of sites that were heterozygous against median depth.",
                helptext="""
                A high proportion of heterozygous sites suggests contamination, a low proportion suggests consanguinity.

                See [the main peddy documentation](https://peddy.readthedocs.io/en/latest/output.html#het-check) for more details about the `het_check` command.
                """,
                anchor="peddy-hetcheck-plot",
                plot=scatter.plot(data, pconfig),
            )
Ejemplo n.º 23
0
    def __init__(self, c_id, mod):

        modname = mod['config'].get('section_name', c_id.replace('_', ' ').title())
        if modname == '' or modname is None:
            modname = 'Custom Content'

        # Initialise the parent object
        super(MultiqcModule, self).__init__(
            name = modname,
            anchor = mod['config'].get('section_anchor', c_id),
            href = mod['config'].get('section_href'),
            info = mod['config'].get('description')
        )

        pconfig = mod['config'].get('pconfig', {})
        if pconfig.get('title') is None:
            pconfig['title'] = modname

        # Table
        if mod['config'].get('plot_type') == 'table':
            pconfig['sortRows'] = pconfig.get('sortRows', False)
            headers = mod['config'].get('headers')
            self.add_section( plot = table.plot(mod['data'], headers, pconfig) )
            self.write_data_file( mod['data'], "multiqc_{}".format(modname.lower().replace(' ', '_')) )

        # Bar plot
        elif mod['config'].get('plot_type') == 'bargraph':
            self.add_section( plot = bargraph.plot(mod['data'], mod['config'].get('categories'), pconfig) )

        # Line plot
        elif mod['config'].get('plot_type') == 'linegraph':
            self.add_section( plot = linegraph.plot(mod['data'], pconfig) )

        # Scatter plot
        elif mod['config'].get('plot_type') == 'scatter':
            self.add_section( plot = scatter.plot(mod['data'], pconfig) )

        # Heatmap
        elif mod['config'].get('plot_type') == 'heatmap':
            self.add_section( plot = heatmap.plot(mod['data'], mod['config'].get('xcats'), mod['config'].get('ycats'), pconfig) )

        # Beeswarm plot
        elif mod['config'].get('plot_type') == 'beeswarm':
            self.add_section( plot = beeswarm.plot(mod['data'], pconfig) )

        # Raw HTML
        elif mod['config'].get('plot_type') == 'html':
            self.add_section( content = mod['data'] )

        # Raw image file as html
        elif mod['config'].get('plot_type') == 'image':
            self.add_section( content = mod['data'] )

        # Not supplied
        elif mod['config'].get('plot_type') == None:
            log.warning("Plot type not found for content ID '{}'".format(c_id))

        # Not recognised
        else:
            log.warning("Error - custom content plot type '{}' not recognised for content ID {}".format(mod['config'].get('plot_type'), c_id))
Ejemplo n.º 24
0
    def somalier_het_check_plot(self):
        """plot the het_check scatter plot"""
        # empty dictionary to add sample names, and dictionary of values
        data = {}

        # for each sample, and list in self.somalier_data
        for s_name, d in self.somalier_data.items():
            # check the sample contains the required columns
            if 'gt_depth_mean' in d and 'ab_std' in d:
                # add sample to dictionary with value as a dictionary of points to plot
                data[s_name] = {'x': d['gt_depth_mean'], 'y': d['ab_std']}

        if len(data) > 0:
            pconfig = {
                'id': 'somalier_het_check_plot',
                'title': 'Somalier: Sample Observed Heterozygosity',
                'xlab': 'Mean depth',
                'ylab': 'Standard deviation of allele-balance',
            }

            self.add_section(
                name='Heterozygosity',
                description=
                "Standard devation of heterozygous allele balance against mean depth.",
                helptext=
                "A high standard deviation in allele balance suggests contamination.",
                anchor='somalier-hetcheck',
                plot=scatter.plot(data, pconfig))
Ejemplo n.º 25
0
    def somalier_relatedness_plot(self):
        data = dict()
        alpha = 0.6
        relatedness_colours = {
            0: ['Unrelated', 'rgba(74, 124, 182, {})'.format(alpha)],
            0.49: ['Sib-sib', 'rgba(243, 123, 40, {})'.format(alpha)],
            0.5: ['Parent-child', 'rgba(159, 84, 47, {})'.format(alpha)]
        }

        # Get index colour scale
        cscale = mqc_colour.mqc_colour_scale()
        extra_colours = cscale.get_colours("Dark2")
        extra_colours = _make_col_alpha(extra_colours, alpha)

        extra_colour_idx = 0
        for s_name, d in self.somalier_data.items():
            if 'ibs0' in d and 'ibs2' in d:
                data[s_name] = {'x': d['ibs0'], 'y': d['ibs2']}
            if 'relatedness' in d:
                relatedness = d['expected_relatedness']
                # -1 is not the same family, 0 is same family but unreleaed
                # @brentp says he usually bundles them together
                if relatedness == -1:
                    relatedness = 0

                # New unique value that we've not seen before
                if relatedness not in relatedness_colours:
                    relatedness_colours[relatedness] = [
                        str(relatedness), extra_colours[extra_colour_idx]
                    ]
                    extra_colour_idx += 0
                    if extra_colour_idx > len(extra_colours):
                        extra_colour_idx = 0

                # Assign colour
                data[s_name]['color'] = relatedness_colours[relatedness][1]

        if len(data) > 0:
            pconfig = {
                'id': 'somalier_relatedness_plot',
                'title': 'Somalier: Sample Shared Allele Rates (IBS)',
                'xlab': 'IBS0 (no alleles shared)',
                'ylab': 'IBS2 (both alleles shared)',
                'marker_line_width': 0
            }

            colours_legend = ''
            for val in sorted(relatedness_colours.keys()):
                name, col_rgb = relatedness_colours[val]
                colours_legend += "<span style=\"color:{}\">{}</span>, ".format(
                    col_rgb.replace(str(alpha), "1.0"), name, val)

            self.add_section(name='Relatedness',
                             anchor='somalier-relatedness',
                             description="""
                Shared allele rates between sample pairs.
                Points are coloured by degree of expected-relatedness: {}""".
                             format(colours_legend),
                             plot=scatter.plot(data, pconfig))
Ejemplo n.º 26
0
    def peddy_pca_plot(self):
        ancestry_colors = {
            "SAS": "rgb(68,1,81,1)",
            "EAS": "rgb(59,81,139,1)",
            "AMR": "rgb(33,144,141,1)",
            "AFR": "rgb(92,200,99,1)",
            "EUR": "rgb(253,231,37,1)",
        }
        background_ancestry_colors = {
            "SAS": "rgb(68,1,81,0.1)",
            "EAS": "rgb(59,81,139,0.1)",
            "AMR": "rgb(33,144,141,0.1)",
            "AFR": "rgb(92,200,99,0.1)",
            "EUR": "rgb(253,231,37,0.1)",
        }
        default_color = "#000000"
        default_background_color = "rgb(211,211,211,0.05)"
        data = OrderedDict()

        # plot the background data first, so it doesn't hide the actual data points
        d = self.peddy_data.pop("background_pca", {})
        if d:
            background = [{
                "x": pc1,
                "y": pc2,
                "color": default_background_color,
                "name": ancestry,
                "marker_size": 1
            } for pc1, pc2, ancestry in zip(d["PC1"], d["PC2"], d["ancestry"])]
            data["background"] = background

        for s_name, d in self.peddy_data.items():
            if "PC1_het_check" in d and "PC2_het_check" in d:
                data[s_name] = {
                    "x": d["PC1_het_check"],
                    "y": d["PC2_het_check"]
                }
                try:
                    data[s_name]["color"] = ancestry_colors.get(
                        d["ancestry-prediction"], default_color)
                except KeyError:
                    pass

        pconfig = {
            "id": "peddy_pca_plot",
            "title": "Peddy: PCA Plot",
            "xlab": "PC1",
            "ylab": "PC2",
            "marker_size": 5,
            "marker_line_width": 0,
        }

        if len(data) > 0:
            self.add_section(name="PCA Plot",
                             anchor="peddy-pca-plot",
                             plot=scatter.plot(data, pconfig))
Ejemplo n.º 27
0
    def peddy_pca_plot(self):
        ancestry_colors = {
            'SAS': 'rgb(68,1,81,1)',
            'EAS': 'rgb(59,81,139,1)',
            'AMR': 'rgb(33,144,141,1)',
            'AFR': 'rgb(92,200,99,1)',
            'EUR': 'rgb(253,231,37,1)'
        }
        background_ancestry_colors = {
            'SAS': 'rgb(68,1,81,0.1)',
            'EAS': 'rgb(59,81,139,0.1)',
            'AMR': 'rgb(33,144,141,0.1)',
            'AFR': 'rgb(92,200,99,0.1)',
            'EUR': 'rgb(253,231,37,0.1)'
        }
        default_color = '#000000'
        default_background_color = 'rgb(211,211,211,0.05)'
        data = OrderedDict()

        # plot the background data first, so it doesn't hide the actual data points
        d = self.peddy_data.pop("background_pca", {})
        if d:
            background = [{
                'x': pc1,
                'y': pc2,
                'color': default_background_color,
                'name': ancestry,
                'marker_size': 1
            } for pc1, pc2, ancestry in zip(d['PC1'], d['PC2'], d['ancestry'])]
            data["background"] = background

        for s_name, d in self.peddy_data.items():
            if 'PC1_het_check' in d and 'PC2_het_check' in d:
                data[s_name] = {
                    'x':
                    d['PC1_het_check'],
                    'y':
                    d['PC2_het_check'],
                    'color':
                    ancestry_colors.get(d['ancestry-prediction'],
                                        default_color)
                }

        pconfig = {
            'id': 'peddy_pca_plot',
            'title': 'Peddy: PCA Plot',
            'xlab': 'PC1',
            'ylab': 'PC2',
            'marker_size': 5,
            'marker_line_width': 0
        }

        if len(data) > 0:
            self.add_section(name='PCA Plot',
                             anchor='peddy-pca-plot',
                             plot=scatter.plot(data, pconfig))
Ejemplo n.º 28
0
    def somalier_ancestry_pca_plot(self):
        data = OrderedDict()

        # cycle over samples and add PC coordinates to data dict
        for s_name, d in self.somalier_data.items():
            if "PC1" in d and "PC2" in d:
                data[s_name] = {
                    "x": d["PC1"],
                    "y": d["PC2"],
                    "color": "rgba(0, 0, 0, 0.6)",
                }

        # add background
        # N.B. this must be done after samples to have samples on top
        d = self.somalier_background_pcs.pop("background_pcs", {})
        if d:
            # generate color scale to match the number of categories
            c_scale = mqc_colour.mqc_colour_scale(name="Paired").colours
            cats = self.somalier_ancestry_cats
            ancestry_colors = dict(zip(cats, c_scale[: len(cats)]))
            default_background_color = "rgb(255,192,203,0.3)"

            # Make colours semi-transparent
            ancestry_colors = dict(zip(ancestry_colors.keys(), _make_col_alpha(ancestry_colors.values(), 0.3)))

            background = [
                {"x": pc1, "y": pc2, "color": ancestry_colors.get(ancestry, default_background_color), "name": ancestry}
                for pc1, pc2, ancestry in zip(d["PC1"], d["PC2"], d["ancestry"])
            ]
            data["background"] = background

        # generate section and plot
        if len(data) > 0:
            pconfig = {
                "id": "somalier_ancestry_pca_plot",
                "title": "Somalier: Sample Predicted Ancestry",
                "xlab": "PC1",
                "ylab": "PC2",
                "marker_size": 5,
                "marker_line_width": 0,
            }

            self.add_section(
                name="Ancestry PCA",
                description="Principal components of samples against background PCs.",
                helptext="""
                Sample PCs are plotted against background PCs from the
                background data supplied to somalier.
                Color indicates predicted ancestry of sample. Data points in close
                proximity are predicted to be of similar ancestry. Consider whether
                the samples cluster as expected.
                """,
                anchor="somalier-ancestry-pca",
                plot=scatter.plot(data, pconfig),
            )
Ejemplo n.º 29
0
    def __init__(self, c_id, mod):

        modname = mod['config'].get('section_name',
                                    c_id.replace('_', ' ').title())

        # Initialise the parent object
        super(MultiqcModule,
              self).__init__(name=modname,
                             anchor=mod['config'].get('section_anchor', c_id),
                             href=mod['config'].get('section_href'),
                             info=mod['config'].get('description'))

        pconfig = mod['config'].get('pconfig', {})
        if pconfig.get('title') is None:
            pconfig['title'] = modname

        # Table
        if mod['config'].get('plot_type') == 'table':
            pconfig['sortRows'] = pconfig.get('sortRows', False)
            self.intro += table.plot(mod['data'], None, pconfig)

        # Bar plot
        elif mod['config'].get('plot_type') == 'bargraph':
            self.intro += bargraph.plot(mod['data'],
                                        mod['config'].get('categories'),
                                        pconfig)

        # Line plot
        elif mod['config'].get('plot_type') == 'linegraph':
            self.intro += linegraph.plot(mod['data'], pconfig)

        # Scatter plot
        elif mod['config'].get('plot_type') == 'scatter':
            self.intro += scatter.plot(mod['data'], pconfig)

        # Heatmap
        elif mod['config'].get('plot_type') == 'heatmap':
            self.intro += heatmap.plot(mod['data'], mod['config'].get('xcats'),
                                       mod['config'].get('ycats'), pconfig)

        # Beeswarm plot
        elif mod['config'].get('plot_type') == 'beeswarm':
            self.intro += beeswarm.plot(mod['data'], pconfig)

        # Not supplied
        elif mod['config'].get('plot_type') == None:
            log.warning("Plot type not found for content ID '{}'".format(c_id))

        # Not recognised
        else:
            log.warning(
                "Error - custom content plot type '{}' not recognised for content ID {}"
                .format(mod['config'].get('plot_type'), c_id))
Ejemplo n.º 30
0
    def peddy_pca_plot(self):
        ancestry_colors = {
            'SAS': 'rgb(68,1,81,1)',
            'EAS': 'rgb(59,81,139,1)',
            'AMR': 'rgb(33,144,141,1)',
            'AFR': 'rgb(92,200,99,1)',
            'EUR': 'rgb(253,231,37,1)'
        }
        background_ancestry_colors = {
            'SAS': 'rgb(68,1,81,0.1)',
            'EAS': 'rgb(59,81,139,0.1)',
            'AMR': 'rgb(33,144,141,0.1)',
            'AFR': 'rgb(92,200,99,0.1)',
            'EUR': 'rgb(253,231,37,0.1)'
        }
        default_color = '#000000'
        default_background_color = 'rgb(211,211,211,0.05)'
        data = OrderedDict()

        # plot the background data first, so it doesn't hide the actual data points
        d = self.peddy_data.pop("background_pca", {})
        if d:
            background = [{'x': pc1,
                        'y': pc2,
                        'color': default_background_color,
                        'name': ancestry,
                        'marker_size': 1}
                        for pc1, pc2, ancestry in zip(d['PC1'], d['PC2'], d['ancestry'])]
            data["background"] = background

        for s_name, d in self.peddy_data.items():
            if 'PC1_het_check' in d and 'PC2_het_check' in d:
                data[s_name] = {
                    'x': d['PC1_het_check'],
                    'y': d['PC2_het_check'],
                    'color': ancestry_colors.get(d['ancestry-prediction'], default_color)
                }

        pconfig = {
            'id': 'peddy_pca_plot',
            'title': 'Peddy: PCA Plot',
            'xlab': 'PC1',
            'ylab': 'PC2',
            'marker_size': 5,
            'marker_line_width': 0
        }

        if len(data) > 0:
            self.add_section (
                name = 'PCA Plot',
                anchor = 'peddy-pca-plot',
                plot = scatter.plot(data, pconfig)
            )
    def bin_plot(self):
        helptext = 'We expect bins to be around 1, so deviations from this indicate problems. \n\
        Low coverage bins (< 0.15) on the x-axis have regions with low or missing coverage. \n\
        Higher values indicate truncated BAM files or missing data. \n\
        Bins with skewed distributions (<0.85 or >1.15) on the y-axis detect dosage bias. \n\
        Large values on the y-axis are likely to impact CNV and structural variant calling. \n\
        See the \n\
        <a href="https://github.com/brentp/goleft/blob/master/docs/indexcov/help-bin.md" target="_blank">goleft indexcov bin documentation</a> \n\
        for more details.'

        data = {}
        for fn in self.find_log_files('goleft_indexcov/ped', filehandles=True):
            header = fn['f'].readline()[1:].strip().split("\t")
            for sample_parts in (l.split("\t") for l in fn['f']):
                cur = dict(zip(header, sample_parts))
                cur["sample_id"] = self.clean_s_name(cur["sample_id"],
                                                     fn["root"])
                total = float(cur["bins.in"]) + float(cur["bins.out"])
                data[cur["sample_id"]] = {
                    "x": float(cur["bins.lo"]) / total,
                    "y": float(cur["bins.out"]) / total
                }

        # Filter to strip out ignored sample names
        data = self.ignore_samples(data)

        if data:
            log.info("Found goleft indexcov bin reports for %s samples" %
                     (len(data)))
            pconfig = {
                'id': 'goleft_indexcov-bin-plot',
                'title':
                'goleft indexcov: Problematic low and non-uniform coverage bins',
                'xlab': 'Proportion of bins with depth < 0.15',
                'ylab':
                'Proportion of bins with depth outside of (0.85, 1.15)',
                'yCeiling': 1.0,
                'yFloor': 0.0,
                'xCeiling': 1.0,
                'xFloor': 0.0
            }
            self.add_section(
                name='Problem coverage bins',
                anchor='goleft_indexcov-bin',
                description=
                'This plot identifies problematic samples using binned coverage distributions.',
                helptext=helptext,
                plot=scatter.plot(data, pconfig))
            return True
        else:
            return False
Ejemplo n.º 32
0
    def make_plots(self):
        pconfig = {
            'id': 'syntenyplot',
            'title': 'Synteny plot',
            'marker_line_width': 0,
            'marker_size': 2,
            'enableMouseTracking': False,
            'square': True,
            'data_labels': self.data_labels
        }

        self.add_section(anchor='syntenyplot',
                         description='',
                         plot=scatter.plot(self.plot_data, pconfig))
Ejemplo n.º 33
0
    def parse_plotPCA(self):
        """Find plotPCA output"""
        self.deeptools_plotPCAData = dict()
        for f in self.find_log_files("deeptools/plotPCAData",
                                     filehandles=False):
            parsed_data = self.parsePlotPCAData(f)
            for k, v in parsed_data.items():
                if k in self.deeptools_plotPCAData:
                    log.warning("Replacing duplicate sample {}.".format(k))
                self.deeptools_plotPCAData[k] = v
            if len(parsed_data) > 0:
                self.add_data_source(f, section="plotPCA")

        self.deeptools_plotPCAData = self.ignore_samples(
            self.deeptools_plotPCAData)

        if len(self.deeptools_plotPCAData) > 0:
            # Write data to file
            self.write_data_file(self.deeptools_plotPCAData,
                                 "deeptools_plot_PCA")

            config = {
                "id": "deeptools_pca_plot",
                "title": "deeptools: PCA Plot",
                "xlab": "PC1",
                "ylab": "PC2",
                "tt_label": "PC1 {point.x:.2f}: PC2 {point.y:.2f}",
            }
            data = dict()
            for s_name in self.deeptools_plotPCAData:
                try:
                    data[s_name] = {
                        "x": self.deeptools_plotPCAData[s_name][1],
                        "y": self.deeptools_plotPCAData[s_name][2],
                    }
                except KeyError:
                    pass
            if len(data) == 0:
                log.debug("No valid data for PCA plot")
                return None

            self.add_section(
                name="PCA plot",
                anchor="deeptools_pca",
                description=
                "PCA plot with the top two principal components calculated based on genome-wide distribution of sequence reads",
                plot=scatter.plot(data, config),
            )

        return len(self.deeptools_plotPCAData)
Ejemplo n.º 34
0
    def __init__(self):
        # Initialise the parent object
        super(MultiqcModule, self).__init__(name='Principal Components Analysis', anchor='rnaseq_az')

        rnaseq_pca_files = self.find_log_files('rnaseq_az/pca_data', filecontents=False)
        rnaseq_pca_files = [f for f in rnaseq_pca_files if f]

        if not rnaseq_pca_files:
            log.debug("Could not find the PCA data file in {}".format(config.analysis_dir))
            raise UserWarning
        if len(rnaseq_pca_files) > 1:
            log.warning("More than 1 PCA data file found in {}".format(config.analysis_dir))
            raise UserWarning
        rnaseq_pca_file = rnaseq_pca_files[0]
        pca_dirpath, pca_fname = rnaseq_pca_file['root'], rnaseq_pca_file['fn']
        pca_fpath = join(pca_dirpath, pca_fname)
        pca_data, color_by_sample, color_by_cond, variances = parse_pca_data(pca_fpath)
        pca_data = self.ignore_samples(pca_data)

        description = ("<p>PCA is a popular method that is based on the principles of dimensional reduction. "
            "Below is a PCA plot of the samples within the space of the first two principal components that explain the most variation in the data. "
            "These were calculated using the read counts of the top 1000 most variable genes within the dataset.</p>")

        legend = ''
        if color_by_cond:
            label_style = 'font-family: \'Lucida Grande\', \'Lucida Sans Unicode\', Arial, Helvetica, sans-serif; ' \
                          'font-size: 12px; ' \
                          'font-weight: bold; '
            legend += '<center><div>'
            legend += '<span style="' + label_style + ' margin-right: 10px;">Conditions: </span>'
            for cond, color in color_by_cond.items():
                legend += '<span style="white-space: nowrap;">'
                legend += '<span style="display: inline-block; width: 16px; height: 12px; ' + \
                          '            margin-bottom: -1px; margin-right: 1px; background-color: ' + color + '"></span>'
                legend += '<span style="' + label_style + ' margin-right: 20px; white-space: normal;"> ' + cond + '</span>'
                legend += '</span>'
            legend += '</div></center>'

        self.add_section(
            name='Principal Components Analysis',
            anchor='rnaseq_az-pca',
            content=description + legend + scatter.plot(pca_data, {
                'title': 'Principal Components Analysis',
                'xlab': 'PC1: ' + variances[0] + '% variance',
                'ylab': 'PC2: ' + variances[1] + '% variance',
                'colors': color_by_sample,
                'tt_label': 'PC1: {point.x}<br/>PC2: {point.y}',
            })
        )
Ejemplo n.º 35
0
 def cnv_winplot_plot(self, cnt, s_name):
     data = dict()
     config = {
         'title': s_name,
         'ymax': 40,
         'ymin': 0,
         'marker_size': 2,
         'marker_line_width': 0
     }
     data[s_name] = cnt
     self.add_section(
         name=s_name,
         anchor='wp' + s_name,
         content=scatter.plot(data, config),
     )
Ejemplo n.º 36
0
    def slamdunkPCAPlot (self):
        """ Generate the PCA plots """

        pconfig = {
            'id': 'slamdunk_pca',
            'title': 'Slamdunk: PCA',
            'xlab': 'PC1',
            'ylab': 'PC2',
            'tt_label': 'PC1 {point.x:.2f}: PC2 {point.y:.2f}'
        }

        self.add_section (
            name = 'PCA (T&gt;C based)',
            anchor = 'slamdunk_PCA',
            description = """This plot shows the principal components of samples based
                        on the distribution of reads with T&gt;C conversions within UTRs
                        (see the <a href="http://t-neumann.github.io/slamdunk/docs.html#summary" target="_blank">slamdunk docs</a>).""",
            plot = scatter.plot(self.PCA_data, pconfig)
        )
Ejemplo n.º 37
0
    def bin_plot(self):
        helptext = 'We expect bins to be around 1, so deviations from this indicate problems. \n\
        Low coverage bins (< 0.15) on the x-axis have regions with low or missing coverage. \n\
        Higher values indicate truncated BAM files or missing data. \n\
        Bins with skewed distributions (<0.85 or >1.15) on the y-axis detect dosage bias. \n\
        Large values on the y-axis are likely to impact CNV and structural variant calling. \n\
        See the \n\
        <a href="https://github.com/brentp/goleft/blob/master/docs/indexcov/help-bin.md" target="_blank">goleft indexcov bin documentation</a> \n\
        for more details.'

        data = {}
        for fn in self.find_log_files('goleft_indexcov/ped', filehandles=True):
            header = fn['f'].readline()[1:].strip().split("\t")
            for sample_parts in (l.split("\t") for l in fn['f']):
                cur = dict(zip(header, sample_parts))
                cur["sample_id"] = self.clean_s_name(cur["sample_id"], fn["root"])
                total = float(cur["bins.in"]) + float(cur["bins.out"])
                data[cur["sample_id"]] = {"x": float(cur["bins.lo"]) / total,
                                          "y": float(cur["bins.out"]) / total}

        # Filter to strip out ignored sample names
        data = self.ignore_samples(data)

        if data:
            log.info("Found goleft indexcov bin reports for %s samples" % (len(data)))
            pconfig = {
                'id': 'goleft_indexcov-bin-plot',
                'title': 'goleft indexcov: Problematic low and non-uniform coverage bins',
                'xlab': 'Proportion of bins with depth < 0.15',
                'ylab': 'Proportion of bins with depth outside of (0.85, 1.15)',
                'yCeiling': 1.0, 'yFloor': 0.0, 'xCeiling': 1.0, 'xFloor': 0.0}
            self.add_section (
                name = 'Problem coverage bins',
                anchor = 'goleft_indexcov-roc',
                description = 'This plot identifies problematic samples using binned coverage distributions.',
                helptext = helptext,
                plot = scatter.plot(data, pconfig)
            )
            return True
        else:
            return False
Ejemplo n.º 38
0
    def peddy_pca_plot(self):

        data = dict()
        for s_name, d in self.peddy_data.items():
            if 'PC1' in d and 'PC2' in d:
                data[s_name] = {
                    'x': d['PC1'],
                    'y': d['PC2'],
                }

        pconfig = {
            'id': 'peddy_pca_plot',
            'title': 'Peddy: PCA Plot',
            'xlab': 'PC1',
            'ylab': 'PC2'
        }

        if len(data) > 0:
            self.add_section (
                name = 'PCA Plot',
                anchor = 'peddy-pca-plot',
                plot = scatter.plot(data, pconfig)
            )