def plot_accuracy(self): """ Generate GffCompare accuracy plot""" datasets = ['Base', 'Exon', 'Intron', 'Intron_chain', 'Transcript', 'Locus'] pconfig = { 'id': 'gffcompare_accuracy_plot', 'title': 'GffCompare: Accuracy values', 'ylab': 'Precision', 'xlab': 'Sensitivity', 'ymin': 0, 'ymax': 1, 'xmin': 0, 'xmax': 1, 'data_labels' : [{'name' : x} for x in datasets] } data_classification = [{ sample: { 'x' : self.gffcompare_data[sample]['accuracy'][dataset]['sensitivity']/100, 'y' : self.gffcompare_data[sample]['accuracy'][dataset]['precision']/100, 'name' : dataset } for sample in self.gffcompare_data.keys() } for dataset in datasets ] print(data_classification) return scatter.plot(data_classification, pconfig)
def peddy_relatedness_plot(self): data = dict() for s_name, d in self.peddy_data.items(): if 'ibs0' in d and 'ibs2' in d: data[s_name] = { 'x': d['ibs0'], 'y': d['ibs2'] } if 'rel' in d: if d['rel'] < 0.25: data[s_name]['color'] = 'rgba(109, 164, 202, 0.9)' elif d['rel'] < 0.5: data[s_name]['color'] = 'rgba(250, 160, 81, 0.8)' else: data[s_name]['color'] = 'rgba(43, 159, 43, 0.8)' pconfig = { 'id': 'peddy_relatedness_plot', 'title': 'Peddy: Relatedness Plot', 'xlab': 'IBS0 (no alleles shared)', 'ylab': 'IBS2 (both alleles shared)', } if len(data) > 0: self.add_section ( name = 'Relatedness', anchor = 'peddy-relatedness-plot', description = """Shared allele rates between sample pairs. Points are coloured by degree of relatedness: <span style="color: #6DA4CA;">less than 0.25</span>, <span style="color: #FAA051;">0.25 - 0.5</span>, <span style="color: #2B9F2B;">greather than 0.5</span>.""", plot = scatter.plot(data, pconfig) )
def somalier_sex_check_plot(self): data = {} sex_index = {"female": 0, "male": 1, "unknown": 2} for s_name, d in self.somalier_data.items(): if "X_depth_mean" in d and "original_pedigree_sex" in d: data[s_name] = { "x": (random.random() - 0.5) * 0.1 + sex_index.get(d["original_pedigree_sex"], 2), "y": d["X_depth_mean"], } if len(data) > 0: pconfig = { "id": "somalier_sex_check_plot", "title": "Somalier: Sample Predicted Sex", "xlab": "Sex from pedigree", "ylab": "Scaled mean depth on X", "categories": ["Female", "Male", "Unknown"], } self.add_section( name="Sex", description="Predicted sex against scaled depth on X", helptext="Higher values of depth, low values suggest male.", anchor="somalier-sexcheck", plot=scatter.plot(data, pconfig), )
def snp_rate_scatterplot(self): """ Make a scatter plot showing relative coverage on X and Y chr """ data = OrderedDict() for sample in self.sexdet_data: try: data[sample] = { 'x': self.sexdet_data[sample]['RateX'], 'y': self.sexdet_data[sample]['RateY'] } except KeyError: pass config = { 'id': 'sexdeterrmine-rate-plot', 'title': 'SexDetErrmine: Relative coverage', 'ylab': 'Relative Cov. on Y', 'xlab': 'Relative Cov. on X' } if len(data) > 0: self.add_section( name='Relative Coverage', anchor='sexdeterrmine-rates', description= 'The coverage on the X vs Y chromosome, relative to coverage on the Autosomes.', helptext=''' Males are expected to have a roughly equal X- and Y-rates, while females are expected to have a Y-rate of 0 and an X-rate of 1. Placement between the two clusters can be indicative of contamination, while placement with higher than expected X- and/or Y-rates can be indicative of sex chromosome aneuploidy. ''', plot=scatter.plot(data, config))
def quartet_scatter_plot(self, figure_data, pconfig_id, pconfig_title, name, anchor): data = dict() for index, row in figure_data.iterrows(): s_name = row["Sample"] data[s_name] = { 'x': row["F1-score"], 'y': row["MCR"] } if row["Batch"] == "Your Datasets": # blue data[s_name]['color'] = 'rgba(109, 164, 202, 0.9)' else: # yellow data[s_name]['color'] = 'rgba(250, 160, 81, 0.8)' # green: rgba(43, 159, 43, 0.8) pconfig = { 'id': pconfig_id, 'title': pconfig_title, 'xlab': 'F1-score', 'ylab': 'Mendelian Concordance Rate', "use_legend": True } if len(data) > 0: self.add_section ( name = name, anchor = anchor, description = """Points are coloured as follows: <span style="color: #6DA4CA;">Your Datasets</span>, <span style="color: #FAA051;">Rest Submmited Datasets</span>.""", plot = scatter.plot(data, pconfig) )
def peddy_relatedness_plot(self): data = dict() for s_name, d in self.peddy_data.items(): if 'ibs0' in d and 'ibs2' in d: data[s_name] = {'x': d['ibs0'], 'y': d['ibs2']} if 'rel' in d: if d['rel'] < 0.25: data[s_name]['color'] = 'rgba(109, 164, 202, 0.9)' elif d['rel'] < 0.5: data[s_name]['color'] = 'rgba(250, 160, 81, 0.8)' else: data[s_name]['color'] = 'rgba(43, 159, 43, 0.8)' pconfig = { 'id': 'peddy_relatedness_plot', 'title': 'Peddy Relatedness Plot', 'xlab': 'IBS0 (no alleles shared)', 'ylab': 'IBS2 (both alleles shared)', } if len(data) > 0: return """<p>Shared allele rates between sample pairs. Points are coloured by degree of relatedness: <span style="color: #6DA4CA;">less than 0.25</span>, <span style="color: #FAA051;">0.25 - 0.5</span>, <span style="color: #2B9F2B;">greather than 0.5</span>.</p>""" + scatter.plot( data, pconfig)
def peddy_het_check_plot(self): """plot the het_check scatter plot""" # empty dictionary to add sample names, and dictionary of values data = {} # for each sample, and list in self.peddy_data for s_name, d in self.peddy_data.items(): # check the sample contains the required columns if 'median_depth_het_check' in d and 'het_ratio_het_check' in d: # add sample to dictionary with value as a dictionary of points to plot data[s_name] = { 'x': d['median_depth_het_check'], 'y': d['het_ratio_het_check'] } pconfig = { 'id': 'peddy_het_check_plot', 'title': 'Peddy: Het Check', 'xlab': 'median depth', 'ylab': 'proportion het calls', } self.add_section( name='Het Check', description= "Proportion of sites that were heterozygous against median depth.", helptext=""" A high proportion of heterozygous sites suggests contamination, a low proportion suggests consanguinity. See [the main peddy documentation](https://peddy.readthedocs.io/en/latest/output.html#het-check) for more details about the `het_check` command. """, anchor='peddy-hetcheck-plot', plot=scatter.plot(data, pconfig))
def peddy_sex_check_plot(self): data = {} sex_index = {"female": 0, "male": 1, "unknown": 2} for s_name, d in self.peddy_data.items(): if 'sex_het_ratio' in d and 'ped_sex_sex_check' in d: data[s_name] = { 'x': sex_index.get(d['ped_sex_sex_check'], 2), 'y': d["sex_het_ratio"] } pconfig = { 'id': 'peddy_sex_check_plot', 'title': 'Peddy: Sex Check', 'xlab': 'Sex From Ped', 'ylab': 'Sex Het Ratio', 'categories': ["Female", "Male", "Unknown"] } self.add_section( name = 'Sex Check', description = "Predicted sex against heterozygosity ratio", helptext = """ Higher values of Sex Het Ratio suggests the sample is female, low values suggest male. See [the main peddy documentation](http://peddy.readthedocs.io/en/latest/#sex-check) for more details about the `het_check` command. """, anchor='peddy-sexcheck-plot', plot=scatter.plot(data, pconfig) )
def somalier_het_check_plot(self): """plot the het_check scatter plot""" # empty dictionary to add sample names, and dictionary of values data = {} # for each sample, and list in self.somalier_data for s_name, d in self.somalier_data.items(): # check the sample contains the required columns if "gt_depth_mean" in d and "ab_std" in d: # add sample to dictionary with value as a dictionary of points to plot data[s_name] = {"x": d["gt_depth_mean"], "y": d["ab_std"]} if len(data) > 0: pconfig = { "id": "somalier_het_check_plot", "title": "Somalier: Sample Observed Heterozygosity", "xlab": "Mean depth", "ylab": "Standard deviation of allele-balance", } self.add_section( name="Heterozygosity", description= "Standard devation of heterozygous allele balance against mean depth.", helptext= "A high standard deviation in allele balance suggests contamination.", anchor="somalier-hetcheck", plot=scatter.plot(data, pconfig), )
def peddy_het_check_plot(self): """plot the het_check scatter plot""" # empty dictionary to add sample names, and dictionary of values data = {} # for each sample, and list in self.peddy_data for s_name, d in self.peddy_data.items(): # check the sample contains the required columns if 'median_depth_het_check' in d and 'het_ratio_het_check' in d: # add sample to dictionary with value as a dictionary of points to plot data[s_name] = { 'x': d['median_depth_het_check'], 'y': d['het_ratio_het_check'] } pconfig = { 'id': 'peddy_het_check_plot', 'title': 'Peddy: Het Check', 'xlab': 'median depth', 'ylab': 'proportion het calls', } self.add_section ( name = 'Het Check', description = "Proportion of sites that were heterozygous against median depth.", helptext = """ A high proportion of heterozygous sites suggests contamination, a low proportion suggests consanguinity. See [the main peddy documentation](https://peddy.readthedocs.io/en/latest/output.html#het-check) for more details about the `het_check` command. """, anchor = 'peddy-hetcheck-plot', plot = scatter.plot(data, pconfig) )
def add_pca_plots(self): results_path = config.metadata['output_dir'] pca_csv_path = os.path.join(results_path, 'unsupervised', 'PCA.csv') if not os.path.exists(pca_csv_path): return 0 # Read the PCA values for sample in csv.DictReader(open(pca_csv_path, 'r')): self.pca_dict[sample['sample']] = sample principle_components = {} for p in sample: pc = p.split(' ')[0] principle_components[pc] = p pca_plot_config = { 'data_labels': [ {'name': 'PC1 vs. PC2', 'xlab': principle_components['PC1'], 'ylab': principle_components['PC2']}, {'name': 'PC2 vs. PC3', 'xlab': principle_components['PC2'], 'ylab': principle_components['PC3']}, {'name': 'PC3 vs. PC4', 'xlab': principle_components['PC3'], 'ylab': principle_components['PC4']}, {'name': 'PC4 vs. PC5', 'xlab': principle_components['PC4'], 'ylab': principle_components['PC5']} ], 'id': 'atacseq_pca_plot', 'marker_size': 5 } pca_plot_data = [self.generate_pca_plot_data(principle_components['PC1'],principle_components['PC2']), self.generate_pca_plot_data(principle_components['PC2'], principle_components['PC3']), self.generate_pca_plot_data(principle_components['PC3'], principle_components['PC4']), self.generate_pca_plot_data(principle_components['PC4'], principle_components['PC5'])] self.add_section( name='Principal Component Analysis', anchor='atacseq_pca', description='Scatter plots of PCA results', helptext='You can see the plots of principal components', plot=scatter.plot(pca_plot_data, pconfig=pca_plot_config) )
def bin_plot(self): pconfig = { "id": "goleft_indexcov-bin-plot", "title": "goleft indexcov: Problematic low and non-uniform coverage bins", "xlab": "Proportion of bins with depth < 0.15", "ylab": "Proportion of bins with depth outside of (0.85, 1.15)", "yCeiling": 1.0, "yFloor": 0.0, "xCeiling": 1.0, "xFloor": 0.0, } self.add_section( name="Problem coverage bins", anchor="goleft_indexcov-bin", description= "This plot identifies problematic samples using binned coverage distributions.", helptext=""" We expect bins to be around 1, so deviations from this indicate problems. Low coverage bins (`< 0.15`) on the x-axis have regions with low or missing coverage. Higher values indicate truncated BAM files or missing data. Bins with skewed distributions (`<0.85` or `>1.15`) on the y-axis detect dosage bias. Large values on the y-axis are likely to impact CNV and structural variant calling. See the [goleft indexcov bin documentation](https://github.com/brentp/goleft/blob/master/docs/indexcov/help-bin.md) for more details. """, plot=scatter.plot(self.bin_plot_data, pconfig), )
def somalier_sex_check_plot(self): data = {} sex_index = {"female": 0, "male": 1, "unknown": 2} for s_name, d in self.somalier_data.items(): if 'X_depth_mean' in d and 'original_pedigree_sex' in d: data[s_name] = { 'x': (random.random() - 0.5) * 0.1 + sex_index.get(d['original_pedigree_sex'], 2), 'y': d["X_depth_mean"] } if len(data) > 0: pconfig = { 'id': 'somalier_sex_check_plot', 'title': 'Somalier: Sample Predicted Sex', 'xlab': 'Sex from pedigree', 'ylab': 'Scaled mean depth on X', 'categories': ["Female", "Male", "Unknown"] } self.add_section( name='Sex', description="Predicted sex against scaled depth on X", helptext="Higher values of depth, low values suggest male.", anchor='somalier-sexcheck', plot=scatter.plot(data, pconfig))
def blobtools_blob_graph(self): config = { 'id': 'blobtools-5', 'title': 'Blobtools: blobplots', 'showInLegend': True, } return scatter.plot(self.blobtools_blob_data, config)
def add_reported_vs_empirical_section(self): sample_data = [] data_labels = [] # Loop through the different data types for ( rt_type_name, rt_type, ) in recal_table_type._asdict().items(): # This table appears to be the correct one to use for reported vs empirical # https://github.com/broadinstitute/gatk/blob/853b53ec2a3ac2d90d7d82a6c8451e29a34692d2/src/main/resources/org/broadinstitute/hellbender/utils/recalibration/BQSR.R#L148 sample_tables = self.gatk_base_recalibrator[rt_type][ "recal_table_1"] if len(sample_tables) == 0: continue reported_empirical = {} for sample, table in sample_tables.items(): reported_empirical[sample] = [] table_rows = [ dict(zip(table, r)) for r in zip(*table.values()) ] table_rows.sort(key=lambda r: r["QualityScore"]) for reported, group in groupby(table_rows, lambda r: r["QualityScore"]): g = list(group) reported_empirical[sample].append({ "x": int(reported), "y": sum(float(r["EmpiricalQuality"]) for r in g) / len(g) if len(g) > 0 else 0, }) sample_data.append(reported_empirical) # Build data label configs for this data type data_labels.append({ "name": "{} Reported vs. Empirical Quality", "ylab": "Empirical quality score" }) plot = scatter.plot( sample_data, pconfig={ "title": "Reported vs. Empirical Quality", "id": "gatk-base-recalibrator-reported-empirical-plot", "xlab": "Reported quality score", "ylab": "Empirical quality score", "xDecimals": False, "data_labels": data_labels, }, ) self.add_section( name="Reported Quality vs. Empirical Quality", anchor="gatk-base-recalibrator-reported-empirical", description= "Plot shows the reported quality score vs the empirical quality score.", plot=plot, )
def peddy_relatedness_plot(self): data = dict() for s_name, d in self.peddy_data.items(): if "ibs0_ped_check" in d and "ibs2_ped_check" in d: data[s_name] = { "x": d["ibs0_ped_check"], "y": d["ibs2_ped_check"] } if "rel_ped_check" in d: if d["rel_ped_check"] < 0.25: data[s_name]["color"] = "rgba(109, 164, 202, 0.9)" elif d["rel_ped_check"] < 0.5: data[s_name]["color"] = "rgba(250, 160, 81, 0.8)" else: data[s_name]["color"] = "rgba(43, 159, 43, 0.8)" pconfig = { "id": "peddy_relatedness_plot", "title": "Peddy: Relatedness Plot", "xlab": "IBS0 (no alleles shared)", "ylab": "IBS2 (both alleles shared)", } if len(data) > 0: self.add_section( name="Relatedness", anchor="peddy-relatedness-plot", description= """Shared allele rates between sample pairs. Points are coloured by degree of relatedness: <span style="color: #6DA4CA;">less than 0.25</span>, <span style="color: #FAA051;">0.25 - 0.5</span>, <span style="color: #2B9F2B;">greather than 0.5</span>.""", plot=scatter.plot(data, pconfig), )
def snp_rate_scatterplot(self): """Make a scatter plot showing relative coverage on X and Y chr""" data = OrderedDict() for sample in self.sexdet_data: try: data[sample] = { "x": self.sexdet_data[sample]["RateX"], "y": self.sexdet_data[sample]["RateY"] } except KeyError: pass config = { "id": "sexdeterrmine-rate-plot", "title": "SexDetErrmine: Relative coverage", "ylab": "Relative Cov. on Y", "xlab": "Relative Cov. on X", } if len(data) > 0: self.add_section( name="Relative Coverage", anchor="sexdeterrmine-rates", description= "The coverage on the X vs Y chromosome, relative to coverage on the Autosomes.", helptext=""" Males are expected to have a roughly equal X- and Y-rates, while females are expected to have a Y-rate of 0 and an X-rate of 1. Placement between the two clusters can be indicative of contamination, while placement with higher than expected X- and/or Y-rates can be indicative of sex chromosome aneuploidy. """, plot=scatter.plot(data, config), )
def peddy_sex_check_plot(self): data = {} sex_index = {"female": 0, "male": 1} for s_name, d in self.peddy_data.items(): if 'sex_het_ratio' in d and 'ped_sex_sex_check' in d: data[s_name] = { 'x': sex_index[d['ped_sex_sex_check']], 'y': d["sex_het_ratio"] } pconfig = { 'id': 'peddy_sex_check_plot', 'title': 'Peddy: Sex Check', 'xlab': 'Sex From Ped', 'ylab': 'Sex Het Ratio', 'categories': ["Female", "Male"] } self.add_section( name='Sex Check', description="Predicted sex against heterozygosity ratio", helptext=""" Higher values of Sex Het Ratio suggests the sample is female, low values suggest male. See [the main peddy documentation](http://peddy.readthedocs.io/en/latest/#sex-check) for more details about the `het_check` command. """, anchor='peddy-sexcheck-plot', plot=scatter.plot(data, pconfig))
def __init__(self, c_id, mod): modname = mod['config'].get('section_name', c_id.replace('_', ' ').title()) if modname == '' or modname is None: modname = 'Custom Content' # Initialise the parent object super(MultiqcModule, self).__init__( name = modname, anchor = mod['config'].get('section_anchor', c_id), href = mod['config'].get('section_href'), info = mod['config'].get('description') ) pconfig = mod['config'].get('pconfig', {}) if pconfig.get('title') is None: pconfig['title'] = modname # Table if mod['config'].get('plot_type') == 'table': pconfig['sortRows'] = pconfig.get('sortRows', False) headers = mod['config'].get('headers') self.add_section( plot = table.plot(mod['data'], headers, pconfig) ) self.write_data_file( mod['data'], "multiqc_{}".format(modname.lower().replace(' ', '_')) ) # Bar plot elif mod['config'].get('plot_type') == 'bargraph': self.add_section( plot = bargraph.plot(mod['data'], mod['config'].get('categories'), pconfig) ) # Line plot elif mod['config'].get('plot_type') == 'linegraph': self.add_section( plot = linegraph.plot(mod['data'], pconfig) ) # Scatter plot elif mod['config'].get('plot_type') == 'scatter': self.add_section( plot = scatter.plot(mod['data'], pconfig) ) # Heatmap elif mod['config'].get('plot_type') == 'heatmap': self.add_section( plot = heatmap.plot(mod['data'], mod['config'].get('xcats'), mod['config'].get('ycats'), pconfig) ) # Beeswarm plot elif mod['config'].get('plot_type') == 'beeswarm': self.add_section( plot = beeswarm.plot(mod['data'], pconfig) ) # Raw HTML elif mod['config'].get('plot_type') == 'html': self.add_section( content = mod['data'] ) # Raw image file as html elif mod['config'].get('plot_type') == 'image': self.add_section( content = mod['data'] ) # Not supplied elif mod['config'].get('plot_type') == None: log.warning("Plot type not found for content ID '{}'".format(c_id)) # Not recognised else: log.warning("Error - custom content plot type '{}' not recognised for content ID {}".format(mod['config'].get('plot_type'), c_id))
def peddy_relatedness_plot(self): data = dict() for s_name, d in self.peddy_data.items(): if 'ibs0_ped_check' in d and 'ibs2_ped_check' in d: data[s_name] = { 'x': d['ibs0_ped_check'], 'y': d['ibs2_ped_check'] } if 'rel_ped_check' in d: if d['rel_ped_check'] < 0.25: data[s_name]['color'] = 'rgba(109, 164, 202, 0.9)' elif d['rel_ped_check'] < 0.5: data[s_name]['color'] = 'rgba(250, 160, 81, 0.8)' else: data[s_name]['color'] = 'rgba(43, 159, 43, 0.8)' pconfig = { 'id': 'peddy_relatedness_plot', 'title': 'Peddy: Relatedness Plot', 'xlab': 'IBS0 (no alleles shared)', 'ylab': 'IBS2 (both alleles shared)', } if len(data) > 0: self.add_section( name='Relatedness', anchor='peddy-relatedness-plot', description= """Shared allele rates between sample pairs. Points are coloured by degree of relatedness: <span style="color: #6DA4CA;">less than 0.25</span>, <span style="color: #FAA051;">0.25 - 0.5</span>, <span style="color: #2B9F2B;">greather than 0.5</span>.""", plot=scatter.plot(data, pconfig))
def peddy_sex_check_plot(self): data = {} sex_index = {"female": 0, "male": 1, "unknown": 2} for s_name, d in self.peddy_data.items(): if "sex_het_ratio" in d and "ped_sex_sex_check" in d: data[s_name] = { "x": sex_index.get(d["ped_sex_sex_check"], 2), "y": d["sex_het_ratio"] } pconfig = { "id": "peddy_sex_check_plot", "title": "Peddy: Sex Check", "xlab": "Sex From Ped", "ylab": "Sex Het Ratio", "categories": ["Female", "Male", "Unknown"], } if len(data) > 0: self.add_section( name="Sex Check", description="Predicted sex against heterozygosity ratio", helptext=""" Higher values of Sex Het Ratio suggests the sample is female, low values suggest male. See [the main peddy documentation](http://peddy.readthedocs.io/en/latest/#sex-check) for more details about the `het_check` command. """, anchor="peddy-sexcheck-plot", plot=scatter.plot(data, pconfig), )
def peddy_het_check_plot(self): """plot the het_check scatter plot""" # empty dictionary to add sample names, and dictionary of values data = {} # for each sample, and list in self.peddy_data for s_name, d in self.peddy_data.items(): # check the sample contains the required columns if "median_depth_het_check" in d and "het_ratio_het_check" in d: # add sample to dictionary with value as a dictionary of points to plot data[s_name] = { "x": d["median_depth_het_check"], "y": d["het_ratio_het_check"] } pconfig = { "id": "peddy_het_check_plot", "title": "Peddy: Het Check", "xlab": "median depth", "ylab": "proportion het calls", } if len(data) > 0: self.add_section( name="Het Check", description= "Proportion of sites that were heterozygous against median depth.", helptext=""" A high proportion of heterozygous sites suggests contamination, a low proportion suggests consanguinity. See [the main peddy documentation](https://peddy.readthedocs.io/en/latest/output.html#het-check) for more details about the `het_check` command. """, anchor="peddy-hetcheck-plot", plot=scatter.plot(data, pconfig), )
def somalier_het_check_plot(self): """plot the het_check scatter plot""" # empty dictionary to add sample names, and dictionary of values data = {} # for each sample, and list in self.somalier_data for s_name, d in self.somalier_data.items(): # check the sample contains the required columns if 'gt_depth_mean' in d and 'ab_std' in d: # add sample to dictionary with value as a dictionary of points to plot data[s_name] = {'x': d['gt_depth_mean'], 'y': d['ab_std']} if len(data) > 0: pconfig = { 'id': 'somalier_het_check_plot', 'title': 'Somalier: Sample Observed Heterozygosity', 'xlab': 'Mean depth', 'ylab': 'Standard deviation of allele-balance', } self.add_section( name='Heterozygosity', description= "Standard devation of heterozygous allele balance against mean depth.", helptext= "A high standard deviation in allele balance suggests contamination.", anchor='somalier-hetcheck', plot=scatter.plot(data, pconfig))
def somalier_relatedness_plot(self): data = dict() alpha = 0.6 relatedness_colours = { 0: ['Unrelated', 'rgba(74, 124, 182, {})'.format(alpha)], 0.49: ['Sib-sib', 'rgba(243, 123, 40, {})'.format(alpha)], 0.5: ['Parent-child', 'rgba(159, 84, 47, {})'.format(alpha)] } # Get index colour scale cscale = mqc_colour.mqc_colour_scale() extra_colours = cscale.get_colours("Dark2") extra_colours = _make_col_alpha(extra_colours, alpha) extra_colour_idx = 0 for s_name, d in self.somalier_data.items(): if 'ibs0' in d and 'ibs2' in d: data[s_name] = {'x': d['ibs0'], 'y': d['ibs2']} if 'relatedness' in d: relatedness = d['expected_relatedness'] # -1 is not the same family, 0 is same family but unreleaed # @brentp says he usually bundles them together if relatedness == -1: relatedness = 0 # New unique value that we've not seen before if relatedness not in relatedness_colours: relatedness_colours[relatedness] = [ str(relatedness), extra_colours[extra_colour_idx] ] extra_colour_idx += 0 if extra_colour_idx > len(extra_colours): extra_colour_idx = 0 # Assign colour data[s_name]['color'] = relatedness_colours[relatedness][1] if len(data) > 0: pconfig = { 'id': 'somalier_relatedness_plot', 'title': 'Somalier: Sample Shared Allele Rates (IBS)', 'xlab': 'IBS0 (no alleles shared)', 'ylab': 'IBS2 (both alleles shared)', 'marker_line_width': 0 } colours_legend = '' for val in sorted(relatedness_colours.keys()): name, col_rgb = relatedness_colours[val] colours_legend += "<span style=\"color:{}\">{}</span>, ".format( col_rgb.replace(str(alpha), "1.0"), name, val) self.add_section(name='Relatedness', anchor='somalier-relatedness', description=""" Shared allele rates between sample pairs. Points are coloured by degree of expected-relatedness: {}""". format(colours_legend), plot=scatter.plot(data, pconfig))
def peddy_pca_plot(self): ancestry_colors = { "SAS": "rgb(68,1,81,1)", "EAS": "rgb(59,81,139,1)", "AMR": "rgb(33,144,141,1)", "AFR": "rgb(92,200,99,1)", "EUR": "rgb(253,231,37,1)", } background_ancestry_colors = { "SAS": "rgb(68,1,81,0.1)", "EAS": "rgb(59,81,139,0.1)", "AMR": "rgb(33,144,141,0.1)", "AFR": "rgb(92,200,99,0.1)", "EUR": "rgb(253,231,37,0.1)", } default_color = "#000000" default_background_color = "rgb(211,211,211,0.05)" data = OrderedDict() # plot the background data first, so it doesn't hide the actual data points d = self.peddy_data.pop("background_pca", {}) if d: background = [{ "x": pc1, "y": pc2, "color": default_background_color, "name": ancestry, "marker_size": 1 } for pc1, pc2, ancestry in zip(d["PC1"], d["PC2"], d["ancestry"])] data["background"] = background for s_name, d in self.peddy_data.items(): if "PC1_het_check" in d and "PC2_het_check" in d: data[s_name] = { "x": d["PC1_het_check"], "y": d["PC2_het_check"] } try: data[s_name]["color"] = ancestry_colors.get( d["ancestry-prediction"], default_color) except KeyError: pass pconfig = { "id": "peddy_pca_plot", "title": "Peddy: PCA Plot", "xlab": "PC1", "ylab": "PC2", "marker_size": 5, "marker_line_width": 0, } if len(data) > 0: self.add_section(name="PCA Plot", anchor="peddy-pca-plot", plot=scatter.plot(data, pconfig))
def peddy_pca_plot(self): ancestry_colors = { 'SAS': 'rgb(68,1,81,1)', 'EAS': 'rgb(59,81,139,1)', 'AMR': 'rgb(33,144,141,1)', 'AFR': 'rgb(92,200,99,1)', 'EUR': 'rgb(253,231,37,1)' } background_ancestry_colors = { 'SAS': 'rgb(68,1,81,0.1)', 'EAS': 'rgb(59,81,139,0.1)', 'AMR': 'rgb(33,144,141,0.1)', 'AFR': 'rgb(92,200,99,0.1)', 'EUR': 'rgb(253,231,37,0.1)' } default_color = '#000000' default_background_color = 'rgb(211,211,211,0.05)' data = OrderedDict() # plot the background data first, so it doesn't hide the actual data points d = self.peddy_data.pop("background_pca", {}) if d: background = [{ 'x': pc1, 'y': pc2, 'color': default_background_color, 'name': ancestry, 'marker_size': 1 } for pc1, pc2, ancestry in zip(d['PC1'], d['PC2'], d['ancestry'])] data["background"] = background for s_name, d in self.peddy_data.items(): if 'PC1_het_check' in d and 'PC2_het_check' in d: data[s_name] = { 'x': d['PC1_het_check'], 'y': d['PC2_het_check'], 'color': ancestry_colors.get(d['ancestry-prediction'], default_color) } pconfig = { 'id': 'peddy_pca_plot', 'title': 'Peddy: PCA Plot', 'xlab': 'PC1', 'ylab': 'PC2', 'marker_size': 5, 'marker_line_width': 0 } if len(data) > 0: self.add_section(name='PCA Plot', anchor='peddy-pca-plot', plot=scatter.plot(data, pconfig))
def somalier_ancestry_pca_plot(self): data = OrderedDict() # cycle over samples and add PC coordinates to data dict for s_name, d in self.somalier_data.items(): if "PC1" in d and "PC2" in d: data[s_name] = { "x": d["PC1"], "y": d["PC2"], "color": "rgba(0, 0, 0, 0.6)", } # add background # N.B. this must be done after samples to have samples on top d = self.somalier_background_pcs.pop("background_pcs", {}) if d: # generate color scale to match the number of categories c_scale = mqc_colour.mqc_colour_scale(name="Paired").colours cats = self.somalier_ancestry_cats ancestry_colors = dict(zip(cats, c_scale[: len(cats)])) default_background_color = "rgb(255,192,203,0.3)" # Make colours semi-transparent ancestry_colors = dict(zip(ancestry_colors.keys(), _make_col_alpha(ancestry_colors.values(), 0.3))) background = [ {"x": pc1, "y": pc2, "color": ancestry_colors.get(ancestry, default_background_color), "name": ancestry} for pc1, pc2, ancestry in zip(d["PC1"], d["PC2"], d["ancestry"]) ] data["background"] = background # generate section and plot if len(data) > 0: pconfig = { "id": "somalier_ancestry_pca_plot", "title": "Somalier: Sample Predicted Ancestry", "xlab": "PC1", "ylab": "PC2", "marker_size": 5, "marker_line_width": 0, } self.add_section( name="Ancestry PCA", description="Principal components of samples against background PCs.", helptext=""" Sample PCs are plotted against background PCs from the background data supplied to somalier. Color indicates predicted ancestry of sample. Data points in close proximity are predicted to be of similar ancestry. Consider whether the samples cluster as expected. """, anchor="somalier-ancestry-pca", plot=scatter.plot(data, pconfig), )
def __init__(self, c_id, mod): modname = mod['config'].get('section_name', c_id.replace('_', ' ').title()) # Initialise the parent object super(MultiqcModule, self).__init__(name=modname, anchor=mod['config'].get('section_anchor', c_id), href=mod['config'].get('section_href'), info=mod['config'].get('description')) pconfig = mod['config'].get('pconfig', {}) if pconfig.get('title') is None: pconfig['title'] = modname # Table if mod['config'].get('plot_type') == 'table': pconfig['sortRows'] = pconfig.get('sortRows', False) self.intro += table.plot(mod['data'], None, pconfig) # Bar plot elif mod['config'].get('plot_type') == 'bargraph': self.intro += bargraph.plot(mod['data'], mod['config'].get('categories'), pconfig) # Line plot elif mod['config'].get('plot_type') == 'linegraph': self.intro += linegraph.plot(mod['data'], pconfig) # Scatter plot elif mod['config'].get('plot_type') == 'scatter': self.intro += scatter.plot(mod['data'], pconfig) # Heatmap elif mod['config'].get('plot_type') == 'heatmap': self.intro += heatmap.plot(mod['data'], mod['config'].get('xcats'), mod['config'].get('ycats'), pconfig) # Beeswarm plot elif mod['config'].get('plot_type') == 'beeswarm': self.intro += beeswarm.plot(mod['data'], pconfig) # Not supplied elif mod['config'].get('plot_type') == None: log.warning("Plot type not found for content ID '{}'".format(c_id)) # Not recognised else: log.warning( "Error - custom content plot type '{}' not recognised for content ID {}" .format(mod['config'].get('plot_type'), c_id))
def peddy_pca_plot(self): ancestry_colors = { 'SAS': 'rgb(68,1,81,1)', 'EAS': 'rgb(59,81,139,1)', 'AMR': 'rgb(33,144,141,1)', 'AFR': 'rgb(92,200,99,1)', 'EUR': 'rgb(253,231,37,1)' } background_ancestry_colors = { 'SAS': 'rgb(68,1,81,0.1)', 'EAS': 'rgb(59,81,139,0.1)', 'AMR': 'rgb(33,144,141,0.1)', 'AFR': 'rgb(92,200,99,0.1)', 'EUR': 'rgb(253,231,37,0.1)' } default_color = '#000000' default_background_color = 'rgb(211,211,211,0.05)' data = OrderedDict() # plot the background data first, so it doesn't hide the actual data points d = self.peddy_data.pop("background_pca", {}) if d: background = [{'x': pc1, 'y': pc2, 'color': default_background_color, 'name': ancestry, 'marker_size': 1} for pc1, pc2, ancestry in zip(d['PC1'], d['PC2'], d['ancestry'])] data["background"] = background for s_name, d in self.peddy_data.items(): if 'PC1_het_check' in d and 'PC2_het_check' in d: data[s_name] = { 'x': d['PC1_het_check'], 'y': d['PC2_het_check'], 'color': ancestry_colors.get(d['ancestry-prediction'], default_color) } pconfig = { 'id': 'peddy_pca_plot', 'title': 'Peddy: PCA Plot', 'xlab': 'PC1', 'ylab': 'PC2', 'marker_size': 5, 'marker_line_width': 0 } if len(data) > 0: self.add_section ( name = 'PCA Plot', anchor = 'peddy-pca-plot', plot = scatter.plot(data, pconfig) )
def bin_plot(self): helptext = 'We expect bins to be around 1, so deviations from this indicate problems. \n\ Low coverage bins (< 0.15) on the x-axis have regions with low or missing coverage. \n\ Higher values indicate truncated BAM files or missing data. \n\ Bins with skewed distributions (<0.85 or >1.15) on the y-axis detect dosage bias. \n\ Large values on the y-axis are likely to impact CNV and structural variant calling. \n\ See the \n\ <a href="https://github.com/brentp/goleft/blob/master/docs/indexcov/help-bin.md" target="_blank">goleft indexcov bin documentation</a> \n\ for more details.' data = {} for fn in self.find_log_files('goleft_indexcov/ped', filehandles=True): header = fn['f'].readline()[1:].strip().split("\t") for sample_parts in (l.split("\t") for l in fn['f']): cur = dict(zip(header, sample_parts)) cur["sample_id"] = self.clean_s_name(cur["sample_id"], fn["root"]) total = float(cur["bins.in"]) + float(cur["bins.out"]) data[cur["sample_id"]] = { "x": float(cur["bins.lo"]) / total, "y": float(cur["bins.out"]) / total } # Filter to strip out ignored sample names data = self.ignore_samples(data) if data: log.info("Found goleft indexcov bin reports for %s samples" % (len(data))) pconfig = { 'id': 'goleft_indexcov-bin-plot', 'title': 'goleft indexcov: Problematic low and non-uniform coverage bins', 'xlab': 'Proportion of bins with depth < 0.15', 'ylab': 'Proportion of bins with depth outside of (0.85, 1.15)', 'yCeiling': 1.0, 'yFloor': 0.0, 'xCeiling': 1.0, 'xFloor': 0.0 } self.add_section( name='Problem coverage bins', anchor='goleft_indexcov-bin', description= 'This plot identifies problematic samples using binned coverage distributions.', helptext=helptext, plot=scatter.plot(data, pconfig)) return True else: return False
def make_plots(self): pconfig = { 'id': 'syntenyplot', 'title': 'Synteny plot', 'marker_line_width': 0, 'marker_size': 2, 'enableMouseTracking': False, 'square': True, 'data_labels': self.data_labels } self.add_section(anchor='syntenyplot', description='', plot=scatter.plot(self.plot_data, pconfig))
def parse_plotPCA(self): """Find plotPCA output""" self.deeptools_plotPCAData = dict() for f in self.find_log_files("deeptools/plotPCAData", filehandles=False): parsed_data = self.parsePlotPCAData(f) for k, v in parsed_data.items(): if k in self.deeptools_plotPCAData: log.warning("Replacing duplicate sample {}.".format(k)) self.deeptools_plotPCAData[k] = v if len(parsed_data) > 0: self.add_data_source(f, section="plotPCA") self.deeptools_plotPCAData = self.ignore_samples( self.deeptools_plotPCAData) if len(self.deeptools_plotPCAData) > 0: # Write data to file self.write_data_file(self.deeptools_plotPCAData, "deeptools_plot_PCA") config = { "id": "deeptools_pca_plot", "title": "deeptools: PCA Plot", "xlab": "PC1", "ylab": "PC2", "tt_label": "PC1 {point.x:.2f}: PC2 {point.y:.2f}", } data = dict() for s_name in self.deeptools_plotPCAData: try: data[s_name] = { "x": self.deeptools_plotPCAData[s_name][1], "y": self.deeptools_plotPCAData[s_name][2], } except KeyError: pass if len(data) == 0: log.debug("No valid data for PCA plot") return None self.add_section( name="PCA plot", anchor="deeptools_pca", description= "PCA plot with the top two principal components calculated based on genome-wide distribution of sequence reads", plot=scatter.plot(data, config), ) return len(self.deeptools_plotPCAData)
def __init__(self): # Initialise the parent object super(MultiqcModule, self).__init__(name='Principal Components Analysis', anchor='rnaseq_az') rnaseq_pca_files = self.find_log_files('rnaseq_az/pca_data', filecontents=False) rnaseq_pca_files = [f for f in rnaseq_pca_files if f] if not rnaseq_pca_files: log.debug("Could not find the PCA data file in {}".format(config.analysis_dir)) raise UserWarning if len(rnaseq_pca_files) > 1: log.warning("More than 1 PCA data file found in {}".format(config.analysis_dir)) raise UserWarning rnaseq_pca_file = rnaseq_pca_files[0] pca_dirpath, pca_fname = rnaseq_pca_file['root'], rnaseq_pca_file['fn'] pca_fpath = join(pca_dirpath, pca_fname) pca_data, color_by_sample, color_by_cond, variances = parse_pca_data(pca_fpath) pca_data = self.ignore_samples(pca_data) description = ("<p>PCA is a popular method that is based on the principles of dimensional reduction. " "Below is a PCA plot of the samples within the space of the first two principal components that explain the most variation in the data. " "These were calculated using the read counts of the top 1000 most variable genes within the dataset.</p>") legend = '' if color_by_cond: label_style = 'font-family: \'Lucida Grande\', \'Lucida Sans Unicode\', Arial, Helvetica, sans-serif; ' \ 'font-size: 12px; ' \ 'font-weight: bold; ' legend += '<center><div>' legend += '<span style="' + label_style + ' margin-right: 10px;">Conditions: </span>' for cond, color in color_by_cond.items(): legend += '<span style="white-space: nowrap;">' legend += '<span style="display: inline-block; width: 16px; height: 12px; ' + \ ' margin-bottom: -1px; margin-right: 1px; background-color: ' + color + '"></span>' legend += '<span style="' + label_style + ' margin-right: 20px; white-space: normal;"> ' + cond + '</span>' legend += '</span>' legend += '</div></center>' self.add_section( name='Principal Components Analysis', anchor='rnaseq_az-pca', content=description + legend + scatter.plot(pca_data, { 'title': 'Principal Components Analysis', 'xlab': 'PC1: ' + variances[0] + '% variance', 'ylab': 'PC2: ' + variances[1] + '% variance', 'colors': color_by_sample, 'tt_label': 'PC1: {point.x}<br/>PC2: {point.y}', }) )
def cnv_winplot_plot(self, cnt, s_name): data = dict() config = { 'title': s_name, 'ymax': 40, 'ymin': 0, 'marker_size': 2, 'marker_line_width': 0 } data[s_name] = cnt self.add_section( name=s_name, anchor='wp' + s_name, content=scatter.plot(data, config), )
def slamdunkPCAPlot (self): """ Generate the PCA plots """ pconfig = { 'id': 'slamdunk_pca', 'title': 'Slamdunk: PCA', 'xlab': 'PC1', 'ylab': 'PC2', 'tt_label': 'PC1 {point.x:.2f}: PC2 {point.y:.2f}' } self.add_section ( name = 'PCA (T>C based)', anchor = 'slamdunk_PCA', description = """This plot shows the principal components of samples based on the distribution of reads with T>C conversions within UTRs (see the <a href="http://t-neumann.github.io/slamdunk/docs.html#summary" target="_blank">slamdunk docs</a>).""", plot = scatter.plot(self.PCA_data, pconfig) )
def bin_plot(self): helptext = 'We expect bins to be around 1, so deviations from this indicate problems. \n\ Low coverage bins (< 0.15) on the x-axis have regions with low or missing coverage. \n\ Higher values indicate truncated BAM files or missing data. \n\ Bins with skewed distributions (<0.85 or >1.15) on the y-axis detect dosage bias. \n\ Large values on the y-axis are likely to impact CNV and structural variant calling. \n\ See the \n\ <a href="https://github.com/brentp/goleft/blob/master/docs/indexcov/help-bin.md" target="_blank">goleft indexcov bin documentation</a> \n\ for more details.' data = {} for fn in self.find_log_files('goleft_indexcov/ped', filehandles=True): header = fn['f'].readline()[1:].strip().split("\t") for sample_parts in (l.split("\t") for l in fn['f']): cur = dict(zip(header, sample_parts)) cur["sample_id"] = self.clean_s_name(cur["sample_id"], fn["root"]) total = float(cur["bins.in"]) + float(cur["bins.out"]) data[cur["sample_id"]] = {"x": float(cur["bins.lo"]) / total, "y": float(cur["bins.out"]) / total} # Filter to strip out ignored sample names data = self.ignore_samples(data) if data: log.info("Found goleft indexcov bin reports for %s samples" % (len(data))) pconfig = { 'id': 'goleft_indexcov-bin-plot', 'title': 'goleft indexcov: Problematic low and non-uniform coverage bins', 'xlab': 'Proportion of bins with depth < 0.15', 'ylab': 'Proportion of bins with depth outside of (0.85, 1.15)', 'yCeiling': 1.0, 'yFloor': 0.0, 'xCeiling': 1.0, 'xFloor': 0.0} self.add_section ( name = 'Problem coverage bins', anchor = 'goleft_indexcov-roc', description = 'This plot identifies problematic samples using binned coverage distributions.', helptext = helptext, plot = scatter.plot(data, pconfig) ) return True else: return False
def peddy_pca_plot(self): data = dict() for s_name, d in self.peddy_data.items(): if 'PC1' in d and 'PC2' in d: data[s_name] = { 'x': d['PC1'], 'y': d['PC2'], } pconfig = { 'id': 'peddy_pca_plot', 'title': 'Peddy: PCA Plot', 'xlab': 'PC1', 'ylab': 'PC2' } if len(data) > 0: self.add_section ( name = 'PCA Plot', anchor = 'peddy-pca-plot', plot = scatter.plot(data, pconfig) )