def _get_html_stats(self): from sequana.tools import StatsBAM2Mapped from easydev import precision data = StatsBAM2Mapped(self.directory + "bwa_mem_stats.json").data html = "Reads with Phix: %s %%<br>" % precision( data['contamination'], 3) # add HTML table if "R2_mapped" in data.keys(): df = pd.DataFrame({ 'R1': [data['R1_mapped'], data['R1_unmapped']], 'R2': [data['R2_mapped'], data['R2_unmapped']] }) else: df = pd.DataFrame({'R1': [data['R1_mapped'], data['R1_unmapped']]}) df.index = ['mapped', 'unmapped'] datatable = DataTable(df, "bwa_bam") datatable.datatable.datatable_options = { 'scrollX': '300px', 'pageLength': 30, 'scrollCollapse': 'true', 'dom': 'irtpB', "paging": "false", 'buttons': ['copy', 'csv'] } js = datatable.create_javascript_function() html_tab = datatable.create_datatable(float_format='%.3g') #html += "{} {}".format(html_tab, js) html += "Unpaired: %s <br>" % data['unpaired'] html += "duplicated: %s <br>" % data['duplicated'] return html
def _get_df(self): if self._df is None: self.reset() N = 0 all_results = [] for read in self.data: res = [] # count reads N += 1 if (N % 10000) == 0: print("Read %d sequences" %N) #res[0] = read length res.append(read.query_length) # res[1] = GC content c = collections.Counter(read.query_sequence) res.append( (c['g'] + c['G'] + c['c'] + c['C'])/float(sum(c.values())) ) # res[2] = snr A # res[3] = snr C # res[4] = snr G # res[5] = snr T snr = list([x for x in read.tags if x[0]=='sn'][0][1]) res = res + snr #res[6] = ZMW name res.append(read.qname.split('/')[1]) # aggregate results all_results.append(res) self._df = pd.DataFrame(all_results, columns=['read_length','GC_content','snr_A','snr_C','snr_G','snr_T','ZMW']) self._N = N self.reset() return self._df
def _get_df(self, method_name): data = getattr(self, method_name)() df = pd.DataFrame({ "name": self.get_projects(), "value": data, "url": self.get_urls()}) return df
def get_actg_content(self, max_sample=500000): try: self.alignments except: self._set_alignments() # what is the longest string ? max_length = max((len(a.seq) for a in self.alignments)) import re df = pd.DataFrame(np.zeros((max_length,5)), columns=['A', 'C', 'G', 'T', 'N']) A = np.zeros(max_length) C = np.zeros(max_length) G = np.zeros(max_length) T = np.zeros(max_length) N = np.zeros(max_length) for a in self.alignments: pos = [m.start() for m in re.finditer("A", a.seq)] A[pos] += 1 C[[m.start() for m in re.finditer("C", a.seq)]] += 1 G[[m.start() for m in re.finditer("G", a.seq)]] += 1 T[[m.start() for m in re.finditer("T", a.seq)]] += 1 N[[m.start() for m in re.finditer("N", a.seq)]] += 1 df["A"] = A df["C"] = C df["T"] = T df["G"] = G df["N"] = N df = df.divide(df.sum(axis=1), axis=0) return df
def plot_unknown_barcodes(self, N=20): ub = self.data['UnknownBarcodes'] df = pd.DataFrame({x['Lane']: x['Barcodes'] for x in ub}) if "unknown" in df.index and len(df) == 1: df.loc['known'] = [0 for i in df.columns] # if data is made of undetermined only, the dataframe is just made of # N lanes with one entry : unknown S = df.sum(axis=1).sort_values(ascending=False).index[0:N] data = df.loc[S][::-1] #print(data) data.columns = ["Lane {}".format(x) for x in data.columns] from matplotlib import rcParams rcParams['axes.axisbelow'] = True pylab.figure(figsize=(10, 8)) ax = pylab.gca() data.plot(kind="barh", width=1, ec="k", ax=ax) rcParams['axes.axisbelow'] = False pylab.xlabel("Number of reads", fontsize=12) pylab.ylabel("") pylab.grid(True) pylab.legend( ["Lane {}".format(x) for x in range(1, len(df.columns) + 1)], loc="lower right") try: pylab.tight_layout() except Exception as err: print(err) return data
def _parse_data(self): taxonomy = {} logger.info("Reading kraken data") columns = ["status", "taxon", "length"] # we select only col 0,2,3 to save memoty, which is required on very # large files try: # each call to concat in the for loop below # will take time and increase with chunk position. # for 15M reads, this has a big cost. So chunksize set to 1M # is better than 1000 and still reasonable in memory reader = pd.read_csv(self.filename, sep="\t", header=None, usecols=[0, 2, 3], chunksize=1000000) except pd.parser.CParserError: raise NotImplementedError # this section is for the case #only_classified_output when there is no found classified read self.unclassified = N # size of the input data set self.classified = 0 self._df = pd.DataFrame([], columns=columns) self._taxons = self._df.taxon return for chunk in reader: try: self._df self._df = pd.concat([self._df, chunk]) except AttributeError: self._df = chunk self._df.columns = columns count = sum(self._df.taxon == 1) if count: logger.warning("Found %s taxons with root ID (1)" % count) # This gives the list of taxons as index and their amount # above, we select only columns 0, 2, 3 the column are still labelled # 0, 2, 3 in the df self._taxons = self._df.groupby("taxon").size() try: self._taxons.drop(0, inplace=True) except: pass # 0 may not be there self._taxons.sort_values(ascending=False, inplace=True) category = self.df.groupby("status").size() if 'C' in category.index: self.classified = category['C'] else: self.classified = 0 if 'U' in category.index: self.unclassified = category['U'] else: self.unclassified = 0
def get_flags_as_df(self): """Returns flags as a dataframe .. doctest:: >>> from sequana import BAM, sequana_data >>> b = BAM(sequana_data('test.bam')) >>> df = b.get_flags_as_df() >>> df.sum() 1 1000 2 484 4 2 8 2 16 499 32 500 64 477 128 523 256 64 512 0 1024 0 2048 0 dtype: int64 .. seealso:: :class:`SAMFlags` for meaning of each flag """ flags = self.get_flags() data = [(this, [flag&this for flag in flags]) for this in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]] df = pd.DataFrame(dict(data)) df = df > 0 return df
def _get_df(self): if self._df is None: self.reset() N = 0 all_results = [] for read in self.data: res = [] # count reads N += 1 if (N % 10000) == 0: print("Read %d sequences" % N) #res[0] = read length res.append(read.query_length) # res[1] = GC content c = collections.Counter(read.query_sequence) res.append(100 * (c['g'] + c['G'] + c['c'] + c['C']) / float(sum(c.values()))) # aggregate results all_results.append(res) self._df = pd.DataFrame(all_results, columns=['read_length', 'GC_content']) self._N = N self.reset() return self._df
def imshow_qualities(self): """Qualities :: from sequana import sequana_data from sequana import FastQC filename = sequana_data("test.fastq", "testing") qc = FastQC(filename) qc.imshow_qualities() from pylab import tight_layout; tight_layout() """ tiles = self._get_tile_info() d = defaultdict(list) for tile, seq in zip(tiles['tiles'], self.qualities): d[tile].append(seq) self.data_imqual = [pd.DataFrame(d[key]).mean().values for key in sorted(d.keys())] from biokit.viz import Imshow im = Imshow(self.data_imqual) im.plot(xticks_on=False, yticks_on=False, origin='lower') pylab.title("Quality per tile", fontsize=self.fontsize) pylab.xlabel("Position in read (bp)") pylab.ylabel("tile number")
def find_motif_fasta(self, filename, motif, window=200, local_threshold=None, global_threshold=None): from sequana import FastA data = FastA(filename) N = len(data) from easydev import Progress pb = Progress(N) df = { "query_name": [], "hit": [], "length": [], "start": [], "end": [] } for i, item in enumerate(data): X1, S = self.find_motif_from_sequence(item.sequence, motif, window=window, local_threshold=local_threshold ) if S >= self.global_threshold: df['query_name'].append(item.name) df['start'].append(0) df['end'].append(len(item.sequence)) df['length'].append(len(item.sequence)) df['hit'].append(S) pb.animate(i+1) df = pd.DataFrame(df) return df
def get_table_dependencies(self): """ Return dependencies of Sequana. """ dep_list = easydev.get_dependencies('sequana') # if installed with conda, this will be empty if len(dep_list) == 0: return "" project_name = list() version = list() link = list() pypi = 'https://pypi.python.org/pypi/{0}' for dep in dep_list: version.append(dep.version) project_name.append(dep.project_name) link.append(pypi.format(dep.project_name)) df = pd.DataFrame({ 'package': project_name, 'version': version, 'link': link }) df['sort'] = df['package'].str.lower() df.sort_values(by='sort', axis=0, inplace=True) df.drop('sort', axis=1, inplace=True) datatable = DataTable(df, 'dep') datatable.datatable.datatable_options = { 'paging': 'false', 'bFilter': 'false', 'bInfo': 'false', 'bSort': 'false' } datatable.datatable.set_links_to_column('link', 'package') js = datatable.create_javascript_function() html = datatable.create_datatable() return js + '\n' + html
def read_blasr_result(file_blasr, blasr_columns): res_blasr = [] f = open(file_blasr, 'r') l = f.readline() while (l): contig = l.replace("\n", "").replace(" ", " ").split(" ") res_blasr.append(contig) l = f.readline() f.close() res_blasr = pd.DataFrame(res_blasr) res_blasr.columns = blasr_columns # convert to numeric res_blasr["qLength"] = res_blasr["qLength"].astype(int) res_blasr["score"] = res_blasr["score"].astype(float) # extract alignment columns res_align = res_blasr[[ "qName", "qLength", "qAlignedSeq", "tAlignedSeq", "matchPattern" ]] # drop alignment columns (useless and too heavy) # res_blasr.drop("qAlignedSeq",axis=1) res_blasr.drop(["qAlignedSeq", "tAlignedSeq", "matchPattern"], axis=1, inplace=True) return res_blasr, res_align
def get_df(self): import pandas as pd data = {} for sample, filename in zip(self.sample_names, self.filenames): df = pd.read_csv(filename) df = df.groupby("kingdom")['percentage'].sum() # if a taxon is obsolete, the kingdom is empty. # We will set the kingdom as Unclassified and raise a warning # if the count is > 5% if " " in df.index: percent = df.loc[" "] if percent > 5: logger.warning( "Found {}% of taxons in obsolete category".format( percent)) if "Unclassified" in df.index: df.loc['Unclassified'] += df.loc[' '] df.drop(" ", inplace=True) else: df.loc['Unclassified'] = df.loc[' '] df.drop(" ", inplace=True) data[sample] = df df = pd.DataFrame(data) #df.to_json(output.data) df = df.sort_index(ascending=False) return df
def summary(self): """ Add information of filter. """ Sdefault = self.rnadiff.summary() self.rnadiff.log2_fc = 1 S1 = self.rnadiff.summary() # set options options = { 'scrollX': 'true', 'pageLength': 20, 'scrollCollapse': 'true', 'dom': '', 'buttons': [] } S = pd.concat([Sdefault, S1]) N = len(Sdefault) df = pd.DataFrame({ 'comparison_link': [1] * len(S), 'comparison': S.index.values, 'Description': ['Number of DGE (any FC)'] * N + ['Number of DGE (|FC| > 1)'] * N, 'Down': S['down'].values, 'Up': S['up'].values, 'Total': S['all'].values }) df = df[[ 'comparison', 'Description', 'Down', 'Up', 'Total', 'comparison_link' ]] df['comparison_link'] = [f"#{name}_table_all" for name in Sdefault.index] + \ [f"#{name}_table_sign" for name in Sdefault.index] dt = DataTable(df, 'dge') dt.datatable.set_links_to_column('comparison_link', 'comparison', new_page=False) dt.datatable.datatable_options = options js_all = dt.create_javascript_function() html = dt.create_datatable(float_format='%d') self.sections.append({ 'name': "Summary", 'anchor': 'filters_option', 'content': f"""<p>Here below is a summary of thfinal Differententially Gene Expression (DGE) analysis. You can find two entries per comparison. The first one has no filter except for an adjusted p-value of 0.05. The second shows the expressed genes with a filter of the log2 fold change of 1 (factor 2 in a normal scale). Clicking on any of the link will lead you to section of the comparison. {js_all} {html} </p>""" })
def get_data_reads(self): lanes = [] names = [] reads = [] for i, lane in enumerate(self.data["ConversionResults"]): total = 0 for this in lane['DemuxResults']: lanes.append(lane['LaneNumber']) names.append(this['SampleId']) reads.append(this['NumberReads']) total += this['NumberReads'] # if only undetermined (no sample sheet), no data should be found # meaning that there is 0 determined and no names associated # so we call it determined and store 0 if total == 0: assert total == 0 names.append("Determined") reads.append(0) lanes.append(lane["LaneNumber"]) if "Undetermined" in lane: names.append("Undetermined") lanes.append(i + 1) reads.append(lane['Undetermined']['NumberReads']) else: names.append("Undetermined") lanes.append(i + 1) reads.append(0) #print(lanes, names, reads) df = pd.DataFrame({"lane": lanes, "name": names, "count": reads}) return df
def merge(self, overlap=0.2): df = pd.concat([self.df1, self.df2]).sort_values(['chr', 'start']) # if overlap at least one base, we merge the peaks and label them with # common information, otherwise we report the original peak merged = [] prev = None overlaps = 0 N1 = 0 N2 = 0 N12 = 0 skip_next = True for k, current in df.iterrows(): if skip_next: prev = current skip_next = False continue # if current overlaps the prev start or end, there is overlap # or if current included in prev there current and prev overlaps if current['start'] <= prev['start'] and current['end'] >= prev[ 'start']: overlap = True N12 += 1 elif current['start'] <= prev['end'] and current['end'] >= prev[ 'end']: overlap = True N12 += 1 elif current['start'] >= prev['start'] and current['end'] <= prev[ 'end']: overlap = True N12 += 1 else: overlap = False if prev['name'].startswith('1_vs_6_7'): N1 += 1 elif prev['name'].startswith('2_vs_6_7'): N2 += 1 if overlap: m = min(current['start'], prev['start']) M = max(current['end'], prev['end']) data = current.copy() data['start'] = m data['end'] = M data['stop'] = M #FIXME same as end. decided on one value data['category'] = 'both' merged.append(data) skip_next = True else: m = min(current['start'], prev['start']) M = max(current['end'], prev['end']) merged.append(prev) skip_next = False prev = current df = pd.DataFrame(merged) df = df.reset_index(drop=True) return df
def get_data(self, ontologies, include_negative_enrichment=True, fdr=0.05): if isinstance(ontologies, str): ontologies = [ontologies] else: assert isinstance(ontologies, list) # First, we select the required ontologies and build a common data set all_data = [] for ontology in ontologies: data = self.enrichment[ontology]['result'] if isinstance(data, dict): # there was only one hit, we expect: data = [data] all_data.extend(data) data = all_data # remove unclassified GO terms unclassified = [ x for x in data if x['term']['label'] == "UNCLASSIFIED" ] logger.info("Found {} unclassified".format(len(unclassified))) data = [x for x in data if x['term']['label'] != "UNCLASSIFIED"] df = pd.DataFrame(data) if len(df) == 0: return df else: logger.info("Found {} GO terms".format(len(df))) df = df.query("number_in_list!=0").copy() logger.info( "Found {} GO terms with at least 1 gene in reference".format( len(df))) # extract the ID and label df['id'] = [x['id'] for x in df['term']] df['label'] = [x['label'] for x in df['term']] # some extra information for convenience df["pct_diff_expr"] = df['number_in_list'] * 100 / df[ 'number_in_reference'] df["log2_fold_enrichment"] = pylab.log2(df['fold_enrichment']) df["abs_log2_fold_enrichment"] = abs(pylab.log2(df['fold_enrichment'])) # Some user may want to include GO terms with fold enrichment # significanyly below 1 or not. if include_negative_enrichment is False: df = df.query("fold_enrichment>=1").copy() logger.info( "Found {} GO terms after keeping only positive enrichment". format(len(df))) # filter out FDR>0.05 df = df.query("fdr<=@fdr").copy() logger.info("Found {} GO terms after keeping only FDR<{}".format( len(df), fdr)) return df
def to_html(self): data = self.data html = "Reads with Phix: %s %%<br>" % precision(data['contamination'], 3) # add HTML table if "R2_mapped" in data.keys(): df = pd.DataFrame({ 'R1': [data['R1_mapped'], data['R1_unmapped']], 'R2': [data['R2_mapped'], data['R2_unmapped']]}) else: df = pd.DataFrame({ 'R1': [data['R1_mapped'], data['R1_unmapped']]}) df.index = ['mapped', 'unmapped'] html += "Unpaired: %s <br>" % data['unpaired'] html += "duplicated: %s <br>" % data['duplicated'] return html
def _dict_to_df(self, region_list, annotation): """ Convert dictionary as dataframe. """ merge_df = pd.DataFrame(region_list) colnames = ["chr", "start", "end", "size", "mean_cov", "max_cov", "mean_rm", "mean_zscore", "max_zscore", "gene_start", "gene_end", "type", "gene", "strand", "product"] if not annotation: colnames = colnames[:9] merge_df = pd.DataFrame(region_list, columns=colnames) int_column = ["start", "end", "size"] merge_df[int_column] = merge_df[int_column].astype(int) if annotation: merge_df.rename(columns={"gene": "gene_name"}, inplace=True) # maybe let the user set what he wants return merge_df.loc[~merge_df["type"].isin( FilteredGenomeCov._feature_not_wanted)] return merge_df
def get_taxonomy_biokit(self, ids): """Retrieve taxons given a list of taxons :param list ids: list of taxons as strings or integers. Could also be a single string or a single integer :return: a dataframe .. note:: the first call first loads all taxons in memory and takes a few seconds but subsequent calls are much faster """ # filter the lineage to keep only information from one of the main rank # that is superkingdom, kingdom, phylum, class, order, family, genus and # species ranks = ('kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species') if isinstance(ids, int): ids = [ids] if len(ids) == 0: return pd.DataFrame() logger.info('Retrieving taxon using biokit.Taxonomy') if isinstance(ids, list) is False: ids = [ids] lineage = [self.tax.get_lineage_and_rank(x) for x in ids] # Now, we filter each lineage to keep only relevant ranks # We drop the 'no rank' and create a dictionary # Not nice but works for now results = [] for i, this in enumerate(lineage): default = dict.fromkeys(ranks, ' ') for entry in this: if entry[1] in ranks: default[entry[1]] = entry[0] elif entry[1] == "superkingdom": default["kingdom"] = entry[0] # Scientific name is the last entry tagged has no_rank following # species TODO (check this assumption) # e.g. 351680 and 151529 have same 7 ranks so to differenatiate # them, the scientific name should be used. # By default, we will take the last one. If species or genus, we # repeat the term try: default['name'] = this[-1][0] except: default['name'] = "root (ambigous kingdom)" results.append(default) df = pd.DataFrame.from_records(results) df.index = ids df = df[list(ranks) + ['name']] df.index = df.index.astype(int) return df
def __init__(self, data): # if data is a dataframe, keep it else, transform to dataframe try: self.df = pd.DataFrame(data) except: self.df = data self.xmax = self.df.shape[1] self.X = None
def get_heatmap_df(): """a simple example to play with and perform test""" import pandas as pd df = pd.DataFrame({ "A": [1, 0, 1, 1], "B": [0.9, 0.1, 0.6, 1], "C": [0.5, 0.2, 0, 1], "D": [0.5, 0.2, 0, 1], }) return df
def summary(self): """ Get a summary DataFrame from a RNADiff analysis. """ summary = pd.DataFrame( { k: {x: len(self.dr_gene_lists[k][x]) for x in self.dr_gene_lists[k]} for k in self.dr_gene_lists } ) return summary
def __init__(self, x, y=None, verbose=False): self.verbose = verbose self.xy_names = ['x', 'y'] if isinstance(x, pd.DataFrame) is True: self.df = x.copy() columns = list(self.df.columns) columns[0] = 'x' columns[1] = 'y' self.xy_names = self.df.columns[0:2] self.df.columns = columns elif y is None: # could be a list of lists, a pandas-compatible dictionary self.df = pd.DataFrame(x) if self.df.shape[1] != 2: if self.df.shape[0] == 2: print("warning transposing data") self.df = self.df.transpose() elif x is not None and y is not None: self.df = pd.DataFrame({'x':x, 'y':y})
def plot_dendogram( self, max_features=5000, transform_method="log", method="ward", metric="euclidean", ): # for info about metric and methods: https://tinyurl.com/yyhk9cl8 assert transform_method in ["log", "anscombe", None] # first we take the normalised data from sequana.viz import clusterisation from sequana.viz import dendogram cluster = clusterisation.Cluster(self.counts_norm) # cluster = clusterisation.Cluster(self.df[self.sample_names]) if transform_method is not None: data = cluster.scale_data(transform_method=transform_method, max_features=max_features) df = pd.DataFrame(data[0]) df.index = data[1] df.columns = self.counts_norm.columns else: df = pd.DataFrame(self.counts_norm) # df.index = data[1] df.columns = self.counts_norm.columns d = dendogram.Dendogram( df.T, metric=metric, method=method, side_colors=list(self.design_df.group_color.unique()), ) # Convert groups into numbers for Dendrogram category group_conv = { group: i for i, group in enumerate(self.design_df[self.condition].unique()) } d.category = self.design_df[self.condition].map(group_conv).to_dict() d.plot()
def summary(self): """ Get a summary DataFrame from a RNADiff analysis. """ summary = pd.DataFrame( { (x,len(self.gene_lists[x])) for x in self.gene_lists.keys() } ) df = summary.set_index(0) df.columns = ["_vs_".join(self.condition_names)] return df
def plot_feature_most_present(self): """""" df = [] for x, y in self.counts_raw.idxmax().iteritems(): most_exp_gene_count = self.counts_raw.stack().loc[y, x] total_sample_count = self.counts_raw.sum().loc[x] df.append({ "label": x, "gene_id": y, "count": most_exp_gene_count, "total_sample_count": total_sample_count, "most_exp_percent": most_exp_gene_count / total_sample_count * 100, }) df = pd.DataFrame(df).set_index("label") df = pd.concat([self.design_df, df], axis=1) pylab.clf() p = pylab.barh( df.index, df.most_exp_percent, color=df.group_color, zorder=10, lw=1, ec="k", height=0.9, ) for idx, rect in enumerate(p): pylab.text( 2, # * rect.get_height(), idx, # rect.get_x() + rect.get_width() / 2.0, df.gene_id.iloc[idx], ha="center", va="center", rotation=0, zorder=20, ) self._format_plot( # title="Counts monopolized by the most expressed gene", # xlabel="Sample", xlabel="Percent of total reads", ) pylab.tight_layout()
def boxplot_mapq_concordance(self, method): # method can only be bwa for now assert method == "bwa" data = self._get_data(method) df = pd.DataFrame(data, columns=["mapq", "length", "concordance"]) pylab.clf() pylab.boxplot([df[df.mapq == i]['concordance'] for i in range(1, 61)]) pylab.xlabel("mapq") pylab.ylabel("concordance") pylab.grid() tt = [10, 20, 30, 40, 50, 60] pylab.xticks(tt, tt)
def summary(self): return pd.DataFrame( { "log2_fc": self._log2_fc, "alpha": self._alpha, "up": len(self.gene_lists["up"]), "down": len(self.gene_lists["down"]), "all": len(self.gene_lists["all"]), }, index=[self.name], )
def __init__(self, data, na=0): """.. rubric:: Constructor Plots the content of square matrix that contains correlation values. :param data: input can be a dataframe (Pandas), or list of lists (python) or a numpy matrix. Note, however, that values must be between -1 and 1. If not, or if the matrix (or list of lists) is not squared, then correlation is computed. The data or computed correlation is stored in :attr:`df` attribute. :param bool compute_correlation: if the matrix is non-squared or values are not bounded in -1,+1, correlation is computed. If you do not want that behaviour, set this parameter to False. (True by default). :param na: replace NA values with this value (default 0) The :attr:`params` contains some tunable parameters for the colorbar in the :meth:`plot` method. :: # can be a list of lists, the correlation matrix is then a 2x2 matrix c = corrplot.Corrplot([[1,1], [2,4], [3,3], [4,4]]) """ super(Corrplot, self).__init__() #: The input data is stored in a dataframe and must therefore be #: compatible (list of lists, dictionary, matrices...) self.df = pd.DataFrame(data, copy=True) compute_correlation = False w, h = self.df.shape if self.df.max().max() > 1 or self.df.min().min() < -1: compute_correlation = True if w != h: compute_correlation = True if list(self.df.index) != list(self.df.columns): compute_correlation = True if compute_correlation: print("Computing correlation") cor = self.df.corr() self.df = cor # replace NA with zero self.df.fillna(na, inplace=True) #: tunable parameters for the :meth:`plot` method. self.params = { 'colorbar.N': 100, 'colorbar.shrink': .8, 'colorbar.orientation': 'vertical' }