def plot_unknown_barcodes(self, N=20): ub = self.data['UnknownBarcodes'] df = pd.DataFrame({x['Lane']: x['Barcodes'] for x in ub}) if "unknown" in df.index and len(df) == 1: df.loc['known'] = [0 for i in df.columns] # if data is made of undetermined only, the dataframe is just made of # N lanes with one entry : unknown S = df.sum(axis=1).sort_values(ascending=False).index[0:N] data = df.loc[S][::-1] #print(data) data.columns = ["Lane {}".format(x) for x in data.columns] from matplotlib import rcParams rcParams['axes.axisbelow'] = True pylab.figure(figsize=(10, 8)) ax = pylab.gca() data.plot(kind="barh", width=1, ec="k", ax=ax) rcParams['axes.axisbelow'] = False pylab.xlabel("Number of reads", fontsize=12) pylab.ylabel("") pylab.grid(True) pylab.legend( ["Lane {}".format(x) for x in range(1, len(df.columns) + 1)], loc="lower right") try: pylab.tight_layout() except Exception as err: print(err) return data
def plot_volcano(self): """ .. plot:: :include-source: from sequana.rnadiff import RNADiffResults from sequana import sequana_data r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1")) r.plot_volcano() """ d1 = self.df.query("padj>0.05") d2 = self.df.query("padj<=0.05") fig = pylab.figure() pylab.plot(d1.log2FoldChange, -np.log10(d1.padj), marker="o", alpha=0.5, color="r", lw=0) pylab.plot(d2.log2FoldChange, -np.log10(d2.padj), marker="o", alpha=0.5, color="k", lw=0) pylab.grid(True) pylab.xlabel("fold change") pylab.ylabel("log10 adjusted p-value") m1 = abs(min(self.df.log2FoldChange)) m2 = max(self.df.log2FoldChange) limit = max(m1,m2) pylab.xlim([-limit, limit]) y1,y2 = pylab.ylim() pylab.ylim([0,y2]) pylab.axhline(-np.log10(0.05), lw=2, ls="--", color="r", label="pvalue threshold (0.05)")
def run_enrichment_kegg(self, organism, annot_col="Name", out_dir="enrichment"): # pragma: no cover out_dir = Path(out_dir) / "figures" out_dir.mkdir(exist_ok=True, parents=True) gene_lists_dict = self.get_gene_lists(annot_col=annot_col, dropna=True) enrichment = {} for compa in self.comparisons: gene_lists = gene_lists_dict[compa] ke = KeggPathwayEnrichment(gene_lists, organism, progress=False) ke.compute_enrichment() for direction in ["up", "down", "all"]: enrichment[(compa, direction)] = ke._get_final_df( ke.enrichment[direction].results, nmax=10000) pylab.figure() ke.scatterplot(direction) pylab.tight_layout() pylab.savefig(out_dir / f"kegg_{compa}_{direction}.pdf") pylab.savefig(out_dir / f"kegg_{compa}_{direction}.png") logger.info(f"KEGG enrichment for {compa} DONE.") df = pd.concat(enrichment).sort_index() df.index.rename(["comparison", "direction", "index"], inplace=True) self.enrichment_kegg = df # Export results (should be moved to enrichment.py at some point I think) with pd.ExcelWriter(out_dir.parent / "enrichment_kegg.xlsx") as writer: df = self.enrichment_kegg.copy() df.reset_index(inplace=True) df.to_excel(writer, "kegg", index=False) ws = writer.sheets["kegg"] try: ws.autofilter(0, 0, df.shape[0], df.shape[1] - 1) except: logger.warning("Fixme")
def plot_hist_coverage(self, logx=True, logy=True, fontsize=16, N=20, fignum=1, hold=False, alpha=0.5, filename=None, **kw_hist): """ """ if hold is False: pylab.figure(fignum) pylab.clf() ax = pylab.gca() ax.set_facecolor('#eeeeee') data = self.df['cov'].dropna().values maxcov = data.max() if logx is True and logy is True: bins = pylab.logspace(0, pylab.log10(maxcov), N) pylab.hist(data, bins=bins, log=True, label=self.chrom_name, alpha=alpha, **kw_hist) pylab.semilogx() pylab.xlabel("Coverage (log scale)", fontsize=fontsize) pylab.ylabel("Count (log scale)", fontsize=fontsize) elif logx is False and logy is True: pylab.hist(data, bins=N, log=True, label=self.chrom_name, alpha=alpha, **kw_hist) pylab.xlabel("Coverage", fontsize=fontsize) pylab.ylabel("Count (log scale)", fontsize=fontsize) elif logx is True and logy is False: bins = pylab.logspace(0, pylab.log10(maxcov), N) pylab.hist(data, bins=N, label=self.chrom_name, alpha=alpha, **kw_hist) pylab.xlabel("Coverage (log scale)", fontsize=fontsize) pylab.ylabel("Count", fontsize=fontsize) pylab.semilogx() else: pylab.hist(data, bins=N, label=self.chrom_name, alpha=alpha, **kw_hist) pylab.xlabel("Coverage", fontsize=fontsize) pylab.ylabel("Count", fontsize=fontsize) pylab.grid(True) if filename: pylab.savefig(filename)
def plot(self, n_components=2, n_neighbors=5, transform="log", switch_x=False, switch_y=False, switch_z=False, colors=None, max_features=500, show_plot=True): """ :param n_components: at number starting at 2 or a value below 1 e.g. 0.95 means select automatically the number of components to capture 95% of the variance :param transform: can be 'log' or 'anscombe', log is just log10. count with zeros, are set to 1 """ from sklearn.manifold import Isomap import numpy as np pylab.clf() data, kept = self.scale_data(transform_method=transform, max_features=max_features) iso = Isomap(n_neighbors=n_neighbors, n_components=n_components) iso.fit(data.T) Xr = iso.transform(data.T) self.Xr = Xr if switch_x: Xr[:, 0] *= -1 if switch_y: Xr[:, 1] *= -1 if switch_z: Xr[:, 2] *= -1 # PC1 vs PC2 if show_plot: pylab.figure(1) self._plot(Xr, pca=None, pc1=0, pc2=1, colors=colors) if n_components >= 3: if show_plot: pylab.figure(2) self._plot(Xr, pca=None, pc1=0, pc2=2, colors=colors) pylab.figure(3) self._plot(Xr, pca=None, pc1=1, pc2=2, colors=colors) return iso
def plot(self, n_components=2, transform="log", switch_x=False, switch_y=False, switch_z=False, colors=None, max_features=500, show_plot=True): """ :param n_components: at number starting at 2 or a value below 1 e.g. 0.95 means select automatically the number of components to capture 95% of the variance :param transform: can be 'log' or 'anscombe', log is just log10. count with zeros, are set to 1 """ assert transform in ['log', 'anscombe'] from sklearn.decomposition import PCA import numpy as np pylab.clf() pca = PCA(n_components) data, kept = self.scale_data(transform_method=transform, max_features=max_features) pca.fit(data.T) Xr = pca.transform(self.scaler.fit_transform(self.df.loc[kept].T)) self.Xr = Xr if switch_x: Xr[:,0] *= -1 if switch_y: Xr[:,1] *= -1 if switch_z: Xr[:,2] *= -1 # PC1 vs PC2 if show_plot: pylab.figure(1) self._plot(Xr, pca=pca, pc1=0,pc2=1, colors=colors) if len(pca.explained_variance_ratio_) >= 3: if show_plot: pylab.figure(2) self._plot(Xr, pca=pca, pc1=0,pc2=2, colors=colors) pylab.figure(3) self._plot(Xr, pca=None, pc1=1,pc2=2, colors=colors) return pca.explained_variance_ratio_
def plot(self, kind="pie", cmap="copper", threshold=1, radius=0.9, textcolor="red", **kargs): """A simple non-interactive plot of taxons :return: None if no taxon were found and a dataframe otherwise A Krona Javascript output is also available in :meth:`kraken_to_krona` .. plot:: :include-source: from sequana import KrakenResults, sequana_data test_file = sequana_data("test_kraken.out", "testing") k = KrakenResults(test_file) df = k.plot(kind='pie') .. seealso:: to generate the data see :class:`KrakenPipeline` or the standalone application **sequana_taxonomy**. """ if len(self._df) == 0: return if self._data_created == False: status = self.kraken_to_krona() if kind not in ['barh', 'pie']: logger.error('kind parameter: Only barh and pie are supported') return # This may have already been called but maybe not. This is not time # consuming, so we call it again here if len(self.taxons.index) == 0: return None df = self.get_taxonomy_biokit(list(self.taxons.index)) df.ix[-1] = ["Unclassified"] * 8 data = self.taxons.copy() data.ix[-1] = self.unclassified data = data / data.sum() * 100 assert threshold > 0 and threshold < 100 others = data[data < threshold].sum() data = data[data > threshold] names = df.ix[data.index]['name'] data.index = names.values data.ix['others'] = others try: data.sort_values(inplace=True) except: data.sort(inplace=True) # text may be long so, let us increase the figsize a little bit pylab.figure(figsize=(10, 8)) pylab.clf() if kind == "pie": ax = data.plot(kind=kind, cmap=cmap, autopct='%1.1f%%', radius=radius, **kargs) pylab.ylabel(" ") for text in ax.texts: # large, x-small, small, None, x-large, medium, xx-small, # smaller, xx-large, larger text.set_size("small") text.set_color(textcolor) for wedge in ax.patches: wedge.set_linewidth(1) wedge.set_edgecolor("k") self.ax = ax elif kind == "barh": ax = data.plot(kind=kind, **kargs) pylab.xlabel(" percentage ") return data
def plot( self, num=1, cmap=None, colorbar=True, vmin=None, vmax=None, colorbar_position="right", gradient_span="None", figsize=(12, 8), fontsize=None, ): """ Using as input:: df = pd.DataFrame({'A':[1,0,1,1], 'B':[.9,0.1,.6,1], 'C':[.5,.2,0,1], 'D':[.5,.2,0,1]}) we can plot the heatmap + dendogram as follows:: h = Heatmap(df) h.plot(vmin=0, vmax=1.1) .. plot:: :include-source: :width: 80% from sequana.viz import heatmap df = heatmap.get_heatmap_df() h = heatmap.Heatmap(df) h.category_column['A'] = 1 h.category_column['C'] = 1 h.category_column['D'] = 2 h.category_column['B'] = 2 h.plot() """ # save all parameters in a dict layout = {} if cmap is None: cmap = self.params.cmap try: import colormap cmap = colormap.cmap_builder(cmap) except: pass # keep track of row and column names for later. row_header = self.frame.index column_header = self.frame.columns import matplotlib # FIXME something clever for the fontsize if len(row_header) > 100 or len(column_header) > 100: matplotlib.rcParams["font.size"] = 6 if len(row_header) > 50 or len(column_header) > 50: matplotlib.rcParams["font.size"] = 7 if len(row_header) > 30 or len(column_header) > 30: matplotlib.rcParams["font.size"] = 8 else: matplotlib.rcParams["font.size"] = 12 if fontsize: matplotlib.rcParams["font.size"] = fontsize # scaling min/max range self.gradient_span = gradient_span #'only_max' # min_to_max, min_to_max_centered, only_max, only_min if self.gradient_span == "min_to_max_centered": vmax = self.frame.max().max() vmin = self.frame.min().min() vmax = max([vmax, abs(vmin)]) vmin = vmax * -1 if self.gradient_span == "only_max": vmin = 0 vmax = self.frame.max().max() if self.gradient_span == "only_min": vmin = self.frame.min().min() vmax = 0 norm = matplotlib.colors.Normalize(vmin, vmax) # Scale the figure window size # fig = pylab.figure(num=num, figsize=figsize) fig.clf() # LAYOUT -------------------------------------------------- # ax1 (dendrogram 1) on the left of the heatmap [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05, 0.22, 0.2, 0.6] width_between_ax1_axr = 0.004 # distance between the top color bar axis and the matrix height_between_ax1_axc = 0.004 # Sufficient size to show color_bar_w = 0.015 # axr, placement of row side colorbar # second to last controls the width of the side color bar - 0.015 when showing [axr_x, axr_y, axr_w, axr_h] = [0.31, 0.1, color_bar_w, 0.6] axr_x = ax1_x + ax1_w + width_between_ax1_axr axr_y = ax1_y axr_h = ax1_h width_between_axr_axm = 0.004 # axc, placement of column side colorbar # # last one controls the hight of the top color bar - 0.015 when showing [axc_x, axc_y, axc_w, axc_h] = [0.4, 0.63, 0.5, color_bar_w] axc_x = axr_x + axr_w + width_between_axr_axm axc_y = ax1_y + ax1_h + height_between_ax1_axc height_between_axc_ax2 = 0.004 # axm, placement of heatmap for the data matrix # why larger than 1? [axm_x, axm_y, axm_w, axm_h] = [0.4, 0.9, 2.5, 0.5] axm_x = axr_x + axr_w + width_between_axr_axm axm_y = ax1_y axm_h = ax1_h axm_w = axc_w # ax2 (dendrogram 2), on the top of the heatmap # [ax2_x, ax2_y, ax2_w, ax2_h] = [0.3, 0.72, 0.6, 0.15] ax2_x = axr_x + axr_w + width_between_axr_axm ax2_y = ax1_y + ax1_h + height_between_ax1_axc + axc_h + height_between_axc_ax2 ax2_w = axc_w # axcb - placement of the color legend # if colorbar_position == "top left": [axcb_x, axcb_y, axcb_w, axcb_h] = [0.07, 0.88, 0.18, 0.09] elif colorbar_position == "right": [axcb_x, axcb_y, axcb_w, axcb_h] = [0.85, 0.2, 0.08, 0.6] else: raise ValueError("'top left' or 'right' accepted for now") # COMPUTATION DENDOGRAM 1 ------------------------------------- if self.column_method: Y = self.linkage(self.frame.transpose(), self.column_method, self.column_metric) ax2 = fig.add_axes([ax2_x, ax2_y, ax2_w, ax2_h], frame_on=True) # p=30, truncate_mode=None, color_threshold=None, get_leaves=True, # orientation='top labels=None, count_sort=False, distance_sort=False, # show_leaf_counts=True, no_plot=False, no_labels=False, leaf_font_size=None, # leaf_rotation=None, leaf_label_func=None, show_contracted=False, # link_color_func=None, ax=None, above_threshold_color='b', # # color_threshold=0 and above_threshold_color='k' colors all # dendogram into black Z = hierarchy.dendrogram( Y, color_threshold=0, above_threshold_color="k", distance_sort="descending", ) ind2 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]), self.cluster_criterion) ax2.set_xticks([]) ax2.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx2 = Z["leaves"] self.frame = self.frame.iloc[:, idx2] # reorder the flat cluster to match the order of the leaves the dendrogram ind2 = ind2[idx2] layout["dendogram2"] = ax2 else: idx2 = range(self.frame.shape[1]) # COMPUTATION DENDOGRAM 2 --------------------------------- if self.row_method: Y = self.linkage(self.frame, self.row_method, self.row_metric) ax1 = fig.add_axes([ax1_x, ax1_y, ax1_w, ax1_h], frame_on=True) Z = hierarchy.dendrogram( Y, orientation="right", color_threshold=0, above_threshold_color="k", distance_sort="descending", ) ind1 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]), self.cluster_criterion) ax1.set_xticks([]) ax1.set_yticks([]) # apply the clustering for the array-dendrograms to the actual matrix data idx1 = Z["leaves"] self.frame = self.frame.iloc[idx1, :] # reorder the flat cluster to match the order of the leaves the dendrogram ind1 = ind1[idx1] layout["dendogram1"] = ax1 else: idx1 = range(self.frame.shape[0]) # HEATMAP itself axm = fig.add_axes([axm_x, axm_y, axm_w, axm_h]) axm.imshow( self.frame, aspect="auto", origin="lower", interpolation="None", cmap=cmap, norm=norm, ) axm.set_xticks([]) axm.set_yticks([]) layout["heatmap"] = axm # TEXT new_row_header = [] new_column_header = [] for i in range(self.frame.shape[0]): axm.text( self.frame.shape[1] - 0.5, i, " " + str(row_header[idx1[i]]), verticalalignment="center", ) new_row_header.append( row_header[idx1[i]] if self.row_method else row_header[i]) for i in range(self.frame.shape[1]): axm.text( i, -0.9, " " + str(column_header[idx2[i]]), rotation=90, verticalalignment="top", horizontalalignment="center", ) new_column_header.append(column_header[idx2[i]] if self. column_method else column_header[i]) # CATEGORY column ------------------------------ if self.category_column: axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h]) category_col = [ self.category_column[self.df.columns[i]] for i in idx2 ] dc = np.array(category_col, dtype=int) dc.shape = (1, len(ind2)) cmap_c = matplotlib.colors.ListedColormap( self.params.col_side_colors) axc.matshow(dc, aspect="auto", origin="lower", cmap=cmap_c) axc.set_xticks([]) axc.set_yticks([]) layout["category_column"] = axc # CATEGORY row ------------------------------- if self.category_row: axr = fig.add_axes([axr_x, axr_y, axr_w, axr_h]) # self.category_row must be a dictionary with names as found in the columns # of the dataframe. category_row = [self.category_row[self.df.index[i]] for i in idx1] dr = np.array(category_row, dtype=int) dr.shape = (len(category_row), 1) cmap_r = matplotlib.colors.ListedColormap( self.params.col_side_colors) axr.matshow(dr, aspect="auto", origin="lower", cmap=cmap_r) axr.set_xticks([]) axr.set_yticks([]) layout["category_row"] = axr # COLORBAR ---------------------- if colorbar == True: axcb = fig.add_axes([axcb_x, axcb_y, axcb_w, axcb_h], frame_on=False) if colorbar_position == "right": orientation = "vertical" else: orientation = "horizontal" cb = matplotlib.colorbar.ColorbarBase(ax=axcb, cmap=cmap, norm=norm, orientation=orientation) # axcb.set_title("whatever") # max_cb_ticks = 5 # axcb.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(max_cb_ticks)) layout["colorbar"] = cb layout["colorbar_scalablemap"] = axcb # could be useful self.d = {"ordered": self.frame.copy(), "rorder": idx1, "corder": idx2} return layout
def plot(self, kind="pie", cmap="copper", threshold=1, radius=0.9, textcolor="red", **kargs): """A simple non-interactive plot of taxons :return: None if no taxon were found and a dataframe otherwise A Krona Javascript output is also available in :meth:`kraken_to_krona` .. plot:: :include-source: from sequana import KrakenResults, sequana_data test_file = sequana_data("test_kraken.out", "testing") k = KrakenResults(test_file) df = k.plot(kind='pie') .. seealso:: to generate the data see :class:`KrakenPipeline` or the standalone application **sequana_taxonomy**. """ if len(self._df) == 0: return if self._data_created == False: status = self.kraken_to_krona() if kind not in ['barh', 'pie']: logger.error('kind parameter: Only barh and pie are supported') return # This may have already been called but maybe not. This is not time # consuming, so we call it again here if len(self.taxons.index) == 0: return None df = self.get_taxonomy_biokit(list(self.taxons.index)) df.ix[-1] = ["Unclassified"] * 8 data = self.taxons.copy() data.ix[-1] = self.unclassified data = data/data.sum()*100 assert threshold > 0 and threshold < 100 others = data[data<threshold].sum() data = data[data>threshold] names = df.ix[data.index]['name'] data.index = names.values data.ix['others'] = others try: data.sort_values(inplace=True) except: data.sort(inplace=True) # text may be long so, let us increase the figsize a little bit pylab.figure(figsize=(10,8)) pylab.clf() if kind == "pie": ax = data.plot(kind=kind, cmap=cmap, autopct='%1.1f%%', radius=radius, **kargs) pylab.ylabel(" ") for text in ax.texts: # large, x-small, small, None, x-large, medium, xx-small, # smaller, xx-large, larger text.set_size("small") text.set_color(textcolor) for wedge in ax.patches: wedge.set_linewidth(1) wedge.set_edgecolor("k") self.ax = ax elif kind == "barh": ax = data.plot(kind=kind, **kargs) pylab.xlabel(" percentage ") return data
def run_enrichment_go(self, taxon, annot_col="Name", out_dir="enrichment"): # pragma: no cover out_dir = Path(out_dir) / "figures" out_dir.mkdir(exist_ok=True, parents=True) gene_lists_dict = self.get_gene_lists(annot_col=annot_col, Nmax=2000, dropna=True) enrichment = {} ontologies = { "GO:0003674": "BP", "GO:0008150": "MF", "GO:0005575": "CC" } failed_enrichments = [] for compa in self.comparisons: gene_lists = gene_lists_dict[compa] pe = PantherEnrichment(gene_lists, taxon) pe.compute_enrichment(ontologies=ontologies.keys(), progress=False) for direction in ["up", "down", "all"]: if not pe.enrichment[direction]: logger.warning( f"No enrichment computed, so no plots computed for {compa} {direction} {ontology}" ) failed_enrichments.append({ "comparison": compa, "direction": direction, "GO": "all", "reason": "no enrichment computed", }) continue for ontology in ontologies.keys(): pylab.figure() enrichment_df = pe.plot_go_terms(direction, ontology, compute_levels=False) if enrichment_df.empty: failed_enrichments.append({ "comparison": compa, "direction": direction, "GO": ontology, "reason": "no enrichment found", }) else: enrichment[(compa, direction, ontology)] = enrichment_df pylab.tight_layout() pylab.savefig( out_dir / f"go_{compa}_{direction}_{ontologies[ontology]}.pdf" ) pe.save_chart( enrichment_df, out_dir / f"chart_{compa}_{direction}_{ontologies[ontology]}.png", ) logger.info(f"Panther enrichment for {compa} DONE.") df = pd.concat(enrichment).sort_index() df.index.rename(["comparison", "direction", "GO_category", "index"], inplace=True) self.enrichment_go = df self.failed_go_enrichments = pd.DataFrame(failed_enrichments) # Export results (should be moved to enrichment.py at some point I think) with pd.ExcelWriter(out_dir.parent / "enrichment_go.xlsx") as writer: df = self.enrichment_go.copy() df.reset_index(inplace=True) df.to_excel(writer, "go", index=False) ws = writer.sheets["go"] try: ws.autofilter(0, 0, df.shape[0], df.shape[1] - 1) except: logger.warning("XLS formatting issue.")
def plot(self, kind="pie", cmap="tab20c", threshold=1, radius=0.9, textcolor="red", **kargs): """A simple non-interactive plot of taxons :return: None if no taxon were found and a dataframe otherwise A Krona Javascript output is also available in :meth:`kraken_to_krona` .. plot:: :include-source: from sequana import KrakenResults, sequana_data test_file = sequana_data("test_kraken.out", "testing") k = KrakenResults(test_file) df = k.plot(kind='pie') .. seealso:: to generate the data see :class:`KrakenPipeline` or the standalone application **sequana_taxonomy**. .. todo:: For a future release, we could use this kind of plot https://stackoverflow.com/questions/57720935/how-to-use-correct-cmap-colors-in-nested-pie-chart-in-matplotlib """ if len(self._df) == 0: return if self._data_created == False: status = self.kraken_to_krona() if kind not in ['barh', 'pie']: logger.error('kind parameter: Only barh and pie are supported') return # This may have already been called but maybe not. This is not time # consuming, so we call it again here if len(self.taxons.index) == 0: return None df = self.get_taxonomy_db(list(self.taxons.index)) # we add the unclassified only if needed if self.unclassified > 0: df.loc[-1] = ["Unclassified"] * 8 data = self.taxons.copy() # we add the unclassified only if needed if self.unclassified > 0: data.loc[-1] = self.unclassified data = data / data.sum() * 100 assert threshold > 0 and threshold < 100 # everything below the threshold (1) is gather together and summarised # into 'others' others = data[data < threshold].sum() data = data[data >= threshold] names = df.loc[data.index]['name'] data.index = names.values if others > 0: data.loc['others'] = others try: data.sort_values(inplace=True) except: data.sort(inplace=True) pylab.figure(figsize=(10, 8)) pylab.clf() if kind == "pie": ax = data.plot(kind=kind, cmap=cmap, autopct='%1.1f%%', radius=radius, **kargs) pylab.ylabel(" ") for text in ax.texts: # large, x-small, small, None, x-large, medium, xx-small, # smaller, xx-large, larger text.set_size("small") text.set_color(textcolor) for wedge in ax.patches: wedge.set_linewidth(1) wedge.set_edgecolor("k") self.ax = ax elif kind == "barh": ax = data.plot(kind=kind, **kargs) pylab.xlabel(" percentage ") return data
max(df_results["Illumina_score"].dropna())) list_pacbio_analysis = [col for col in list_analysis if ('Pacbio' in col)] for analysis in list_pacbio_analysis: df_results[analysis + "_score"] = round( df_results[analysis + "_score"] / 100., 2) if len(sys.argv) > 5: print("perso") colors = custom_colormap else: cmap = pylab.cm.get_cmap(colormap) colors = [cmap(i) for i in np.linspace(0, 1, len(list_analysis))] # get results for curves pylab.figure(figsize=(8, 8)) for i in range(len(list_analysis)): analysis = list_analysis[i] res = compute_table_performance(analysis, df_results) print("%s" % analysis) # [TP, FP, FN, TN] # print(len(res[0]), len(res[1]), res[2], res[3] , sum([len(res[0]), len(res[1]), res[2], res[3]])) TP = res[0] FP = res[1] FN = [0] * res[2] TN = [0] * res[3] y_true = np.array([1] * len(TP) + [1] * len(FN) + [0] * len(FP) + [0] * len(TN)) y_scores = np.array(TP + FN + FP + TN) precision, recall, thresholds = precision_recall_curve(y_true, y_scores) pylab.plot(recall, precision, color=colors[i], label=analysis)