def multiDimensionGraph(): df.head(2) sns.relplot( x="PBR(IFRS-연결)", y="수익률(%)", col="size", hue="베타 (M,5Yr)", data=df, palette="coolwarm", ) with sns.plotting_context("notebook", font_scale=1.2): sns.relplot(x="PBR(IFRS-연결)", y="수익률(%)", col="size", hue="베타 (M,5Yr)", palette="coolwarm", data=df) with sns.plotting_context("notebook", font_scale=1.2): sns.relplot( x="PBR(IFRS-연결)", y="수익률(%)", size="size", # `col` 대신 `size`사용 hue="베타 (M,5Yr)", palette="coolwarm", data=df)
def set_jw_style(): import matplotlib import seaborn as sns matplotlib.rcParams["pdf.fonttype"] = 42 matplotlib.rcParams["ps.fonttype"] = 42 sns.set( style="ticks", font="Arial", font_scale=1, rc={ "axes.linewidth": 0.05, "axes.labelsize": 7, "axes.titlesize": 7, "xtick.labelsize": 6, "ytick.labelsize": 6, "legend.fontsize": 6, "xtick.major.width": 0.25, "xtick.minor.width": 0.25, "ytick.major.width": 0.25, "text.color": "Black", "axes.labelcolor": "Black", "xtick.color": "Black", "ytick.color": "Black", }, ) sns.plotting_context()
def plot_values(skews, kurt, fname): with sns.plotting_context('notebook', font_scale=1.25): ax = sns.lmplot(x='droplet number', y='count', data=skews, fit_reg=False, hue='droplet number', palette='magma') # ax.set(xlabel='count', ylabel='count', title='Intensity in droplets over time') ax.set(ylabel='Skewness', ylim=(-1, 1), title='Skew of intensity for each droplet') ax.savefig(fname + '_skewplot.png') with sns.plotting_context('notebook', font_scale=1.5): ax = sns.lmplot(x='droplet number', y='count', data=kurt, fit_reg=False, hue='droplet number', palette='magma') # ax.set(xlabel='count', ylabel='count', title='Intensity in droplets over droplet number') ax.set(ylabel='Kurtosis', title='Kurtosis of intensity for each droplet') ax.savefig(fname + '_kurtplot.png')
def boxplot_stats(dfin, title): plotlt.rcParams['pdf.fonttype'] = 42 plotlt.rcParams['ps.fonttype'] = 42 #hue --> give colour for FDR<0.1 #size scale based on relab colour = ['r', 'black'] sns.set(style="white") sns.plotting_context(font_scale=0.5)
def setDefaultStyle(self, fontscale=1.2, font='monospace'): import seaborn as sns sns.set(font_scale=fontscale, rc={'figure.facecolor':'white','axes.facecolor': '#F7F7F7'}) sns.set_style("ticks", {'font.family':font, 'axes.facecolor': '#F7F7F7', 'legend.frameon': True}) sns.plotting_context('notebook', rc={'legend.fontsize':16,'xtick.labelsize':12, 'ytick.labelsize':12,'axes.labelsize':14,'axes.titlesize':16}) return
def dropplot(data, feature='median_conservation', genome_len=10**4): mapping = {} vals = np.sort(data[feature].unique()) for i, cons in enumerate(vals): mapping[str(cons)] = i n_colors = 2 if vals.shape[0] > 2: n_colors = max(8, vals.shape[0]) with sns.plotting_context( rc={ "font.size": 14, "axes.titlesize": 18, "axes.labelsize": 18, "xtick.labelsize": 14, "ytick.labelsize": 14, 'y.labelsize': 16 }): pal = sns.mpl_palette('seismic', n_colors) with sns.plotting_context( rc={ "font.size": 12, "axes.labelsize": 15, "xtick.labelsize": 14, "ytick.labelsize": 12, 'aspect': 10 }): f, ax = plt.subplots(figsize=(14, 4)) for i, seq in enumerate(g['seq_id'].unique()): g_tag = data[data['seq_id'] == seq] ax.plot([1, genome_len], [i, i], color="black", alpha=0.7, linewidth=4) for row in g_tag.iterrows(): row = row[1] ax.scatter([row['start'], row['end']], [i, i], marker='s', s=2 * row['drop_size'], c=pal[mapping[str(row[feature])]], label="{} {}".format(row['product'], row['start'])) plt.legend(bbox_to_anchor=[1.1, 1.1]) sns.palplot(sns.mpl_palette('seismic', n_colors)) plt.show()
def multi_qqplot(data, max_pval=1.0): with sns.axes_style('ticks'), sns.plotting_context('paper', font_scale=2.5): # change dpi import matplotlib as mpl mpl.rc('savefig', dpi=300) # make qq plot for each method g = sns.FacetGrid(data, col="method", col_wrap=3, sharey=False, aspect=1.5) g.map(qqplot, "p-value") plt.tight_layout() #set_axes_label(g.fig, 'Theoretical ($log_{10}(p)$)', 'Ovserved ($log_{10}(p)$)', ylab_xoffset=-0.02, xlab_yoffset=-0.02) set_axes_label(g.fig, 'Expected p-value', 'Ovserved p-value', ylab_xoffset=-0.03, ylab_yoffset=.62, xlab_yoffset=-0.02) g.set_titles('{col_name}') # set ylabel #g.axes[0].set_ylabel('p-value') #g.axes[3].set_ylabel('p-value') #g.axes[6].set_ylabel('p-value') # set xlim for myax in g.axes: myax.set_xlim((0, max_pval)) myax.set_ylim((0, max_pval))
def plot_feature(cv, index, model_index=1, ascending=False): """Plot the feature importance Plot the model importance Parameters: ----------- cv: estimator The estimater is trained by tunning the parameters index: The train data columns model_index: int indice Specific the cv model indice """ value = cv.best_estimator_.steps[model_index][1].feature_importances_ new_index = index[cv.best_estimator_.steps[0][1].get_support()] data = pd.DataFrame(value, index=new_index) data = data.reset_index().sort_values(0, ascending=False) with sns.axes_style("dark"), sns.plotting_context("paper", font_scale=1.5): plt.figure(figsize=(7, 7)) ax = plt.subplot() sns.barplot( y="index", x=0, data= data, orient="h", palette=["#8c8c91"], ax=ax ) plt.xlabel("") plt.ylabel("") return ax
def plot_entropies(results, rotate='oblimin', dpi=300, figsize=(20,8), ext='png', plot_dir=None): """ Plots factor analytic results as bars Args: results: a dimensional structure results object c: the number of components to use task_sublists: a dictionary whose values are sets of tasks, and whose keywords are labels for those lists dpi: the final dpi for the image figsize: scalar - the width of the plot. The height is determined by the number of factors ext: the extension for the saved figure plot_dir: the directory to save the figure. If none, do not save """ EFA = results.EFA # plot entropies entropies = EFA.results['entropies_%s' % rotate].copy() null_entropies = EFA.results['null_entropies_%s' % rotate].copy() entropies.loc[:, 'group'] = 'real' null_entropies.loc[:, 'group'] = 'null' plot_entropies = pd.concat([entropies, null_entropies], 0) plot_entropies = plot_entropies.melt(id_vars= 'group', var_name = 'EFA', value_name = 'entropy') with sns.plotting_context('notebook', font_scale=1.8): f = plt.figure(figsize=figsize) sns.boxplot(x='EFA', y='entropy', data=plot_entropies, hue='group') plt.xlabel('# Factors') plt.ylabel('Entropy') plt.title('Distribution of Measure Specificity across Factor Solutions') if plot_dir is not None: f.savefig(path.join(plot_dir, 'entropies_across_factors.%s' % ext), bbox_inches='tight', dpi=dpi) plt.close()
def draw_heatmap(df, x_labels=True, y_labels=True, title=None, xlabel=None, ylabel=None, **kwargs): x_labels = list(map(x_labels, df.columns)) if callable(x_labels) else x_labels y_labels = list(map(y_labels, df.index)) if callable(y_labels) else y_labels with sb.axes_style('white'), sb.plotting_context('paper'): # print(sb.axes_style()) # print(sb.plotting_context()) axes = sb.heatmap(df, xticklabels=x_labels, yticklabels=y_labels, annot=True, cmap='RdYlGn', robust=True, **kwargs) set_labels(axes, title=title, xlabel=xlabel, ylabel=ylabel, x_tick_params=dict(labelrotation=90)) fig = axes.get_figure() fig.set_size_inches(10, df.shape[0] / 2) fig.set_dpi(120) return fig
def plot_beneficiaries(df, title): with sns.plotting_context('notebook', font_scale=1.6): fig, ax = plt.subplots(1) fig.set_size_inches(12, 16) dfi = df.set_index('principal_beneficiary') sns.heatmap(dfi[['amount']], annot=True, square=False, xticklabels=True, yticklabels=True, fmt='.2f', ax=ax) ax.set_title(title, fontsize=28, pad=20) ax.set_xlabel('') ax.set_ylabel('') fig.savefig(f"plots/{title.lower()}_heatmap.png", bbox_inches='tight', dpi=200) plt.show() fig, ax = plt.subplots(1) fig.set_size_inches(12, 16) sns.barplot(x='amount', y='principal_beneficiary', data=df, palette='Spectral', ax=ax) ax.set_title(title, fontsize=28, pad=20) ax.set_xlabel('') ax.set_ylabel('') fig.savefig(f"plots/{title.lower()}_barplot.png", bbox_inches='tight', dpi=200) plt.show()
def violinplot_combined_one_isoform(args): plt.clf() with sns.plotting_context("paper", font_scale=1.8): # print(args.tsv_input) # sns.set(font_scale=2) true_positives = pd.read_csv(args.tsv_input, sep="\t") fig, ax = plt.subplots() flierprops = dict(markerfacecolor='0.75', markersize=5, marker='o') d = {'color': ['b', 'g', 'r']} g = sns.FacetGrid(true_positives, row="Family", size=3, aspect=1.6, row_order=["TSPY13P", "HSFY2", "DAZ2"], legend_out=True) sns.set(style="whitegrid", palette="muted") (g.map(sns.violinplot, "read_count", args.y_axis, "TOOL", cut=0, hue_order=["ISOCON", "ICE"], palette=sns.color_palette("muted", 2)).despine(left=True).add_legend( title="TOOL", label_order=["ISOCON", "ICE"])) g.set_titles(row_template="{row_name}", fontweight='bold', size=16) g.set_yticklabels(["", 0, 0.2, 0.4, 0.6, 0.8, 1.0]) if args.y_axis == "FP": g.set(yscale="log") plt.savefig(args.outfile) plt.close()
def visualize_embeddings_umap(self, title='', ext="png", save=True, **umap_kwargs): #Init umapper umapper = umap.UMAP(**umap_kwargs) # Compute umap embeddings umap_embeddings = umapper.fit_transform(self.embeddings) # Plot embeddings # with sns.set(style='white', context='poster'): with sns.plotting_context(context="poster"): _, ax = plt.subplots(1, figsize=(14, 10)) plt.scatter(*umap_embeddings.T, s=0.8, c=self.true_labels, cmap="tab20b", alpha=1) # plt.scatter(*umap_embeddings.T, s=1.5, c= self.true_labels, cmap='tab10', alpha=0.8) plt.setp(ax, xticks=[], yticks=[]) cbar = plt.colorbar(boundaries=np.arange(self.n_classes + 1) - 0.5) cbar.set_ticks(np.arange(self.n_classes)) cbar.set_ticklabels(self.categorical_labels) plt.title(title) if save: self.save_fig(title, ext, fig)
def plot_tsne( x: np.ndarray, y: np.ndarray, metadata: Dict[str, Any], kwargs: Dict[str, Any], ) -> sns.FacetGrid: """Plot the t-SNE results.""" with sns.plotting_context(context="paper"): graph = sns.relplot( x=x[:, 0], y=x[:, 1], hue=y, palette=metadata["palette"], **kwargs["relplot_kwargs"], ) _plot_colorbar( figure=graph.fig, palette=[*metadata["palette"].values()][1:], labels=[*metadata["labels"].values()][1:], ) graph.set( title=f'{metadata["name"]} t-SNE Projection', xlabel="t-SNE Component 1", xticks=[], ylabel="t-SNE Component 2", yticks=[], ) return graph
def wrapper(*args, **kwargs): if "context" in kwargs.keys(): _context = kwargs["context"] del kwargs["context"] else: _context = "notebook" if "style" in kwargs.keys(): _style = kwargs["style"] del kwargs["style"] else: _style = "whitegrid" if "params" in kwargs.keys(): _params = kwargs["params"] del kwargs["params"] else: _params = None _default_params = { # "xtick.bottom": True, # "ytick.left": True, # "xtick.color": ".8", # light gray # "ytick.color": ".15", # dark gray "axes.spines.left": False, "axes.spines.bottom": False, "axes.spines.right": False, "axes.spines.top": False, } if _params is not None: merged_params = {**_params, **_default_params} else: merged_params = _default_params with sns.plotting_context(context=_context), sns.axes_style(style=_style, rc=merged_params): func(*args, **kwargs)
def plotSentResults(csvPath): """ plots positive and negative sentiment results from csv result files as png :param csvPath: path to csv result files :return: None """ if not ".csv" in csvPath: csvPath = csvPath + ".csv" _data = pd.read_csv(result_folder + csvPath) vars = ["positive"] * len(_data["pos"]) + ["negative"] * len(_data["neg"]) day = list(_data["date"]) day.extend(_data["date"]) vals = list(_data["pos"]) vals.extend(list(_data["neg"])) data_preproc = pd.DataFrame({'Day': day, 'value': vals, 'variable': vars}) if "Fox" in csvPath: _ticks = np.arange(0, 120, 5) plt.figure(figsize=(12, 9)) with sns.plotting_context("notebook", font_scale=2.0): ax = sns.lineplot(x='Day', y='value', hue='variable', data=data_preproc) if "Fox" in csvPath: plt.xticks(ticks=_ticks, fontsize=13, rotation=60) fig = ax.get_figure() ax.set(xlabel='Date', ylabel='Weighted frequency') handles, labels = ax.get_legend_handles_labels() ax.legend(handles=handles[1:], labels=labels[1:]) result_name = csvPath.split(".")[0] + "Sentiment" + ".png" fig.savefig(result_folder + result_name)
def plot_bar(d, k, pv, figurename): plt.figure() fs = 20 palette = sns.color_palette(['black', 'green']) with sns.plotting_context('paper', font_scale=2): ax = sns.barplot(data=d, x='fragment_counts', y='normalized_count', hue='samplename', palette=palette) ax.annotate('$\chi^{2}$: %.2f\nP-value: %.3f' % (k, pv), xy=(2, 1), fontsize=fs) ax.legend(title=' ', fontsize=fs, loc=(0.5, 0.4)) ax.set_xlim(-0.5, 5) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.set_yscale('log') ax.set_xlabel('Count per unique fragment', fontsize=fs) ax.set_ylabel('% Fragments', fontsize=fs) #x = ax.set_xticklabels(ax.get_xmajorticklabels(),rotation=0) ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda y,pos: ('{{:.{:1d}f}}'\ .format(int(np.maximum(-np.log10(y),0))))\ .format(y))) #ax.tick_params(axis='both', which='major', labelsize=fs) #ax.ticklabel_format(style='plain') plt.savefig(figurename, transparent=True, bbox_inches='tight') print 'Plotted %s' % figurename return 0
def context(context='notebook', font_scale=1.5, rc=None): """Create pyfolio default plotting style context. Under the hood, calls and returns seaborn.plotting_context() with some custom settings. Usually you would use in a with-context. Parameters ---------- context : str, optional Name of seaborn context. font_scale : float, optional Scale font by factor font_scale. rc : dict, optional Config flags. By default, {'lines.linewidth': 1.5, 'axes.facecolor': '0.995', 'figure.facecolor': '0.97'} is being used and will be added to any rc passed in, unless explicitly overriden. Returns ------- seaborn plotting context For more information, see seaborn.plotting_context(). """ if rc is None: rc = {} rc_default = {'lines.linewidth': 1.5, 'axes.facecolor': '0.995', 'figure.facecolor': '0.97', 'font_scal': 1} # Add defaults if they do not exist for name, val in rc_default.items(): rc.setdefault(name, val) return sns.plotting_context(context=context, font_scale=font_scale, rc=rc)
def get_figure(self, data, *args, caption="Caption not provided", **kwargs): """ Plot the dataframe. """ plt.figure() with seaborn.plotting_context(self.context, font_scale=self.font_scale, rc=self.rc_params()): graphic = seaborn\ .catplot( data=self.get_dataframe(data), x=self.xvar, y=self.yvar, ci=self.confidence_interval, kind="bar", hue=self.gvar if self.gvar else None, height=self.height_figure, aspect=self.aspect_ratio_figure)\ .set( xlabel=self.xlabel, ylabel=self.ylabel, title=kwargs.pop("title", "")) plt.xticks(rotation=90) return Figure( graphic, caption=caption) raise RuntimeError( "Something must have gone wrong.")
def plot_confusion_matrix(cm, labels=None, cmap='Blues', title=None, norm=False, context=None, annot=True): if labels is None: labels = True if isinstance(labels, collections.Iterable) and not isinstance(labels,str): labels = [label.title() for label in labels] if norm: cm = normalize_confusion_matrix(cm) if title is None: if norm: title = "Normalized Confusion Matrix" else: title = "Confusion Matrix" if context is None: context = sns.plotting_context("notebook", font_scale=1.5) with context: ax = sns.heatmap(cm, xticklabels=labels, yticklabels=labels, cmap=cmap, annot=annot ) ax.set_title(title)
def plot_2d_pca(data): plt.figure(figsize=(12,10)) with sbn.plotting_context("notebook",font_scale=1.25): sbn.scatterplot(x = "PC1", y = "PC2", data = pc_df, #hue="Species", #style="Sex", s=100) for i in range(n): #plot as arrows the variable scores (each variable has a score for PC1 and one for PC2) plt.arrow(0, 0, coeff[i,0], coeff[i,1], color = 'k', alpha = 0.9, head_width = 0.02, head_length = 0.05, linestyle = '-', linewidth = 1.5, overhang = 0.2) plt.text(coeff[i,0]* 1.15, coeff[i,1] * 1.15, list(data.columns.values)[i], color = 'k', ha = 'center', va = 'center', fontsize = 10) plt.xlabel("PC1: "+f'{var_explained[0]:.0f}'+"%") plt.ylabel("PC2: "+f'{var_explained[1]:.0f}'+"%") plt.show()
def setup_figure(self): """ Prepare the matplotlib figure for plotting. This method sets the default font, and the overall apearance of the figure. """ if options.cfg.xkcd: fonts = QtGui.QFontDatabase().families() for x in ["Humor Sans", "DigitalStrip", "Comic Sans MS"]: if x in fonts: self.options["figure_font"] = QtGui.QFont(x, pointSize=self.options["figure_font"].pointSize()) break else: for x in ["comic", "cartoon"]: for y in fonts: if x.lower() in y.lower(): self.options["figure_font"] = QtGui.QFont(x, pointSize=self.options["figure_font"].pointSize()) break plt.xkcd() with sns.plotting_context("paper"): self.g = sns.FacetGrid(self._table, col=self._col_factor, col_wrap=self._col_wrap, row=self._row_factor, sharex=True, sharey=True)
def make_sns_heatmap(self, characteristic, cmap=sns.light_palette("green"), ret=False, context='paper', dim=[list('ABCDEFGH'), list(range(1, 13))], replicates=False): ''' creates a seaborn heatmap of any well characteristic (specified via function), also accepts a specified colormap (cmap), can return the figure (ret), accepts context which changes relative sizes of elements, and can restrict the heatmap to certain wells ''' data = tools.as_matrix(self.well_list, characteristic, *dim) with sns.plotting_context(context): heatmap = sns.heatmap(data, cmap=cmap, annot=True, linewidths=.5, fmt='.2f') heatmap.set_xticklabels(dim[1]) heatmap.set_yticklabels(dim[0], rotation=0) fig = heatmap.get_figure() fig.set_size_inches(13, 8) fig.savefig("output.svg") if ret: return heatmap
def plot_pca( x: np.ndarray, y: np.ndarray, variance: np.ndarray, metadata: Dict[str, Any], kwargs: Dict[str, Any], ) -> sns.FacetGrid: """Plot the PCA results.""" with sns.plotting_context(context="paper"): graph = sns.relplot( x=x[:, 0], y=x[:, 1], hue=y, palette=metadata["palette"], **kwargs["relplot_kwargs"], ) _plot_colorbar( figure=graph.fig, palette=[*metadata["palette"].values()][1:], labels=[*metadata["labels"].values()][1:], ) graph.set( title=f'{metadata["name"]} PCA Projection', xlabel= f"Principal Component 1 - {variance[0]*100:.1f}% Explained Variance", xticks=[], ylabel= f"Principal Component 2 - {variance[1]*100:.1f}% Explained Variance", yticks=[], ) return graph
def make_violins(exp, brain_areas, gene_list): subset = exp.loc[:, brain_areas] subset['in_gene_list'] = subset.index.isin(gene_list) tidy = subset.reset_index().melt(id_vars=['gene_symbol', 'in_gene_list'], var_name='brain_area', value_name='expression') with sns.plotting_context('notebook', font_scale=1.25): fig, ax = plt.subplots(figsize=(12, 9)) sns.violinplot(y='brain_area', x='expression', edgecolor='black', hue='in_gene_list', palette={ False: '#636364', True: '#D9D4D3' }, cut=2, split=True, inner='quartiles', data=tidy, ax=ax) ax.set_xlabel('Expression (z-scored)') ax.set_ylabel('') legend = ax.get_legend() legend.set_title('') legend._loc = 7 legend_labels = {'False': 'Background genes', 'True': 'Disease genes'} for text, label in zip(legend.texts, legend_labels.items()): text.set_text(label[1]) sns.despine()
def generate_pca_classification(pc_df): with sns.plotting_context( rc={ "font.size": 14, "axes.titlesize": 18, "axes.labelsize": 18, "xtick.labelsize": 14, "ytick.labelsize": 14, 'y.labelsize': 16 }): f, axes = plt.subplots(1, 2) # sns.scatterplot(x='principal component 1', y = 'principal component 2', hue='ground_truth',marker = '+', data=principalDf, ax=axes[0]) sns.scatterplot(x='principal component 1', y='principal component 2', hue='kmeans_clusters', marker='+', data=pc_df, ax=axes[0]) sns.scatterplot(x='principal component 1', y='principal component 2', hue='GMM_clusters', marker='+', data=pc_df, ax=axes[1]) sns.despine()
def predicted_to_member_id(args): # for pseudo vs coding, see: # https://stackoverflow.com/questions/37331937/seaborn-facetgrid-countplot-hue # fig =sns.FacetGrid(data=df,col='Sex',hue='Marker2',palette='Set1',size=4,aspect=1).map(sns.countplot,'Marker1',order=df.Marker1.unique()).add_legend() plt.clf() with sns.plotting_context("paper", font_scale=1.0): print(args.tsv_input) data = pd.read_csv(args.tsv_input, sep="\t") sns.set(style="whitegrid", palette="muted") g = sns.factorplot("MEMBER_ID", col="FAMILY", col_wrap=4, data=data[data.MEMBER_ID.notnull()], kind="count", size=2.5, aspect=.8) # titanic[titanic.deck.notnull()] # g = sns.FacetGrid(data, col="FAMILY", size=3, aspect=.5) # g.map(sns.countplot, "MEMBER_ID") #.add_legend() plt.subplots_adjust(top=0.9) g.fig.suptitle("Potential Isoforms per member", fontweight='bold', size=16) plt.savefig(args.outfile) plt.close()
def draw(df, output): df["syst"] = np.array([s.replace("_", " ") for s in df["syst"].values]) #print(df[(df["syst"]=="metTrigStat") & (df["variation"]=="up") & (df["process"]=="zmumu")]) plt.rcParams['xtick.top'] = False plt.rcParams['ytick.right'] = False with sns.plotting_context(context='paper', font_scale=1.8): g = sns.FacetGrid( df, row='syst', col='process', hue='variation', margin_titles=True, legend_out=True, ) g.map(plt.step, "bins", "count", where='post').add_legend() g.set(ylim=(0.5, 1.5)) #g.fig.text(0.0, 1, r'$\mathbf{CMS}\ \mathit{Preliminary}$', # ha='left', va='bottom', fontsize='large') #g.fig.text(0.9, 1, r'$35.9\ \mathrm{fb}^{-1}(13\ \mathrm{TeV})$', # ha='right', va='bottom', fontsize='large') # Report print("Creating {}".format(output)) # Actually save the figure g.fig.savefig(output, format="pdf", bbox_inches="tight") plt.close(g.fig) plt.rcParams['xtick.top'] = True plt.rcParams['ytick.right'] = True
def _show_results(results: pd.DataFrame): """ Given a DataFrame of performance testing results, this function plots the results in a figure. In addition, it dumps the results as a string. :param results: a DataFrame containing the results of a performance test """ print(results.to_string()) sns.set_theme() with sns.plotting_context("paper", font_scale=1.5): sns.catplot( x="Input", y="Performance", hue="Function", kind="bar", data=pd.DataFrame(results), legend=False, height=8, aspect=2 ) plt.title("How to Python: Function Performance Comparison", fontsize=16) plt.legend(bbox_to_anchor=(1.05, 1), loc=2, title="Functions", fontsize='12', title_fontsize='12') plt.tight_layout() filename = os.path.splitext(os.path.basename(inspect.stack()[2].filename))[0] plt.savefig(f"{os.path.join('visualizations', filename)}.png")
def plot_bp_qual(tsv_file, outfile): sns.plt.clf() with sns.plotting_context("paper", font_scale=1.8): indata = pd.read_csv(tsv_file, sep="\t") fig, ax = plt.subplots() g = sns.factorplot(x="Homopolymenr_length", y="P_error", col="Passes", col_order=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], col_wrap=3, hue="Base", data=indata[indata.P_error.notnull()], size=3, aspect=1.6, palette="Set3", dodge=True, cut=0, bw=.2) #kind="violin", # g = sns.FacetGrid(data, row="Family", col="mutation_rate", size=3, aspect=1.6, row_order=["TSPY13P", "HSFY2", "DAZ2"], col_order=[0.01, 0.001, 0.0001], legend_out=True) sns.set(style="whitegrid", palette="muted") # (g.map(sns.violinplot, "read_count", args.y_axis, "TOOL", cut=0, hue_order=["ISOCON", "ICE"], palette=sns.color_palette("muted", 2)).despine(left=True).add_legend(title="TOOL", label_order=["ISOCON", "ICE"])) # g.set_titles(col_template="$\mu={col_name}$", row_template="{row_name}", size=16) # g.set_yticklabels(["",0,0.2,0.4,0.6,0.8,1.0]) # g.set(yscale="log") g.set(xlim=(0, 6)) g.set(ylim=(0, 1)) plt.savefig(outfile) plt.close()
def welch_matrix(scores, images): traits = list(scores.columns) traits = ["Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"] ret = np.zeros((len(traits), len(traits))) for i, trait_i in enumerate(traits): arr_i = scores[trait_i].as_matrix() arr_i = (arr_i > np.median(arr_i)).astype(float) * 200 - 100 for j, trait_j in enumerate(traits): arr_j = images[trait_j].values ret[i, j] = scipy.stats.kendalltau(arr_i, arr_j)[0] if i != j: ret[j,i] = 0 with sns.plotting_context("notebook", font_scale=1.5): pylab.clf() cmap = sns.cubehelix_palette(100, start=0, rot=-0.25, reverse=False) v = ret[i, :].max() - ret[i, :].min() color = (99 * (ret[i, :] - ret[i, :].min()) / v).astype(int) for idx in range(5): pylab.bar(idx, height=ret[i, idx], color=cmap[color[idx]]) pylab.text(idx, ret[i, idx] + v * (0.025 if ret[i,idx] > 0 else -0.04), traits[idx], horizontalalignment="center", fontsize=12) pylab.ylabel("Spearman $\\rho$") pylab.tight_layout() pylab.gca().set_xticks([]) pylab.show() return pandas.DataFrame(ret, columns=[traits], index=[traits])
def plot_region_heatmap(self, clim=None): """ Plots a frequency x region heatmap of mean t-statistics. """ # mean t-stat within subject by region and frequency, then mean across subjects mean_df = self.group_df.groupby(['subject', 'regions', 'frequency']).mean().groupby(['regions', 'frequency']).mean() mean_df = mean_df.reset_index() # ignore data without a region mean_df['regions'].replace('', np.nan, inplace=True) mean_df = mean_df.dropna(subset=['regions']) # reshape it for easier plotting with seaborn mean_df = mean_df.pivot_table(index='frequency', columns='regions', values='t-stat') # center the colormap and plot if clim is None: clim = np.max(np.abs(mean_df.values)) with sns.plotting_context("talk"): sns.heatmap(mean_df, cmap='RdBu_r', yticklabels=mean_df.index.values.round(2), vmin=-clim, vmax=clim, cbar_kws={'label': 't-stat'}) plt.gca().invert_yaxis() plt.ylabel('Frequency') plt.xlabel('') plt.gcf().set_size_inches(12, 9)
def plotDf(df, save_directory, filename, drawdev=False, font_scale=0.8, hue_order=None, legend_loc=1): # need df with row: <dataset,method,fold,value> then # sns.lineplot(data=df,x="dataset",y="value",style="method) sns.set_style("whitegrid") setLateXFonts() with sns.plotting_context("poster",font_scale=font_scale, rc={"lines.linewidth": font_scale, "grid.linewidth": font_scale}): fig, ax = plt.subplots() if drawdev: _y = "value" else: _y="method_mean" if hue_order is None: sns_plot = sns.lineplot(x="dataset", y=_y, hue="method", data=df, sort=False) else: sns_plot = sns.lineplot(x="dataset", y=_y, hue="method", hue_order=hue_order, data=df, sort=False) ax.set(xlabel='dataset', ylabel='retrieval loss') ax.legend(loc=legend_loc) fig = sns_plot.get_figure() fig.savefig(f"{save_directory}/{filename}.pdf",bbox_inches='tight') plt.show() plt.clf()
def recall_per_abundance_normalized(args): plt.clf() with sns.plotting_context("paper", font_scale=1.8): data = pd.read_csv(args.recallfile, sep="\t") # new_data = data.groupby(["read_count", "abundance", "ed" ], as_index=False)['recall'].mean() # print(new_data) data.apply(pd.to_numeric, errors='coerce') g = sns.factorplot(x="read_count", y="recall", hue="ed", col="abundance", data=data, col_wrap=3, size=3, aspect=1.6, col_order=[0.5, 0.2, 0.1, 0.05, 0.01, 0.005]) g.set(ylim=(0.0, 1.0)) # g.set(yscale="log", ylim=(0.1,10000)) # g.set_titles(col_template="$\mu={col_name}$", row_template="{row_name}", size=16) g.set_ylabels("Recall") g.set_xlabels("Total read depth") outfile = os.path.join(args.outfolder, "recall_per_abundance_normalized.pdf") plt.savefig(outfile) plt.close()
def plot_clustering_similarity(results, plot_dir=None, verbose=False, ext='png'): HCA = results.HCA # get all clustering solutions clusterings = HCA.results.items() # plot cluster agreement across embedding spaces names = [k for k,v in clusterings] cluster_similarity = np.zeros((len(clusterings), len(clusterings))) cluster_similarity = pd.DataFrame(cluster_similarity, index=names, columns=names) distance_similarity = np.zeros((len(clusterings), len(clusterings))) distance_similarity = pd.DataFrame(distance_similarity, index=names, columns=names) for clustering1, clustering2 in combinations(clusterings, 2): name1 = clustering1[0].split('-')[-1] name2 = clustering2[0].split('-')[-1] # record similarity of distance_df dist_corr = np.corrcoef(squareform(clustering1[1]['distance_df']), squareform(clustering2[1]['distance_df']))[1,0] distance_similarity.loc[name1, name2] = dist_corr distance_similarity.loc[name2, name1] = dist_corr # record similarity of clustering of dendrogram clusters1 = clustering1[1]['labels'] clusters2 = clustering2[1]['labels'] rand_score = adjusted_rand_score(clusters1, clusters2) MI_score = adjusted_mutual_info_score(clusters1, clusters2) cluster_similarity.loc[name1, name2] = rand_score cluster_similarity.loc[name2, name1] = MI_score with sns.plotting_context(context='notebook', font_scale=1.4): clust_fig = plt.figure(figsize = (12,12)) sns.heatmap(cluster_similarity, square=True) plt.title('Cluster Similarity: TRIL: Adjusted MI, TRIU: Adjusted Rand', y=1.02) dist_fig = plt.figure(figsize = (12,12)) sns.heatmap(distance_similarity, square=True) plt.title('Distance Similarity, metric: %s' % HCA.dist_metric, y=1.02) if plot_dir is not None: save_figure(clust_fig, path.join(plot_dir, 'cluster_similarity_across_measures.%s' % ext), {'bbox_inches': 'tight'}) save_figure(dist_fig, path.join(plot_dir, 'distance_similarity_across_measures.%s' % ext), {'bbox_inches': 'tight'}) plt.close(clust_fig) plt.close(dist_fig) if verbose: # assess relationship between two measurements rand_scores = cluster_similarity.values[np.triu_indices_from(cluster_similarity, k=1)] MI_scores = cluster_similarity.T.values[np.triu_indices_from(cluster_similarity, k=1)] score_consistency = np.corrcoef(rand_scores, MI_scores)[0,1] print('Correlation between measures of cluster consistency: %.2f' \ % score_consistency)
def plot_alpha(metadata, category, hue): import seaborn as sns with plt.rc_context(dict(sns.axes_style("darkgrid"), **sns.plotting_context("notebook", font_scale=2))): width = len(metadata[category].unique()) plt.figure(figsize=(width*4, 8)) sns.boxplot(x=category, y='Alpha diversity', data=metadata.sort(category), hue=hue, palette='cubehelix')
def context(context='notebook', font_scale=1.5, rc=None): if rc is None: rc = {} rc_default = {'lines.linewidth': 1.5, 'axes.facecolor': '0.995', 'figure.facecolor': '0.97'} # Add defaults if they do not exist for name, val in rc_default.items(): rc.setdefault(name, val) return sns.plotting_context(context=context, font_scale=font_scale, rc=rc)
def analyze_solution(recovered_users, hidden_user_idx, edges, verbose=False, drawing=False): global USERS adj = defaultdict(set) for i, j, _, _ in edges: adj[i].add(j) adj[j].add(i) recovered_users /= np.sqrt((recovered_users ** 2).sum(-1))[..., np.newaxis] gold_users = USERS[hidden_user_idx, FEATURE_START:] gold_users /= np.sqrt((gold_users ** 2).sum(-1))[..., np.newaxis] diff = np.sqrt(((gold_users - recovered_users) ** 2).sum(-1)) non_zeros = np.where(recovered_users[:, 0] > -100)[0] if verbose: print('average distance {:.3f}'.format(np.mean(diff[non_zeros]))) prct = [5, 25, 50, 75, 95] vals = np.percentile(diff[non_zeros], prct) print('Percentile: '+'\t'.join(['{}'.format(str(_).ljust(5)) for _ in prct])) print(' '+'\t'.join(['{:.3f}'.format(_) for _ in vals])) embeddings = np.zeros((4, non_zeros.size)) i = 0 for uidx in range(len(recovered_users)): neighbors = adj[hidden_user_idx[uidx]] hidden_neighbors = {_ for _ in neighbors if _ in hidden_user_idx} tot_dst = 0 me = USERS[uidx, FEATURE_START:] me /= np.linalg.norm(me) for n in neighbors: nei = USERS[n, FEATURE_START:] tot_dst += np.linalg.norm(nei/np.linalg.norm(nei) - me) if uidx in non_zeros: embeddings[:, i] = [diff[uidx], len(neighbors), len(hidden_neighbors)/len(neighbors), tot_dst/len(neighbors)] i += 1 if drawing: labels = ['number of neighbors', 'fraction of unknown neighbors', 'mean distance from all neighbors'] for i in range(1, 4): with sns.plotting_context("notebook", font_scale=1.7, rc={"figure.figsize": (20, 10)}): sns.regplot(embeddings[i, :], embeddings[0, :], label=labels[i-1]) ppl.legend() return embeddings
def plot_boundary(predict_fun, dataset, method): plot_step = .002 xx, yy = np.meshgrid(np.arange(0,1, plot_step), np.arange(0,1, plot_step)) Z = predict_fun(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) size = (26/8)*2.54 with sns.plotting_context(rc={'figure.figsize': (size,size)}): fig, ax1 = plt.subplots() ax1.contourf(xx, yy, Z, 2, cmap=RdGr) ax1.set_xlim(0, 1) ax1.set_ylim(0, 1) plt.axis('equal') plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', labelbottom='off', labelleft='off') plt.show() plt.savefig('{}_{}{}.png'.format(dataset, method, '_full' if FULL else ''), dpi=300, bbox_inches='tight', pad_inches=0)
def isoformCheck(path): gene='TP53' patientIso={} noPatIDonlyTups=[] os.chdir(path) for files in os.listdir(path): with open(files) as input: #extracts patient ID from file name patientID='' for i in range(len(files)): if files[i]=='-': break; else: patientID=patientID+files[i] #comment out header line if no header is included in input header=next(input) for line in input: line=line.split('\t') if len(line)<35: print line break; if (line[11]==gene) and (float(line[36].rstrip('\n'))>=15): noPatIDonlyTups.append([str(line[14]), str(line[17]), str(line[18])]) if patientID in patientIso: patientIso[patientID]=patientIso[patientID]+[(str(line[14]), str(line[17]), str(line[18]))] else: patientIso[patientID]=[(str(line[14]), str(line[17]), str(line[18]))] print patientIso print len(patientIso) print noPatIDonlyTups for x in range(0, len(noPatIDonlyTups)): if noPatIDonlyTups[x][0]=='NON_SYNONYMOUS_CODING': noPatIDonlyTups[x][0]='NON_SYN' #print sorted(noPatIDonlyTups, key=lambda x: x[0]) dataframe=pandas.DataFrame(noPatIDonlyTups, columns=['mutation type', 'mutation', 'isoform']) print dataframe with sns.plotting_context("notebook", font_scale=1.5): sns.countplot(y="mutation type", hue="isoform", data=dataframe, palette="Set2") sns.plt.show()
def plot_factor_correlation(results, c, rotate='oblimin', title=True, DA=False, size=4.6, dpi=300, ext='png', plot_dir=None): if DA: EFA = results.DA else: EFA = results.EFA loading = EFA.get_loading(c, rotate=rotate) # get factor correlation matrix reorder_vec = EFA.get_factor_reorder(c) phi = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c],'Phi') phi = pd.DataFrame(phi, columns=loading.columns, index=loading.columns) phi = phi.iloc[reorder_vec, reorder_vec] mask = np.zeros_like(phi) mask[np.tril_indices_from(mask, -1)] = True with sns.plotting_context('notebook', font_scale=2) and sns.axes_style('white'): f = plt.figure(figsize=(size*5/4, size)) ax1 = f.add_axes([0,0,.9,.9]) cbar_ax = f.add_axes([.91, .05, .03, .8]) sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1, cbar_ax=cbar_ax, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True)) sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1, cbar_ax=cbar_ax, annot=True, annot_kws={"size": size/c*15}, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True), mask=mask) yticklabels = ax1.get_yticklabels() ax1.set_yticklabels(yticklabels, rotation=0, ha="right") ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90) if title == True: ax1.set_title('%s Factor Correlations' % results.ID.split('_')[0].title(), weight='bold', y=1.05, fontsize=size*3) ax1.tick_params(labelsize=size*3) # format cbar cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=size*2) cbar_ax.set_ylabel('Pearson Correlation', rotation=-90, labelpad=size*4, fontsize=size*3) if plot_dir: filename = 'factor_correlations_EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def plotting_context(context='notebook', font_scale=1.5, rc=None): """Create alphalens default plotting style context. Under the hood, calls and returns seaborn.plotting_context() with some custom settings. Usually you would use in a with-context. Parameters ---------- context : str, optional Name of seaborn context. font_scale : float, optional Scale font by factor font_scale. rc : dict, optional Config flags. By default, {'lines.linewidth': 1.5} is being used and will be added to any rc passed in, unless explicitly overriden. Returns ------- seaborn plotting context Example ------- with alphalens.plotting.plotting_context(font_scale=2): alphalens.create_full_tear_sheet(..., set_context=False) See also -------- For more information, see seaborn.plotting_context(). """ if rc is None: rc = {} rc_default = {'lines.linewidth': 1.5} # Add defaults if they do not exist for name, val in rc_default.items(): rc.setdefault(name, val) return sns.plotting_context(context=context, font_scale=font_scale, rc=rc)
def plot_stacked_bar(df): import seaborn as sns with plt.rc_context(dict(sns.axes_style("darkgrid"), **sns.plotting_context("notebook", font_scale=1.8))): f, ax = plt.subplots(1, figsize=(10, 10)) x = list(range(len(df.columns))) bottom = np.array([0] * len(df.columns)) cat_percents = [] for id_ in df.index: color = '#' + ''.join(np.random.choice(list('ABCDEF123456789'), 6)) ax.bar(x, df.loc[id_], color=color, bottom=bottom, align='center') bottom = df.loc[id_] + bottom cat_percents.append(''.join(["[{0:.2f}] ".format(x) for x in df.loc[id_].tolist()])) legend_labels = [' '.join(e) for e in zip(cat_percents, df.index.tolist())] ax.set_xticks(x) ax.set_xticklabels(df.columns.tolist()) ax.set_ylim([0, 1]) ax.legend(legend_labels, loc='center left', bbox_to_anchor=(1, 0.5))
def Graph(predictedEndAccuracy, task = DEFAULT): # load graph model graph = None if task == TASK_LEVEL_1: graph = GROUP_END_ACCURACY_LEVEL1 elif task == TASK_LEVEL_2: graph = GROUP_END_ACCURACY_LEVEL2 elif task == TASK_LEVEL_3: graph = GROUP_END_ACCURACY_LEVEL3 elif task == TASK_LEVEL_4: graph = GROUP_END_ACCURACY_LEVEL4 elif task == TASK_LEVEL_5: graph = GROUP_END_ACCURACY_LEVEL5 else: print("Unknow task level value entered: %s" % task) return EMPTY_STRING groupEndAccuracy = pickle.load(open(graph, "rb")) # generate plot rcParams.update({'figure.autolayout': True}) fig = plt.figure(figsize = (5, 3.4), dpi = 100) ax = fig.add_subplot() with sns.plotting_context(PLOTTING_CONTEXT, font_scale = 1.2): ax = sns.kdeplot(groupEndAccuracy, shade = True, color = sns.xkcd_rgb[PLOT_COLOR], linewidth = 3); ax.set(yticks = []) ax.legend_.remove() ax.set_xlabel(X_LABEL, fontsize = 12) ax.hold(True) plt.axvline(x = predictedEndAccuracy, ymin = 0, ymax = 1.2, linewidth = 2, color = LINE_COLOR) ax.set_xlim([0, 1.2]) # calculate statistics sortedValues = pd.Series.sort_values(groupEndAccuracy) percentHigherThanPopulation = float(np.array(np.where(sortedValues < predictedEndAccuracy)).shape[1]) / len(sortedValues) * 100 return (fig, percentHigherThanPopulation)
def plot_heatmap_genus(dataframe, high, low, oxy, rep, plot_dir): """ Make a heatmap at Genus, using oganisms withing the specified abundance cutoffs. :param dataframe: dataframe to pass :param high: highest abundance to include genera for :param low: lowes abundance to include genera for :param oxy: oxygen tension, "Low" or "High" :param rep: replicate (1-4) :param plot_dir: directory to save plots in. :return: """ # get rid of oxygen levels and replicates if specified. if oxy is not 'all': print("keep only {} oxygen samples".format(oxy)) dataframe = dataframe[dataframe['oxy'] == oxy] if rep is not 'all': print("keep only replicate levels:", rep) dataframe = dataframe[dataframe['rep'].isin(rep)] dataframe = abundance_utils.filter_by_abundance( data=dataframe, abundance_column='fraction of reads', high=high, low=low) dataframe['facet_replicate'] = 'replicate ' + dataframe['rep'].astype(str) # make height of the plot a function of the number of rows (Genera): num_data_rows = len(dataframe['Genus'].unique()) plot_size = 2 + num_data_rows / 7 plot_aspect = 2 if num_data_rows > 6: plot_aspect = .85 if num_data_rows > 9: plot_aspect = .65 if num_data_rows > 9: plot_aspect = .6 def facet_heatmap(data, **kws): """ Used to fill the subplots with data. :param data: :param kws: :return: """ facet_data = data.pivot(index='Genus', columns='week', values='fraction of reads') # Pass kwargs to heatmap cmap used to be 'Blue' sns.heatmap(facet_data, cmap="YlGnBu", **kws) with sns.plotting_context(font_scale=7): g = sns.FacetGrid(dataframe, col='facet_replicate', margin_titles=True, size=plot_size, aspect=plot_aspect) g.set_xticklabels(rotation=90) # Create a colorbar axes cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, vmin=0, # specify vmax = max abundance seen or each will # have its own scale (and you might not know it!) vmax=dataframe['fraction of reads'].max(), ) g.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18) g.set_axis_labels('week') # Add space so the colorbar doesn't overlap the plot g.fig.subplots_adjust(right=.9) # add a supertitle, you bet. plt.subplots_adjust(top=0.80) supertitle = str(low) + ' < fraction of reads < ' + str( high) + ', {} oxygen'.format(oxy) g.fig.suptitle(supertitle, size=18) # write a filename and save. filename = oxy + "_oxygen--{0}_to_{1}_abundance".format(low, high) print('filename:', filename) plot_dir = elviz_utils.prepare_plot_dir(plot_dir) # save figure g.savefig(plot_dir + filename + '.pdf')
def heatmap_from_taxa_dict(dataframe, taxa_dict, title=False, facet='rep', annotate=False, summarise_other=True, main_dir='./', cap_facet_labels=True, plot_dir='./plots/mixed_taxonomy/', size_spec=False, aspect_spec=False, check_totals_sum_to_1=True, svg=False): """ Make a plot using a taxa_dict. The taxa_dict is used to make a summary dataframe using aggregate_mixed_taxonomy(), and the reult is plotted. :param dataframe: dataframe to source all data from :param taxa_dict: a dictionary with taxonomic levels as keys and names as values. E.g. {'Phylum':['Bacteroidetes'], 'Order':['Burkholderiales','Methylophilales', 'Methylococcales']} :param facet: The rows to facet the subplots by. Defaults to replicates, so weeks will be the columns. :param annotate: print numerical values inside each square? (Makes big plots *really* big; not recommended for default use. :param main_dir: main dir to consider "home", so notebooks can be run in remote directories. :param summarise_other: include a bar for "other"? (Or just don't show) :param plot_dir: path to save plots at, relative to main_dir :param size_spec: manually specify the figure size (useful when default is ugly) :param aspect_spec: manually specify the figure asepct ratio (useful when default is ugly :return: saves and returns a seaborn heat map """ # Cherry pick out the rows for the specified taxa. # If you give conflicting taxa as input, aggregate_mixed_taxonomy() will # throw an error. plot_data = aggregate_mixed_taxonomy( dataframe=dataframe, taxa_dict=taxa_dict, main_dir=main_dir, summarise_other=summarise_other, check_totals_sum_to_1=check_totals_sum_to_1) # store the maximum abundance level. We will need to tell all the # sub-heat maps to use this same maximum so they aren't each on their # own scale. max_abundance = plot_data['abundance sum'].max() # The data is seperated by these two variables. # The one not used as the facet will be used as the columns in the # subplot. if facet == 'week': cols_in_facet = 'rep' else: cols_in_facet = 'week' print('plot_data.head()') print(plot_data.head()) def pivot_so_columns_are_plotting_variable(dataframe, groupby): return dataframe.pivot(index='taxonomic name', columns=groupby, values='abundance sum') def facet_heatmap(data, groupby, xrotation, **kws): """ Used to fill the subplots with data. :param data: dataframe to plot :param groupby: column to group on :param xrotation: degrees to rotate x labels by :param kws: kewyord arguments for plotting :return: """ # pivot only supports one column for now. # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu facet_data = pivot_so_columns_are_plotting_variable( dataframe=data, groupby=groupby) # Pass kwargs to heatmap cmap used to be 'Blue' sns.heatmap(facet_data, cmap="YlGnBu", **kws) g.set_xticklabels(rotation=xrotation) # todo: add a label at the bottom like "replicate" or "week" # currently replicate is turned into facet_replicate but should just # make a label that says replicate. Week # Control plot aesthetics depending on facet option. if facet == 'week': xrotation = 0 num_rows = len(plot_data['taxonomic name'].unique()) size = 2 * 0.2*num_rows aspect = 1 space_for_cbar = 0.85 x_axis_label = 'replicate' else: # (facet = "rep") xrotation = 90 # Calculate the size, aspect depending on the number of # rows per subplot num_rows = len(plot_data['taxonomic name'].unique()) size = 1 + 0.22*num_rows aspect = 1.5 # aspect for each sub-plot, not a single tile space_for_cbar = 0.85 if size_spec: size = size_spec if aspect_spec: aspect = aspect_spec print(plot_data.head()) if cap_facet_labels: if facet == "rep": row_var='$O_2$' col_var = 'Week' facet_var = "Replicate" else: print("not set up for facet != rep") plot_data = capitalize_some_column_names(plot_data) col_var else: facet_var = 'rep' row_var = 'oxy' col_var = 'week' with sns.plotting_context(font_scale=8): g = sns.FacetGrid(plot_data, col=facet_var, row=row_var, size=size, aspect=aspect, margin_titles=True) # Add axes for the colorbar. [left, bottom, width, height] cbar_ax = g.fig.add_axes([.92, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, # NEED vmax = MAX ABUNDANCE or each plot will have # its own color scale! vmin=0, vmax=max_abundance, annot=annotate, groupby=col_var, xrotation=xrotation) g.set_axis_labels(col_var) # add space for x label g.fig.subplots_adjust(bottom=0.2) # todo: add an x-label for each facet (I want only 1) # g.set_axis_labels(['x label', 'ylabel']) # g.fig.subplots_adjust(top=0.2) # g.fig.text(0.5, 0.1, s='armadillo') #, *args, **kwargs) # g.fig.xlabel('ardvark') # Add space so the colorbar doesn't overlap th plot. g.fig.subplots_adjust(right=space_for_cbar) # todo: still not enough room for # Order-Burkholderiales_Methylophilales_Methylococcales-- # Phylum-Bacteroidetes--rep.pdf # Format the y strings in each subplot of the Seaborn grid. # Don't put () on the function you are c # Todo: make the 2nd argument a function y_label_formatter(g, italics_unless_other) supertitle = taxa_dict_to_descriptive_string(taxa_dict) if title: # TODO: they are currently being converted to LaTeX # add a supertitle, you bet. plt.subplots_adjust(top=0.80) g.fig.suptitle(supertitle, size=16) # Tight layout --> title and cbar overlap heat maps. Boo. # NO: plt.tight_layout() g.fig.subplots_adjust(wspace=.05, hspace=.05) # prepare filename and save. plot_dir = elviz_utils.prepare_plot_dir(plot_dir) print("plot directory: {}".format(plot_dir)) filepath = plot_dir + supertitle filepath += "--{}".format(facet) if annotate: filepath += "--annotated" filepath += ".pdf" print(filepath) g.fig.savefig(filepath) if svg: g.fig.savefig(filepath.rstrip("pdf") + "svg") return g
def plot_fluxnet_comparison_one_site(driver, science_test_data_dir, compare_data_dict, result_dir, plot_dir, plots_to_make, context, style, var_names, months, obs_dir, subdir): if check_site_files(obs_dir, subdir): # get CSV file from site directory to get lat/lng for site lat, lng = get_fluxnet_lat_lon(obs_dir, subdir) print(lat, lng) # loop over data to compare data = {} for key, items in compare_data_dict.items(): if key == "ecflux": try: # load Ameriflux data data[key] = read_fluxnet_obs(subdir, science_test_data_dir, items) except OSError: warnings.warn( "this %s site does not have data" % subdir) elif key == "VIC.4.2.d": try: # load VIC 4.2 simulations data[key] = read_vic_42_output(lat, lng, science_test_data_dir, items) except OSError: warnings.warn( "this site has a lat/lng precision issue") else: try: # load VIC 5 simulations data[key] = read_vic_5_output(lat, lng, result_dir, items) except OSError: warnings.warn( "this site has a lat/lng precision issue") # make figures # plot preferences fs = 15 dpi = 150 if 'annual_mean_diurnal_cycle' in plots_to_make: # make annual mean diurnal cycle plots with plt.rc_context(dict(sns.axes_style(style), **sns.plotting_context(context))): f, axarr = plt.subplots(4, 1, figsize=(8, 8), sharex=True) for i, (vic_var, variable_name) in enumerate( var_names.items()): # calculate annual mean diurnal cycle for each # DataFrame annual_mean = {} for key, df in data.items(): annual_mean[key] = pd.DataFrame( df[vic_var].groupby(df.index.hour).mean()) df = pd.DataFrame( {key: d[vic_var] for key, d in annual_mean.items() if vic_var in d}) for key, series in df.iteritems(): series.plot( linewidth=compare_data_dict[key]['linewidth'], ax=axarr[i], color=compare_data_dict[key]['color'], linestyle=compare_data_dict[key]['linestyle'], zorder=compare_data_dict[key]['zorder']) axarr[i].legend(loc='upper left') axarr[i].set_ylabel( '%s ($W/{m^2}$)' % variable_name, size=fs) axarr[i].set_xlabel('Time of Day (Hour)', size=fs) axarr[i].set_xlim([0, 24]) axarr[i].xaxis.set_ticks(np.arange(0, 24, 3)) # save plot plotname = '%s_%s.png' % (lat, lng) os.makedirs(os.path.join(plot_dir, 'annual_mean'), exist_ok=True) savepath = os.path.join(plot_dir, 'annual_mean', plotname) plt.savefig(savepath, bbox_inches='tight', dpi=dpi) plt.clf() plt.close() if 'monthly_mean_diurnal_cycle' in plots_to_make: # make monthly mean diurnal cycle plots with plt.rc_context(dict(sns.axes_style(style), **sns.plotting_context(context))): f, axarr = plt.subplots(4, 12, figsize=(35, 7), sharex=True, sharey=True) for i, (vic_var, variable_name) in enumerate( var_names.items()): # calculate monthly mean diurnal cycle monthly_mean = {} for (key, df) in data.items(): monthly_mean[key] = pd.DataFrame( df[vic_var].groupby([df.index.month, df.index.hour]).mean()) df = pd.DataFrame( {key: d[vic_var] for key, d in monthly_mean.items() if vic_var in d}) for j, month in enumerate(months): for key, series in df.iteritems(): series[j + 1].plot( linewidth=compare_data_dict[key]['linewidth'], ax=axarr[i, j], color=compare_data_dict[key]['color'], linestyle=compare_data_dict[key]['linestyle'], zorder=compare_data_dict[key]['zorder']) axarr[i, j].set_ylabel( '%s \n ($W/{m^2}$)' % variable_name, size=fs) axarr[i, j].set_xlabel('', size=fs) axarr[i, j].set_xlim([0, 24]) axarr[i, j].xaxis.set_ticks(np.arange(0, 24, 3)) if i == 0: axarr[i, j].set_title(month, size=fs) # add legend axarr[0, -1].legend(loc='center left', bbox_to_anchor=(1, 0.5)) # add common x label f.text(0.5, 0.04, 'Time of Day (Hour)', ha='center', size=fs) # save plot plotname = '%s_%s.png' % (lat, lng) os.makedirs(os.path.join(plot_dir, 'monthly_mean'), exist_ok=True) savepath = os.path.join(plot_dir, 'monthly_mean', plotname) plt.savefig(savepath, bbox_inches='tight', dpi=dpi) plt.clf() plt.close()
def plot_snotel_comparison_one_site( driver, science_test_data_dir, compare_data_dict, result_dir, plot_dir, plots_to_make, plot_variables, context, style, filename): print(plots_to_make) # get lat/lng from filename file_split = re.split('_', filename) lng = file_split[3].split('.txt')[0] lat = file_split[2] print('Plotting {} {}'.format(lat, lng)) # loop over data to compare data = {} for key, items in compare_data_dict.items(): # read in data if key == "snotel": data[key] = read_snotel_swe_obs(filename, science_test_data_dir, items) elif key == "VIC.4.2.d": data[key] = read_vic_42_output(lat, lng, science_test_data_dir, items) else: data[key] = read_vic_5_output(lat, lng, result_dir, items) # loop over variables to plot for plot_variable, units in plot_variables.items(): if 'water_year' in plots_to_make: with plt.rc_context(dict(sns.axes_style(style), **sns.plotting_context(context))): fig, ax = plt.subplots(figsize=(10, 10)) df = pd.DataFrame({key: d[plot_variable] for key, d in data.items() if plot_variable in d}) for key, series in df.iteritems(): series.plot( use_index=True, linewidth=compare_data_dict[key]['linewidth'], ax=ax, color=compare_data_dict[key]['color'], linestyle=compare_data_dict[key] ['linestyle'], zorder=compare_data_dict[key]['zorder']) ax.legend(loc='upper left') ax.set_ylabel("%s [%s]" % (plot_variable, units)) # save figure os.makedirs(os.path.join(plot_dir, plot_variable), exist_ok=True) plotname = '%s_%s.png' % (lat, lng) savepath = os.path.join(plot_dir, plot_variable, plotname) plt.savefig(savepath, bbox_inches='tight') print(savepath) plt.clf() plt.close()
def heatmap_all_below(dataframe, taxa_dict, plot_dir, low_cutoff=0.001, cap_facet_labels=True, title=False, svg=False): """ Make a heatmap of all the taxa below the taxa specified in taxa_dict. :param dataframe: dataframe of data to harvest excerpts from :param taxa_dict: a dictionary with taxonomic levels as keys and names as values. E.g. {'Order':['Burkholderiales']} :param plot_dir: path to save plots to, relative to main_dir :param main_dir: path to data source, etc. :param low_cutoff: lowest abundance to include. A taxa must be above this threshold in at least one sample to be included. :return: """ # TODO: this function has a lot of commonality with heatmap_from_taxa_dict # and could/should be factored. # grab the data for that taxa: # for now assume just 1 key and 1 value. taxa_level = list(taxa_dict.keys())[0] taxa_name = list(taxa_dict.values())[0][0] dataframe = dataframe[dataframe[taxa_level] == taxa_name] print(dataframe.head()) # Columns to form a concatenated label from: label_cols = taxonomy_levels_below(taxa_level=taxa_level) print('label_cols: {}'.format(label_cols)) # change nan cells to 'unknown' dataframe.fillna('unknown', inplace=True) # make a summary string representing the taxonomy for everything below def label_building_lambda(f, column_value_list, taxa_name): """ Returns a lambda function to make row labels from. :param f: function to make a lambda out of. :param columns: column names to pass to function f in the lambda :return: function """ # * means unpack the list you get from the list comprehension print("columns passed: {}".format(column_value_list)) print("Use those in {}".format(f)) # Passing a list into label_from_taxa_colnames(). # Doing a list comprehension on columns. # Note that (row[col] for col in columns)) is a generator . # building something like label_from_taxa_colnames() return lambda row: f([row[col] for col in column_value_list], taxa_name) # e.g. makes: # my_function([Comamonadaceae, Curvibacter]) from a row of a dataframe # and the specification that columns = ['Family', 'Genus'] # TODO: use the taxa_dict to get the columns to use! # make a name_string per row. It's something like # "Comamonadaceae, Curvibacter" or "other" dataframe['name_string'] = dataframe.apply( label_building_lambda(f=label_from_taxa_colnames, column_value_list=label_cols, taxa_name=taxa_name), axis=1) print("dataframe.head() for name_string:") print(dataframe.head()) # reduce to only name_string rows with at least one abundance > the # threshold set by low_cutoff to we don't have a zillion rows: # todo: allow high to change? dataframe = \ abundance_utils.filter_by_abundance(data=dataframe, abundance_column='fraction of ' 'reads', high=1, low=low_cutoff, taxonomy_column='name_string') # Plot as usual, using the stuff developed above. # todo: factor some of this?? def pivot_so_columns_are_plotting_variable(dataframe, groupby): return dataframe.pivot(index='name_string', columns=groupby, values='fraction of reads') def facet_heatmap(data, groupby, xrotation, **kws): """ Used to fill the subplots with data. :param data: dataframe to plot :param groupby: column to group on :param xrotation: :param kws: :return: """ # pivot only supports one column for now. # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu facet_data = pivot_so_columns_are_plotting_variable( dataframe=data, groupby=groupby) # Pass kwargs to heatmap cmap. sns.heatmap(facet_data, cmap="YlGnBu", **kws) g.set_xticklabels(rotation=xrotation) # set some plotting parameters xrotation = 90 # Calculate the size, aspect depending on the number of # rows per subplot num_rows = len(dataframe['name_string'].unique()) size = 1 + 0.22*num_rows aspect = 1.5 # a if cap_facet_labels: dataframe = capitalize_some_column_names(dataframe) facet_var = "Replicate" row_var='$O_2$' col_var = "Week" else: facet_var = 'rep' row_var = 'oxy' col_var = 'week' # todo: this doesn't seem to be changing the font size. Probably isn't # for other plotting calls either! with sns.plotting_context(font_scale=40): g = sns.FacetGrid(dataframe, col=facet_var, row=row_var, size=size, aspect=aspect, margin_titles=True) g.set_axis_labels(col_var) # Add axes for the colorbar. [left, bottom, width, height] cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads') g = g.map_dataframe(facet_heatmap, cbar_ax=cbar_ax, vmin=0, # MUST SET VMAX or all of the subplots will be on # their own color scale and you might not know it. vmax=dataframe['fraction of reads'].max(), annot=False, groupby=col_var, xrotation=90) # modify labels # Todo: make the 2nd argument a function y_label_formatter(g, italics_unless_other) # add space for x label g.fig.subplots_adjust(bottom=0.2) # room for colorbar (cbar) g.fig.subplots_adjust(right=0.85) # add a supertitle, you bet. supertitle_base = taxa_dict_to_descriptive_string(taxa_dict) if title: plt.subplots_adjust(top=0.80) supertitle = \ supertitle_base + '. Min fraction of reads cutoff = {}'.format( low_cutoff) g.fig.suptitle(supertitle, size=15) # Also summarise # of taxa rows being grouped together. # prepare filename and save. plot_dir = elviz_utils.prepare_plot_dir(plot_dir) filepath = plot_dir + supertitle_base filepath += "--min_{}".format(low_cutoff) filepath += "--{}".format('x-week') filepath += ".pdf" print(filepath) g.fig.savefig(filepath) if svg: g.fig.savefig(filepath.rstrip("pdf") + "svg") return g
def target_gene_expression_analysis(mirna2age, mirna2disease,mirna2family,gene2age): mir_targetdb = pd.read_csv('/Users/virpatel/Desktop/pub_stuff/relevant_data/mir_target_vectordb.txt', sep='\t',index_col=[0], encoding='utf-8') mir_expdb = pd.read_csv('/Users/virpatel/Desktop/pub_stuff/relevant_data/exp_data_alldmir.txt', sep='\t',index_col=[0]) family_target_hamming = [] family_target_avg_age = [] family_perc_dis = [] tardis = [] tarnotindis = [] # generate_matrix(mir_targetdb,'target_heatmap_jaccard') generate_matrix(mir_expdb,'tis_exp_heatmap_jaccard') return # # # for fam in mirna2family: # # # family_vector = [] # # # mirlst = [a for a in mirna2family[fam] if a in mir_targetdb.index] # # # mirdislst = [a for a in mirna2family[fam] if a in mirna2disease] # # # if len(mirlst) < 4: continue # # # if len(mirdislst) < 4: continue # # # for mir in mirlst: # # # for other_mir in mirlst: # # # if mir == other_mir: continue # # # family_vector.append(hamming(mir_targetdb.loc[mir], mir_targetdb.loc[other_mir],normalized=True)) # # # family_target_hamming.append(std(family_vector)) # # # family_target_avg_age.append(round(mean([float(mirna2age[mirna]) for mirna in mirlst if mirna in mirna2age]),1)) # # # family_perc_dis.append(float(len(mirdislst)) / float(len(mirna2family[fam]))) # # target_lst = list(mir_targetdb.columns.values) # # mirnanumdis = [] # # mirnanumtar = [] # # mir_avg_tar_age_dis = [] # # mir_avg_tar_age_nondis = [] # # mir_age = [] # # mir_median_tar_age_all = [] # # for mir in mir_targetdb.index: # # if mir not in mirna2disease: mirnanumdis.append(0) # # else: mirnanumdis.append(len(mirna2disease[mir])) # # bintarlt = mir_targetdb.loc[mir].tolist() # # mirnanumtar.append(sum(bintarlt)) # # tarages = [float(gene2age[target_lst[ind]]) for ind, a in enumerate(bintarlt) if target_lst[ind] in gene2age and a == 1] # # mir_median_tar_age_all.append(median(tarages)) # # mir_avg_tar_age_dis.append(mean(tarages)) # # for mir in mir_targetdb.index: # # if mir not in mirna2disease: # # bintarlt = mir_targetdb.loc[mir].tolist() # # tarages = [float(gene2age[target_lst[ind]]) for ind, a in enumerate(bintarlt) if target_lst[ind] in gene2age and a == 1] # # mir_avg_tar_age_nondis.append(median # # yung_num_tis = [] # # old_num_tis = [] # # dis_num = [] # # mir_age_lst = [] # # exp_val = [] # # for mir in mir_expdb.index: # # if mir in mirna2age: # # v = float(sum(mir_expdb.loc[mir].tolist())) # # mir_age_lst.append(mirna2age[mir]) # # mirage = mirna2age[mir] # # exp_val.append(v) # # if mirage > 100.0: old_num_tis.append(sum(mir_expdb.loc[mir].tolist())) # # else: yung_num_tis.append(sum(mir_expdb.loc[mir].tolist())) # # plt.scatter(mir_age_lst, exp_val) # # plt.ylabel('Tissue Expression Count') # # plt.xlabel('miRNA Age') # # plt.subplots_adjust(bottom=0.20) # # plt.savefig('figures/mirna_exp_all.pdf',bbox_inches='tight') # # plt.close() # print mannwhitneyu(yung_num_tis, old_num_tis) mir_in_fam_pot = [] mir_in_fam = [] mir_not_in_fam = [] mirna_in_hamming_2_exp = {} for mir in mirna2family: if len(mirna2family[mir]) > 3: mir_in_fam_pot += mirna2family[mir] expdb = [] for mirna in mir_expdb.index: if mirna not in mirna2age: continue mirna_in_hamming_2_exp[mirna] = mir_expdb.loc[mirna].tolist() if mirna in mir_in_fam_pot: mir_in_fam.append(mirna) expdb.append([float(sum(mir_expdb.loc[mirna].tolist())), float(mirna2age[mirna]), 'In miRNA Family']) else: mir_not_in_fam.append(mirna) expdb.append([float(sum(mir_expdb.loc[mirna].tolist())), float(mirna2age[mirna]), 'Not In miRNA Family']) age1 = [mirna2age[a] for a in mir_in_fam ] age2 = [mirna2age[a] for a in mir_not_in_fam] gen1 = [sum(mirna_in_hamming_2_exp[a]) for a in mir_in_fam ] gen2 = [sum(mirna_in_hamming_2_exp[a]) for a in mir_not_in_fam] expdb = pd.DataFrame(expdb, columns=['Number of Tissues','miRNA Age','In miRNA Family?']) expdb = expdb.sort('miRNA Age',ascending=1) with sns.plotting_context(font_scale=300): sns.violinplot(x='miRNA Age',y='Number of Tissues',hue='In miRNA Family?',data=expdb,palette="muted", width=.7,legend=False,cut = 0) fig = plt.gcf() frame = plt.legend(frameon=True, loc='bottom right' ) fig.set_size_inches(30, 10.5) ax1 = plt.gca() ax1.set_xlim([-1,17]) ax1.set_ylim([-0.1, 20.5]) plt.savefig('figures/violin_fam_no_fam_exp.pdf',bbox_inches='tight') plt.close()
from matplotlib import rcParams import matplotlib.cm as cm import matplotlib as mpl from matplotlib import rc import random from matplotlib.font_manager import FontProperties import seaborn as sns import time sns.color_palette("bright") sns.set(font_scale=1.2) sns.plotting_context(context="talk", rc=None) font = {'family': 'Serif', 'color': 'Black', 'weight': 'normal', 'size': 13, } title_font = {'family': 'Serif', 'color': 'Black', 'weight': 'semibold', 'size': 15, } def organize_mrmr_ranking(pickle_file_path, folder_path, write_plot_path, figure_size, color_code, sensitive_features, methodology, figure_name, font, Title_font, target): final_feature_importance_dictionary = {}
le.despine(left=True) le.fig.subplots_adjust(right = 0.95); legend = le.ax.legend(loc = 'lower right', shadow = True) le.ax.xaxis.set_label_coords(0.5, -0.1) #%% Plotting timeseries flatui = ["#9b59b6", "#3498db", "#e74c3c"] pal_ts = [pal[0], pal[1] ,sns.color_palette('Paired')[1] ] sns.set(context = "poster", style = 'darkgrid', palette = pal_ts, rc = {'axes.labelsize': 20.0, 'figure.figsize': [10, 10], 'legend.fontsize': 16.0, 'xtick.labelsize': 20.0, 'ytick.labelsize': 20.0}) with sns.plotting_context('poster'): ax1 = plt.subplot(311) plt.plot_date(dates, H_mod_PT, 'o--', label = 'TSEB', lw = 1) plt.plot_date(dates, H_mod_OS, 'o--', label = 'OSEB', lw = 1) plt.plot_date(dates, H_Scinti, 'o--', label = 'Scintillometer', lw = 1) plt.legend(loc = 'center left', bbox_to_anchor=(1.1,0.5), prop={'size':20}) start = dates[0].astype('datetime64[D]') stop = dates[len(dates) -1].astype('datetime64[D]') + np.timedelta64(1, 'D') ax1.set_xlim([start, stop]) ax1.set_ylabel('Energy flux (W/m2)', labelpad=20, fontsize=20) # Second plot ecData, ecTime = met.loadMetDataPN('EC') ax2 = plt.subplot(312) dateCol_start = (np.abs(ecTime - start.astype('datetime64[s]'))).argmin()
import pandas as pd import seaborn as sns from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas from matplotlib.figure import Figure #change directory to where my app files ares import os os.chdir("/Users/Charm/OneDrive/app_files/store") GroupEndAccuracy = pickle.load( open( "groupEndAccuracyLevel5.p", "rb" ) ) fig = Figure(figsize=(5,4), dpi=100) ax = fig.add_subplot(111) canvas = FigureCanvas(fig) with sns.plotting_context("notebook",font_scale=1.2): #ax = sns.distplot(GroupEndAccuracy, hist=False, rug=True, color='blue'); ax = sns.kdeplot(GroupEndAccuracy, shade=True, color = sns.xkcd_rgb["light blue"], linewidth=3); ax.set(yticks=[]) ax.legend_.remove() ax.set_xlabel('End Accuracy', fontsize=16) ax.hold(True) ax.set_title('Level5', fontsize=16) predictedEnd = 0.85 #value output by my model from the App plt.axvline(x=predictedEnd, ymin=0, ymax = 1.2, linewidth=2, color='r') aa = pd.Series.sort_values(GroupEndAccuracy) HigherThanPopulation = float(np.array(np.where(aa<predictedEnd)).shape[1])/len(aa)*100 print("Patient is likely to perform better than %d percent of the population on this task" %HigherThanPopulation)
def plot_bar_factor(loading, ax=None, bootstrap_err=None, grouping=None, width=4, height=8, label_rows=True, title=None, color_grouping=False, separate_ticklabels=True): """ Plots one factor loading as a vertical bar plot Args: loading: factor loadings as a dataframe or series ax: optional, plot axis bootstrap_err: a dataframe/series with the same index as loading. Used to plot confidence intervals on bars grouping: optional, output of "get_factor_groups", used to plot separating horizontal lines label_rows: boolean, whether to put ylabels """ # longest label for drawing lines DV_fontsize = height/(loading.shape[0]//2)*20 # set up plot variables if ax is None: f, ax = plt.subplots(1,1, figsize=(width, height)) # change axis border width for axis in ['top','bottom','left','right']: ax.spines[axis].set_linewidth(height/8) with sns.plotting_context(font_scale=1.3): # plot optimal factor breakdown in bar format to better see labels # plot actual values colors = sns.diverging_palette(220,15,n=2) ordered_colors = [colors[int(i)] for i in (np.sign(loading)+1)/2] if bootstrap_err is None: abs(loading).plot(kind='barh', ax=ax, color=ordered_colors, width=.7) else: abs(loading).plot(kind='barh', ax=ax, color=ordered_colors, width=.7, xerr=bootstrap_err, error_kw={'linewidth': height/10}) # draw lines separating groups if grouping is not None: factor_breaks = np.cumsum([len(i[1]) for i in grouping])[:-1] for y_val in factor_breaks: ax.hlines(y_val-.5, 0, 1.1, lw=height/10, color='grey', linestyle='dashed') # set axes properties ax.set_xlim(0, max(max(abs(loading)), 1.1)); ax.set_yticklabels(''); ax.set_xticklabels('') labels = ax.get_yticklabels() locs = ax.yaxis.get_ticklocs() # add factor label to plot if title: ax.set_xlabel(title, ha='center', va='top', fontsize=height/2, weight='bold', rotation=90) ax.tick_params(axis='x', bottom=False, labelbottom=False) # add labels of measures to top and bottom tick_colors = ['#000000','#444098'] ax.set_facecolor('#DBDCE7') for location in locs[2::3]: ax.axhline(y=location, xmin=0, xmax=1, color='w', zorder=-1, lw=height/10) # if leftall given, plot all labels on left if label_rows: for i, label in enumerate(labels): label.set_text('%s ' % (label.get_text())) # and other half on bottom ax.set_yticks(locs) left_labels=ax.set_yticklabels(labels,fontsize=DV_fontsize) ax.tick_params(axis='y', size=height/4, width=height/10, pad=width) if grouping is not None and color_grouping: # change colors of ticks based on factor group color_i = 1 last_group = None for j, label in enumerate(left_labels): group = np.digitize(locs[j], factor_breaks) if last_group is None or group != last_group: color_i = 1-color_i last_group = group color = tick_colors[color_i] label.set_color(color) else: ax.set_yticklabels('') ax.tick_params(axis='y', size=0) if ax is None: return f
for j in range(IL): table.append(a + sim_out[i*IL + j]) # create a pandas frame print 'Making PANDAS frame...' df = pd.DataFrame(table, columns=columns) # turns out all we need is the follow pivoted table #perf = pd.pivot_table(df, values='Error', index=['SNR'], columns=['Algorithm'], aggfunc=np.mean) with open(pickle_file, 'wb') as f: pickle.dump([df, parameters], f) f.close() sns.set(style='whitegrid') sns.plotting_context(context='poster', font_scale=2.) pal = sns.cubehelix_palette(8, start=0.5, rot=-.75) # Draw the figure print 'Plotting...' df_rand = df[df['algorithm'] == 'RANDOM'] df_det = df[df['algorithm'] == 'DETERMINISTIC'] # Plot random p_rand = pd.pivot_table(df_rand, values='success', index=['K'], columns=['C'], aggfunc=np.mean) p_rand = p_rand.reindex_axis(sorted(p_rand.columns, key=int), axis=1) p_rand = p_rand.reindex_axis(sorted(p_rand.index, key=int), axis=0) p_det = pd.pivot_table(df_det, values='success', index=['K'], columns=['C'], aggfunc=np.mean) p_det = p_det.reindex_axis(sorted(p_det.columns, key=int), axis=1)
def get_grid(self, **kwargs): kwargs["data"] = self.df with sns.axes_style(self.axes_style): with sns.plotting_context(self.plotting_context): grid = sns.FacetGrid(**kwargs) return grid
def get_fig_for_dataframe(self): form_data = self.get_column_form_data() string_expressions = {form_datum["name"] : form_datum["initial"] for form_datum in form_data["string_field_uniques"]} df = self.data_mapping_revision.get_data() row_mask = df.isin(string_expressions)[[form_datum["name"] for form_datum in form_data["string_field_uniques"]]] df = DataFrame(df[row_mask.all(1)]) split_y_axis_by = self.split_y_axis_by if self.split_y_axis_by !='None' else None split_colour_by = self.split_colour_by if self.split_colour_by !='None' else None kwargs = {"size": 5, "aspect": 1.75, "sharex":True, "sharey":True, "hue" : split_colour_by, "legend" : False, "legend_out" : True, #'legend.frameon': False } split_by = self.split_by if self.split_by !='None' else None if split_by: kwargs["row"] = None kwargs["col"] = split_by kwargs["col_wrap"] = 4 if GRAPH_MAPPINGS[self.visualisation_type]["xy"] == True: if df.count()[0] > 0 : xlim = (0, float(max(df[self.x_axis]))*1.3) ylim = (0, float(max(df[self.y_axis]))*1.1) kwargs["xlim"] = xlim kwargs["ylim"] = ylim with plotting_context( "poster" ): sns.set_style("white") labels = GRAPH_MAPPINGS[self.visualisation_type]["get_label_function"](self, df) # g = sns.factorplot(self.x_axis, # y=self.y_axis, data=df, # row=self.split_y_axis_by if self.split_y_axis_by !='None' else None, # x_order=labels, # col=self.split_colour_by if self.split_colour_by !='None' else None,) g_kwargs = {} if labels: g_kwargs["x_order"] =labels print kwargs g = sns.FacetGrid(df,**kwargs ) g.map(GRAPH_MAPPINGS[self.visualisation_type]["function"], self.x_axis, self.y_axis, **g_kwargs); if labels: if split_by: for ax in g.axes: ax.set_xticklabels(labels, rotation=90) else: g.set_xticklabels(labels, rotation=90) g.set_legend() # frame = g.fig.legend().get_frame() #if labels and not split_by : # g.set_xticklabels(labels, rotation=90) if self.visualisation_title: g.fig.tight_layout() height_in_inches = g.fig.get_figheight() title_height_fraction = 0.2 / (height_in_inches ** (0.5)) #20px is ~0.3 inches g.fig.suptitle(self.visualisation_title, fontsize=20) g.fig.tight_layout(rect=(0,0,1,1 - title_height_fraction)) else: g.fig.tight_layout() g.fig.patch.set_alpha(0.0) return g.fig
import argparse import os import sys import cnvlib import numpy as np import pandas as pd from matplotlib import pyplot as plt import seaborn AP = argparse.ArgumentParser(description=__doc__) AP.add_argument('cnr_files', nargs='+', help="All sample .cnr files.") AP.add_argument('-o', '--output', help="Output filename.") args = AP.parse_args() seaborn.plotting_context("poster") seaborn.set(font="Sans", style="darkgrid") def load_depths_logs(cnr_fnames): logs = [] depths = [] for fname in cnr_fnames: cnr = cnvlib.read(fname) logs.append(cnr['log2']) depths.append(cnr['depth']) # Ninja move if len(cnr_fnames) == 1: plt.title(cnr.sample_id)