def master_loop(): gene_names = [] master_store = [] time_stamps = [] dump_dict = {} for file_name in os.listdir(source_folder): if file_name[-4:] == '.vcf': print file_name master_mix = np.array(pull_average_depth_for_genes(pull_depth_from_vcf(file_name))) time_stamps.append(file_name[:-4]) gene_names = master_mix[:, 0] master_store.append(master_mix[:, 1]) dump_dict[file_name[:-4]] = [master_mix] print dump_dict dump(dump_dict, open('my_pipe.dmp', 'w')) arg_sorter = np.argsort(np.array([int(val[1:]) for val in time_stamps])) time_stamps = np.array(time_stamps)[arg_sorter] print time_stamps master_store = np.array(master_store) master_store = master_store[arg_sorter, :].astype(np.float32) master_store[np.isnan(master_store)] = 0 print master_store dataframe = pd.DataFrame(data=master_store.T, index=gene_names, columns=time_stamps) sns.heatmap(dataframe) sns.plt.show() sns.clustermap(dataframe) sns.plt.show() sns.clustermap(dataframe, col_cluster=False) sns.plt.show()
def plot_filter_seq_heat(filter_outs, out_pdf, whiten=True, drop_dead=True): # compute filter output means per sequence filter_seqs = filter_outs.mean(axis=2) # whiten if whiten: filter_seqs = preprocessing.scale(filter_seqs) # transpose filter_seqs = np.transpose(filter_seqs) if drop_dead: filter_stds = filter_seqs.std(axis=1) filter_seqs = filter_seqs[filter_stds > 0] # downsample sequences seqs_i = np.random.randint(0, filter_seqs.shape[1], 500) hmin = np.percentile(filter_seqs[:,seqs_i], 0.1) hmax = np.percentile(filter_seqs[:,seqs_i], 99.9) sns.set(font_scale=0.3) plt.figure() sns.clustermap(filter_seqs[:,seqs_i], row_cluster=True, col_cluster=True, linewidths=0, xticklabels=False, vmin=hmin, vmax=hmax) plt.savefig(out_pdf) #out_png = out_pdf[:-2] + 'ng' #plt.savefig(out_png, dpi=300) plt.close()
def plot_centrimo(centrimo_in, figure_output): centrimo_table = pd.read_table(centrimo_in, index_col=0) centrimo_table.sort(columns="Average", axis=0, ascending=False, inplace=True) sns.clustermap(centrimo_table, method='single', metric="euclidean", z_score=None, row_cluster=False, col_cluster=True) f = plt.gcf() f.savefig(figure_output, bbox_inches='tight')
def plot_target_corr(filter_outs, seq_targets, filter_names, target_names, out_pdf, seq_op='mean'): num_seqs = filter_outs.shape[0] num_targets = len(target_names) if seq_op == 'mean': filter_outs_seq = filter_outs.mean(axis=2) else: filter_outs_seq = filter_outs.max(axis=2) # std is sequence by filter. filter_seqs_std = filter_outs_seq.std(axis=0) filter_outs_seq = filter_outs_seq[:,filter_seqs_std > 0] filter_names_live = filter_names[filter_seqs_std > 0] filter_target_cors = np.zeros((len(filter_names_live),num_targets)) for fi in range(len(filter_names_live)): for ti in range(num_targets): cor, p = spearmanr(filter_outs_seq[:,fi], seq_targets[:num_seqs,ti]) filter_target_cors[fi,ti] = cor cor_df = pd.DataFrame(filter_target_cors, index=filter_names_live, columns=target_names) sns.set(font_scale=0.3) plt.figure() sns.clustermap(cor_df, cmap='BrBG', center=0, figsize=(8,10)) plt.savefig(out_pdf) plt.close()
def visualizeConsensus(consensusMat, connectivityMatrices, clusters, colNames): if colNames=='noXLabels': #put concensus matrix into dataframe to build hierarchical clustermap dataframe=pd.DataFrame(data=consensusMat) #clusters by columns and rows and annotates probablility a particular sample clusters together #cluster distance is meausred by average Euclidean Distance in seaborn for hierarchical clustering consensusClustered=sns.clustermap(dataframe, col_cluster=True, row_cluster=True, annot=True) consensusClustered.savefig(str(matrixPath)+'consensus_Matrix_over_'+str(len(connectivityMatrices))+'_runs_at_k='+str(clusters)+'.png') else: #assigns sample names to consensus matrix sampleNames=[] with open(colNames) as input: for line in input: sampleNames.append(line.rstrip('\n')) #put concensus matrix into dataframe to build hierarchical clustermap dataframe=pd.DataFrame(data=consensusMat, index=sampleNames, columns=sampleNames) #clusters by columns and rows and annotates probablility a particular sample clusters together #cluster distance is meausred by average Euclidean Distance in seaborn for hierarchical clustering consensusClustered=sns.clustermap(dataframe, col_cluster=True, row_cluster=True, annot=True) consensusClustered_non_annt=sns.clustermap(dataframe, col_cluster=True, row_cluster=True, annot=False) plt.setp(consensusClustered.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) plt.setp(consensusClustered_non_annt.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) plt.setp(consensusClustered.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) plt.setp(consensusClustered_non_annt.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) consensusClustered.savefig(str(matrixPath)+'consensus_Matrix_over_'+str(len(connectivityMatrices))+'_runs_at_k='+str(clusters)+'.png') consensusClustered_non_annt.savefig(str(matrixPath)+'non_annotated_consensus_Matrix_over_'+str(len(connectivityMatrices))+'_runs_at_k='+str(clusters)+'.png')
def produce_clustermap(rankings, fname, metric): sns.set() sns.set_context("paper") pp = PdfPages(fname) sns.clustermap(rankings, col_cluster=False, metric=metric) pp.savefig() pp.close()
def drawClustermap(df, output): if args.scaling == 'z_score': g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, z_score=0, figsize=figSize) elif args.scaling == 'standard': g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, standard_scale=0, figsize=figSize) else: g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, figsize=figSize) plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0, size=int(args.yaxis_fontsize), family=args.font) plt.setp(g.ax_heatmap.get_xticklabels(), rotation=90, size=int(args.xaxis_fontsize), family=args.font, weight='bold') g.savefig(output, format='pdf', dpi=1000, bbox_inches='tight')
def plot_filter_seg_heat(filter_outs, out_pdf, whiten=True, drop_dead=True): b = filter_outs.shape[0] f = filter_outs.shape[1] l = filter_outs.shape[2] s = 5 while l/float(s) - (l/s) > 0: s += 1 print '%d segments of length %d' % (s,l/s) # split into multiple segments filter_outs_seg = np.reshape(filter_outs, (b, f, s, l/s)) # mean across the segments filter_outs_mean = filter_outs_seg.max(axis=3) # break each segment into a new instance filter_seqs = np.reshape(np.swapaxes(filter_outs_mean, 2, 1), (s*b, f)) # whiten if whiten: filter_seqs = preprocessing.scale(filter_seqs) # transpose filter_seqs = np.transpose(filter_seqs) if drop_dead: filter_stds = filter_seqs.std(axis=1) filter_seqs = filter_seqs[filter_stds > 0] # downsample sequences seqs_i = np.random.randint(0, filter_seqs.shape[1], 500) hmin = np.percentile(filter_seqs[:,seqs_i], 0.1) hmax = np.percentile(filter_seqs[:,seqs_i], 99.9) sns.set(font_scale=0.3) if whiten: dist = 'euclidean' else: dist = 'cosine' plt.figure() sns.clustermap(filter_seqs[:,seqs_i], metric=dist, row_cluster=True, col_cluster=True, linewidths=0, xticklabels=False, vmin=hmin, vmax=hmax) plt.savefig(out_pdf) #out_png = out_pdf[:-2] + 'ng' #plt.savefig(out_png, dpi=300) plt.close()
def get_seaborn_clustermap(dfr, params, title=None, annot=True): """Returns a Seaborn clustermap.""" fig = sns.clustermap( dfr, cmap=params.cmap, vmin=params.vmin, vmax=params.vmax, col_colors=params.colorbar, row_colors=params.colorbar, figsize=(params.figsize, params.figsize), linewidths=params.linewidths, xticklabels=params.labels, yticklabels=params.labels, annot=annot, ) fig.cax.yaxis.set_label_position("left") if title: fig.cax.set_ylabel(title) # Rotate ticklabels fig.ax_heatmap.set_xticklabels(fig.ax_heatmap.get_xticklabels(), rotation=90) fig.ax_heatmap.set_yticklabels(fig.ax_heatmap.get_yticklabels(), rotation=0) # Return clustermap return fig
def clust_heatmap(gene_list, df_by_gene, num_to_plot=len(gene_list), title='', plot=False, label_map=False): if num_to_plot >175: sns.set(context= 'poster', font_scale = 0.65/(num_to_plot/100)) else: sns.set(context= 'poster', font_scale = .80, font ='Verdana') sns.set_palette('RdBu',4,0.1) cell_list = df_by_gene.index.tolist() cg = sns.clustermap(df_by_gene[gene_list[0:num_to_plot]].transpose(), metric=metric, method=method, z_score=0, figsize=(30, 25)) col_order = cg.dendrogram_col.reordered_ind cg.ax_heatmap.set_title(title) if label_map: Xlabs = [cell_list[i] for i in col_order] colors = [label_map[cell][0] for cell in Xlabs] for xtick, color in zip(cg.ax_heatmap.get_xticklabels(), colors): xtick.set_color(color) xtick.set_rotation(270) if plot: plt.show() cell_linkage = cg.dendrogram_col.linkage link_mat = pd.DataFrame(cell_linkage, columns=['row label 1', 'row label 2', 'distance', 'no. of items in clust.'], index=['cluster %d' %(i+1) for i in range(cell_linkage.shape[0])]) if title != '': save_name = '_'.join(title.split(' ')[0:2]) cg.savefig(os.path.join(filename, save_name+'_heatmap.pdf'), bbox_inches='tight') else: cg.savefig(os.path.join(filename,'Non_group_heatmap_z1_deleted.pdf'), bbox_inches='tight') plt.close() return cell_linkage, df_by_gene[gene_list[0:num_to_plot]], col_order
def plot_clustermap(dat, cmap='purple', save_fig=False, save_name='Clustermap'): """Plot clustermap. Parameters ---------- dat : pandas.DataFrame Data to create clustermap from. """ # Set up plotting and aesthetics sns.set() sns.set_context("paper", font_scale=1.5) # Set colourmap if cmap == 'purple': cmap = sns.cubehelix_palette(as_cmap=True) elif cmap == 'blue': cmap = sns.cubehelix_palette(as_cmap=True, rot=-.3, light=0.9, dark=0.2) # Create the clustermap cg = sns.clustermap(dat, cmap=cmap, method='complete', metric='cosine', figsize=(12, 10)) # Fix axes cg.cax.set_visible(True) _ = plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=60, ha='right') _ = plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) # Save out - if requested if save_fig: db = check_db(db) s_file = os.path.join(db.figs_path, save_name + '.svg') cg.savefig(s_file, transparent=True)
def plot_heatmap_with_dendrogram(similarity_matrix, plot_name, show_link): """ Makes a plot of heatmap and dendrogram. Arguments are a similarity matrix (as Pandas data frame) and the name of the plot. """ # the distance matrix has to be condensed first cond_dist_matrix = pdist.squareform(100 - similarity_matrix) # linkage makes the whole mathematics Z = linkage(cond_dist_matrix, 'average') # uncomment the print statement to see the linkage matrix if show_link: print(Z) sns.set(font='sans-serif', font_scale=0.7) # round the figures displayed in the heatmap as to integers pairwise_cognacy_displayinheatmap = np.round( similarity_matrix, decimals=0).astype(int) # create a seaborn clustermap object heatncluster = sns.clustermap( pairwise_cognacy_displayinheatmap, annot=True, cmap='inferno_r', vmax=100, fmt='d', col_linkage=Z, row_linkage=Z) plt.setp(heatncluster.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) plt.setp(heatncluster.ax_heatmap.xaxis.get_majorticklabels(), rotation=45) file_name = plot_name + '.png' click.echo('Writing ' + file_name, err=True) plt.savefig(file_name)
def plot_heatmap(data, columns=None, chrom="chrom", position="position", vline_color="black", **kwargs): """Plots a (clustered) CNV heatmap for multiple samples.""" # Select all columns by default. if columns is None: columns = [c for c in data if c not in {chrom, position}] # Sort data by position. data = data.sort([chrom, position], ascending=True) # Plot heatmap. g = sns.clustermap(data[columns].T, linewidths=0, col_cluster=False, **kwargs) g.ax_heatmap.set_xticks([]) # Plot chromosome breaks. breaks = np.where(~data[chrom].duplicated(take_last=True))[0] breaks += 1 for loc in breaks[:-1]: g.ax_heatmap.axvline(loc, color=vline_color) # Add chromosome labels. label_pos = np.concatenate([[0], breaks]) label_pos = (label_pos[:-1] + label_pos[1:]) / 2 g.ax_heatmap.set_xticks(label_pos) g.ax_heatmap.set_xticklabels(data[chrom].unique(), rotation=0) # Label axes. g.ax_heatmap.set_xlabel(chrom) return g
def plot_clustermap(df, output_path, cmap=None, legend_label='', z_score=None, xticklabels=False, yticklabels=True, colors_dict=None, col_colors=None, row_colors=None): """Make clustermap figure Parameters ---------- df df_meta output_path Returns ------- cg """ cg = sns.clustermap(df, col_colors=col_colors, row_colors=None, cmap=cmap, z_score=z_score, yticklabels=yticklabels, xticklabels=xticklabels) if colors_dict: for cat in colors_dict.keys(): for label in colors_dict[cat]: cg.ax_col_dendrogram.bar(0, 0, color=colors_dict[cat][label], label=label, linewidth=0) cg.ax_col_dendrogram.legend(loc=(-0.7, -2), ncol=1) plt.subplots_adjust(top=1, bottom=0.02, left=0.3, right=0.8) fig = plt.gcf() fig.set_size_inches([10, 7.5]) cg.cax.set_position((.025, .1, 0.025, .15)) cg.cax.text(-0.3, -0.2, legend_label, fontsize=9) plt.savefig(output_path, dpi=300) return cg
def __call__(self, data, path): colorbar = self.getColorBar(data) n_samples = data.shape[0] data = data.iloc[:, :n_samples] ax = seaborn.clustermap(data, row_colors=colorbar) return ResultBlocks(ResultBlock("""#$mpl %i$#\n""" % ax.cax.figure.number, title="ClusterMapPlot"))
def plot_enrichment_multiple(self, multi_gene_list, output_filename): enrichment_results_df = self.iterative_enrichment_multilist(multi_gene_list) # replace accession numbers with GO term descriptions formatted_enrichment_df = enrichment_results_df.merge(self.term_definition, left_index=True, right_on='GO term accession') formatted_enrichment_df['GO term'] = (formatted_enrichment_df['GO term name'] + '\n[' + formatted_enrichment_df['GO term accession'] + ']') formatted_enrichment_df.drop(self.term_definition.columns, axis=1, inplace=True) formatted_enrichment_df.set_index('GO term', inplace=True) sns.set(style='whitegrid') plt.figure() plot = sns.clustermap(formatted_enrichment_df, cmap="Reds_r") plt.setp(plot.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) # plot.set_xlabel(r'$-log_{10}(p)$') # plot.set_ylabel('') # plot.xaxis.grid(False) # plot.yaxis.grid(True) # sns.despine(left=True, bottom=True) if output_filename: plot.savefig(output_filename) else: plt.show()
def plot_dist_matrix(matrix, fasta_names, heatmap_out, dendrogram_out): """Cluster the distance matrix hierarchically and plot using seaborn. Average linkage method is used.""" # Load required modules for plotting import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import seaborn as sns import pandas as pd from scipy.cluster.hierarchy import dendrogram, linkage # Create pdm = pd.DataFrame(matrix, index=fasta_names, columns=fasta_names) # Plot heatmap figsizex = max(10, len(fasta_names) / 4) clustergrid = sns.clustermap(pdm, metric='euclidean', method='average', figsize=(figsizex, figsizex)) clustergrid.savefig(heatmap_out) # Plot dendrogram sns.set_style('white') figsizey = max(10, len(fasta_names) / 8) f, ax = plt.subplots(figsize=(figsizex, figsizey)) link = linkage(pdm, metric='euclidean', method='average') dendrogram(link, labels=pdm.index, ax=ax) no_spine = {'left': True, 'bottom': True, 'right': True, 'top': True} sns.despine(**no_spine) plt.xticks(rotation=90) f.tight_layout() plt.savefig(dendrogram_out)
def plot_transition_clustermap(data_array, gene_names, pseudotimes, n_clusters=10, gradient=False): if gradient: data_to_plot = zscore(np.gradient(data_array)[1].T, axis=0) scale = None metric = 'seuclidean' row_linkage = linkage(pdist(abs(data_to_plot), metric=metric), method='complete') else: data_to_plot = data_array.T scale = 0 metric = 'correlation' row_linkage = linkage(pdist(data_to_plot, metric=metric), method='complete') assignments = fcluster(row_linkage, n_clusters, criterion='maxclust') cm = sns.clustermap(data_to_plot, col_cluster=False, standard_scale=scale, yticklabels=gene_names, row_linkage=row_linkage, row_colors=[settings.STATE_COLORS[i] for i in assignments]) r = np.arange(10, data_array.shape[0], data_array.shape[0]/10) plt.setp(cm.ax_heatmap.get_yticklabels(), fontsize=5) cm.ax_heatmap.set_xticks(r) cm.ax_heatmap.set_xticklabels(['%.1f' % x for x in pseudotimes[r]]) cm.ax_heatmap.set_xlabel('Pseudotime') cm.ax_heatmap.set_ylabel('Gene') gene_clusters = defaultdict(list) for i, cl in enumerate(assignments): gene_clusters[settings.STATE_COLORS[cl]].append(gene_names[i]) return gene_clusters
def CorrFig(data,outname): data.corr().to_csv(outname+'.corr',index=True,header=True,sep='\t') seaborn.set_context('notebook', font_scale=1.2) fig1 = seaborn.clustermap(data.corr(), method='average', metric='euclidean', figsize=(12,12), cmap='YlGnBu', annot=True) plt.setp(fig1.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) plt.setp(fig1.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) plt.savefig(outname+'.corr.pdf') return
def plot_clustermap(df): # corr = df.corr() # yticks = corr.index # sns.clustermap(corr, 'yticklabels=yticks') cg=sns.clustermap(df.corr()) # plt.yticks(rotation=0) plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
def clustermap(corpus, distance_matrix=None, color_leafs=True, outputfile=None, fontsize=5, save=False, show=False, return_svg=False): """ Draw a square clustermap of the corpus using seaborn's `clustermap`. Parameters ---------- corpus : `Corpus` instance The corpus to be plotted. distance_matrix : array-like, [n_texts, n_texts] A square distance matrix holding the pairwise distances between all the texts in the corpus. color_leafs: boolean, default=True, If true, will color the text labels on the axis according to their category. outputfile : str The path where the plot should be saved. fontsize : int, default=5 The fontsize of the labels on the axes. save : boolean, default=False Whether to save the plot to `outputfile`. """ plt.clf() # convert to pandas dataframe: labels = corpus.titles df = pd.DataFrame(data=distance_matrix, columns=labels) df = df.applymap(lambda x:int(x*1000)).corr() # clustermap plotting: cm = sns.clustermap(df) ax = cm.ax_heatmap # xlabels: for idx, label in enumerate(ax.get_xticklabels()): label.set_rotation('vertical') label.set_fontname('Arial') label.set_fontsize(fontsize) if color_leafs: label.set_color(plt.cm.spectral(corpus.target_ints[idx] / 10.)) # ylabels: for idx, label in enumerate(ax.get_yticklabels()): label.set_rotation('horizontal') label.set_fontname('Arial') label.set_fontsize(fontsize) if color_leafs: label.set_color(plt.cm.spectral(corpus.target_ints[-idx-1] / 10.)) # watch out: different indexing on this axis if save: if outputfile: outputfile = os.path.expanduser(outputfile) cm.savefig(outputfile) if show: plt.show() if return_svg: return plt_fig_to_svg(cm)
def main(): args = parser.parse_args() import numpy as np import pandas as pd import seaborn as sns major_index = args.major_index minor_index = args.minor_index df = pd.read_table(args.tsv, index_col=[major_index, minor_index], sep=args.delimiter) df = np.log2(df) if args.log_normalize else df # set our undected samples to our lowest detection df[df==-1*np.inf] = df[df!=-1*np.inf].min().min() # translate our data so we have no negatives (which would screw up our addition and makes no biological sense) if args.translate: df+=abs(df.min().min()) major_counts = df.groupby(level=[major_index]).count() # we only want to plot samples with multiple values in the minor index cutoff = args.minor_cutoff multi = df[df.index.get_level_values(major_index).isin(major_counts[major_counts>=cutoff].dropna().index)] # Let's select the most variable minor axis elements most_variable = multi.groupby(level=major_index).var().mean(axis=1).order(ascending=False) # and group by 20s for i in xrange(11): dat = multi[multi.index.get_level_values(major_index).isin(most_variable.index[10*i:10*(i+1)])] # we want to cluster by our major index, and then under these plot the values of our minor index major_dat = dat.groupby(level=major_index).sum() seaborn_map = sns.clustermap(major_dat, row_cluster=True, col_cluster=True) # now we keep this clustering, but recreate our data to fit the above clustering, with our minor # index below the major index (you can think of transcript levels under gene levels if you are # a biologist) merged_dat = pd.DataFrame(columns=[seaborn_map.data2d.columns]) for major_val in seaborn_map.data2d.index: minor_rows = multi[multi.index.get_level_values(major_index)==major_val][seaborn_map.data2d.columns] major_row = major_dat.loc[major_val, ][seaborn_map.data2d.columns] merged_dat.append(major_row) merged_dat = merged_dat.append(major_row).append(minor_rows) merged_map = sns.clustermap(merged_dat, row_cluster=False, col_cluster=False) # recreate our dendrogram, this is undocumented and probably a hack but it works seaborn_map.dendrogram_col.plot(merged_map.ax_col_dendrogram) # for rows, I imagine at some point it will fail to fall within the major axis but fortunately # for this dataset it is not true seaborn_map.dendrogram_row.plot(merged_map.ax_row_dendrogram) merged_map.savefig('{}_heatmap_{}.png'.format(os.path.split(args.tsv.name)[1], i))
def make_heatmap_w2vrelated(model, rel_wds): """ Given a model (from word2vec) and a list of related words, make a square heatmap using the cosine similarity between the given words """ n = len(rel_wds) names = [wd[0] for wd in rel_wds] data_mat = np.zeros((n,n)) for i, word1 in enumerate(names): for j, word2 in enumerate(names): data_mat[i,j] = model.similarity(word1, word2) if i == j: data_mat[i,j] = 0 df = pd.DataFrame(data=data_mat, columns=names, index=names) sb.clustermap(df, linewidths=.5,)
def graphDendrogram(csv): dend_fn = csv dend_data = pd.read_csv(dend_fn, na_values = 'n/a') dend_data = dend_data.rename(columns = {'Unnamed: 0':'Project1'}) dend_data = pd.melt(dend_data, id_vars=['Project1']) dend_data = dend_data.rename(columns = {'variable':'project2','value':'ANI'}) dend_data = dend_data.pivot("Project1", "project2", "ANI") g = sns.clustermap(dend_data) g.savefig(outputfile + "_dendrogram.pdf")
def heatmap_plot_zscore_bigneuron(df_zscore_features, df_all, output_dir, title=None): print "heatmap plot:bigneuron" #taiwan metric ='nt_type' mtypes = np.unique(df_all[metric]) print mtypes mtypes_pal = sns.color_palette("hls", len(mtypes)) mtypes_lut = dict(zip(mtypes, mtypes_pal)) mtypes_colors = df_all[metric].map(mtypes_lut) linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean') data = df_zscore_features.transpose() row_linkage = hierarchy.linkage(data, method='ward', metric='euclidean') feature_order = hierarchy.leaves_list(row_linkage) #print data.index matchIndex = [data.index[x] for x in feature_order] #print matchIndex data = data.reindex(matchIndex) pl.figure() g = sns.clustermap(data, row_cluster = False, col_linkage=linkage, method='ward', metric='euclidean', linewidths = 0.0,col_colors = [mtypes_colors], cmap = sns.cubehelix_palette(light=1, as_cmap=True),figsize=(40,10)) pl.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) pl.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) #g.ax_heatmap.set_xticklabels([]) pl.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.95) # !!!!! if title: pl.title(title) location ="best" num_cols=1 # Legend for row and col colors for label in mtypes: g.ax_row_dendrogram.bar(0, 0, color=mtypes_lut[label], label=label, linewidth=0.0) g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0) filename = output_dir + '/zscore_feature_heatmap.png' pl.savefig(filename, dpi=300) #pl.show() print("save zscore matrix heatmap figure to :" + filename) pl.close() print "done clustering and heatmap plotting" return linkage
def cluster_all_map(mat, ylab, out): """Returns nothing. Generates a figure where all tissues are clustered together based a Scipy clustering metric provided. Args: mat (array): Fold change normalized matrix containing fold change values for all three tissue types where the fold change for a given sample is tissue_fpkm[i]/tissue_fpkm[controls].mean() ylab (list): Y axis labels generated out (str): The title of the Seaborn clustermap generated out.pdf' Returns: Fold change normalized clustermap containing fold change values for all three tissue types where the fold change for a given sample is tissue_fpkm[i]/tissue_fpkm[controls].mean(). The Seaborn clustermap will be labeled as <out>.pdf """ heart = ["#3498db"] brain = ["#e74c3c"] quad = ["#2ecc71"] wt = ["#8FBC8F"] het = ["#B22222"] aso = ["#FFD700"] ko = ["#FF69B4"] white = ['#FFFFFF'] color_leg = ["#3498db", "#e74c3c", "#2ecc71", "#FFFFFF", "#8FBC8F", "#B22222", "#FFD700", "#FF69B4"] legend_lab = ["heart", "brain", "quad", " ", "wt", "het", "aso", "ko"] xlabel = array(["wt", "wt", "wt", "Het", "Het", "aso", "aso", "ko", "ko", "wt", "wt", "wt", "Het", "Het", "aso", "aso", "ko", "ko", "wt", "wt", "Het", "Het", "aso", "aso", "ko", "ko", ]) sample_type = (sns.color_palette(wt, 3) + sns.color_palette(het, 2) + sns.color_palette(aso, 2) + sns.color_palette(ko, 2) + sns.color_palette(wt, 3) + sns.color_palette(het, 2) + sns.color_palette(aso, 2) + sns.color_palette(ko, 2) + sns.color_palette(wt, 2) + sns.color_palette(het, 2) + sns.color_palette(aso, 2) + sns.color_palette(ko, 2)) season_colors = (sns.color_palette(heart, 9) + sns.color_palette(brain, 9) + sns.color_palette(quad, 8)) g = sns.clustermap(mat, annot=False, method='weighted', metric='euclidean', col_colors=[sample_type, season_colors], col_cluster=True, xticklabels=xlabel, yticklabels=ylab) plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) for C, L in zip([c for c in color_leg], legend_lab): g.ax_col_dendrogram.bar(0, 0, color=C, label=L, linewidth=0) g.ax_col_dendrogram.legend(loc="upper right", ncol=2) plt.suptitle('Fold change filter across Tissues +/- 1 FC') plt.savefig(out, format="pdf", dpi=1000)
def heatmap(output_dir, table: pd.DataFrame, metadata: qiime2.CategoricalMetadataColumn=None, normalize: bool=True, title: str=None, metric: str='euclidean', method: str='average', cluster: str='both', color_scheme: str='rocket') -> None: if table.empty: raise ValueError('Cannot visualize an empty table.') if metadata is not None: table = _munge_metadata(metadata, table, cluster) cbar_label = 'frequency' if normalize: table = table.apply(lambda x: np.log10(x + 1)) cbar_label = 'log10 frequency' # Hard-coded values for reasonable plots scaletron, labelsize, dpi = 50, 8, 100 sns.set(rc={'xtick.labelsize': labelsize, 'ytick.labelsize': labelsize, 'figure.dpi': dpi}) width, height = table.shape[1] / scaletron, table.shape[0] / scaletron heatmap_plot = sns.clustermap(table, method=method, metric=metric, **_clustering_map[cluster], cmap=color_scheme, xticklabels=True, yticklabels=True, cbar_kws={'label': cbar_label}) if title is not None: heatmap_plot.fig.suptitle(title) hm = heatmap_plot.ax_heatmap.get_position() cbar = heatmap_plot.cax.get_position() row = heatmap_plot.ax_row_dendrogram.get_position() col = heatmap_plot.ax_col_dendrogram.get_position() # Resize the plot to set cell aspect-ratio to square heatmap_plot.ax_heatmap.set_position([hm.x0, hm.y0, width, height]) heatmap_plot.cax.set_position([cbar.x0, hm.y0 + height, cbar.width, cbar.height]) heatmap_plot.ax_row_dendrogram.set_position([row.x0, row.y0, row.width, height]) heatmap_plot.ax_col_dendrogram.set_position([col.x0, hm.y0 + height, width, col.height]) # https://stackoverflow.com/a/34697479/3776794 plt.setp(heatmap_plot.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) plt.setp(heatmap_plot.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) for ext in ['png', 'svg']: img_fp = os.path.join(output_dir, 'feature-table-heatmap.%s' % ext) heatmap_plot.savefig(img_fp) index_fp = os.path.join(TEMPLATES, 'index.html') q2templates.render(index_fp, output_dir, context={'normalize': normalize})
def plot_nn_weights(w, x_labels, y_labels, fig_path, row_linkage=None, fig_size=(10, 3)): plt.figure(figsize=fig_size) clmap = sns.clustermap(pd.DataFrame(w, columns=x_labels), method='average', metric='cosine', row_linkage=row_linkage, col_cluster=False, robust=True, yticklabels=y_labels) plt.setp(clmap.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) plt.setp(clmap.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) clmap.cax.set_visible(False) plt.savefig(fig_path) plt.clf() plt.close()
def cluster_map(data, names): """Cluster map of genes""" import seaborn as sns import pylab as plt data = data.ix[names] X = np.log(data).fillna(0) X = X.apply(lambda x: x-x.mean(), 1) cg = sns.clustermap(X,cmap='RdYlBu_r',figsize=(8,10),lw=.5,linecolor='gray') mt=plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) mt=plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) return cg
def plot_gene_clustermap_by_membership(data_array, memberships): groups = list(set(memberships)) group_means = np.zeros([len(groups), data_array.shape[1]]) for gi, group in enumerate(groups): group_means[gi, :] = data_array[memberships == group, :].mean(axis=0) cm = sns.clustermap((group_means - group_means.min(axis=0)).T, col_cluster=False) r = np.arange(len(groups)) + 0.5 cm.ax_heatmap.set_xticks(r) cm.ax_heatmap.set_xticklabels(groups) cm.ax_heatmap.set_xlabel('Cell type') cm.ax_heatmap.set_ylabel('Gene') return cm
print "standardizing by log scale..." log_df = z + 0.01 log_df = np.log(log_df) print "standardizing columns to the max in each row..." std_df = z.div(z.max(axis=1), axis=0) x = float(sys.argv[2]) y = float(sys.argv[3]) print "making log heat map..." sns.set(font_scale=1.5) g = sns.clustermap(log_df, figsize=(x, y), cmap="magma", yticklabels=False, xticklabels=False, col_colors=lut) plt.savefig("heatmap_log.png", bbox_inches="tight", dpi=600) plt.clf() print "making standard heat map..." g = sns.clustermap(std_df, figsize=(x, y), vmin=0, vmax=1, cmap="magma", yticklabels=False, xticklabels=False, col_colors=lut) plt.savefig("heatmap_std.png", bbox_inches="tight", dpi=600)
import numpy as np import pandas as pd from numpy.random import randn from scipy import stats import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns df = sns.load_dataset('flights') df2 = df.pivot('year', 'month', 'passengers') print(df2) sns.clustermap(df2).savefig('cl1.png') sns.clustermap(df2, col_cluster=False).savefig('cl2.png') sns.clustermap(df2, standard_scale=0).savefig('cl3.png') sns.clustermap(df2, standard_scale=1).savefig('cl4.png')
def jacobian_kinetics( adata, basis='umap', regulators=None, effectors=None, mode="pseudotime", tkey="potential", color_map="bwr", gene_order_method='raw', show_colorbar=False, cluster_row_col=[False, True], figsize=(11.5, 6), standard_scale=1, save_show_or_return='show', save_kwargs={}, **kwargs ): """Plot the gene expression dynamics over time (pseudotime or inferred real time) in a heatmap. Note that by default `potential` estimated with the diffusion graph built from reconstructed vector field will be used as the measure of pseudotime. Parameters ---------- adata: :class:`~anndata.AnnData` an Annodata object. basis: `str` The reduced dimension basis. regulators: `list` or `None` (default: `None`) The list of genes that will be used as regulators for plotting the Jacobian heatmap, only limited to genes that have already performed Jacobian analysis. effectors: `List` or `None` (default: `None`) The list of genes that will be used as targets for plotting the Jacobian heatmap, only limited to genes that have already performed Jacobian analysis. mode: `str` (default: `vector_field`) Which data mode will be used, either vector_field or pseudotime. if mode is vector_field, the trajectory predicted by vector field function will be used, otherwise pseudotime trajectory (defined by time argument) will be used. By default `potential` estimated with the diffusion graph built reconstructed vector field will be used as pseudotime. tkey: `str` (default: `potential`) The .obs column that will be used for timing each cell, only used when mode is not `vector_field`. color_map: `str` (default: `BrBG`) Color map that will be used to color the gene expression. If `half_max_ordering` is True, the color map need to be divergent, good examples, include `BrBG`, `RdBu_r` or `coolwarm`, etc. gene_order_method: `str` (default: `half_max_ordering`) [`half_max_ordering`, `maximum`] Supports two different methods for ordering genes when plotting the heatmap: either `half_max_ordering`, or `maximum`. For `half_max_ordering`, it will order genes into up, down and transit groups by the half max ordering algorithm (HA Pliner, et. al, Molecular cell 71 (5), 858-871. e8). While for `maximum`, it will order by the position of the highest gene expression. show_colorbar: `bool` (default: `False`) Whether to show the color bar. cluster_row_col: `[bool, bool]` (default: `[False, False]`) Whether to cluster the row or columns. figsize: `str` (default: `(11.5, 6)` Size of figure standard_scale: `int` (default: 1) Either 0 (rows, cells) or 1 (columns, genes). Whether or not to standardize that dimension, meaning for each row or column, subtract the minimum and divide each by its maximum. save_show_or_return: {'show', 'save_fig', 'return'} (default: `show`) Whether to save_fig, show or return the figure. save_kwargs: `dict` (default: `{}`) A dictionary that will passed to the save_fig function. By default it is an empty dictionary and the save_fig function will use the {"path": None, "prefix": 'kinetic_curves', "dpi": None, "ext": 'pdf', "transparent": True, "close": True, "verbose": True} as its parameters. Otherwise you can provide a dictionary that properly modify those keys according to your needs. kwargs: All other keyword arguments are passed to heatmap(). Currently `xticklabels=False, yticklabels='auto'` is passed to heatmap() by default. Returns ------- Nothing but plots a heatmap that shows the element of Jacobian matrix dynamics over time (potential decreasing). Examples -------- >>> import dynamo as dyn >>> adata = dyn.sample_data.hgForebrainGlutamatergic() >>> adata = dyn.pp.recipe_monocle(adata) >>> dyn.tl.dynamics(adata) >>> dyn.vf.VectorField(adata, basis='pca') >>> valid_gene_list = adata[:, adata.var.use_for_transition].var.index[:2] >>> dyn.vf.jacobian(adata, regulators=valid_gene_list[0], effectors=valid_gene_list[1]) >>> dyn.pl.jacobian_kinetics(adata) """ import pandas as pd import seaborn as sns import matplotlib.pyplot as plt Jacobian_ = "jacobian" if basis is None else "jacobian_" + basis Der, cell_indx, jacobian_gene, regulators_, effectors_ = adata.uns[Jacobian_].get('jacobian'), \ adata.uns[Jacobian_].get('cell_idx'), \ adata.uns[Jacobian_].get('jacobian_gene'), \ adata.uns[Jacobian_].get('regulators'), \ adata.uns[Jacobian_].get('effectors') if tkey == "potential" and "potential" not in adata.obs_keys(): ddhodge(adata) adata_ = adata[cell_indx, :] time = adata_.obs[tkey] jacobian_mat = Der.reshape((-1, Der.shape[2])) if Der.ndim == 3 else Der[None, :] n_source_targets_ = Der.shape[0] * Der.shape[1] if Der.ndim == 3 else 1 targets_, sources_ = (np.repeat(effectors_, Der.shape[1]), np.tile(regulators_, Der.shape[0])) if Der.ndim == 3 \ else (np.repeat(effectors_, Der.shape[0]), np.repeat(effectors_, Der.shape[0])) source_targets_ = [sources_[i] + '->' + targets_[i] for i in range(n_source_targets_)] regulators = regulators_ if regulators is None else regulators effectors = effectors_ if effectors is None else effectors if type(regulators) == str: regulators = [regulators] if type(effectors) == str: effectors = [effectors] regulators = list(set(regulators_).intersection(regulators)) effectors = list(set(effectors_).intersection(effectors)) if len(regulators) == 0 or len(effectors) == 0: raise ValueError(f"Jacobian related to source genes {regulators} and target genes {effectors}" f"you provided are existed. Available source genes includes {regulators_} while " f"available target genes includes {effectors_}") n_source_targets = len(regulators) * len(effectors) targets, sources = np.repeat(effectors, len(regulators)), np.tile(regulators, len(effectors)) source_targets = [sources[i] + '->' + targets[i] for i in range(n_source_targets)] jacobian_mat = jacobian_mat[:, np.argsort(time)] if gene_order_method == "half_max_ordering": time, all, valid_ind, gene_idx = _half_max_ordering( jacobian_mat, time, mode=mode, interpolate=True, spaced_num=100 ) all, source_targets = all[np.isfinite(all.sum(1)), :], np.array(source_targets)[gene_idx][np.isfinite(all.sum(1))] df = pd.DataFrame(all, index=source_targets_) elif gene_order_method == 'maximum': jacobian_mat = lowess_smoother(time, jacobian_mat, spaced_num=100) jacobian_mat = jacobian_mat[np.isfinite(jacobian_mat.sum(1)), :] if standard_scale is not None: exprs = (jacobian_mat - np.min(jacobian_mat, axis=standard_scale)[:, None]) / np.ptp( jacobian_mat, axis=standard_scale )[:, None] max_sort = np.argsort(np.argmax(exprs, axis=1)) df = pd.DataFrame(exprs[max_sort, :], index=np.array(source_targets_)[max_sort]) elif gene_order_method == "raw": jacobian_mat /= np.abs(jacobian_mat).max(1)[:, None] df = pd.DataFrame(jacobian_mat, index=np.array(source_targets_)) else: raise Exception('gene order_method can only be either half_max_ordering or maximum') heatmap_kwargs = dict(xticklabels=False, yticklabels=1) if kwargs is not None: heatmap_kwargs = update_dict(heatmap_kwargs, kwargs) sns_heatmap = sns.clustermap( df.loc[source_targets, :], col_cluster=cluster_row_col[0], row_cluster=cluster_row_col[1] if len(source_targets) > 2 else False, cmap=color_map, figsize=figsize, center=0, **heatmap_kwargs ) if not show_colorbar: sns_heatmap.cax.set_visible(False) if save_show_or_return == "save_fig": s_kwargs = {"path": None, "prefix": 'jacobian_kinetics', "dpi": None, "ext": 'pdf', "transparent": True, "close": True, "verbose": True} s_kwargs = update_dict(s_kwargs, save_kwargs) save_fig(**s_kwargs) elif save_show_or_return == "show": if show_colorbar: plt.subplots_adjust(right=0.85) plt.tight_layout() plt.show() elif save_show_or_return == "return": return sns_heatmap
### Heatmaps for Calls made by Hour and Day of Week byDayofWeekHour = call_data.groupby( by=['Day_of_Week', 'Hour']).count()['twp'].unstack(level=-1) fig8 = plt.figure(figsize=(10, 6)) ax8 = fig8.add_axes([ .1, .1, .8, .8, ]) ax8 = sns.heatmap(byDayofWeekHour, cmap="coolwarm") fig8.suptitle('Heatmap: Hour by Day of the Week') clm = sns.clustermap(byDayofWeekHour, cmap="coolwarm", figsize=(10, 6)) fig9 = clm.fig fig9.suptitle('Clustermap: Hour by Day of the Week') ### Heatmaps for Calls made by Month and Day of Week byDayofWeekMonth = call_data.groupby( by=['Day_of_Week', 'Month']).count()['twp'].unstack(level=-1) fig8 = plt.figure(figsize=(10, 6)) ax8 = fig8.add_axes([ .1, .1, .8, .8, ]) ax8 = sns.heatmap(byDayofWeekMonth, cmap="coolwarm")
# Create a custom palette to identify the networks network_pal = sns.cubehelix_palette(len(used_networks), light=.9, dark=.1, reverse=True, start=1, rot=-2) network_lut = dict(zip(map(str, used_networks), network_pal)) # Convert the palette to vectors that will be drawn on the side of the matrix networks = df.columns.get_level_values("network") network_colors = pd.Series(networks, index=df.columns).map(network_lut) # Create a custom colormap for the heatmap values cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True) # Draw the full plot sns.clustermap(df.corr(), row_colors=network_colors, linewidths=.5, col_colors=network_colors, figsize=(13, 13), cmap=cmap) '''Scatterplot with categorical variables https://seaborn.pydata.org/examples/scatterplot_categorical.html ''' sns.set(style="whitegrid", palette="muted") # Load the example iris dataset iris = sns.load_dataset("iris") # "Melt" the dataset to "long-form" or "tidy" representation iris = pd.melt(iris, "species", var_name="measurement")
df = df.apply(lambda x: x / x.max(), axis=1) df = df.drop('total', axis=1) df.loc['sum'] = df.sum(axis=0) df = df.sort_values('sum', axis=1, ascending=False) df = df.drop('sum', axis=0) df.to_csv(args.output + ".csv") if args.count > 0: df = df.iloc[:, :args.count] else: args.count = df.shape[1] sz = min(50, max(args.count, df.shape[0])) // 5 g = clustermap(data=df, metric='braycurtis', col_cluster=False, robust=True, figsize=(sz + 5, sz + 5)) if args.count > 50: g.ax_heatmap.get_xaxis().set_visible(False) plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), fontsize=min(100, 40 * sz // args.count)) plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), fontsize=min(100, 40 * sz // df.shape[0])) plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, va='center') plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) for a in g.ax_row_dendrogram.collections: a.set_linewidth(2) g.savefig(args.output + ".svg") g.savefig(args.output + ".png")
import pandas as pd import numpy as np # names of columns drug_conditions = ["marimastat 3 uM", "ibudilast 20 uM", "ibudilast 200 uM", "cabozantinib 0.1 uM", "cabozantinib 1 uM", "sorafenib 2 uM", "sorafenib 20 uM", "axitinib 2 uM", "axitinib 20 uM", "tofacitinib 2 uM", "tofacitinib 20 uM", "thalidomide 0.5 uM", "thalidomide 5 uM", "icatibant 0.1 uM", "icatibant 1 uM"] # names of rows cell_lines = ["G523", "G885", "G729", "G564", "G861"] # corresponding numerical value for each cell viability_scores = np.array([[103.2445191,66.64440593,4.739848128,97.90725205,94.42274748,116.9796604,3.906406317,14.56179271,23.33680375,114.614175,117.3194148,92.78114457,104.8391006,99.06876532,99.90973465], [99.54387203,95.89649106,4.679616554,99.55142491,99.892707,98.84118352,3.74480514,5.318624446,6.386772279,99.10143741,66.16415959,99.87811211,99.80021925,103.0671789,103.4136019], [111.7364657,101.337013,13.41437035,115.9142502,109.4053095,116.1193251,9.227651647,84.10387886,55.32228376,115.9941438,115.0353046,93.96947089,112.1451494,94.90497327,109.030808], [100.5542982,100.0080483,54.18581791,95.78376446,100.0588735,98.01859304,2.305208161,57.75735247,33.72282194,98.08674223,98.5647624,116.1383385,100.4812294,112.8439546,100.0594984], [103.7184012,101.7273883,89.18527618,100.145608,61.24720474,103.5849188,2.019981229,46.41365913,4.292586744,101.9055264,89.33413613,102.7615483,100.2794762,111.1432579,105.007406]]) # compiles all of the above information into a pandas dataframe data = pd.DataFrame(data=viability_scores, index=cell_lines, columns=drug_conditions) print(data) # make heirarchically clustered heatmap using seaborn seaborn.set(color_codes=True) clustered_map = seaborn.clustermap(data) # save the generated heatmap clustered_map.savefig("clustered.jpg")
tr_dict = df_tickers['Total Return Index (UBS)'].to_dict() tr_dict = {v: k for k, v in tr_dict.items()} df_class = df_tickers['Classification'] df_class = df_class.replace({'DM': '#9FD356', 'EM': '#FA824C'}) # Read Total Return Index df_tr = pd.read_excel(file_path, index_col=0, sheet_name='Total Return') df_tr = df_tr.rename(tr_dict, axis=1) # Comnpute returns df = df_tr.pct_change(1) df = df['2009-01-01' <= df.index] df = df[df.index <= '2020-01-31'] corr = df.corr() # Chart sns.clustermap(data=corr, method='average', metric='euclidean', figsize=(10, 10), cmap='mako', row_colors=df_class, col_colors=df_class, linewidths=0) plt.savefig( '/Users/gustavoamarante/Dropbox/CQF/Final Project/figures/Correlation and Dendrogam.pdf', pad_inches=0) plt.show()
df[df['Reason']=='Fire'].groupby('Date').count()['twp'].plot() plt.title('Fire') plt.tight_layout() df[df['Reason']=='EMS'].groupby('Date').count()['twp'].plot() plt.title('EMS') plt.tight_layout() dayHour = df.groupby(by=['Day of Week','Hour']).count()['Reason'].unstack() dayHour.head() plt.figure(figsize=(12,6)) sns.heatmap(dayHour,cmap='viridis') sns.clustermap(dayHour,cmap='viridis') dayMonth = df.groupby(by=['Day of Week','Month']).count()['Reason'].unstack() dayMonth.head() plt.figure(figsize=(12,6)) sns.heatmap(dayMonth,cmap='viridis') sns.clustermap(dayMonth,cmap='viridis')
rois_beta_df["GM_mean"] = rois_avg_df["GM_mean"] ## # Seaborn import scipy.cluster.hierarchy as hc import scipy.spatial as sp # https://stackoverflow.com/questions/38705359/how-to-give-sns-clustermap-a-precomputed-distance-matrix DF = rois_beta_df.copy() DF_corr = DF.corr() DF_dism = 1 - DF_corr # ** 2 linkage = hc.linkage(sp.distance.squareform(DF_dism), method='average') g = sns.clustermap(DF_corr, col_linkage=linkage, row_linkage=linkage) plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0) # For y axis plt.savefig(os.path.join(WD_CLUST, "cor_rois.pdf")) # Positive / Negatives # -------------------- beta_pos_msk = comp > 0 beta_neg_msk = comp < 0 print(beta_pos_msk.sum(), beta_neg_msk.sum()) Xscores = np.zeros((X_adni.shape[0], 5)) Xscores[:, 0] = np.dot(X_adni[:, beta_pos_msk], comp[beta_pos_msk]).ravel() Xscores[:, 1] = X_adni[:, beta_pos_msk].mean(axis=1)
def visualizeConsensus(consensusMat, connectivityMatrices, clusters, colNames, suffix): plt.rcParams['font.size'] = '8' plt.rcParams['pdf.fonttype'] = 42 if colNames == 'noXLabels': # put concensus matrix into dataframe to build hierarchical clustermap dataframe = pd.DataFrame(data=consensusMat) dataframe.to_csv(str(matrixPath + 'consensus_matrix_table.txt'), sep="\t") # clusters by columns and rows and annotates probablility a particular sample clusters together # cluster distance is meausred by average Euclidean Distance in seaborn for hierarchical clustering consensusClustered = sns.clustermap(dataframe, col_cluster=True, row_cluster=True, annot=True) consensusClustered.savefig( str(matrixPath) + 'consensus_Matrix_over_' + str(len(connectivityMatrices)) + '_runs_at_k=' + str(clusters) + '.' + suffix) else: # assigns sample names to consensus matrix sampleNames = [] with open(colNames) as input: for line in input: sampleNames.append(line.rstrip('\n')) # put consensus matrix into dataframe to build hierarchical clustermap dataframe = pd.DataFrame(data=consensusMat, index=sampleNames, columns=sampleNames) # clusters by columns and rows and annotates probablility a particular sample clusters together # cluster distance is measured by average Euclidean Distance in seaborn for hierarchical clustering consensusClustered = sns.clustermap(dataframe, col_cluster=True, row_cluster=True, annot=True) ax = consensusClustered.ax_heatmap xaxis = [] for ind in consensusClustered.dendrogram_col.reordered_ind: xaxis.append(sampleNames[ind]) ax.set_xticklabels(xaxis, rotation=90) yaxis = [] for ind in consensusClustered.dendrogram_row.reordered_ind: yaxis.append(sampleNames[ind]) ax.set_yticklabels(yaxis, rotation=0) consensusClustered_non_annt = sns.clustermap(dataframe, col_cluster=True, row_cluster=True, annot=False) axNA = consensusClustered_non_annt.ax_heatmap xaxis = [] for ind in consensusClustered_non_annt.dendrogram_col.reordered_ind: xaxis.append(sampleNames[ind]) axNA.set_xticklabels(xaxis, rotation=90) yaxis = [] for ind in consensusClustered_non_annt.dendrogram_row.reordered_ind: yaxis.append(sampleNames[ind]) axNA.set_yticklabels(yaxis, rotation=0) consensusClustered.savefig( str(matrixPath) + 'consensus_Matrix_over_' + str(len(connectivityMatrices)) + '_runs_at_k=' + str(clusters) + '.' + suffix) consensusClustered_non_annt.savefig( str(matrixPath) + 'non_annotated_consensus_Matrix_over_' + str(len(connectivityMatrices)) + '_runs_at_k=' + str(clusters) + '.' + suffix) df_ordered_by_clust = dataframe.reindex(index=xaxis, columns=xaxis) df_ordered_by_clust.to_csv(str(matrixPath + 'consensus_matrix_table.txt'), sep="\t")
def heatmap( adata, var_names, sortby="latent_time", layer="Ms", color_map="viridis", col_color=None, palette="viridis", n_convolve=30, standard_scale=0, sort=True, colorbar=None, col_cluster=False, row_cluster=False, context=None, font_scale=None, figsize=(8, 4), show=None, save=None, **kwargs, ): """\ Plot time series for genes as heatmap. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. var_names: `str`, list of `str` Names of variables to use for the plot. sortby: `str` (default: `'latent_time'`) Observation key to extract time data from. layer: `str` (default: `'Ms'`) Layer key to extract count data from. color_map: `str` (default: `'viridis'`) String denoting matplotlib color map. col_color: `str` or list of `str` (default: `None`) String denoting matplotlib color map to use along the columns. palette: list of `str` (default: `'viridis'`) Colors to use for plotting groups (categorical annotation). n_convolve: `int` or `None` (default: `30`) If `int` is given, data is smoothed by convolution along the x-axis with kernel size n_convolve. standard_scale : `int` or `None` (default: `0`) Either 0 (rows) or 1 (columns). Whether or not to standardize that dimension (each row or column), subtract minimum and divide each by its maximum. sort: `bool` (default: `True`) Wether to sort the expression values given by xkey. colorbar: `bool` or `None` (default: `None`) Whether to show colorbar. {row,col}_cluster : `bool` or `None` If True, cluster the {rows, columns}. context : `None`, or one of {paper, notebook, talk, poster} A dictionary of parameters or the name of a preconfigured set. font_scale : float, optional Scaling factor to scale the size of the font elements. figsize: tuple (default: `(8,4)`) Figure size. show: `bool`, optional (default: `None`) Show the plot, do not return axis. save: `bool` or `str`, optional (default: `None`) If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on {'.pdf', '.png', '.svg'}. kwargs: Arguments passed to seaborns clustermap, e.g., set `yticklabels=True` to display all gene names in all rows. Returns ------- If `show==False` a `matplotlib.Axis` """ import seaborn as sns var_names = [name for name in var_names if name in adata.var_names] tkey, xkey = kwargs.pop("tkey", sortby), kwargs.pop("xkey", layer) time = adata.obs[tkey].values time = time[np.isfinite(time)] X = (adata[:, var_names].layers[xkey] if xkey in adata.layers.keys() else adata[:, var_names].X) if issparse(X): X = X.A df = pd.DataFrame(X[np.argsort(time)], columns=var_names) if n_convolve is not None: weights = np.ones(n_convolve) / n_convolve for gene in var_names: try: df[gene] = np.convolve(df[gene].values, weights, mode="same") except Exception: pass # e.g. all-zero counts or nans cannot be convolved if sort: max_sort = np.argsort(np.argmax(df.values, axis=0)) df = pd.DataFrame(df.values[:, max_sort], columns=df.columns[max_sort]) strings_to_categoricals(adata) if col_color is not None: col_colors = to_list(col_color) col_color = [] for _, col in enumerate(col_colors): if not is_categorical(adata, col): obs_col = adata.obs[col] cat_col = np.round(obs_col / np.max(obs_col), 2) * np.max(obs_col) adata.obs[f"{col}_categorical"] = pd.Categorical(cat_col) col += "_categorical" set_colors_for_categorical_obs(adata, col, palette) col_color.append(interpret_colorkey(adata, col)[np.argsort(time)]) if "dendrogram_ratio" not in kwargs: kwargs["dendrogram_ratio"] = ( 0.1 if row_cluster else 0, 0.2 if col_cluster else 0, ) if "cbar_pos" not in kwargs or not colorbar: kwargs["cbar_pos"] = None kwargs.update( dict( col_colors=col_color, col_cluster=col_cluster, row_cluster=row_cluster, cmap=color_map, xticklabels=False, standard_scale=standard_scale, figsize=figsize, )) args = {} if font_scale is not None: args = {"font_scale": font_scale} context = context or "notebook" with sns.plotting_context(context=context, **args): try: cm = sns.clustermap(df.T, **kwargs) except Exception: logg.warn("Please upgrade seaborn with `pip install -U seaborn`.") kwargs.pop("dendrogram_ratio") kwargs.pop("cbar_pos") cm = sns.clustermap(df.T, **kwargs) savefig_or_show("heatmap", save=save, show=show) if show is False: return cm
rate = n_error_outliers/y_out.size print("Classification rate = ",100*(1-rate),"%") import seaborn as sns sns.pairplot(df) type(t_out) plot_data = pd.DataFrame(np.array(t_out).reshape(328,)) import seaborn as sns sns.pairplot(plot_data) sns.distplot(plot_data) sns.clustermap(X) # extra work not required. sns.violinplot([X]) # extra work not required. """# Problem 2""" # Load the digit data digits = datasets.load_digits() # View the features of the first observation digits.data[0:1] # View the target of the first observation digits.target[0:1]
def kinetic_heatmap( adata, genes, mode="vector_field", basis=None, layer="X", project_back_to_high_dim=True, tkey="potential", dist_threshold=1e-10, color_map="BrBG", gene_order_method='half_max_ordering', show_colorbar=False, cluster_row_col=[False, False], figsize=(11.5, 6), standard_scale=1, save_show_or_return='show', save_kwargs={}, **kwargs ): """Plot the gene expression dynamics over time (pseudotime or inferred real time) in a heatmap. Note that by default `potential` estimated with the diffusion graph built from reconstructed vector field will be used as the measure of pseudotime. Parameters ---------- %(kin_curves.parameters.no_ncol|color|c_palette)s color_map: `str` (default: `BrBG`) Color map that will be used to color the gene expression. If `half_max_ordering` is True, the color map need to be divergent, good examples, include `BrBG`, `RdBu_r` or `coolwarm`, etc. gene_order_method: `str` (default: `half_max_ordering`) [`half_max_ordering`, `maximum`] Supports two different methods for ordering genes when plotting the heatmap: either `half_max_ordering`, or `maximum`. For `half_max_ordering`, it will order genes into up, down and transit groups by the half max ordering algorithm (HA Pliner, et. al, Molecular cell 71 (5), 858-871. e8). While for `maximum`, it will order by the position of the highest gene expression. show_colorbar: `bool` (default: `False`) Whether to show the color bar. cluster_row_col: `[bool, bool]` (default: `[False, False]`) Whether to cluster the row or columns. figsize: `str` (default: `(11.5, 6)` Size of figure standard_scale: `int` (default: 1) Either 0 (rows, cells) or 1 (columns, genes). Whether or not to standardize that dimension, meaning for each row or column, subtract the minimum and divide each by its maximum. save_show_or_return: {'show', 'save_fig', 'return'} (default: `show`) Whether to save_fig, show or return the figure. save_kwargs: `dict` (default: `{}`) A dictionary that will passed to the save_fig function. By default it is an empty dictionary and the save_fig function will use the {"path": None, "prefix": 'kinetic_heatmap', "dpi": None, "ext": 'pdf', "transparent": True, "close": True, "verbose": True} as its parameters. Otherwise you can provide a dictionary that properly modify those keys according to your needs. kwargs: All other keyword arguments are passed to heatmap(). Currently `xticklabels=False, yticklabels='auto'` is passed to heatmap() by default. Returns ------- Nothing but plots a heatmap that shows the gene expression dynamics over time. """ import pandas as pd import seaborn as sns import matplotlib.pyplot as plt if tkey == "potential" and "potential" not in adata.obs_keys(): ddhodge(adata) exprs, valid_genes, time = fetch_exprs( adata, basis, layer, genes, tkey, mode, project_back_to_high_dim ) exprs = exprs.A if issparse(exprs) else exprs if dist_threshold is not None and mode == 'vector_field': valid_ind = list( np.where(np.sum(np.diff(exprs, axis=0) ** 2, axis=1) > dist_threshold)[0] + 1 ) valid_ind.insert(0, 0) exprs = exprs[valid_ind, :] time = time[valid_ind] if gene_order_method == "half_max_ordering": time, all, valid_ind, gene_idx = _half_max_ordering( exprs.T, time, mode=mode, interpolate=True, spaced_num=100 ) all, genes = all[np.isfinite(all.sum(1)), :], np.array(valid_genes)[gene_idx][np.isfinite(all.sum(1))] df = pd.DataFrame(all, index=genes) elif gene_order_method == 'maximum': exprs = lowess_smoother(time, exprs.T, spaced_num=100) exprs = exprs[np.isfinite(exprs.sum(1)), :] if standard_scale is not None: exprs = (exprs - np.min(exprs, axis=standard_scale)[:, None]) / np.ptp( exprs, axis=standard_scale )[:, None] max_sort = np.argsort(np.argmax(exprs, axis=1)) df = pd.DataFrame(exprs[max_sort, :], index=np.array(valid_genes)[max_sort]) else: raise Exception('gene order_method can only be either half_max_ordering or maximum') heatmap_kwargs = dict(xticklabels=False, yticklabels=1) if kwargs is not None: heatmap_kwargs = update_dict(heatmap_kwargs, kwargs) sns_heatmap = sns.clustermap( df, col_cluster=cluster_row_col[0], row_cluster=cluster_row_col[1], cmap=color_map, figsize=figsize, **heatmap_kwargs ) if not show_colorbar: sns_heatmap.cax.set_visible(False) if save_show_or_return == "save": s_kwargs = {"path": None, "prefix": 'kinetic_heatmap', "dpi": None, "ext": 'pdf', "transparent": True, "close": True, "verbose": True} s_kwargs = update_dict(s_kwargs, save_kwargs) save_fig(**s_kwargs) elif save_show_or_return == "show": if show_colorbar: plt.subplots_adjust(right=0.85) plt.tight_layout() plt.show() elif save_show_or_return == "return": return sns_heatmap
plt.legend() plt.savefig(output_file) plt.show() plt.close(f) # # Plot dissimilarity matrices num_models = len(models) model_names = [models[k][1] for k in selected_scores.keys()] ss = np.concatenate([v[0][None, :] for v in selected_scores.values()], axis=0) sl = np.concatenate([v[0][None, :] for v in selected_labels.values()], axis=0) df = pd.DataFrame(ss.transpose(), columns=model_names) # Create a categorical palette to identify the networks network_pal = sns.husl_palette(len(model_names), s=.45) network_lut = dict(zip(map(str, model_names), network_pal)) # Convert the palette to vectors that will be drawn on the side of the matrix network_colors = pd.Series(model_names, index=df.columns).map(network_lut) # Draw the full plot sns.clustermap(df.corr(), center=0, cmap="RdBu", row_colors=network_colors, col_colors=network_colors, linewidths=.75, figsize=(13, 13)) plt.show()
def cn_heatmap(self, df, cell_font_size=3, max_cn=4, method='ward', cmap='bwr', figsize=(15, 20), xlabel='Contigs', ylabel='Cells', **kwargs): """ Create a heatmap from a copy number matrix df: triple indexed dataframe with as columns ('contig', start, end ), as rows cells/samples cell_font_size (int): font size of the cell labels max_cn (int) : dataframe will be clipped to this value. (Maximum copy number shown) method (str) : clustering metric cmap (str) : colormap used figsize(tuple) : Size of the figure xlabel (str) : Label for the x-axis, by default this is Contigs ylabel (str) : Label for the x-axis, by default this is Cells **kwargs : Arguments which will be passed to seaborn.clustermap """ try: clmap = sns.clustermap(df.sort_index(1)[self.contigs], col_cluster=False, method=method, cmap=cmap, vmax=max_cn, vmin=0, yticklabels=True, figsize=figsize, **kwargs) ax_heatmap = clmap.ax_heatmap except Exception as e: print('Falling back on heatmap without clustering') fig, ax_heatmap = plt.subplots(figsize=figsize) clmap = sns.heatmap(df.sort_index(1)[self.contigs], cmap=cmap, vmax=max_cn, vmin=0, yticklabels=True, ax=ax_heatmap) prev = None xtick_pos = [] xtick_label = [] last_idx = 0 for idx, (contig, start, end) in enumerate(df.sort_index(1)[self.contigs].columns): if prev is not None and prev != contig: ax_heatmap.axvline(idx - 0.5, c='k', lw=1.5, zorder=10) xtick_pos.append((idx + last_idx) / 2) xtick_label.append(prev) last_idx = idx prev = contig ax_heatmap.set_xticks(xtick_pos) ax_heatmap.set_xticklabels(xtick_label, rotation=0, fontsize=8) ax_heatmap.set_xlabel(xlabel, labelpad=20) ax_heatmap.set_ylabel(ylabel, labelpad=20) return clmap
plt.tight_layout() #%% df[df['Reason']=='Fire'].groupby('Date').count()['twp'].plot() plt.tight_layout() #%% df[df['Reason']=='EMS'].groupby('Date').count()['twp'].plot() plt.tight_layout() #%% day_hour = df.groupby(by=['Day of Week','Hour']).count()['Reason'].unstack() #%% plt.figure(figsize=(12,6)) sns.heatmap(day_hour, cmap='viridis') #%% sns.clustermap(day_hour, cmap='viridis') #%% day_month = df.groupby(by=['Day of Week', 'Month']).count()['Reason'].unstack() #%% plt.figure(figsize=(12, 6)) sns.heatmap(day_month, cmap='viridis') #%% sns.clustermap(day_month, cmap='viridis') #%%
plt.show() ''' 3.6 seaborn.heatmap seaborn.heatmap() 主要是用于绘制热力图,也就类似于色彩矩阵。 ''' # 生成 10x10 的随机矩阵 matrix_data = np.random.rand(10, 10) # 绘图 sns.heatmap(data=matrix_data) plt.show() ''' 3.7 seaborn.clustermap seaborn.clustermap() 可以将矩阵数据集绘制为层次聚类热图。 ''' iris_data.pop("species") #去掉了花的类别列 # 绘图 sns.clustermap(iris_data) plt.show()
sfinal_feat = pd.concat([sFeatMatAll[combi], conds2], axis=1) #make a colormap to assign colours - based on class (ie clozapine10 is separate) cmap1 = sns.color_palette("tab20", np.unique(sfinal_feat['drug']).shape[0]) #make a clustergram #1. make lut for drug colors #2. map the lut onto the clustergram lut = dict(zip(np.unique(sfinal_feat['drug']), cmap1)) #add in row colors to the dataframe row_colors = sfinal_feat['drug'].map(lut) #map onto the feature Matrix #make clustergram cg=sns.clustermap(sfinal_feat.iloc[:,:-3], metric = 'euclidean', cmap = 'inferno', \ row_colors = row_colors) plt.setp(cg.ax_heatmap.yaxis.set_ticklabels\ (sfinal_feat['drug'][cg.dendrogram_row.reordered_ind])) plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=10) col = cg.ax_col_dendrogram.get_position() cg.ax_col_dendrogram.set_position( [col.x0, col.y0, col.width * 1, col.height * 1]) #save fig plt.savefig(os.path.join(directoryA[0:-7], 'Figures', 'Agar_stats_LDA_clustergram1.tif'), \ dpi =150, bbox_inches = 'tight', pad_inches = 1) plt.show() #make list of the final order of the drugs drug_order = list(sfinal_feat['drug'][cg.dendrogram_row.reordered_ind]) conc_order = list(
louvain = attr_df.loc[i, 'louvain'] #louvain of sample cell_type = attr_df.loc[i, 'biosample_cell_type'] #cell type l_color = louvain_colors[louvain] c_color = cell_type_colors[cell_type] attr_df.loc[i, 'louvain_colors'] = l_color attr_df.loc[i, 'cell_type_colors'] = c_color #print(attr_df.head()) #print(len(louvain)) #8 clusters #print(len(cell_type)) #9 cell)_type cmap = sns.diverging_palette(220, 20, as_cmap=True) g = sns.clustermap(features_df, cmap=cmap, row_cluster=True, col_cluster=False, row_colors=attr_df[['louvain_colors', 'cell_type_colors']], linewidths=0, xticklabels=False, yticklabels=False) #for some reason, it cant take two legends.... #legend_louvain = [mpatches.Patch(color=c, label=l) for c,l in attr_df[['louvain_colors','louvain']].drop_duplicates().values] #l2=g.ax_heatmap.legend(loc='upper left',bbox_to_anchor=(0.05,1.3),handles=legend_louvain,frameon=True) #l2.set_title(title='louvain cluster',prop={'size':10}) attr_df['biosample_cell_type'] = encoder.inverse_transform( attr_df['biosample_cell_type'].values.tolist()) legend_cell_type = [ mpatches.Patch(color=k, label=v) for k, v in attr_df[ ['cell_type_colors', 'biosample_cell_type']].drop_duplicates().values ] l1 = g.ax_heatmap.legend(loc='upper left', bbox_to_anchor=(1.01, 0.6),
def main(_): print("Loading data...") dfs = [] for filename in os.listdir(FLAGS.data): if filename.endswith(".csv"): dfs.append( pd.read_csv(os.path.join(FLAGS.data, filename), encoding="utf-8")) data = pd.concat(dfs) print("%d Examples" % (len(set(data["id"])))) print("%d Annotations" % len(data)) if not os.path.isdir(FLAGS.plot_dir): os.makedirs(FLAGS.plot_dir) with open(FLAGS.emotion_file, "r") as f: all_emotions = f.read().splitlines() all_emotions_neutral = all_emotions + ["neutral"] print("%d emotion Categories" % len(all_emotions)) print("%d unique raters" % len(data["rater_id"].unique())) print("%.3f marked unclear" % (data["example_very_unclear"].sum() / len(data))) # Since the ones marked as difficult have no labels, exclude those data = data[data[all_emotions_neutral].sum(axis=1) != 0] print("Distribution of number of labels per example:") print(data[all_emotions_neutral].sum(axis=1).value_counts() / len(data)) print("%.2f with more than 3 labels" % ((data[all_emotions_neutral].sum(axis=1) > 3).sum() / len(data))) # more than 3 labels print("Label distributions:") print((data[all_emotions_neutral].sum(axis=0).sort_values(ascending=False) / len(data) * 100).round(2)) print("Plotting label correlations...") ratings = data.groupby("id")[all_emotions].mean() # Compute the correlation matrix corr = ratings.corr() # Generate a mask for the upper triangle mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # Set up the matplotlib figure fig, _ = plt.subplots(figsize=(11, 9)) # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap( corr, mask=mask, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={"shrink": .5}) fig.savefig( FLAGS.plot_dir + "/correlations.pdf", dpi=500, format="pdf", bbox_inches="tight") print("Plotting hierarchical relations...") z = linkage( pdist(ratings.T, metric="correlation"), method="ward", optimal_ordering=True) fig = plt.figure(figsize=(11, 4), dpi=400) plt.xlabel("") plt.ylabel("") dendrogram( z, labels=ratings.columns, leaf_rotation=90., # rotates the x axis labels leaf_font_size=12, # font size for the x axis labels color_threshold=1.05, ) fig.savefig( FLAGS.plot_dir + "/hierarchical_clustering.pdf", dpi=600, format="pdf", bbox_inches="tight") sent_color_map = { "positive": "#BEECAF", "negative": "#94bff5", "ambiguous": "#FFFC9E" } with open(FLAGS.sentiment_dict) as f: sent_dict = json.loads(f.read()) sent_colors = {} for e in all_emotions: if e in sent_dict["positive"]: sent_colors[e] = sent_color_map["positive"] elif e in sent_dict["negative"]: sent_colors[e] = sent_color_map["negative"] else: sent_colors[e] = sent_color_map["ambiguous"] # Generate a mask for the upper triangle mask = np.zeros_like(corr, dtype=np.bool) mask[np.diag_indices(mask.shape[0])] = True # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) row_colors = pd.Series( corr.columns, index=corr.columns, name="sentiment").map(sent_colors) # Draw the heatmap with the mask and correct aspect ratio g = sns.clustermap( corr, mask=mask, cmap=cmap, vmax=.3, vmin=-0.3, center=0, row_linkage=z, col_linkage=z, col_colors=row_colors, linewidths=.1, cbar_kws={ "ticks": [-.3, -.15, 0, .15, .3], "use_gridspec": False, "orientation": "horizontal", }, figsize=(10, 10)) g.ax_row_dendrogram.set_visible(False) g.cax.set_position([.34, -0.05, .5, .03]) for label in sent_color_map: g.ax_col_dendrogram.bar( 0, 0, color=sent_color_map[label], label=label, linewidth=0) g.ax_col_dendrogram.legend( title="Sentiment", loc="center", bbox_to_anchor=(1.1, .5)) g.savefig(FLAGS.plot_dir + "/hierarchical_corr.pdf", dpi=600, format="pdf") print("Calculating agreements...") unique_labels = data.groupby("id").apply(CheckAgreement, 1, all_emotions_neutral).to_dict() data["unique_labels"] = data["id"].map(unique_labels) agree_dict_2 = data.groupby("id").apply(CheckAgreement, 2, all_emotions_neutral).to_dict() data["agree_2"] = data["id"].map(agree_dict_2) agree_dict = data.groupby("id").apply(CheckAgreement, 3, all_emotions_neutral).to_dict() data["agree_3"] = data["id"].map(agree_dict) agree_dict = data.groupby("id").apply(CheckAgreement, 1, all_emotions_neutral, 1).to_dict() data["no_agree"] = data["id"].map(agree_dict) filtered_2 = data[data["agree_2"].str.len() > 0] print( "%d (%d%%) of the examples have 2+ raters agreeing on at least one emotion label" % (len(filtered_2["id"].unique()), (len(filtered_2) / len(data) * 100))) filtered_3 = data[data["agree_3"].str.len() > 0] print( "%d (%d%%) of the examples have 3+ raters agreeing on at least one emotion label" % (len(filtered_3["id"].unique()), (len(filtered_3) / len(data) * 100))) print("Plotting number of labels...") data["num_unique_prefilter"] = data["unique_labels"].apply(CountLabels) data["num_unique_postfilter"] = data["agree_2"].apply(CountLabels) unique_ex = data.drop_duplicates("id") df = pd.DataFrame({ "count": unique_ex["num_unique_prefilter"].tolist() + unique_ex["num_unique_postfilter"].tolist(), "type": ["pre-filter"] * len(unique_ex) + ["post-filter"] * len(unique_ex) }) fig = plt.figure(dpi=600) ax = sns.countplot( data=df, x="count", hue="type", palette=["skyblue", "navy"]) plt.xlim(-.5, 7.5) plt.legend(loc="center right", fontsize="x-large") plt.ylabel("Number of Examples", fontsize="x-large") plt.xlabel("Number of Labels", fontsize="x-large") plt.draw() labels = [item.get_text() for item in ax.get_yticklabels()] ax.set_yticklabels(["%dk" % (int(int(label) / 1000)) for label in labels]) plt.tight_layout() fig.savefig( FLAGS.plot_dir + "/number_of_labels.pdf", dpi=600, format="pdf", bbox_inches="tight") print("Proportion of agreement per label:") print( filtered_2[all_emotions_neutral].sum(axis=0).sort_values(ascending=False) / len(data))
if len(taxon_all) < 2: taxon = "unknown" else: taxon = taxa[index].split(';')[1].split("_")[-1] if toi != None: if toi not in taxon: df.drop([index], inplace=True, axis=0) continue if taxon in taxa_colors: row_colors.append(taxa_colors[taxon]) else: row_colors.append("w") print "plotting..." sns.set(font_scale=1) g = sns.clustermap(df, figsize=(8, 8), col_colors=col_colors, col_cluster=False, yticklabels=False, xticklabels=False, cmap="magma_r") # adjust axis labels plt.setp(g.ax_heatmap.get_xticklabels(), rotation=90) plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) plt.savefig("figure.png", bbox_inches='tight', dpi=300) plt.show()
vertex_label_size=5, vertex_frame_width=0, vertex_size=20, edge_width=1., target='%s/reports/Figure_4.pdf' % wd) print '[INFO] Network exported: ', network_i.summary() # -- Betas heatmap cmap = sns.diverging_palette(220, 10, n=9, as_cmap=True) plot_df = lm_betas_kinases.loc[:, [m in met_name for m in lm_betas_kinases]] plot_df.columns = [met_name[m] for m in plot_df] plot_df.index = [acc_name[i].split(';')[0] for i in plot_df.index] sns.set(style='white', palette='pastel') sns.clustermap(plot_df.T, figsize=(15, 20), cmap=cmap, linewidth=.5) plt.savefig('%s/reports/Figure_Supp_4_kinases_dynamic_betas.pdf' % wd, bbox_inches='tight') plt.close('all') plot_df = lm_betas_tfs.loc[:, [m in met_name for m in lm_betas_tfs]] plot_df.columns = [met_name[m] for m in plot_df] plot_df.index = [acc_name[i].split(';')[0] for i in plot_df.index] plot_df = plot_df[plot_df.std(1) != 0] sns.set(style='white', palette='pastel') sns.clustermap(plot_df.T, figsize=(15, 20), cmap=cmap, linewidth=.5) plt.savefig( '%s/reports/Figure_Supp_4_transcription_factors_dynamic_betas.pdf' % wd, bbox_inches='tight') plt.close('all')
z=pd.read_csv(sys.argv[1], sep='\t', index_col=0) # add colored x-labels lut=[] for sample in z.columns.values: if "2014-09" in sample: lut.append(sys.argv[3]) elif "2015-06" in sample: lut.append(sys.argv[4]) elif "2016-02" in sample: lut.append(sys.argv[5]) elif "2017-02" in sample: lut.append(sys.argv[6]) else: lut.append('w') # make heat map sns.set(font_scale=0.6) size=float(sys.argv[2]) g = sns.clustermap(z, figsize=(size,size), col_colors=lut, row_colors=lut, col_cluster=True, xticklabels=False, yticklabels=False, cmap="magma") plt.subplots_adjust(left=0, right=1, top=0.99, bottom=0.01) ratio=0.6 h_adjust=0.08 w_adjust=-0.05 hm = g.ax_heatmap.get_position() xden = g.ax_col_dendrogram.get_position() yden = g.ax_row_dendrogram.get_position() col = g.ax_col_colors.get_position() row = g.ax_row_colors.get_position() legend = g.cax.get_position()
def plot_heatmap(data, vmin=0, vmax=100, cm=None, col_colors=None, row_colors=None, sorted_labels=None, annot=True, col_cluster=False, row_cluster=False): # seaborn dpi = 72.27 fontsize_x_pt = 8 fontsize_y_pt = 10 # compute the matrix height in points and inches matrix_height_pt = fontsize_y_pt * data.shape[0] matrix_height_in = matrix_height_pt / dpi matrix_width_pt = fontsize_x_pt * data.shape[1] matrix_width_in = matrix_width_pt / dpi # compute the required figure height top_margin = 0.04 # in percentage of the figure height bottom_margin = 0.04 # in percentage of the figure height coeff = 2 figure_height = coeff * matrix_height_in / (1 - top_margin - bottom_margin) figure_width = coeff * matrix_width_in / (1 - top_margin - bottom_margin) ccr = 0.8 * col_colors.shape[ 0] / figure_height if col_colors is not None else 0 # build the figure instance with the desired height # comput the matrix height in points and inches if cm is None: cm = plt.cm.get_cmap('gist_heat') # plasma viridis cm = colors.LinearSegmentedColormap('hot_r', plt.cm.revcmap(cm._segmentdata)) cm.set_bad('lightgray') cm.set_under('blue') if sorted_labels is not None: data = data.ix[sorted_labels] if sns.__version__ == "0.9.dev0+k": splot = sns.clustermap( data, col_cluster=col_cluster, row_cluster=row_cluster, figsize=(figure_width, figure_height), col_colors=col_colors, row_colors=row_colors, cmap=cm, mask=(data == 0), vmin=vmin, vmax=vmax, col_colors_ratio=ccr, xticklabels=1, # print all labels annot=annot, annot_kws={'fontsize': 3}, fmt='.2f') else: splot = sns.clustermap( data, col_cluster=col_cluster, row_cluster=row_cluster, figsize=(figure_width, figure_height), col_colors=col_colors, row_colors=row_colors, cmap=cm, mask=(data == 0), vmin=vmin, vmax=vmax, annot=annot, annot_kws={'fontsize': 3}, fmt='.2f', xticklabels=1 # print all labels ) splot.cax.set_visible(False) # TODO plt.setp(splot.ax_row_dendrogram, visible=False) # TODO plt.setp(splot.ax_col_dendrogram, visible=False) # TODO splot.ax_heatmap.yaxis.set_ticks_position('left') splot.ax_heatmap.yaxis.set_label_position('left') splot.ax_heatmap.set_xlabel(data.columns.name, fontsize=10) splot.ax_heatmap.set_ylabel(data.index.name, fontsize=10) splot.ax_heatmap.set_yticks(numpy.arange(data.shape[0]) + 0.5, minor=False) plt.setp(splot.ax_heatmap.get_yticklabels(), rotation=0) plt.setp(splot.ax_heatmap.get_xticklabels(), rotation=90) plt.setp(splot.ax_heatmap.get_yticklabels(), fontsize=8) plt.setp(splot.ax_heatmap.get_xticklabels(), fontsize=6) return splot
sns.stripplot(x="day", y="total_bill", data=tips) sns.stripplot(x="day", y="total_bill", data=tips,jitter=True) # adds jitter to better visualize #--- swarm plot sns.swarmplot() sns.swarmplot(x="day", y="total_bill", data=tips) #------------------------------------------# #----------- MATRIX PLOTS -----------------# #------------------------------------------# #---- HEATMAP sns.heatmap() sns.heatmap(tips.corr(), cmap='RdBu_r') #--- CLUSTER MAP sns.clustermap() #------------------------------------------# #----------- GENERAL PLOT -----------------# #------------------------------------------# sns.factorplot() sns.factorplot(x='sex',y='total_bill',data=tips,kind='bar') # 'kind' parameter decides type of plot #----------- GENERAL GRIDS ---------------# #--- pairgrid sns.PairGrid(iris) # Just the Grid g = sns.PairGrid(iris) g.map_diag(plt.hist) g.map_upper(plt.scatter)
def clustermap(adata, obs_keys=None, use_raw=True, show=None, save=None, **kwargs): """Hierarchically-clustered heatmap [Waskom16]_. Wraps `seaborn.clustermap <https://seaborn.pydata.org/generated/seaborn.clustermap.html>`_ for :class:`~scanpy.api.AnnData`. Parameters ---------- adata : :class:`~scanpy.api.AnnData` Annotated data matrix. obs_keys : `str` Categorical annotation to plot with a different color map. Currently, only a single key is supported. use_raw : `bool`, optional (default: `True`) Use `raw` attribute of `adata` if present. show : bool, optional (default: `None`) Show the plot. save : `bool` or `str`, optional (default: `None`) If `True` or a `str`, save the figure. A string is appended to the default filename. Infer the filetype if ending on \{'.pdf', '.png', '.svg'\}. **kwargs : keyword arguments Keyword arguments passed to `seaborn.clustermap <https://seaborn.pydata.org/generated/seaborn.clustermap.html>`_. Returns ------- If `show == False`, a `seaborn.ClusterGrid` object. Notes ----- The returned object has a savefig() method that should be used if you want to save the figure object without clipping the dendrograms. To access the reordered row indices, use: clustergrid.dendrogram_row.reordered_ind Column indices, use: clustergrid.dendrogram_col.reordered_ind Examples -------- Soon to come with figures. In the meanwile, see https://seaborn.pydata.org/generated/seaborn.clustermap.html. >>> import scanpy.api as sc >>> adata = sc.datasets.krumsiek11() >>> sc.pl.clustermap(adata, obs_keys='cell_type') """ if not isinstance(obs_keys, (str, type(None))): raise ValueError('Currently, only a single key is supported.') sanitize_anndata(adata) X = adata.raw.X if use_raw and adata.raw is not None else adata.X df = pd.DataFrame(X, index=adata.obs_names, columns=adata.var_names) if obs_keys is not None: row_colors = adata.obs[obs_keys] utils.add_colors_for_categorical_sample_annotation(adata, obs_keys) # do this more efficiently... just a quick solution lut = dict( zip(row_colors.cat.categories, adata.uns[obs_keys + '_colors'])) row_colors = adata.obs[obs_keys].map(lut) g = sns.clustermap(df, row_colors=row_colors, **kwargs) else: g = sns.clustermap(df, **kwargs) show = settings.autoshow if show is None else show if show: pl.show() else: return g
data.index = data.iloc[:, 0] data = data.iloc[:, 2:] clname = list(data.columns) data = data[[ 'q_A', 'q_A_unmedicated', 'q_A_medicated', 'q_B', 'q_B_unmedicated', 'q_B_medicated', 'q_C', 'q_C_unmedicated', 'q_C_medicated', ]] #First create the clustermap figure g = sns.clustermap(data, row_colors=np.random.randn(94, 3), figsize=(13, 8)) # set the gridspec to only cover half of the figure #g.gs.update(left=0.05, right=0.45) # ##create new gridspec for the right part #gs2 = matplotlib.gridspec.GridSpec(1,1, left=0.6) ## create axes within this new gridspec #ax2 = g.fig.add_subplot(gs2[0]) ## plot boxplot in the new axes #sns.boxplot(data=iris, orient="h", palette="Set2", ax = ax2) plt.show() np.random.randint(0, 256, 3)
# # ## Correlations in data # In[5]: # Spearman is recommended for ordinal data. correlations = df.corr(method='spearman') sns.heatmap(correlations, square=True); # Note that if we were to scale the data, the correlation matrix would be unchanged. # In[6]: cg = sns.clustermap(correlations, square=True) plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0); # Fix rotation of y-labels. # The expected clusters emerged. Party ID got grouped with economics more than with moral attitudes. Economics and race line up with one another. # ## Principal component analysis # In[7]: from sklearn.pipeline import Pipeline from sklearn.decomposition import PCA from sklearn.preprocessing import Imputer, StandardScaler imp = Imputer(strategy='mean')
#save the dictionary values to a list data = list(dict.values()) #Convert the list to an array an_array = np.array(data, dtype=np.float64) #Set the array in a dataframe with sample names as columns and annotations as index df = pd.DataFrame(data=an_array, index=flat_list, columns=args.samples) #Add a reference sample column to the dataframe only containing 100% df[args.ref_sample] = [100] * len(df) #Print the length of the dataframe print('length dataframe: ', len(df)) #Save the dataframe as csv file df.to_csv(args.output[:-4] + '.csv') #Create heatmap of the dataframe g = sns.clustermap(df, cmap="vlag") plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) #Save and show the heatmap plt.savefig(args.output, bbox_inches="tight") plt.show() #Check the name of the reference sample #Based on the name of the reference sample, extract the rows where the replicates of the reference are equal to 100 and all the other samples are unequal to 100. if args.ref_sample == 'PO1': new_df = df[(df[args.ref_sample] == 100) & (df[args.ref_sample[:-1] + '2'] == 100) & (df[args.ref_sample[:-1] + '3'] == 100) & (df['PR1'] != 100) & (df['PR2'] != 100) & (df['PR3'] != 100)] elif args.ref_sample == 'PR1': new_df = df[(df[args.ref_sample] == 100) & (df[args.ref_sample[:-1] + '2'] == 100) &