def _plot_heatmap(call_csv, samples, positions, sample_info, batch_counts): def sample_sort(x): batch = sample_info[x]["batch"] return (-batch_counts.get(batch, 0), batch, x) out_file = "%s.png" % os.path.splitext(call_csv)[0] df = pd.read_csv(call_csv) sv_rect = df.pivot(index="position", columns="sample", values="caller_support") sv_rect = sv_rect.reindex_axis(positions, axis=0) sv_rect = sv_rect.reindex_axis(["%s: %s" % (sample_info[x]["batch"], x) for x in sorted(samples, key=sample_sort)], axis=1) fig = plt.figure(tight_layout=True) plt.title("Shared structural variant calls for affected and unaffected in regions of interest", fontsize=16) ax = sns.heatmap(sv_rect, cbar=False, cmap=sns.diverging_palette(255, 1, n=3, as_cmap=True)) colors = sns.diverging_palette(255, 1, n=3) b1 = plt.bar(0, 0, bottom=-100, color=colors[-1]) b2 = plt.bar(0, 0, bottom=-100, color=colors[0]) ax.legend([b1, b2], ["affected", "unaffected"], ncol=2, bbox_to_anchor=(0.85, 0.995), loc=3) plt.setp(ax.get_xticklabels(), fontsize=8) plt.setp(ax.get_yticklabels(), fontsize=8) fig.set_size_inches(20, 8) fig.savefig(out_file)
def plotGraphicalCorrelationMatrix(data): ''' Input : data Output : graphical correlation matrix Inspired from : https://stanford.edu/~mwaskom/software/seaborn/examples/many_pairwise_correlations.html ''' try: print "\nGenerating the graphical correlation matrix...\n" time.sleep(3) corr = data.corr() f, ax = plt.subplots(figsize=(20, 20)) # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr, cmap=cmap, square=True, xticklabels=False, yticklabels=False, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax) plt.title('Correlation Matrix', fontsize=30) ax.set_ylabel('Features', fontsize=20) ax.set_xlabel('Features', fontsize=20) xticklabels = ['video_category_id','viewCount','likeCount','dislikeCount','favoriteCount','commentCount','dimension','definition','caption','licensedContent'] ylabel = xticklabels[::-1] ax.set_xticklabels(xticklabels, rotation=45) ax.set_yticklabels(ylabel, rotation=0) name = "../YoutubeData/correlation_matrix.pdf" plt.savefig(name) print "\nPlease close the Bar Chart when you want to move ahead..." plt.show() print "You can always retrieve the graphical correlation matrix in YoutubeData folder.\n" time.sleep(3) return True except: raise VideoAnalysisException(" Error while Generating the graphical correlation matrix")
def plot_Bayes_pval_map(priors, posterior): """ :param priors: list of xidplus.prior classes :param posterior: xidplus.posterior class :return: the default xidplus Bayesian P value map plot """ sns.set_style("white") mod_map_array = postmaps.replicated_maps(priors, posterior, posterior.samples['lp__'].size) Bayes_pvals = [] cmap = sns.diverging_palette(220, 20, as_cmap=True) hdulists = list(map(lambda prior: postmaps.make_fits_image(prior, prior.sim), priors)) fig = plt.figure(figsize=(10 * len(priors), 10)) figs = [] for i in range(0, len(priors)): figs.append(aplpy.FITSFigure(hdulists[i][1], figure=fig, subplot=(1, len(priors), i + 1))) Bayes_pvals.append(postmaps.make_Bayesian_pval_maps(priors[i], mod_map_array[i])) for i in range(0, len(priors)): figs[i].show_markers(priors[i].sra, priors[i].sdec, edgecolor='black', facecolor='black', marker='o', s=20, alpha=0.5) figs[i].tick_labels.set_xformat('dd.dd') figs[i].tick_labels.set_yformat('dd.dd') figs[i]._data[ priors[i].sy_pix - np.min(priors[i].sy_pix) - 1, priors[i].sx_pix - np.min(priors[i].sx_pix) - 1] = \ Bayes_pvals[i] figs[i].show_colorscale(vmin=-6, vmax=6, cmap=cmap) figs[i].add_colorbar() figs[i].colorbar.set_location('top') return figs, fig
def plot_2_corr_heatmaps(corr1, corr2, labels, title1, title2): fig=plt.figure(figsize=(9, 8)) gs = gridspec.GridSpec(1, 2) ax1 = fig.add_subplot(gs[0, 0]) ax2 = fig.add_subplot(gs[0, 1]) sns.set(style="white") # Generate a mask for the upper triangle mask = np.zeros_like(corr1, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr1, mask=mask, cmap=cmap, vmax=.3, square=True, xticklabels=labels, yticklabels=labels, linewidths=.5, ax=ax1, cbar_kws={"shrink": .3}, annot=True) ax1.set_title(title1) sns.heatmap(corr2, mask=mask, cmap=cmap, vmax=.3, square=True, xticklabels=labels, yticklabels=labels, linewidths=.5, ax=ax2, cbar_kws={"shrink": .3}, annot=True) ax2.set_title(title2) fig.tight_layout() plt.show()
def res_matrix(mark,state,cut_off=40): path = os.path.join(get_data_dir(), "tmp", "{0} in {1}-{2}.csv".format(mark, state,cut_off)) DF = pd.read_csv(path, sep='\t') Full_EID_list = get_full_EID_list() res_matrix = [] tmp = [0.]*len(Full_EID_list) for i in range(0,len(DF.index),1): try: if DF.chromMiddle[i-1] == DF.chromMiddle[i]: tmp[Full_EID_list.index(DF.EID[i])] = DF.signalValue[i] else: res_matrix.append(tmp) tmp = [0.]*len(Full_EID_list) except: pass f, ax = plt.subplots(figsize=(15, 15)) cmap = sns.diverging_palette(210, 10, as_cmap=True) sns.corrplot(np.array(res_matrix), annot=False, sig_stars=False, # .T?? diag_names=False, cmap=cmap, ax=ax) f.tight_layout() plt.show() path2 = os.path.join(get_data_dir(), "tmp","{0} in {1}-{2}_diff.csv".format(mark,state,cut_off)) a = open(path2,'w') for i in range(0,len(res_matrix[0]),1): for j in range(0,len(res_matrix),1): a.write(str(res_matrix[j][i])+"\t") a.write("\n") a.close()
def main(): # Load list of pointing IDs todo_file = rawdata_dir + 'todo_list.ascii.dat' ID_list = np.genfromtxt(todo_file, skip_header=1, usecols=[0], unpack=True, dtype=str) N_los = len(ID_list) # Load bins centers bins_file = 'rbins.ascii.dat' bin_centers = np.genfromtxt(bins_file, skip_header=1, usecols=[2], unpack=True) N_bins = len(bin_centers) # Round bin centers to three decimal places bin_centers = np.round(bin_centers, 3) # Make array of column names for pandas Dataframe col_names = [] for i in range(N_bins): name = str(bin_centers[i]) col_names.append(name) # Recast as array col_names = np.asarray(col_names) # Create list of png's for use in making gif png_list =[] # Calculate correlation matrix for each l.o.s. for ID in ID_list: # Load counts from 1000 mocks with pandas # Each row is a mock, each column is a bin counts_filename = counts_dir + 'counts_all_' + ID + '.dat' DF = pd.read_csv(counts_filename, sep='\s+', names=col_names) # Calculate correlation matrix corr = DF.corr() # plot heatmap of matrix plt.clf() sns.set(style="white") mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True f, ax = plt.subplots(figsize=(11, 9)) cmap = sns.diverging_palette(145, 280, s=85, l=25, n=7, as_cmap=True) sns.heatmap(corr, mask=mask, cmap=cmap,square=True, annot=True, xticklabels=col_names, yticklabels=col_names, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax, vmin=-1.0, vmax=1.0) plt.title('Correlation Matrix for l.o.s. ' + ID, fontsize=20) plt.xlabel('Bin Center (kpc)', fontsize=18) plt.ylabel('Bin Center (kpc)', fontsize=18) fig_name = plots_dir + 'corr_matrix_' + ID + '.png' plt.savefig(fig_name) png_list.append(fig_name) gif_name = plots_dir + 'corr_matrix.gif' GIF_MOVIE(png_list, gif_name)
def __init__(self, master, x_train, y_train, x_test, y_test, evaluator, df, console): Tk.Frame.__init__(self, master) self.x_train = x_train self.y_train = y_train self.x_test = x_test self.y_test = y_test self.evaluator = evaluator self.df = df self.console = console frame_train = Tk.Frame(self) frame_train.pack(fill=Tk.BOTH, expand=1, padx=15, pady=15) plt.figure(figsize=(12, 20)) plt.subplot(111) # 背景色白色 sns.set(style="white") # 特征关联矩阵(矩阵里不仅包含特征,还包括类别) corr = df.corr() # 隐藏矩阵的上三角 mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # 画图 f, ax = plt.subplots(figsize=(11, 11)) cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax) plt.xticks(rotation=-90) plt.yticks(rotation=0) plt.title("Cardiotocography \"Feature-Feature\" & \"Feature-Label\" Correlations") self.attach_figure(plt.gcf(), frame_train)
def plot_feature_corr(X, f_sz = (11, 9)): """ Purpose: plot a correlation matrix for the features in X Inputs: X: a pandas dataframe of feature values f_sz: a tuple for the figure size Output: the correlation matrix of X """ sns.set(style="white") # Compute the correlation matrix corr = X.corr() # Generate a mask for the upper triangle mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # Set up the matplotlib figure f, ax = plt.subplots(figsize= f_sz) # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax) return corr
def _process(self,data): for x in data: if data[x][1] not in self.data: #prepares the data to visualise the xcor matrix of a specific batch number. self.data[data[x][1]]={} self.data[data[x][1]]['matrix']=numpy.identity(self.size) self.data[data[x][1]]['ro_count']=0 self.data[data[x][1]]['matrix'][(data[x][2][1],data[x][2][0])]=data[x][0] #self.addToProvState('batch_'+str(data[x][1]),self.data[data[x][1]]['matrix'],metadata={'matrix':str(self.data[data[x][1]]['matrix'])},dep=['batch_'+str(data[x][1])],ignore_inputs=False) self.data[data[x][1]]['ro_count']+=1 if self.data[data[x][1]]['ro_count']==(self.size*(self.size-1))/2: matrix=self.data[data[x][1]]['matrix'] d = pd.DataFrame(data=matrix, columns=range(0,self.size),index=range(0,self.size)) mask = numpy.zeros_like(d, dtype=numpy.bool) mask[numpy.triu_indices_from(mask)] = True # Set up the matplotlib figure f, ax = plt.subplots(figsize=(11, 9)) # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(d, mask=mask, cmap=cmap, vmax=1, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax) sns.plt.savefig("./plots/"+str(data[x][1])+"_plot.png") self.write('output',(matrix,data[x][1]),metadata={'matrix':str(d),'batch':str(data[x][1])},dep=['batch_'+str(data[x][1])])
def plot_EFA_retest(combined, size=4.6, dpi=300, ext='png', plot_dir=None): corr = combined.corr() max_val = abs(corr).max().max() fig = plt.figure(figsize=(size,size)); ax = fig.add_axes([.1, .1, .8, .8]) cbar_ax = fig.add_axes([.92, .15, .04, .7]) sns.heatmap(corr, square=True, ax=ax, cbar_ax=cbar_ax, vmin=-1, vmax=1, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True), cbar_kws={'orientation': 'vertical', 'ticks': [-1, 0, 1]}); ax.set_xticklabels(ax.get_xticklabels(), rotation=90) ax.set_yticklabels(ax.get_yticklabels(), rotation=0) ax.tick_params(labelsize=size/len(corr)*40) # format cbar axis cbar_ax.set_yticklabels([format_num(-max_val), 0, format_num(max_val)]) cbar_ax.tick_params(labelsize=size, length=0, pad=size/2) cbar_ax.set_ylabel('Factor Loading', rotation=-90, fontsize=size, labelpad=size/2) # set divider lines n = corr.shape[1] ax.axvline(n//2, 0, n, color='k', linewidth=size/3) ax.axhline(n//2, 0, n, color='k', linewidth=size/3) if plot_dir is not None: save_figure(fig, path.join(plot_dir, 'EFA_test_retest_heatmap.%s' % ext), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def plot_corr(file, score, stat, ind_var, brain_type): # seaborn sns.set(style="white") # import the dataframe dt = pd.read_csv(file) # Compute the correlation matrix corr = dt.corr() ### Create the matrix figure with seaborn # Generate a mask for the upper triangle mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # Set up the matplotlib figure f, ax = plt.subplots(figsize=(len(ind_var),len(ind_var))) # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr, mask=mask, cmap=cmap, annot=False, ax=ax) plt.subplots_adjust(left= 0.30,bottom=0.30) plt.savefig(os.path.join(stat,score, "heatmap_" + score + "_" + stat + "_"+ brain_type + ".png")) plt.close() return corr
def f2hex_nodes(fx, vmin, vmax, midpoint): norm = MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=midpoint) f2rgb = cm.ScalarMappable(norm=norm, cmap=sns.diverging_palette(150, 275, s=80, l=55, as_cmap=True)) rgb = [f2rgb.to_rgba(rate)[:3] for rate in fx] colors_hex = [0]*(len(rgb)) for i, color in enumerate(rgb): colors_hex[i] = '#%02x%02x%02x' % tuple([255 * fc for fc in color]) return colors_hex
def make_corr_plot(d, title="plot"): f, ax = plt.subplots(figsize=(9, 9)) cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.corrplot(d, annot=False, sig_stars=False, diag_names=False, cmap=cmap, ax=ax) f.tight_layout() plt.title(title) f.savefig(title)
def heat_map(corrs_mat): sns.set(style = "white") f, ax = plt.subplots(figsize = (11, 9)) mask = np.zeros_like(corrs_mat, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.heatmap(corrs_mat, mask=mask, cmap=cmap, ax=ax)
def heatmap_Sigcelltype(args, df, path): import seaborn as sns ''' To plot stack plot df as heatmap ''' #print(args.key_celltype_list) if args['key_celltype_list']: cell_type = ['macrophage', 'Alveolar macrophage', 'm1 macrophage','m2 macrophage', 'monocyte', 'dendritic cell', 'glial cell', 'neutrophil', 'mast cell', 'Natural killer cell', 'Kupffer cell', 'Plasma cell', 'eosinophil', 'naive B cell', 'memory B cell', 'B lymphocyte', 'T lymphocyte', 'naive T cell', 'memory T cell', 'CD8 T cell', 'CD4 T cell', 'regulatory T cell','Cytotoxic T cell', 'helper T cell'] # creating df for heatmap new_df = pd.DataFrame(0, columns=df.columns, index=cell_type) #print(new_df) for k, v in df.iterrows(): for c, val in v.iteritems(): #print(c, val) new_df.loc[k, c] = val # plotting df new_df = new_df.T sns.set_context("talk") cmap = sns.diverging_palette(255, 15, sep=20, n=3, as_cmap=True) plt.clf() plt.figure(figsize=[20,10]) sns.heatmap(new_df.round(2), cmap = cmap, vmin=0, vmax=0.2, yticklabels=True, cbar=False, xticklabels=True, linecolor='#ffffff',linewidths=0.01, square=True, annot=True) plt.xticks(rotation=45) plt.tight_layout() plt.savefig(os.path.join(path, 'GCAM_cofficients.svg')) plt.close() else: # creating df for heatmap df = df.T sns.set_context("talk") cmap = sns.diverging_palette(255, 15, sep=20, n=3, as_cmap=True) plt.clf() plt.figure(figsize=[20,10]) sns.heatmap(df.round(2), cmap = cmap, vmin=0, vmax=0.2, yticklabels=True, cbar=False, xticklabels=True, linecolor='#ffffff',linewidths=0.01, square=True, annot=True) plt.xticks(rotation=45) plt.tight_layout() plt.savefig(os.path.join(path, 'GCAM_cofficients.svg')) plt.close()
def make_corr(data): """This method creates a scatter, correlation matrix.""" sns.set(style="white") corr = data.corr() mask = np.zeros_like(corr, dtype = np.bool) mask[np.triu_indices_from(mask)] = True f, ax = plt.subplots(figsize = (22, 18)) cmap = sns.diverging_palette(255, 140, as_cmap = True) sns.heatmap(corr, mask = mask, cmap = cmap, vmax = .3, square = True, xticklabels = True, yticklabels = True, linewidths = 1, cbar_kws = {"shrink": .5}, ax = ax)
def heatmap(df): corr = df.drop(['group', 'id'], axis=1).corr() mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True fig = plt.figure(figsize=[8,8]) #cmap = sns.cubehelix_palette(8, light=0.8, dark=0.2, as_cmap=True) cmap = sns.diverging_palette(240, 10, as_cmap=True) sns.heatmap(corr, mask=mask, vmax=.8, square=True, cmap=cmap, linewidths=0.8, annot=True) plt.title("Correlation Matrix") plt.savefig(os.path.join(FIG_PATH, 'heatmap.png'))
def display_closest_DVs(consensus, n_closest=10): nth = { 1: "first", 2: "second", 3: "third", 4: "fourth", 5: "fifth", 6: "sixth", 7: "seventh", 8: "eigth", 9: "ninth", 10: "tenth", } df = consensus.get_consensus_cluster()['distance_df'] df.index = format_variable_names(df.index) df.columns = format_variable_names(df.columns) sorted_df = pd.DataFrame(data=np.zeros((len(df),n_closest)), index=df.index) sorted_df.columns = [nth[i+1] for i in sorted_df.columns] for name, row in sorted_df.iterrows(): closest = 1-df.loc[name].drop(name).sort_values()[:n_closest] closest = ['%s: %s%%' % (i,int(b*100)) for i,b in closest.iteritems()] sorted_df.loc[name] = closest def magnify(): return [dict(selector="tr:hover", props=[("border-top", "2pt solid black"), ("border-bottom", "2pt solid black")]), dict(selector="th:hover", props=[("font-size", "10pt")]), dict(selector="td", props=[('padding', "0em 0em")]), # dict(selector="th:hover", # props=[("font-size", "12pt")]), dict(selector="tr:hover td:hover", props=[('max-width', '200px'), ('font-weight', 'bold'), ('color', 'black'), ('font-size', '9pt')]) ] cm =sns.diverging_palette(220,15,n=161) def color_cell(val): val = val[val.rindex(': ')+2:val.rindex('%')] color = to_hex(cm[int(val)+30]) return 'background-color: %s' % color styler = sorted_df.style styler \ .applymap(color_cell) \ .set_properties(**{'max-width': '100px','font-size': '10pt', 'border-color': 'white'})\ .set_precision(2)\ .set_table_styles(magnify()) return styler
def plot_factor_correlation(results, c, rotate='oblimin', title=True, DA=False, size=4.6, dpi=300, ext='png', plot_dir=None): if DA: EFA = results.DA else: EFA = results.EFA loading = EFA.get_loading(c, rotate=rotate) # get factor correlation matrix reorder_vec = EFA.get_factor_reorder(c) phi = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c],'Phi') phi = pd.DataFrame(phi, columns=loading.columns, index=loading.columns) phi = phi.iloc[reorder_vec, reorder_vec] mask = np.zeros_like(phi) mask[np.tril_indices_from(mask, -1)] = True with sns.plotting_context('notebook', font_scale=2) and sns.axes_style('white'): f = plt.figure(figsize=(size*5/4, size)) ax1 = f.add_axes([0,0,.9,.9]) cbar_ax = f.add_axes([.91, .05, .03, .8]) sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1, cbar_ax=cbar_ax, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True)) sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1, cbar_ax=cbar_ax, annot=True, annot_kws={"size": size/c*15}, cmap=sns.diverging_palette(220,15,n=100,as_cmap=True), mask=mask) yticklabels = ax1.get_yticklabels() ax1.set_yticklabels(yticklabels, rotation=0, ha="right") ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90) if title == True: ax1.set_title('%s Factor Correlations' % results.ID.split('_')[0].title(), weight='bold', y=1.05, fontsize=size*3) ax1.tick_params(labelsize=size*3) # format cbar cbar_ax.tick_params(axis='y', length=0) cbar_ax.tick_params(labelsize=size*2) cbar_ax.set_ylabel('Pearson Correlation', rotation=-90, labelpad=size*4, fontsize=size*3) if plot_dir: filename = 'factor_correlations_EFA%s.%s' % (c, ext) save_figure(f, path.join(plot_dir, filename), {'bbox_inches': 'tight', 'dpi': dpi}) plt.close()
def l_reg(input_path): DF = pd.read_csv(input_path) DF.drop('gene_id', axis=1, inplace=True) #corr_mat = np.corrcoef(DF.as_matrix()) f, ax = plt.subplots(figsize=(20, 20)) cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.corrplot(DF.as_matrix().T, annot=False, sig_stars=False, diag_names=False, cmap=cmap, ax=ax) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) plt.savefig(os.path.join(get_data_dir(), "tmp", "H3K27me3_corrplot.png"))
def square_matrix_plot(matrix, vmax=1, vmin=0): sns.set(style="white") corr = 1 - matrix mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True fig, ax = plt.subplots(figsize=(11, 9)) cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.set_context("talk") # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr, cmap="YlGnBu", vmax=vmax, vmin=vmin, square=True, linewidths=0.5, cbar_kws={"shrink": 0.9}, ax=ax) plt.title("pairwise") return fig, ax
def plot_morph(good_spikes, cluster, morph_dim, spacing=.02, ymax=.04): plt.figure(figsize=(20,20)) with sns.color_palette(sns.xkcd_palette(["twilight blue", "kermit green"]), 2): plt.subplot(222) stim_name = morph_dim[1]+"_rec" spks2plot = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name) stim_name = morph_dim+'128' spks2plot = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name) plt.legend(loc=1) ax = plt.gca() ax.plot((0, 0), (0, ymax), c=".2", alpha=.5) ax.plot((stim_length, stim_length), (0, ymax), c=".2", alpha=.5) xlim(-.5, 1) ylim(0,ymax) plt.xticks([0, .5]) plt.yticks([0, .5*ymax, ymax]) plt.title('cell: %d morph dim: %s' % (cluster, morph_dim)) plt.subplot(224) stim_name = morph_dim[0]+"_rec" spks2plot = good_spikes[(spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name) stim_name = morph_dim+'001' spks2plot = good_spikes[(spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name) plt.legend(loc=1) ax = plt.gca() ax.plot((0, 0), (0, ymax), c=".2", alpha=.5) ax.plot((stim_length, stim_length), (0, ymax), c=".2", alpha=.5) xlim(-.5, 1) ylim(0,ymax) plt.xticks([0, .5]) plt.yticks([0, .5*ymax, ymax]) with sns.color_palette(sns.diverging_palette(262, 359, s=99, l=43, sep=1, n=128, center="dark"), 128): plt.subplot(121) spks_morph = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['morph_dim']==morph_dim)] morph_ymax = 128*spacing+ymax for morph_pos in np.unique(spks_morph['morph_pos'].values): stim_name = morph_dim + str(int(morph_pos)) spks2plot = spks_morph[spks_morph['morph_pos'] == morph_pos] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, offset=morph_pos*spacing, label=stim_name) ax = plt.gca() ax.plot((0, 0), (0, morph_ymax), c=".2", alpha=.5) ax.plot((stim_length, stim_length), (0, morph_ymax), c=".2", alpha=.5) xlim(-.5, 1) ylim(0,morph_ymax) plt.xticks([0, .5]) plt.yticks([]) plt.tick_params(axis='y', which='both', bottom='off', top='off', labelbottom='off') sns.despine()
def plotMatrixHeat(self, matrix, path): f, ax = plt.subplots(figsize=(11, 9)) # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(matrix, cmap=cmap, vmax=matrix.max(), square=True, xticklabels=5, yticklabels=5, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax) plt.savefig(path)
def main(): movie_raw_data = pd.read_csv('../input/movie_metadata.csv') print movie_raw_data.head(3) print movie_raw_data.isnull().sum() print movie_raw_data.shape movie_raw_data_dropna=movie_raw_data.dropna() print movie_raw_data_dropna.shape print movie_raw_data.dtypes # movie_filterd_imdbscore=movie_raw_data['imdb_score'].loc # movie_filterd_imdbscore=movie_raw_data.loc[movie_raw_data['imdb_score'].isin([2,3])] movie_filterd_imdbscore_first=movie_raw_data.loc[movie_raw_data['imdb_score'] >5] movie_filterd_imdbscore_from_raw=movie_raw_data.loc[movie_raw_data['imdb_score'] <8] print movie_filterd_imdbscore_first.shape movie_filterd_imdbscore_second=movie_filterd_imdbscore_first.loc[movie_raw_data['imdb_score'] <8] print movie_filterd_imdbscore_second.shape print movie_filterd_imdbscore_from_raw.shape print '*********************************' print movie_raw_data_dropna.head(3) profit=(((movie_raw_data_dropna['gross'].values-movie_raw_data_dropna['budget'].values))/(movie_raw_data_dropna['gross'].values))*100 print profit movie_raw_data_dropna.loc[:,'profit']=pd.Series(profit, movie_raw_data_dropna.index) print movie_raw_data_dropna.shape print movie_raw_data_dropna.head(3) corr=movie_raw_data_dropna.corr() print corr f, ax = plt.subplots(figsize=(11, 9)) cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.heatmap(corr, cmap=cmap, vmax=1, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax) g = sns.jointplot(x="title_year", y="profit",kind='scatter',size=10,ylim = [0,110],xlim=[1980,2020],data=movie_raw_data_dropna) h = sns.jointplot(x="imdb_score", y="profit",kind='reg',size=10,ylim = [0,110],data=movie_raw_data_dropna) # j = sns.pairplot(movie_raw_data_dropna,hue='content_rating') plt.show()
def heatmap_plot_zscore(df_zscore_features, df_all, output_dir, title=None): pl.figure() # Create a custom palette for creline colors cre_lines = np.unique(df_all['cre_line']) cre_line_pal = sns.color_palette("hls", len(cre_lines)) cre_line_lut = dict(zip(cre_lines, cre_line_pal)) # map creline type to color creline_colors = df_all['cre_line'].map(cre_line_lut) # Create a custom palette for dendrite_type colors dendrite_types = np.unique(df_all['dendrite_type']) dendrite_type_pal = sns.color_palette("hls", len(dendrite_types)) dendrite_type_lut = dict(zip(dendrite_types, dendrite_type_pal)) dendritetype_colors = df_all['dendrite_type'].map(dendrite_type_lut) # Create a custom colormap for the heatmap values cmap = sns.diverging_palette(240, 10, as_cmap=True) r_linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean') c_linkage = hierarchy.linkage(df_zscore_features.T, method='ward', metric='euclidean') # PLOT g = sns.clustermap(df_zscore_features, row_linkage=r_linkage, method='ward', metric='euclidean', linewidths=0.0, row_colors=dendritetype_colors, cmap=cmap, xticklabels=True, yticklabels =False) if title: pl.title(title) # TODO : adjust creline tag size # print type(g.data) #print g.data.columns #crelines = g.data['cre_line'] #g.ax_heatmap.set_yticklabels(crelines, fontsize=3) assignment = hierarchy.fcluster(r_linkage, 2, criterion="maxclust") # Legend for row and col colors for label in dendrite_types: g.ax_row_dendrogram.bar(0, 0, color=dendrite_type_lut[label], label=label, linewidth=0) g.ax_row_dendrogram.legend(loc="center", ncol=1) #for label in cre_lines: # g.ax_col_dendrogram.bar(0, 0, color=cre_line_lut[label], label=label, linewidth=0) # g.ax_col_dendrogram.legend(loc="center", ncol=3) #pl.show() pl.title('zscore') filename = output_dir + '/zscore_feature_heatmap.png' pl.savefig(filename, dpi=300) print("save zscore matrix heatmap figure to :" + filename) pl.close() return g
def plot_correlation_map(df): corr = df.corr() _, ax = plt.subplots(figsize=(12, 10)) cmap = sns.diverging_palette(220, 10, as_cmap=True) _ = sns.heatmap( corr, cmap=cmap, square=True, cbar_kws={'shrink': .9}, ax=ax, annot=True, annot_kws={'fontsize': 12} )
def corr_analysis(data): corr=data.corr() #Generate a mask for the upper triangle mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True f, ax = plt.subplots(figsize=(20, 20)) # Generate a custom diverging colormap cmap = sns.diverging_palette(110, 10,as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1,linewidths=.5, cbar_kws={"shrink": .6},annot=True,annot_kws={"size":8} ) plt.xticks(rotation=90) plt.yticks(rotation=0) plt.show()
def Show_correlation(df, cols=None): """ Use a heatmap of the correlations of DataFrame columns to estimate the features to engineer. """ if cols: df = df[cols + ['target']] corrmat = df.corr() plt.figure(figsize = (12, 10)) cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True) sns.heatmap(corrmat, cmap=cmap, annot=True, fmt="f") plt.xticks(rotation = 90); plt.yticks(rotation = 0) plt.tight_layout() plt.show()
def get_cor_matrix( self,method="pearson" ): self.method = method out_cor_file = "%s.corMat.%s.pdf" % ( ".".join( self.infile.split(".")[:-2] ), self.method ) pd_mat = pd.DataFrame( self.mat.matrix ) pd_mat.columns = self.mat.colname pd_mat.index = self.mat.rowname self.cor_mat = pd_mat.corr( self.method ).values sns.set(style="darkgrid") f, ax = plt.subplots(figsize=(9, 9)) cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.corrplot(pd_mat, annot=False, sig_stars=False, diag_names=False, cmap=cmap, ax=ax, cmap_range=(0.0, 1.0),method=self.method ) f.savefig( out_cor_file,format="pdf" )
def plot_jaccard_heatmap(communities,shape=30,out=None): data =np.array(list(map(jaccard_similarity,list(product(communities, repeat=2))))) data = data.reshape(shape,shape) ax = plt.axes() cmap = sns.diverging_palette(220, 10, as_cmap=True) heat = sns.heatmap(data,cmap=plt.cm.Reds,square=True,linewidths=.5, cbar_kws={"shrink": .5},ax = ax) heat.invert_yaxis() plt.ylabel("Comunity ID") plt.xlabel("Comunity ID") plt.yticks(size='small',rotation='horizontal') plt.xticks(size='small',rotation='vertical') if out == None: plt.show() else: plt.savefig(out+".svg",bbox_inches="tight") plt.close()
ax2.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) ax3.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) ax4.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) plt.savefig('%s' % (savepath), bbox_inches='tight') plt.close(fig) #-----------------------------------------------------------------------------# if __name__ == '__main__': main() color_palette = { 'purples': sns.cubehelix_palette(12)[5:10], 'groups': [ sns.diverging_palette(10, 240, n=27), sns.diverging_palette(10, 240, n=27), sns.diverging_palette(10, 240, n=27), sns.diverging_palette(10, 240, n=27), '0.5' ], 'greens': sns.cubehelix_palette(rot=-.4, n_colors=12)[4:9], 'purples_ex': sns.cubehelix_palette(12), 'g_ex': sns.color_palette("GnBu_d", n_colors=16) } styles = { 'cadis': { 'ls': '-.',
def score(self, y_true, y_probs, subgroup_df, output=True): """Parameters ---------- y_true : pandas Series, pandas DataFrame The true values for all observations. y_pred : pandas Series, pandas DataFrame The model's predicted values for all observations. subgroup_df : pandas DataFrame Dataframe of all subgroups to be compared. Each column should be a specific subgroup with 1 to indicating the observation is a part of the subgroup and 0 indicating it is not. There should be no other values besides 1 or 0 in the dataframe. output : boolean (default = True) If true returns a heatmap of the AEG scores. """ import numpy as np import pandas as pd from scipy.stats import mannwhitneyu def calc_pos_aeg(parameter, df): sub_probs = df[((df.target == 1) & (df[parameter] == 1))]['probs'] back_probs = df[((df.target == 1) & (df[parameter] == 0))]['probs'] pos_aeg = (.5 - (mannwhitneyu(sub_probs, back_probs)[0] / (len(sub_probs) * len(back_probs)))) return round(pos_aeg, 2) def calc_neg_aeg(parameter, df): sub_probs = df[((df.target == 0) & (df[parameter] == 1))]['probs'] back_probs = df[((df.target == 0) & (df[parameter] == 0))]['probs'] neg_aeg = (.5 - (mannwhitneyu(sub_probs, back_probs)[0] / (len(sub_probs) * len(back_probs)))) return round(neg_aeg, 2) # ensure that the passed dataframe has an appropriate axis subgroup_df.reset_index(drop=True, inplace=True) # ensure input true and prob values are formatted correctly if type(y_true) == pd.core.frame.DataFrame: y_true.columns = ['target'] y_true.reset_index(drop=True, inplace=True) else: y_true = pd.DataFrame(y_true, columns=['target']).reset_index(drop=True) if type(y_probs) == pd.core.frame.DataFrame: y_probs.columns = ['probs'] y_probs.reset_index(drop=True, inplace=True) else: y_probs = pd.DataFrame(y_probs, columns=['probs']).reset_index(drop=True) # combine all inputs into a DataFrame input_df = pd.concat([y_true, y_probs, subgroup_df], axis=1) # build dataframe and fill with ROC AUC metrics self.output_df = pd.DataFrame(index=subgroup_df.columns, columns=['Positive AEG', 'Negative AEG']) for col in subgroup_df.columns: self.output_df.loc[col] = [ calc_pos_aeg(col, input_df), calc_neg_aeg(col, input_df) ] if output: import seaborn as sns sns.heatmap(self.output_df.astype('float32'), vmin=-.5, vmax=.5, cmap=sns.diverging_palette(10, 10, n=101), annot=True, linewidths=2)
def plot_top_heatmap(coef_df, auc_vals, pheno_dict, args): coef_mat = coef_df.groupby(level=0, axis=1).mean() coef_mat = (coef_mat.transpose() / coef_mat.abs().max(axis=1)).transpose() if args.auc_cutoff == -1: min_auc = auc_vals[MuType({('Gene', args.gene): pnt_mtype})] else: min_auc = args.auc_cutoff plt_mtypes = { mtype for mtype, auc_val in auc_vals.iteritems() if (not isinstance(mtype, RandomType) and auc_val >= min_auc and ( tuple(mtype.subtype_iter())[0][1] & copy_mtype).is_empty()) } plt_genes = set() for mtype in plt_mtypes: plt_genes |= set(coef_mat.loc[mtype].abs().sort_values()[-10:].index) fig, ax = plt.subplots(figsize=(4 + len(plt_genes) / 4, 1.3 + len(plt_mtypes) / 5.3)) plot_df = coef_mat.loc[plt_mtypes, plt_genes] plot_df = plot_df.iloc[dendrogram(linkage( distance.pdist(plot_df, metric='euclidean'), method='centroid'), no_plot=True)['leaves'], dendrogram(linkage(distance.pdist( plot_df.transpose(), metric='euclidean'), method='centroid'), no_plot=True)['leaves']] coef_cmap = sns.diverging_palette(13, 131, s=91, l=41, sep=3, as_cmap=True) sns.heatmap(plot_df, cmap=coef_cmap, center=0, xticklabels=False, yticklabels=False) for i, mtype in enumerate(plot_df.index): if mtype == MuType({('Gene', args.gene): pnt_mtype}): lbl_wgt = 'bold' else: lbl_wgt = 'normal' ax.text(-0.29 / plot_df.shape[1], 1 - ((i + 0.53) / plot_df.shape[0]), get_fancy_label(tuple(mtype.subtype_iter())[0][1]), size=9, weight=lbl_wgt, ha='right', va='center', transform=ax.transAxes) for i, gene in enumerate(plot_df.columns): ax.text((i + 1) / plot_df.shape[1], -0.29 / plot_df.shape[0], gene, size=12, ha='right', va='top', rotation=47, transform=ax.transAxes, clip_on=False) plt.savefig(os.path.join( plot_dir, '__'.join([args.expr_source, args.cohort]), "{}_top-heatmap_{}.svg".format(args.gene, args.classif)), bbox_inches='tight', format='svg') plt.close()
def graphing(train, train_truth): n = 77 ########### correlation graph corr = pd.DataFrame(train).corr() mask = np.triu(np.ones_like( corr, dtype=np.bool)) # Generate a mask for the upper triangle f, ax = plt.subplots(figsize=(11, 9)) # Set up the matplotlib figure cmap = sns.diverging_palette( 220, 10, as_cmap=True) # Generate a custom diverging colormap sns.set(style="white") sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={ "shrink": .5 }) # Draw the heatmap with the mask and correct aspect ratio f.savefig('correlation_map.png') ########## PCA data = train data = stats.zscore(data, ddof=1) #scaler = MinMaxScaler() #data = scaler.fit_transform(data) pca = PCA(n_components=n) graph_df = pca.fit_transform(data) graph_df_var = pca.explained_variance_ratio_ graph_df = pd.DataFrame(graph_df) graph_df['class'] = pd.DataFrame(train_truth) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(1, 1, 1) ax.set_xlabel('Principal Component 1', fontsize=15) ax.set_ylabel('Principal Component 2', fontsize=15) ax.set_title('2 component PCA', fontsize=20) targets = ['0', '1', '2', '3', '4', '5', '6', '7', '8'] colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'sienna'] for t, color in zip(targets, colors): i = [] for ind, j in enumerate(graph_df['class']): if int(j) == int(t): i.append(ind) ax.scatter(graph_df.loc[i, 0], graph_df.loc[i, 1], c=color, alpha=0.65, s=30) ax.legend(targets) ax.grid() fig.savefig('2_pca.png') ######################## PCA important components histogram graph_var_cumulative = np.cumsum(graph_df_var) trace1 = dict(type='bar', x=['PC %s' % i for i in range(1, n)], y=graph_df_var, name='Individual') trace2 = dict(type='scatter', x=['PC %s' % i for i in range(1, n)], y=graph_var_cumulative, name='Cumulative') data = [trace1, trace2] layout = dict(title='Explained variance by different principal components', yaxis=dict(title='Explained variance in percent'), annotations=list([ dict( x=1.16, y=1.05, xref='paper', yref='paper', text='Explained Variance', showarrow=False, ) ])) fig = dict(data=data, layout=layout) plot(fig, filename='selecting-principal-components.png') ######################## LDA data = train data = remove_collinear(data) #LDA data = pd.DataFrame(data) #scaler = MinMaxScaler() #data = scaler.fit_transform(data) data = stats.zscore(data, ddof=1) lda = LDA(n_components=2) graph_df = lda.fit_transform(data, train_truth.ravel()) graph_df = pd.DataFrame(graph_df) graph_df['class'] = pd.DataFrame(train_truth) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(1, 1, 1) ax.set_xlabel('Linear Discriminant 1', fontsize=15) ax.set_ylabel('Linear Discriminant 2', fontsize=15) ax.set_title('2 Discriminant LDA', fontsize=20) targets = ['0', '1', '2', '3', '4', '5', '6', '7', '8'] colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'sienna'] for t, color in zip(targets, colors): i = [] for ind, j in enumerate(graph_df['class']): if int(j) == int(t): i.append(ind) ax.scatter(graph_df.loc[i, 0], graph_df.loc[i, 1], c=color, alpha=0.65, s=30) ax.legend(targets) ax.grid() fig.savefig('2_lda.png') return
# ## Correlation Matrix between all variables # In[21]: sns.set(style='white', font_scale= 1) corr = dataset.corr() # here we compute the correlation between numericals variables # Generate a mask for the upper triangle mask = np.zeros_like(corr, dtype= np.bool) # To generate a numpy array from correlation with true or false mask[np.triu_indices_from(mask)] = True # To have the index of the upper triangle # Setup the matplotlib figures f, ax = plt.subplots(figsize = (20,10)) f.suptitle('Correlation Matrix', fontsize=40) # Generate a custum diverging color map cmap = sns.diverging_palette(10, 0, as_cmap=True) # Draw the heatmap with the mask and the correct aspect ratio sns.heatmap(corr, mask=mask, annot=True, cmap=cmap, vmax=1, center=0, square=True, linewidth=5, cbar_kws={'shrink': .5}) # # Feature engineering # Explain Here # In[22]: dataset.dtypes # The dates are objects, we need to change it. One reason is, we can calculate the difference between the first open and the enrolled date. # This differece can be visualised in a distribution.
import matplotlib.pyplot as plt import seaborn as sns import viscid from viscid.plot import vpyplot as vlt f = viscid.load_file('./otico_001.3d.xdmf') mymap = sns.diverging_palette(28, 240, s=95, l=50, as_cmap=True) figure = plt.figure(figsize=(14, 10)) g = f.get_grid(time=12) vlt.plot(g['bx']['z=0'], cmap=mymap, style='contourf', levels=256) vlt.savefig('OT_bx.png') plt.show()
from heapq import nlargest c = nlargest(3, b, key=lambda e: e[1]) print(c) d = [] for i in c: d.append(i[0]) #Compute the correlation matrix corr = df[d].corr() print(corr) # Generate a mask for the upper triangle mask = np.triu(np.ones_like(corr, dtype=bool)) # Set up the matplotlib figure f, ax = plt.subplots(figsize=(11, 9)) # Generate a custom diverging colormap cmap = sns.diverging_palette(230, 20, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={"shrink": .5}) #
# # Desafio 7 # Plotar o gráfico de aparições por gênero. Pode ser um gráfico de tipo = barra # In[9]: # Importando as bibliotecas necessárias import matplotlib.pyplot as plt import seaborn as sns # Setando estilos sns.set_style('whitegrid') cmap = sns.diverging_palette(150, 275, l=55, s=80, n=9, center='dark', as_cmap=True) # Tamanho da figura plt.figure(figsize=(15, 8)) # Escolhendo o tipo de gráfico generos.plot(kind='bar', colormap=cmap) # Definindo o título plt.title('Ocorrência de gêneros', fontsize=20) # Definindo o eixo horizontal plt.xlabel("Gêneros", fontsize=16)
cs_i = [] for i in range(0, len(df_norm)): cs_j = [] for j in range(0, len(df_norm)): cs_j.append( float( manual_cosine_similarity( df_norm.iloc[i, :].values.reshape(1, -1), df_norm.iloc[j, :].values.reshape(1, -1)))) cs_i.append(cs_j) return cs_i cosine_similarity_calculation_manually = calculate_cosine_similarity_manually( df_norm) # Creating dataframe for manually calculated cosine similarity cosine_similarity_manual_df = pd.DataFrame( cosine_similarity_calculation_manually) #******************************************************************************* df_list = cosine_similarity_df.iloc[0:10, 0:10] fig, ax = plt.subplots(figsize=(10, 10)) cmap = sns.diverging_palette(240, 10, s=80, l=45, as_cmap=True) sns.heatmap(df_list, cmap=cmap, vmin=0.95, vmax=1, annot=True) linear_kernel(df_norm.iloc[0, :].values.reshape(1, -1), df_norm.iloc[0, :].values.reshape(1, -1))
#%%| import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns sns.set(style="whitegrid") test = pd.read_csv('../data/test_edited.csv') train = pd.read_csv('../data/train_edited.csv') default_cmap = sns.diverging_palette(220, 20, n=13) # %% #### All Variables Except Sale Price #### all_but = [ 'Id', 'PID', 'MS_SubClass', 'MS_Zoning', 'Lot_Frontage', 'Lot_Area', 'Street', 'Alley', 'Lot_Shape', 'Land_Contour', 'Utilities', 'Lot_Config', 'Land_Slope', 'Neighborhood', 'Condition_1', 'Condition_2', 'Bldg_Type', 'House_Style', 'Overall_Qual', 'Overall_Cond', 'Year_Built', 'Year_Remod/Add', 'Roof_Style', 'Roof_Matl', 'Exterior_1st', 'Exterior_2nd', 'Mas_Vnr_Type', 'Mas_Vnr_Area', 'Exter_Qual', 'Exter_Cond', 'Foundation', 'Bsmt_Qual', 'Bsmt_Cond', 'Bsmt_Exposure', 'BsmtFin_Type_1', 'BsmtFin_SF_1', 'BsmtFin_Type_2', 'BsmtFin_SF_2', 'Bsmt_Unf_SF', 'Total_Bsmt_SF', 'Heating', 'Heating_QC', 'Central_Air', 'Electrical', '1st_Flr_SF', '2nd_Flr_SF', 'Low_Qual_Fin_SF', 'Gr_Liv_Area', 'Bsmt_Full_Bath', 'Bsmt_Half_Bath', 'Full_Bath', 'Half_Bath', 'Bedroom_AbvGr', 'Kitchen_AbvGr', 'Kitchen_Qual', 'TotRms_AbvGrd', 'Functional', 'Fireplaces', 'Fireplace_Qu', 'Garage_Type', 'Garage_Yr_Blt', 'Garage_Finish', 'Garage_Cars', 'Garage_Area', 'Garage_Qual', 'Garage_Cond', 'Paved_Drive', 'Wood_Deck_SF', 'Open_Porch_SF', 'Enclosed_Porch', '3Ssn_Porch', 'Screen_Porch', 'Pool_Area', 'Pool_QC', 'Fence', 'Misc_Feature', 'Misc_Val', 'Mo_Sold', 'Yr_Sold', 'Sale_Type',
def main(lrate, n_samples, bsize, n_nets): X, Y = make_moons(noise=0.2, random_state=0, n_samples=1000) X, Y = shuffle(X, Y) X = scale(X) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5) net = get_net() sampler = SGHMCSampler(precondition=True, ignore_burn_in=True) all_params = lasagne.layers.get_all_params(net, trainable=True) Xt = T.matrix() Yt = T.matrix() U, params = neg_log_like(net, Xt, Yt, Xsize=X_train.shape[0]) # we could also use these updates in our custom function # but instead we will use the sampler.step function below updates = sampler.prepare_updates(U, params, lrate, mdecay=0.01, inputs=[Xt, Yt]) err = class_error(net, Xt, Yt) compute_err = theano.function([Xt, Yt], err) predict = theano.function([Xt], lasagne.layers.get_output(net, Xt)) print("Starting sampling") samples = deque(maxlen=n_nets) for i in range(n_samples): start = (i * bsize) % (X_train.shape[0] - bsize) xmb = floatX(X_train[start:start + bsize]) ymb = floatX(Y_train[start:start + bsize]).reshape((-1, 1)) _, nll = sampler.step(xmb, ymb) if i % 1000 == 0: total_err, total_nll = compute_err(floatX(X_train), floatX(Y_train).reshape(-1, 1)) print("{}/{} : NLL = {} TOTAL={} ERR = {}".format( i, n_samples, nll, total_nll, total_err)) if i % 200 == 0: samples.append(lasagne.layers.get_all_param_values(net)) # get predictions grid = np.mgrid[-3:3:100j, -3:3:100j] grid_2d = floatX(grid.reshape(2, -1).T) preds = np.zeros((grid_2d.shape[0], len(samples))) preds_test = np.zeros((X_test.shape[0], len(samples))) for i, sample in enumerate(samples): lasagne.layers.set_all_param_values(net, sample) preds[:, i] = predict(grid_2d).reshape(-1) preds_test[:, i] = predict(floatX(X_test)).reshape(-1) mean_pred = np.mean(preds, axis=1) std_pred = np.std(preds, axis=1) mean_pred_test = np.mean(preds_test, axis=1) class_pred_test = mean_pred_test > 0.5 std_pred_test = np.std(preds_test, axis=1) cmap = sns.diverging_palette(250, 12, s=85, l=25, as_cmap=True) fig, ax = plt.subplots(figsize=(10, 6)) contour = ax.contourf(grid[0], grid[1], mean_pred.reshape(100, 100), cmap=cmap, alpha=1.) ax.scatter(X_test[class_pred_test == 0, 0], X_test[class_pred_test == 0, 1]) ax.scatter(X_test[class_pred_test == 1, 0], X_test[class_pred_test == 1, 1], color='r') cbar = plt.colorbar(contour, ax=ax) _ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel='X', ylabel='Y') cbar.ax.set_ylabel( 'Posterior predictive mean probability of class label = 0') cmap = sns.cubehelix_palette(light=1, as_cmap=True) fig, ax = plt.subplots(figsize=(10, 6)) contour = ax.contourf(grid[0], grid[1], std_pred.reshape(100, 100), cmap=cmap) ax.scatter(X_test[class_pred_test == 0, 0], X_test[class_pred_test == 0, 1]) ax.scatter(X_test[class_pred_test == 1, 0], X_test[class_pred_test == 1, 1], color='r') cbar = plt.colorbar(contour, ax=ax) _ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel='X', ylabel='Y') cbar.ax.set_ylabel('Uncertainty (posterior predictive standard deviation)') plt.show()
def make(self, reself: tp.Optional[bool] = False, **kwargs): """ Generate a heatmap plot from the selected columns of the object's dataframe. **Parameters** reself A logical variable. If ``True``, an instance of the object will be returned upon exit to the calling routine. The default value is ``False``. **Returns** the object self if ``reself = True`` otherwise, ``None``. However, this method causes side-effects by manipulating the existing attributes of the object. """ for key in kwargs.keys(): if hasattr(self, key): setattr(self, key, kwargs[key]) elif key == "dataFrame": setattr(self, "_dfref", wref.ref(kwargs[key])) else: raise Exception(newline + "Unrecognized input '" + key + "' class attribute detected." + newline + self._getDocString()) # set what to plot ############################################################################################################################ #### xticklabels / yticklabels properties ############################################################################################################################ if isinstance(self.xticklabels.kws, Struct): if "horizontalalignment" not in vars(self.xticklabels.kws).keys(): self.xticklabels.kws.horizontalalignment = "right" if "rotation" not in vars(self.xticklabels.kws).keys(): self.xticklabels.kws.rotation = 45 else: raise Exception( newline + "The xticklabels.kws component of the current HeatMapPlot object must" + newline + "be an object of class Struct(), essentially a structure with components" + newline + "whose names are the input arguments to the set_xticklabels() method of the" + newline + "Axes class of the matplotlib library." + newline + self._getDocString()) if isinstance(self.yticklabels.kws, Struct): if "horizontalalignment" not in vars(self.yticklabels.kws).keys(): self.yticklabels.kws.horizontalalignment = "right" if "rotation" not in vars(self.yticklabels.kws).keys(): self.yticklabels.kws.rotation = 45 else: raise Exception( newline + "The yticklabels.kws component of the current HeatMapPlot object must" + newline + "be an object of class Struct(), essentially a structure with components" + newline + "whose names are the input arguments to the set_yticklabels() method of the" + newline + "Axes class of the matplotlib library." + newline + self._getDocString()) ############################################################################################################################ #### heatmap properties ############################################################################################################################ if isinstance(self.heatmap.kws, Struct): if "square" not in vars(self.heatmap.kws).keys(): self.heatmap.kws.square = True if "cmap" not in vars(self.heatmap.kws).keys(): try: import seaborn as sns self.heatmap.kws.cmap = sns.diverging_palette( h_neg=self._colorStart, h_pos=self._colorEnd, n=self._colorCount) except: if self._isdryrun: self.heatmap.kws.cmap = None else: raise Exception( newline + "Failed to set the heatmap.kws.cmap component of the current HeatMapPlot object." + newline + "This component depends on the external seaborn Python library. Therefore, it is " + newline + "likely that the seaborn library or one of the required components of it, such as " + newline + "the matplotlib Python library is not properly installed on your system. Please " + newline + "fix this issue, otherwise, the visualization tools of the ParaMonte library " + newline + "will not work as expected. You can install the seaborn library by typing " + newline + "the following commands in your Anaconda3 or Bash command prompt: " + newline + newline + " pip install --user --upgrade matplotlib" + " pip install --user --upgrade seaborn" + newline + self._getDocString()) else: raise Exception( newline + "The heatmap.kws component of the current HeatMapPlot object must" + newline + "be an object of class Struct(), essentially a structure with components" + newline + "whose names are the input arguments to the heatmap() function of the" + newline + "seaborn library." + newline + self._getDocString()) ############################################################################################################################ #### figure properties ############################################################################################################################ if self.figure.enabled: if isinstance(self.figure.kws, Struct): if "dpi" not in vars(self.figure.kws).keys(): self.figure.kws.dpi = 150 if "facecolor" not in vars(self.figure.kws).keys(): self.figure.kws.facecolor = "w" if "edgecolor" not in vars(self.figure.kws).keys(): self.figure.kws.edgecolor = "w" else: raise Exception( newline + "The figure.kws component of the current DensityPlot object must" + newline + "be an object of class Struct(), essentially a structure with components" + newline + "whose names are the input arguments to the figure() function of the" + newline + "matplotlib library." + newline + self._getDocString()) ############################################################################################################################ ############################################################################################################################ if self._isdryrun: return ############################################################################################################################ ############################################################################################################################ import seaborn as sns import matplotlib.pyplot as plt plt.ion( ) # turn on the interactive mode. Used to detach the figure from the command line in ipython ############################################################################################################################ #### generate figure and axes if needed ############################################################################################################################ self._constructBasePlot() ############################################################################################################################ #### check data type ############################################################################################################################ self._checkDataType() ############################################################################################################################ #### check rows presence. This must be checked here, because it depends on the integrity of the in input dataFrame. ############################################################################################################################ if self.rows is None: self.rows = range(len(self._dfref().index)) rownames = self._dfref().index[self.rows] ############################################################################################################################ #### check columns presence. This must be checked here, because it depends on the integrity of the in input dataFrame. ############################################################################################################################ colnames, colindex = pm.dfutils.getColNamesIndex( self._dfref().columns, self.columns) ############################################################################################################################ #### set up tick labels ############################################################################################################################ xtickExists = True if "xticklabels" in vars(self.heatmap.kws).keys(): if not any(self.heatmap.kws.xticklabels): xtickExists = False else: self.heatmap.kws.xticklabels = colnames ytickExists = True if "yticklabels" in vars(self.heatmap.kws).keys(): if not any(self.heatmap.kws.yticklabels): ytickEyists = False else: self.heatmap.kws.yticklabels = rownames ############################################################################################################################ #### plot data ############################################################################################################################ if self.annotPrecision is None: data = self._dfref().iloc[self.rows, colindex] else: data = self._dfref().iloc[self.rows, colindex].round( decimals=self.annotPrecision) self.currentFig.axes = sns.heatmap(data=data, **vars(self.heatmap.kws)) ############################################################################################################################ #### configure the tick labels (orientation, ...) ############################################################################################################################ self.currentFig.axes.set_xticklabels( self.currentFig.axes.get_xticklabels(), **vars(self.xticklabels.kws)) self.currentFig.axes.set_yticklabels( self.currentFig.axes.get_yticklabels(), **vars(self.yticklabels.kws)) plt.tight_layout() if self.figure.enabled: # default figure size figWidth = 6.4 # inches figHeight = 4.8 # inches figWidthScale = 1 figHeightScale = 1 threshDimension = 10 # scale only if ticklabels are present if xtickExists: figWidthScale = max(1, self._dfref().shape[1] / threshDimension) figWidth *= figWidthScale if ytickExists: figHeightScale = max(1, self._dfref().shape[0] / threshDimension) figHeight *= figHeightScale self.currentFig.figure.set_size_inches(figWidth, figHeight) ############################################################################################################################ if reself: return self
# In[ ]: import seaborn as sns corr = df_test[[ "P2", "P3", "norm_len_name", "title_ms", "title_mrs", "title_mr", "title_others", "is_male", "age_norm", "norm_family_size", "norm_fare", "cab_b", "cab_c", "cab_d", "cab_e", "cab_f", "cab_g", "cab_z", "embQ", "embS" ]].corr() mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # Set up the matplotlib figure f, ax = plt.subplots(figsize=(11, 9)) # Generate a custom diverging colormap cmap = sns.diverging_palette(0, 50, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={"shrink": .5}) # In[ ]:
pd.isnull(laWeather).sum() """# Analysis: Weather""" # a weather correlation matrix was created to see what kind of relationships each variable had with one another corr = laWeather.corr() corr.style.background_gradient(cmap='coolwarm') # weather correlation matrix 2 fig, ax = plt.subplots(figsize=(10, 8)) corr = laWeather.corr() ax = sns.heatmap(corr, vmin=-1, vmax=1, center=0, cmap=sns.diverging_palette(20, 220, n=200), square=True) ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right') # precipitation by hour heatmap #x = ['1am', '2am', '3am', '4am', '5am', '6am', '7am', '8am', '9am', '10am', '11am', '12pm', '1pm', '2pm', '3pm', '4pm', '5pm', '6pm', '7pm', '8pm', '9pm', '10pm', '11pm', '12am'] rain_hour_pt = laWeather.pivot_table(index='Precipitation', columns='hours', aggfunc='size') rain_hour_pt = rain_hour_pt.apply(lambda x: x / rain_hour_pt.max(axis=1)) plt.figure(figsize=(15, 5)) plt.title('Precipitation by hour', fontsize=14) #plt.xlabel('x') sns.heatmap(rain_hour_pt, cbar=True, annot=False, fmt=".0f", cmap="Blues")
def correlation_plot(): data.corr() # first calculate correlation between all columns! f, ax = plt.subplots(figsize=(11, 9)) # Set up the matplotlib figure cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.heatmap(data.corr(), mask=mask, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={"shrink": .5})
# TODO: HOW TO SAVE THIS IN A DJ TABLE FOR LATER? parsdict = { 'threshold': r'Threshold $(\sigma)$', 'bias': r'Bias $(\mu)$', 'lapselow': r'Lapse low $(\gamma)$', 'lapsehigh': r'Lapse high $(\lambda)$' } ylims = [[-5, 105], [-105, 105], [-0.05, 1.05], [-0.05, 1.05]] yticks = [[0, 19, 100], [-100, -16, 0, 16, 100], [-0, 0.2, 0.5, 1], [-0, 0.2, 0.5, 1]] # pick a good-looking diverging colormap with black in the middle cmap = sns.diverging_palette( 20, 220, n=len(behav['probabilityLeft_block'].unique()), center="dark") if len(behav['probabilityLeft_block'].unique()) == 1: cmap = "gist_gray" sns.set_palette(cmap) # plot the fitted parameters for pidx, (var, labelname) in enumerate(parsdict.items()): ax = axes[pidx, 1] sns.lineplot(x="date", y=var, marker='o', hue="probabilityLeft_block", linestyle='', lw=0,
def radiocolorf(freq): ffreq = (float(freq) - 1.0)/(45.0 - 1.0) pal = sns.diverging_palette(200, 60, l=80, as_cmap=True, center="dark") return rgb2hex(pal(ffreq))
matplotlib.use('AGG') import matplotlib.pyplot as plt import seaborn as sns if __name__ == '__main__': ntop = 30 cases = pd.read_csv('cases.csv') ctrls = pd.read_csv('ctrls.csv') genes = ctrls.columns scores, pvals = st.ttest_ind(cases, ctrls) top_idx = np.argsort(pvals)[:ntop] df = pd.DataFrame(np.array([genes[top_idx], pvals[top_idx]]).T, columns=['Gene', 'p']) with open('table.tex', 'w') as f: f.write( tabulate(df, headers=list(df.columns), tablefmt="latex", floatfmt=".4f")) plt.figure(tight_layout=True) cmap = sns.diverging_palette(255, 1, n=3, as_cmap=True) sns.clustermap(pd.concat( [ctrls.ix[:, top_idx].sort(axis=1), cases.ix[:, top_idx].sort(axis=1)]).T, cmap=cmap) plt.savefig('heatmap.png')
iqr=q75-q25 min=q25 -(1.5*iqr) max=q75 +(1.5*iqr) bike_df=bike_df.drop(bike_df[bike_df.loc[:,i]<min].index) bike_df=bike_df.drop(bike_df[bike_df.loc[:,i]>max].index) bike_df.describe() #Feature Selection f, ax=plt.subplots(figsize=(7,5)) n_names = ['temp','atemp','hum','windspeed'] df = bike_df.loc[:,n_names] sns.heatmap(df.corr(),mask=np.zeros_like(df.corr(),dtype=np.bool), cmap=sns.diverging_palette(220,10,as_cmap=True),ax=ax,annot = True) cnames = ['season','workingday','weathersit','yr','mnth'] from scipy.stats import chi2_contingency for i in cnames: print(i) chi2,p,dof,ex = chi2_contingency(pd.crosstab(bike_df['cnt'],bike_df[i])) print(p) #dropping correlated variable bike_df = bike_df.drop(['atemp'], axis=1) bike_df.shape bike_df['temp'] = bike_df['temp']*39 bike_df['hum'] = bike_df['hum']*100 bike_df['windspeed'] = bike_df['windspeed']*67
def dendrogram(root, data_in, labels=None, index=None, model=None, n_max=150): """Generate and save the dendrogram obtained from the clustering algorithm. This function generates the dendrogram obtained from the clustering algorithm applied on the data. The plots will be saved into the appropriate folder of the tree-like structure created into the root folder. The row colors of the heatmap are the either true or estimated data labels. Parameters ----------- root : string The root path for the output creation data_in : array of float, shape : (n_samples, n_dimensions) The low space embedding estimated by the dimensinality reduction and manifold learning algorithm. labels : array of int, shape : n_samples The result of the clustering step. index : list of integers (or strings) This is the samples identifier, if provided as first column (or row) of of the input file. Otherwise it is just an incremental range of size n_samples. model : sklearn or sklearn-like object An instance of the class that evaluates a step. In particular this must be a clustering model provided with the clusters_centers_ attribute (e.g. KMeans). n_max : int, (INACTIVE) The maximum number of samples to include in the dendrogram. When the number of samples is bigger than n_max, only n_max samples randomly extracted from the dataset are represented. The random extraction is performed using sklearn.model_selection.StratifiedShuffleSplit (or sklearn.cross_validation.StratifiedShuffleSplit for legacy reasons). """ # define col names col = ["$x_{" + str(i) + "}$" for i in np.arange(0, data_in.shape[1], 1)] df = pd.DataFrame(data=data_in, columns=col, index=index) # -- Code for row colors adapted from: # https://stanford.edu/~mwaskom/software/seaborn/examples/structured_heatmap.html # Create a custom palette to identify the classes if labels is None: labels = np.zeros(df.shape[0], dtype=np.short) else: mapping = dict( zip(np.unique(labels), np.arange(np.unique(labels).shape[0]))) labels = np.vectorize(mapping.get)(labels) n_colors = np.unique(labels).shape[0] custom_pal = sns.color_palette("hls", n_colors) custom_lut = dict(zip(map(str, range(n_colors)), custom_pal)) # Convert the palette to vectors that will be drawn on the matrix side custom_colors = pd.Series(map(str, labels)).map(custom_lut) # Create a custom colormap for the heatmap values cmap = sns.diverging_palette(220, 20, n=7, as_cmap=True) if model.affinity == 'precomputed': import scipy.spatial.distance as ssd from scipy.cluster.hierarchy import linkage # convert the redundant square matrix into a condensed one. # Even if the docs of scipy said so, linkage function does not # understand that the matrix is precomputed, unless it is 1-dimensional Z = linkage(ssd.squareform(data_in), method=model.linkage, metric='euclidean') g = sns.clustermap(df, method=model.linkage, row_linkage=Z, col_linkage=Z, linewidths=.5, cmap=cmap) else: # workaround to a different name used for manhattan/cityblock distance if model.affinity == 'manhattan': model.affinity = 'cityblock' g = sns.clustermap(df, method=model.linkage, metric=model.affinity, row_colors=custom_colors, linewidths=.5, cmap=cmap) plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=5) filename = os.path.join( root, os.path.basename(root) + '_dendrogram.' + DEFAULT_EXT) g.savefig(filename) logging.info('Figure saved %s', filename) plt.close()
for i in headers: plt.hist(data[i]) plt.xlabel(i) plt.ylabel('Count') hname = 'hist_' + i + '.png' plt.savefig(hname) plt.clf() ## correlation martrix (seaborn heatmap) f, ax = plt.subplots(figsize=(10, 8)) corr = df.corr() sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool), cmap=sns.diverging_palette(100, 220, as_cmap=True), square=True, ax=ax) plt.savefig('diabetes_corr_matrix.png') plt.clf() ## 2 feature/variable scatter plots (Y vs. X) for i in range(0, len(headers)): xvar = headers[i] for j in range(0, len(headers) - 1): if i != j: yvar = headers[j] plt.scatter(data[xvar], data[yvar])
test_features = generator.return_vec( test_atoms, [generator.eigenspectrum_vec, generator.composition_vec]) print('{} shape training feature matrix'.format(np.shape(train_features))) print('{} shape testing feature matrix'.format(np.shape(test_features))) # After this, we can analyze the distribution of the feature sets. In the following, we see a large number of features in the latter half of the vectors tend to be zero. # In[6]: dif = np.max(train_features, axis=0) - np.min(train_features, axis=0) np.place(dif, dif == 0., [1.]) mean = np.mean(train_features, axis=0) scaled = (train_features.copy() - mean) / dif plt.figure(num=0, figsize=(30, 15)) cmap = sns.diverging_palette(250, 15, s=75, l=40, n=1000, center="dark") sns.heatmap(scaled, cmap=cmap) plt.savefig('train_features.png') # In[7]: scaled = (test_features.copy() - mean) / dif plt.figure(num=1, figsize=(30, 15)) cmap = sns.diverging_palette(250, 15, s=75, l=40, n=1000, center="dark") sns.heatmap(scaled, cmap=cmap) plt.savefig('test_features.png') # We can make some parallel coordinate plots using pandas to get a slightly better idea of how the feature vectors look. Initially, we set up the dataframe containing the training data. # In[8]:
### FACETING multiple line plots, especially for different cols with different scales ### df.plot(subplots=True, linewidth=0.5, layout=(2, 4), # specifies no. of rows & cols in the figure figsize=(16, 10), sharex=False, sharey=False) plt.show() ### heatmap of correlation matric corr_mat = df.corr(method='pearson') import seaborn as sns #sns.heatmap(corr_mat, annot=True, linewidths=0.4, annot_kws={"size": 10}) sns.heatmap(corr_mat, mask=np.zeros_like(corr_mat, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True), square=True, ax=ax) plt.xticks(rotation=90) plt.yticks(rotation=0) plt.show() # clustermap to group together similar columns (using hierarchical clustering) sns.clustermap(corr_mat, row_cluster=True, col_cluster=True,) plt.setp(fig.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) plt.show() ### scatterplots, with points colored by group import numpy as np
#选取IV>0.01的变量 high_IV = {k:v for k, v in IV_dict.items() if v >= 0.02} high_IV_sorted = sorted(high_IV.items(),key=lambda x:x[1],reverse=True) short_list = high_IV.keys() short_list_2 = [] for var in short_list: newVar = var + '_WOE' trainData[newVar] = trainData[var].map(WOE_dict[var]) short_list_2.append(newVar) #对于上一步的结果,计算相关系数矩阵,并画出热力图进行数据可视化 trainDataWOE = trainData[short_list_2] f, ax = plt.subplots(figsize=(10, 8)) corr = trainDataWOE.corr() sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True),square=True, ax=ax) plt.show() #两两间的线性相关性检验 #1,将候选变量按照IV进行降序排列 #2,计算第i和第i+1的变量的线性相关系数 #3,对于系数超过阈值的两个变量,剔除IV较低的一个 deleted_index = [] cnt_vars = len(high_IV_sorted) for i in range(cnt_vars): if i in deleted_index: continue x1 = high_IV_sorted[i][0]+"_WOE" for j in range(cnt_vars): if i == j or j in deleted_index: continue
# Select a subset of the networks used_networks = [1, 5, 6, 7, 8, 11, 12, 13, 16, 17] used_columns = ( df.columns.get_level_values("network").astype(int).isin(used_networks)) df = df.loc[:, used_columns] # Create a custom palette to identify the networks network_pal = sns.cubehelix_palette(len(used_networks), light=.9, dark=.1, reverse=True, start=1, rot=-2) network_lut = dict(zip(map(str, used_networks), network_pal)) # Convert the palette to vectors that will be drawn on the side of the matrix networks = df.columns.get_level_values("network") network_colors = pd.Series(networks).map(network_lut) # Create a custom colormap for the heatmap values cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True) # Draw the full plot sns.clustermap(df.corr(), row_colors=network_colors, linewidths=.5, col_colors=network_colors, figsize=(13, 13), cmap=cmap)
for date_forward in range(1, lags + 1): print(date_forward) dv_forward = dv + '_forward' + str(date_forward) dfcoeff, dfpvals = correlation_matrix(df_temp, var_list_wout_dv, dv_forward) dfcoeff_all_lags = pd.concat([dfcoeff_all_lags, dfcoeff], axis=1) dfpvals_all_lags = pd.concat([dfpvals_all_lags, dfpvals], axis=1) return dfcoeff_all_lags, dfpvals_all_lags dfcoeff_all_lags, dfpvals_all_lags = viz_corrs_of_lagged_variables_with_dv( df_test, 10, 'resting_hr', var_list_wout_dv, date_to_hr_resting_dict) # i need to get the n here to see how much i'm shinking sample size fig = plt.figure(figsize=(8, 6)) cmap_enter = sns.diverging_palette(15, 125, sep=10, s=70, l=50, as_cmap=True) sns.heatmap(dfcoeff_all_lags, center=0, square=False, annot=True, fmt='.2f', annot_kws={'size': 11}, cmap=cmap_enter, vmin=-.4, vmax=.4, cbar_kws=dict(use_gridspec=False, location='top')) # cbar=False, # NEXT -- # MAKE SURE NOT LOOSING TONS OF N FOR CORR # CAN I GET PARTIAL OR SEMI-PARTIAL (YEAH) CORRS? HOW TO COMPUTE? # PARTIALLING OUT PRIOR RESTING HR AT THE SAME LAG
# In[44]: ####Correlation Plot####### corr = data.corr() # Generate a mask for the upper triangle mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # Set up the matplotlib figure f, ax = plt.subplots(figsize=(15, 13)) # Generate a custom diverging colormap cmap = sns.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={"shrink": .5}) # # Statistical Model - Logistic Model # In[ ]:
for q in range (0,4,2): contact_var_anxiety_BC[r] = you_inf_anxiety_count[q] # for age group bc r += 1 for q in range (0,4,2): contact_var_anxiety_BC[r] = home_inf_anxiety_count[q] #for education bc r += 1 for q in range (0,4,2): contact_var_anxiety_BC[r] = fnr_inf_anxiety_count[q]# for marital bc r += 1 f, ax = plt.subplots(figsize=(10, 8)) sns.heatmap(contact_var_anxiety_BC, vmin=0, vmax=1350, xticklabels=x_labels, yticklabels = y_labels, mask=np.zeros_like(contact_var_anxiety_BC, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True), square=True, ax=ax) ##################################################################### ################# for Contact DC data ########################### contact_var_anxiety_DC = np.zeros((8,5),dtype = np.double) r = 0 for q in range (1,4,2): contact_var_anxiety_DC[r] = consultancy_anxiety_count[q] # for age group bc r += 1 for q in range (1,4,2):
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from scipy import stats from paper_behavior_functions import (seaborn_style, institution_map, group_colors, figpath, load_csv, FIGURE_WIDTH, FIGURE_HEIGHT, num_star) # Load some things from paper_behavior_functions figpath = Path(figpath()) seaborn_style() institution_map, col_names = institution_map() pal = group_colors() cmap = sns.diverging_palette(20, 220, n=3, center="dark") # ========================================== # #%% 1. GET GLM FITS FOR ALL MICE # ========================================== # print('loading model from disk...') params_basic = load_csv('model_results', 'params_basic.csv') params_full = load_csv('model_results', 'params_full.csv') combined = params_basic.merge(params_full, on=['institution_code', 'subject_nickname']) # ========================================== # # PRINT SUMMARY AND STATS # ========================================== #
full_corr_df = pd.merge(corr_df, pval_df, how='left', left_on='feature', right_on='feature') full_corr_df.sort_values(by='pearsonr', inplace=True, ascending=False) full_corr_df.head() full_corr_df.tail() if plot_hist: _ = full_corr_df.hist(column='pearsonr', figsize=(10, 7), grid=False) return full_corr_df colors = sns.diverging_palette(10, 220, sep=80, n=len(full_corr_df)).as_hex() colors = colors[::-1] sns.set_palette(colors) analysis_type = 'synonymous' #'primary' #synonymous if analysis_type == 'primary': df = prim_df.copy() else: df = syn_df.copy() full_corr_df = get_pearsonr_per_feature(df, analysis_type=analysis_type) title = '[' + analysis_type + ' analysis] Keep only genes with pval < 1' _ = full_corr_df.plot.barh(x='feature', y='pearsonr', figsize=(16, 18),