def display_trial_stats(df, title_prefix, ylim_bottom, ylim_top): """ Displays summary statistics and time series plot describing the 4 columns in a simulation stats DataFrame: The length (number of steps) in each trial of the simulation, the total reward for each trial, the total negative reward in each trial, and whether each trial reach the designated destination. """ successes = df[df.reached_destination==True].Trial failures = df[df.reached_destination==False].Trial print "The destination was reached in {} out of {} trials.".format(successes.shape[0], df.shape[0]) display(df[['total_reward', 'negative_reward', 'trial_length']].describe().T) sns.set(font_scale=1.5, style={"axes.facecolor": "white"}) sns.plt.figure(figsize=(16, 8)) ax = sns.tsplot(df.trial_length, color='.75', legend=True, condition='Trial Length') ax = sns.tsplot(df.total_reward, color='#106B70', legend=True, condition='Total Reward') ax = sns.tsplot(df.negative_reward, color='#D43500', legend=True, condition='Negative Reward') ax = sns.rugplot(successes, color='green', height=1, linewidth=10, alpha=0.1) ax = sns.rugplot(failures, color='red', height=1, linewidth=10, alpha=0.1) sns.plt.legend(labels=['Trial Length', 'Total Reward', 'Negative Reward', 'Reached Destination'], frameon=True) ax.set(xlabel='Trial', ylabel='Value') ax.set_title(title_prefix + ': Trial Length, Total Reward, and Negative Reward for each Trial') sns.plt.ylim(ylim_bottom, ylim_top) sns.plt.plot([0, 100], [0, 0], linewidth=1, color='.5')
def plot_rugplot(df, column='AirTime', jitter=0.0, seed=0): """ Plots a rug plot. Parameters ---------- df: A pandas.DataFrame column: The column to use in "df" jitter: An int or float. Default: 0. If jitter > 0, uses numpy.random.normal() to draw random samples from a normal distribution with zero mean and standard deviatation equal to "jitter". seed: An int. Used by numpy.random.seed(). Returns ------- A matplotlib.axes.Axes """ fig, ax = plt.subplots(figsize=(10, 6)) ax.set_xlabel(column) ax.set_ylim(0, 1) rand = 0 if (jitter > 0): np.random.seed(seed) rand = np.random.normal(0, jitter, len(df[column])) x = df[column] + rand sns.rugplot(x, height=0.5, ax=ax) return ax
def kde_tissue(tissue, q, genes, x, y, dfplot, dfindex, ax, label, col= 'b'): """ Plots all the tissue specific genes,i.e. all genes that appear in one and only one 'tissue' tissue -- tissue to plot q -- qvalue to slice on dfindex -- the dataframe generated by organizer dfplot -- the dataframe containing columns x, y and genes x -- the name of the column containing the values to plot in the histogram y -- the name of the column with which to slice the dataframe (q or p value) genes -- the name of the column containing the WBID names label -- name of the plot just made ax -- axis to plot in col -- color """ g= lambda x:((dfindex.expressed == 1) & (dfindex.tissue == x))\ # & (~dfindex[dfindex.expressed == 1].duplicated('gene')) f= lambda x: (dfplot[genes].isin(x)) & (dfplot[y] < q) gene_selection= g(tissue) genes_to_plot= dfindex[gene_selection].gene ind= f(genes_to_plot) to_plot= dfplot[ind][x] n= len(dfplot[ind][genes].unique()) if len(to_plot) > 15: sns.kdeplot(to_plot, color= col,label= label+' n= {0}'.format(n), ax= ax, lw= 5, cut=0.5) if len(to_plot) <= 20: sns.rugplot(to_plot, color= col, ax= ax, height= .07, lw= 2)
def dist_small_multiples(df, figsize=(20, 20)): """ Small multiples plots of the distribution of a dataframe's variables. """ import math sns.set_style("white") num_plots = len(df.columns) n = int(math.ceil(math.sqrt(num_plots))) fig = plt.figure(figsize=figsize) axes = [plt.subplot(n, n, i) for i in range(1, num_plots + 1)] i = 0 for k, v in df.iteritems(): ax = axes[i] sns.kdeplot(v, shade=True, ax=ax, legend=False) sns.rugplot(v, ax=ax, c=sns.color_palette("husl", 3)[0]) [label.set_visible(False) for label in ax.get_yticklabels()] ax.xaxis.set_ticks([v.min(), v.max()]) ax.set_title(k) i += 1 sns.despine(left=True, trim=True, fig=fig) plt.tight_layout() return fig, axes
def plot_load(load, nb_resources=None, ax=None, normalize=False, time_scale=False, load_label="load", UnixStartTime=0, TimeZoneString='UTC'): ''' Plots the number of used resources against time :normalize: if True normalize by the number of resources `nb_resources` ''' mean = metrics.load_mean(load) u = load.copy() if time_scale: # make the time index a column u = u.reset_index() # convert timestamp to datetime u.index = pd.to_datetime(u['time'] + UnixStartTime, unit='s') u.index.tz_localize('UTC').tz_convert(TimeZoneString) if normalize and nb_resources is None: nb_resources = u.load.max() if normalize: u.load = u.load / nb_resources mean = mean / nb_resources # get an axe if not provided if ax is None: ax = plt.gca() # leave room to have better view ax.margins(x=0.1, y=0.1) # plot load u.load.plot(drawstyle="steps-post", ax=ax, label=load_label) # plot a line for max available area if nb_resources and not normalize: ax.plot([u.index[0], u.index[-1]], [nb_resources, nb_resources], linestyle='-', linewidth=2, label="Maximum resources ({})".format(nb_resources)) # plot a line for mean utilisation ax.plot([u.index[0], u.index[-1]], [mean, mean], linestyle='--', linewidth=1, label="Mean {0} ({1:.2f})".format(load_label, mean)) sns.rugplot(u.load[u.load == 0].index, ax=ax, color='r') ax.scatter([], [], marker="|", linewidth=1, s=200, label="Reset event ({} == 0)".format(load_label), color='r') # FIXME: Add legend when this bug is fixed # https://github.com/mwaskom/seaborn/issues/1071 ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
def kde_value(value, q, dfplot, dfindex, ax, label, col= 'b', min_length= 10, rug_length= 20): """ Plots all the value specific genes,i.e. all genes that appear in one and only one 'tissue' """ g= (dfindex.effect == value) f= lambda x: (dfplot.ens_gene.isin(x)) & (dfplot.qval < q) genes_to_plot= dfindex[g].gene ind= f(genes_to_plot) x= dfplot[ind].b n= len(dfplot[ind].ens_gene.unique()) if len(x) > min_length: sns.kdeplot(x, color= col,label= label+' n= {0}'.format(n), ax= ax, lw= 5, cut=0.5) if len(x) < rug_length: sns.rugplot(x, color= col, ax= ax, height= .1, lw= 2) else: print('too few values to plot {0}'.format(label+' n= {0}'.format(n)))
def start(self): # Normalise histogram norm = np.sum(np.diff(self.edges, axis=1) * self.hist, axis=1) hist = self.hist / norm[:, None] # Roll axis for easier plotting hist_r = np.rollaxis(hist, 1) nbins, npix = hist_r.shape e = self.edges[0] hist_tops = np.insert(hist_r, np.arange(nbins), hist_r, axis=0) edges_tops = np.insert(e, np.arange(e.shape[0]), e, axis=0)[1:-1] # Mask dead pixels spe = self.dead.mask1d(self.spe) spe_sigma = self.dead.mask1d(self.spe_sigma) hist_tops = self.dead.mask2d(hist_tops) # Spectrum with all pixels self.log.info("Plotting: spectrum_all") ax_spectrum_all = self.fig_spectrum_all.add_subplot(1, 1, 1) ax_spectrum_all.semilogy(edges_tops, hist_tops, color='b', alpha=0.2) ax_spectrum_all.set_xlabel("Amplitude (p.e.)") ax_spectrum_all.set_ylabel("Probability") # Sprectrum for each tm self.log.info("Plotting: spectrum_tm") hist_tops_tm = np.reshape(hist_tops, (hist_tops.shape[0], 32, 64)) for tm, fig in enumerate(self.fig_spectrum_tm_list): ax = fig.add_subplot(1, 1, 1) ax.set_title("SPE Spectrum, TM {}".format(tm)) ax.semilogy(edges_tops, hist_tops_tm[:, tm], color='b', alpha=0.2) ax.set_xlabel("Amplitude (p.e.)") ax.set_ylabel("Probability") # Combined gaussian of each spe value self.log.info("Plotting: combined_gaussian") ax_comgaus = self.fig_combgaus.add_subplot(1, 1, 1) x = np.linspace(-1, 4, 200) kernels = [] for val, sigma in zip(spe.compressed(), spe_sigma.compressed()): kernel = stats.norm(val, sigma).pdf(x) kernels.append(kernel) # plt.plot(x, kernel, color="r") sns.rugplot(spe.compressed(), color=".2", linewidth=1, ax=ax_comgaus) density = np.sum(kernels, axis=0) density /= integrate.trapz(density, x) ax_comgaus.plot(x, density) ax_comgaus.set_xlabel("SPE Fit Value (p.e.)") ax_comgaus.set_ylabel("Sum") # Kernel density estimate self.log.info("Plotting: spe_kde") ax_kde = self.fig_kde.add_subplot(1, 1, 1) sns.rugplot(spe.compressed(), color=".2", linewidth=1, ax=ax_kde) sns.kdeplot(spe.compressed(), shade=True, ax=ax_kde) ax_kde.set_xlabel("SPE Fit Value (p.e.)") ax_kde.set_ylabel("KDE") # Histogram self.log.info("Plotting: histogram") ax_hist = self.fig_hist.add_subplot(1, 1, 1) sns.distplot(spe.compressed(), kde=False, rug=True, ax=ax_hist) ax_hist.set_xlabel("SPE Fit Value (p.e.)") ax_hist.set_ylabel("N")
means2 = np.append(means2, m2) stds12 = np.append(stds2, s2) sample3 = np.random.normal(mu, std, sample_size3) m3 = np.mean(sample3) s3 = np.std(sample3) means3 = np.append(means3, m3) stds3 = np.append(stds3, s3) cont += 1 Fig2 = plt.figure(2) ax2 = sns.lineplot(x, pdf, color='red') ax2.fill_between(x, pdf, color="tomato", alpha=0.7) #sns.scatterplot(means1,np.zeros(len(means1)),s=90,alpha=0.6,zorder=10) sns.rugplot(means1) plt.plot([mu, 0], [mu, 0.01], color='firebrick', linewidth=3, zorder=1) plt.text(40, 0.0175, 'N= ' + str(sample_size1), size=12) plt.title("Erro Padrão da Média") Fig21 = plt.figure(21) ax2 = sns.lineplot(x, pdf, color='red') ax2.fill_between(x, pdf, color="tomato", alpha=0.7) #sns.scatterplot(means2,np.zeros(len(means2)),s=90,alpha=0.6,zorder=10) sns.rugplot(means2) plt.plot([mu, 0], [mu, 0.01], color='firebrick', linewidth=3, zorder=1) plt.text(40, 0.0175, 'N= ' + str(sample_size2), size=12) plt.title("Erro Padrão da Média") Fig22 = plt.figure(22) ax2 = sns.lineplot(x, pdf, color='red')
endnote_height=0, title_height=0) sc_team1 = pitch.scatter(df_team1.x, df_team1.y, s=df_team1.shot_statsbomb_xg * 700, ec='black', color='#ba495c', ax=axes[0]) sc_team2 = pitch.scatter(df_team2.x, df_team2.y, s=df_team1.shot_statsbomb_xg * 700, ec='black', color='#697cd4', ax=axes[0]) # note height=1 means that the whole of the marginal axes are taken up by the rugplots team1_rug_y = sns.rugplot(y=df_team1.y, ax=axes[1], color='#ba495c', height=1) team1_rug_y = sns.rugplot(y=df_team2.y, ax=axes[3], color='#697cd4', height=1) team1_rug_x = sns.rugplot(x=df_team1.x, ax=axes[2], color='#ba495c', height=1) team2_rug_x = sns.rugplot(x=df_team2.x, ax=axes[2], color='#697cd4', height=1) txt1 = axes[0].text(x=15, y=70, s=team1, fontproperties=fm.prop, color='#ba495c', ha='center', va='center', fontsize=30) txt2 = axes[0].text(x=105, y=70, s=team2, fontproperties=fm.prop,
# 하나의 숫자 변수만 입력해야한다. # bins : 변수를 n개의 bin으로 자른다. b = df_last['평당분양가격'].hist(bins=10) #distplot으로 히스토그램그리기 #distplot은 결측지가 있으면 에러가 난다. price = df_last.loc[df_last['평당분양가격'].notnull(), '평당분양가격'] #어떤열을쓸건지도 적어주어야함 price sns.distplot(price) #kde, rug #kde(kernal, density) -> 함수를 겹치는 방법으로 히스토그램보다 부드러운 곡선을 그린다. #rug(실수 분포 플롯) -> 자료의 분포를 묘사하기 위해 사용됨 sns.kdeplot(price, shade=True) sns.rugplot(price) sns.displot(price, kde=True, rug=True) sns.displot(data=df_last, x='평당분양가격', kde=True, rug=True, hue='전용면적') sns.displot(data=df_last, x='평당분양가격', kde=True, rug=True, hue='전용면적', col='전용면적', col_wrap=1, aspect=2) #subplot으로 여러개 그려보기 g = sns.FacetGrid(df_last, row='지역명', height=1.7, aspect=4) g.map(sns.kdeplot, '평당분양가격') #pairplot df_last_notnull = df_last.loc[df_last['평당분양가격'].notnull(),['연도', '월', '지역명', '평당분양가격', '전용면적']] sns.pairplot(df_last_notnull, hue='지역명')
# Changing default bins sns.distplot(x, bins=20, kde=False, rug=True) # Kernel Density - show shape of a distribution sns.distplot(x, hist=False, rug=True) sns.kdeplot(x, shade=True) # Multiple Kernel Densities sns.kdeplot(x) sns.kdeplot(x, bw=.2, label="bw: 0.2") sns.kdeplot(x, bw=2, label="bw: 2") plt.legend() # Cuts the edges off sns.kdeplot(x, shade=True, cut=0) sns.rugplot(x) # Plotting Parametric x = np.random.gamma(6, size=200) sns.distplot(x, kde=False, fit=stats.gamma) # Scatterplots mean, cov = [0, 1], [(1, .5), (.5, 1)] data = np.random.multivariate_normal(mean, cov, 200) df = pd.DataFrame(data, columns=["x", "y"]) sns.jointplot(x="x", y="y", data=df) # Hexbin Plots x, y = np.random.multivariate_normal(mean, cov, 1000).T with sns.axes_style("white"): sns.jointplot(x=x, y=y, kind="hex", color="k")
############################################################################################################# # 2. Kernel Density Estimation Plots ############################################################################################################# # The normal imports import numpy as np from numpy.random import randn import pandas as pd from scipy import stats import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns #Create dataset dataset = randn(25) #Create rugplot sns.rugplot(dataset) sns.kdeplot(dataset, shade=True, kernel='gau') plt.hist(dataset, normed=True, color="#6495ED", alpha=.5)
import pandas as pd import numpy as np import matplotlib.pyplot as plt #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # 차트에 한글 가능하도록 from matplotlib import font_manager, rc, rcParams font_name = font_manager.FontProperties( fname="c:/windows/Fonts/malgun.ttf").get_name() rc('font', family=font_name) rcParams['axes.unicode_minus'] = False # 부호표시 (-,+) 사용할때 ### #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ import seaborn as sns iris = sns.load_dataset('iris') x = iris.petal_length.values # 단순 데이터들의 위치를 표시 한다. sns.rugplot(x) # 데이터의 밀도를 그래프로 표현한다. sns.kdeplot(x) plt.title('꽃 잎 길이에 대한 Kernal 밀도 그래프') plt.show()
ncol=3, title='donor', title_fontsize=9) ### Subplot B - KDE histograms with ADCC data using CD56+ cells plt.subplot(223) sns.kdeplot(data_cd56['EC50'], bw=.4, shade=True, color='indigo', linewidth=2, legend=False) sns.rugplot( data_cd56['EC50'], color='indigo', ) plt.ylabel("density", fontdict={'size': 8, 'weight': 'bold'}) plt.yticks(size=6) plt.xlabel(r"log$_{10}$EC$_{50}$ (µg/mL)", fontdict={ 'size': 8, 'weight': 'bold' }) plt.axvline( x=data_cd56['EC50'].mean(), # equals -3.0042 ls='--', linewidth=2, color='red') plt.xlim((-5, 0))
def distribution_visualization_1vN(n, markers=['+', 'o', 's', 'd', '.'], colors=['c', 'r', 'g', 'b', 'm', 'gray']): """ Plots a graph for each input feature, showing the generated sample distribution around the true feature for the samples that are positioned at the (25, 50, 75)th percentiles. """ import DoomLevelsGAN.DoomGAN as nn output_graph_folder = nn.FLAGS.ref_sample_folder + "graphs/1v{}/input_features/".format( n) os.makedirs(output_graph_folder, exist_ok=True) percent_dict = load_level_subset() true_samples = dict() for p in percent_dict: for name in percent_dict[p]: true_samples[name] = None true_samples = nn.gan.get_samples_by_name(true_samples) while None in true_samples.values(): print( "Not all levels have been found, due to random selection of levels to match the batch size. Retrying.." ) true_samples = nn.gan.get_samples_by_name(true_samples) # loading generated results path = nn.FLAGS.ref_sample_folder + 'samples_percentiles/generated1v{}'.format( n) names = np.load(path + 'names.npy'.format(n)) generated = np.load(path + 'generated.npy'.format(n)) gen_samples = dict() for n_id, name in enumerate(names): gen_samples[name] = generated[n_id, ...] # Input features, only relevant levels for f, fname in enumerate(nn.gan.features): fig = plt.figure(figsize=(15, 11)) for p, pname in enumerate(percent_dict): if pname == 'perc0' or pname == 'perc100': continue name = percent_dict[pname][f] samp = generated[np.where(names == name)][0] true_value = true_samples[name][fname] values = samp[fname] #axt = sb.rugplot([np.mean(values)], height=1, ls="-", linewidth=0.75, marker=colors[p]) axt = sb.rugplot(true_value, height=1, ls="--", linewidth=0.75, label="True_{}".format(pname), color=colors[p], marker=markers[p]) sb.kdeplot(values, ax=axt, ls="-", label="Generated_{}".format(pname), color=colors[p], marker=markers[p]) plt.setp(axt.get_legend().get_texts(), fontsize=30) axt.set_xlabel("{}".format(fname), fontsize=45) #plt.title("{} generated samples distribution from every quartile of feature \"{}\"".format(n, fname)) fig.canvas.set_window_title("{}".format("{}".format(fname))) fig.tight_layout() fig.savefig(output_graph_folder + '1v{}_{}.png'.format(n, fname)) fig.savefig(output_graph_folder + '1v{}_{}.pdf'.format(n, fname))
expensive_tasks = indices_vs_expensive.filter(lambda x: x[1] == 1) expensive_tasks = expensive_tasks.map(lambda x: x[0]) labeled_expensive_tasks = expensive_tasks.zipWithIndex() partition_vs_expensive_task = labeled_expensive_tasks.map(lambda x: (x[1] % num_partitions, x[0])) # Get cheap tasks ready to process cheap_tasks = indices_vs_expensive.filter(lambda x: x[1] == 0) cheap_tasks = cheap_tasks.map(lambda x: x[0]) labeled_cheap_tasks = cheap_tasks.zipWithIndex() partition_vs_cheap_task = labeled_cheap_tasks.map(lambda x: (x[1] % num_partitions, x[0])) # Combine cheap & expensive tasks, now designated to an appropriate partition partition_vs_ij = partition_vs_expensive_task.union(partition_vs_cheap_task) # Sort data into the correct partition...sorted by key! sorted_by_partition = partition_vs_ij.sortByKey(numPartitions=100) mandelbrot_load_balanced = sorted_by_partition.map(lambda a: mandelbrot_wrapper(*a[1])) summed_rdd = P2.sum_values_for_partitions(mandelbrot_load_balanced) summed_result = summed_rdd.collect() # Now collect the data & plot plt.hist(summed_result, bins=np.logspace(3, 8, 20)) sns.rugplot(summed_result, color="red") plt.gca().set_xscale("log") plt.xlabel("Total Number of Iterations on Partition") plt.ylabel("Partition Count") plt.title("Number of Iterations on each Partition") plt.savefig("P2b_hist.png", dpi=200, bbox_inches="tight")
print("-"*30) print(data1.isnull().sum()) # 绘制密度图 h_male=data1[data1["gender"]=="男"]["height"] h_female=data1[data1["gender"]=="女"]["height"] fig=plt.figure(figsize=(8,5)) sns.kdeplot(h_male, label="male_height", linestyle = '--',linewidth = 2,color='orange') # 男性 sns.kdeplot(h_female, label="female_height", linestyle = '--',linewidth = 2,color='g') # 女性 # 数据频率分布图 sns.rugplot(h_male,height = 0.1,color = 'orange') sns.rugplot(h_female,height = 0.05,color = 'g') # 绘制辅助线 male_height_mean=h_male.mean() # 男性平均身高 female_height_mean=h_female.mean() # 女性平均身高 plt.axvline(x=male_height_mean,linestyle="--",linewidth = 1.2,color="orange",alpha=0.5) plt.axvline(x=female_height_mean,linestyle="--",linewidth = 1.2,color="g",alpha=0.5) # 添加注释 plt.text(male_height_mean,0.005,"male_height_mean: %.1fcm"%male_height_mean,color="orange") plt.text(female_height_mean,0.01,"female_height_mean: %.1fcm"%female_height_mean,color="g") # 标题 plt.title("Athlete's height") plt.grid(linestyle="--")
''' import matplotlib from cache_codec import * from matplotlib import pyplot as plotter import seaborn if __name__ == "__main__": #Figure showing gaussian mixture model for ERBB2 models = load_gene_models("BC") samples = load_sample_profiles("BC").values() expression_levels = [sample.profiles["ERBB2"].intensity for sample in samples] print(expression_levels) seaborn.rugplot(expression_levels, color="black") seaborn.distplot(models["ERBB2"].sample(100000), hist=False) exp_patch = matplotlib.patches.Patch(color='black', label="Expression Data") gmm_patch = matplotlib.patches.Patch(color='blue', label="GMM model") plotter.legend(handles=[exp_patch, gmm_patch]) plotter.legend(loc="upper right") plotter.title("ERBB2") plotter.xlabel("Expression Level") plotter.ylabel("Density") plotter.savefig("Yay!.png")
import pandas as pd import numpy as np import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') import seaborn as sns df = pd.read_csv('AAPL_2006-01-01_to_2018-01-01.csv', index_col='Date', parse_dates=['Date']) sns.heatmap(df.isnull(), cbar=False, yticklabels=False) sns.heatmap(df.corr()) plt.hist(df['Open'], bins=40) sns.rugplot(df['Open']) sns.pairplot(df) training_set = df[:'2016'].iloc[:, 1:2].values test_set = df['2017':].iloc[:, 1:2].values df['High'][:'2016'].plot(figsize=(16, 9), legend=True) df['High']['2017':].plot(figsize=(16, 9), legend=True) plt.legend(['Training Set (Before 2016)', ' Test Set (After 2017)']) plt.title("Apple Stock Price") plt.tight_layout() plt.show() from sklearn.preprocessing import MinMaxScaler sc_x = MinMaxScaler(feature_range=(0, 1)) scaled_training_set = sc_x.fit_transform(training_set) X_train = [] y_train = []
# kind=''is scatter by default, but this controls what sort of graph is displayed # on the plot. We pass in 'hex' to display a hexagraph sns.jointplot(x='total_bill', y='tip', data=tips, kind='kde', color='blue') # pairplot will plots all of the columns and rows against each other # this is a great way to quickly visualize your data. # hue='' takes in a categorical (True/False, Male/Female, etc.) column and will # separate and plot these on the pairplot graph. it also takes palette='' which # allows you to set the colors yourself sns.pairplot(tips, hue='sex') # a rugplot draws one small line for each value in the single column. seems to be # less useful than a histogram (distplot()) sns.rugplot(tips['total_bill']) # KDE plots = Kernel Density Estimation plots # create a dataset dataset = np.random.randn(25) # Create another rugplot sns.rugplot(dataset) # set up the x-axis for the plot x_min = dataset.min() - 2 x_max = dataset.max() + 2 # 100 equally space points from x_min to x_max x_axis = np.linspace(x_min, x_max, 100)
# #fig,(ax1,ax2) = plt.subplots(ncols=2,figsize=(12,4)) #sns.regplot('X1','X2',data=pd.DataFrame(data=X_norm,columns=['X1','X2']),fit_reg=False,ax=ax1) #ax1.set_title('Original dimension') #sns.rugplot(Z,ax=ax2) #ax2.set_xlabel('Z') #ax2.set_title('Z dimension') #plt.show() # 将数据恢复到原始维度 X_recover = recover_data(Z,U) fig,(ax1,ax2,ax3) = plt.subplots(ncols=3,figsize=(12,4)) sns.rugplot(Z,ax=ax1) ax1.set_title('Z dimension') ax1.set_xlabel('Z') sns.regplot('X1','X2', data = pd.DataFrame(X_recover,columns=['X1','X2']), fit_reg=False, ax=ax2) ax2.set_title('2D projection from Z') sns.regplot('X1','X2', data=pd.DataFrame(data=X_norm,columns=['X1','X2']), fit_reg=False, ax=ax3) ax3.set_title('Original dimension') plt.show()
df = pd.read_csv('tips.csv') #First thirty rows tips = df.head(10) #Display the table st.table(tips) st.header("Visualisation Using Seaborn") #bar plot st.subheader("Bar Plot") tips.plot(kind='bar') st.pyplot() #Displot st.subheader("Displot") sns.displot(tips['total_bill']) st.pyplot() #joinplot st.subheader("JointPlot") sns.jointplot(x='total_bill', y='tip', data=tips, kind='scatter') st.pyplot() #pairplot st.subheader("Pairplot") sns.pairplot(tips, hue='sex', palette='rainbow') st.pyplot() #Rugplot st.subheader("Rugplot") sns.rugplot(tips['tip']) st.pyplot() #Correation st.subheader("Heatmap") sns.heatmap(tips.corr(), cmap='coolwarm', annot=True) st.pyplot()
maTeamPts4 = pd.rolling_mean(teamdf.Pts4, maGames) maTeamPtsAg = pd.rolling_mean(teamdf.PtsAg, maGames) fig3 = plt.figure('MatPlotLib Moving Average') ax3 = plt.subplot(2, 1, 1, title=("Points For "+str(maGames)+ " Game Moving Average")) plt.plot(maTeamPts4.index, maTeamPts4) plt.ylim(ymin=0) ax4 = plt.subplot(212, title=("Points Against " +str(maGames)+ " Game Moving Average"), sharex=ax3) plt.plot(maTeamPtsAg.index, maTeamPtsAg) plt.ylim(ymin=0) fig4 = plt.figure('Seaborn KDE') #sns.distplot(teamdf.Pts4, hist=False, kde_kws={"shade": True}) ax5 = plt.subplot(2,1,1, title=team + ' Points-For KDE') sns.kdeplot(teamdf.Pts4, bw=.4, cut=40, shade=True, label=team) #sns.kdeplot(df.Pts4, bw=1, shade=True, label='League Overall') ax6 = plt.subplot(2,1,2, title=team + ' Points-Against KDE', sharex=ax5) sns.kdeplot(teamdf.PtsAg, bw=.4, cut=40, shade=True, label=team) plt.figure('Rugplot') sns.rugplot(teamdf.Pts4) plt.show()
# A dice roll # The Probability Mass function # Each number roll_options = [1, 2, 3, 4, 5, 6] # Total probability space is 1 tprob = 1 # Each roll has the same odds of appearing --> 1/6 prob_roll = tprob / len(roll_options) # Plot using seaborn rugplot (note this is not really a rugplot), # setting height equal to probability of roll uni_plot = sns.rugplot(roll_options, height=prob_roll, c='indianred') # Set Title uni_plot.set_title('Probability Mass Function for Dice Roll') # We can see in the above example that the f(x) value on the plot is just # equal to 1/(Total Possible Outcomes) # The mean is simply the max and min value divided by two, just like # the mean of two numbers μ=(b+a)/2 # With a variance of: σ^2=(b−a+1)^2 / 12 # automatically create a Discrete Uniform Distribution using Scipy. # Imports from scipy.stats import randint
import numpy as np from numpy.random import randn import pandas as pd from scipy import stats import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns dataset = randn(25) sns.rugplot(dataset) plt.hist(dataset, alpha=0.3) #ヒストグラムを透明度を持たせて作成 sns.rugplot(dataset) #rugplotで実際のデータがどこにあるのかを表示 #ここから sns.rugplot(dataset) x_min = dataset.min() - 2 x_max = dataset.max() + 2 x_axis = np.linspace(x_min, x_max, 100) bandwidth = ((4 * dataset.std()**5) / (3 * len(dataset)))**0.2 kernel_list = [] for data_point in dataset: kernel = stats.norm(data_point, bandwidth).pdf(x_axis) kernel_list.append(kernel) kernel = kernel / kernel.max() kernel = kernel * 0.4 plt.plot(x_axis, kernel, color='gray', alpha=0.5) plt.ylim(0, 1) #ここまではカーネル密度関数をseabornを使わないで実装する方法
X3 = rng.normal(4, 1, size=n_per_group) X = np.concatenate((X1, X2, X3)) labels = np.concatenate((0 * ones, 1 * ones, 2 * ones)) # sort to help visualize sort_inds = np.argsort(-X) X = X[sort_inds] labels = labels[sort_inds] fig, ax = plt.subplots(1, 1, figsize=(8, 6)) sns.histplot(x=X, hue=labels, palette=palette, bins=50, stat="density", ax=ax) sns.rugplot( x=X, hue=labels, palette=palette, height=0.05, legend=False, ax=ax, expand_margins=True, ) stashfig("rank-distribution") #%% [markdown] # ## A distribution from the latent ranks # Using the ranks, we can create a distribution from which to sample graphs. Here I plot # the matrix of edge probabilities $P$ and an adjacency matrix $A$ from it. #%% k = 15 beta = 5
#no curve, rug lines at bottom sns.distplot(data.mpg, bins=20, kde=False, rug=True) #more bins, no curve sns.distplot(data.mpg, hist=False, rug=True) #without density sns.jointplot(x="wt", y="mpg", data=data) sns.jointplot(x="wt", y="mpg", data=data, kind="kde") sns.jointplot( x="x", y="y", data=df, ) #Links:https://seaborn.pydata.org/tutorial/distributions.html f, ax = plt.subplots(figsize=(6, 6)) sns.kdeplot(data.wt, data.mpg, ax=ax) sns.rugplot(data.wt, color="g", ax=ax) sns.rugplot(data.mpg, vertical=True, ax=ax) #%% Pair Plot sns.pairplot(data[['wt', 'mpg', 'hp', 'qsec']]) #%%% #%%% #%%%outliers #In statistics, an outlier is an observation point that is distant from other observations. sns.boxplot(x=data['mpg']) #The Z-score is the signed number of standard deviations by which the value of an observation or data point is above the mean value of what is being observed or measured #Links: https://towardsdatascience.com/ways-to-detect-and-remove-the-outliers-404d16608dba from scipy import stats import numpy as np z = np.abs(stats.zscore(data.mpg)) print(z)
def Time_Nature_kdeplots(time_nature={ 'month': ['temp', 'RH', 'wind'], 'day': ['temp', 'RH', 'wind'] }, figNo=2, clr_background='Greens', clr_rugplots='green', scatters=True): ''' : Function name: Time_Nature_kdeplots : Function works to demonstrate the kdeplots reagarding fire intensity with natural features against time index(month/day) Explanation of kde plots: 1) x-axis: time index (month/day) 2) y-aixs: natural feature from ['temp','RH','wind'] P.S.: rain has been ignored since 99.6% of its data =0 which will have no density in kdeplots 3) color shades: fire intensity by values of 'ln(area+1)' 4) color bar: demonstrating the change and scope of values from 'ln(area+1)' 5) rug plots at each axis: mark the values of natural feature/time index 6) scatters: locate the fire cases : type time_nature: dict{str:list} : param time_nature: time_nature.keys():time index time_nature.values(): a list of natural features : type figNo: int : param figNo: number of figures plot= number of time indices in dict : type clr_background: str : param clr_background: color palatte chosen for kdeplots, users can only set the color palattes available in Seaborn database : type clr_rugplots: str : param clr_rugplots: color palatte chosen for rugplots, users can only set the color available in Seaborn database : type scatters: bool : param scatters: whether to show scatters in kdeplots(True/False) ''' assert isinstance(figNo, int) and figNo == len(time_nature.keys( )) #number of figures plot= number of time indices in dict assert isinstance(time_nature, dict) assert isinstance(clr_background, str) assert isinstance(clr_rugplots, str) n = 1 # figure index for i in time_nature.keys(): assert isinstance(i, str) and i in ['month', 'day' ] # the time indices available assert isinstance(time_nature[i], list) for v in time_nature[i]: assert v in ['temp', 'RH', 'wind'] # the natatural features available m = len(time_nature[i] ) # the number of natural features for one time index q = 100 + 10 * m + 1 # the index of subplot if m == 1: figsize = (5, 5) elif m == 2: figsize = (14, 6) else: figsize = (18, 5) # set the figsize plt.figure(n, figsize) for j in range(m): plt.subplot(q) plt.title(i + '-' + time_nature[i][j], fontsize=20, position=(0.5, 1.05)) plt.xlabel(i, fontsize=20) plt.ylabel(time_nature[i], fontsize=20) sns.kdeplot( fire[i], fire[time_nature[i][ j]], # demonstrate the probability distribution of two variables cbar=True, # display color bar shade=True, # display shades cmap=clr_background, # set the color palatte shade_lowest=False, # not display periphery color/shade n_levels=40 # number of curves, the higher, the smoother ) # the color change indicates the change of density plt.grid(linestyle='--') # show grids if scatters == True: #set scatters with their sizes, colors and shapes plt.scatter(fire[i], fire[time_nature[i][j]], s=5, alpha=0.5, color='k', marker='+') sns.rugplot(fire[i], color=clr_rugplots, axis='x', alpha=0.5) #set the rugplots sns.rugplot(fire[time_nature[i][j]], color=clr_rugplots, axis='y', alpha=0.5) q += 1 n += 1 plt.show()
def InvarianceTestKolSmirn(epsi, y1, y2, band_int, cdf_1, cdf_2, up_band, low_band, pos=None, name='Invariance Test', bound=(0, 0)): # This function returns a figure for the Kolmogorov-Smirnov (IID) test for invariance # INPUTS # epsi :[vector](1 x t_end) series of (to be tested as such) invariants # y1 :[vector](1 x ~t_end/2) first partition of vector epsi # y2 :[vector](1 x ~t_end/2) second partition of vector epsi # band_int :[row vector] x-axis values of the (upper and lower) band # cdf_1 :[vector](1 x ~t_end/2) empirical cdf of y1 # cdf_2 :[vector](1 x ~t_end/2) empirical cdf of y2 # up_band :[row vector] y-axis values of the upper band # low_band :[row vector] y-axis values of the lower band # pos :[cell] cell array containing the positions of each graph # - pos{1} -> position of the histogram of first sample # - pos{2} -> position of the histogram of second sample # - pos{3} -> main plot position # - pos{4} -> title position # name :[string] title of the figure # bound :[vector](1x2) lower and upper values of x-axis ## Code if pos is None: pos = {} pos[1] = [0.1300, 0.74, 0.3347, 0.1717] pos[2] = [0.5703, 0.74, 0.3347, 0.1717] pos[3] = [0.1300, 0.11, 0.7750, 0.5] pos[4] = [0.3, 1.71] # pos [4]=[band_int[0]+(0.5-0.07)@(band_int[-1]-band_int[0]) 1.8] # colors blue = [0.2, 0.2, 0.7] l_blue = [0.2, 0.6, 0.8] orange = [.9, 0.6, 0] d_orange = [0.9, 0.3, 0] # max and min value of the first reference axis settings, for both plots [0] and [1] if bound[0] != 0: xlim_1 = bound[0] else: xlim_1 = band_int[0] if bound[1] != 0: xlim_2 = bound[1] else: xlim_2 = band_int[-1] # max value for the second reference axis setting, for plot [0] ycount, _ = histogram(epsi, int(round(10 * log(len(epsi.flatten())))), normed=False) ylim = npmax(ycount) # # histograms # n1y, n1x = histogram(y1, int(round(10 * log(len(y1.flatten()))))) # n2y, n2x = histogram(y2, int(round(10 * log(len(y2.flatten()))))) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0), colspan=2) # # plot histogram of Sample 1, y1 sns.distplot(y1, bins=int(round(10 * log(len(y1.flatten())))), kde=False, color=orange, hist_kws={ "alpha": 1, "edgecolor": "k" }, ax=ax1) ax1.set_xlabel('Sample1') ax1.set_xlim((xlim_1, xlim_2)) ax1.set_ylim([0, ylim * 0.8]) ax1.ticklabel_format(axis='x', style='sci', scilimits=(-2, 2)) ax1.grid(False) sns.distplot(y2, bins=int(round(10 * log(len(y2.flatten())))), kde=False, color=l_blue, hist_kws={ "alpha": 1, "edgecolor": "k" }, ax=ax2) ax2.grid(False) ax2.set_xlabel('Sample2') ax2.set_xlim((xlim_1, xlim_2)) ax2.set_ylim([0, ylim * 0.8]) ax2.ticklabel_format(axis='x', style='sci', scilimits=(-2, 2)) # plot the cdf[s] # plot data on the first reference axis up_limit_y1 = interp(y1[0], band_int, up_band) low_limit_y1 = interp(y1[0], band_int, low_band) up_limit_y2 = interp(y2[0], band_int, up_band) low_limit_y2 = interp(y2[0], band_int, low_band) ax3.scatter(y1, cdf_1, color=d_orange, s=2) ax3.scatter(y2, cdf_2, color=blue, s=2) sns.rugplot(y1[0], height=0.025, color=d_orange, ax=ax3) sns.rugplot(y2[0], height=0.025, color=blue, ax=ax3) # # # plot the (upper and lower) band ax3.plot(band_int, up_band, '-', color='k', lw=0.5) ax3.plot(band_int, low_band, '-', color='k', lw=0.5) # ax3.set_xlabel('data') ax3.set_ylabel('cdf') ax3.set_xlim([xlim_1, xlim_2]) ax3.set_ylim([-0.05, 1.05]) ax3.ticklabel_format(axis='x', style='sci', scilimits=(-2, 2)) plt.suptitle(name) plt.tight_layout(rect=[0, 0.03, 1, 0.95])
import numpy as np import pandas as pd from scipy import stats, integrate import matplotlib.pyplot as plt import seaborn as sns x = np.random.normal(0, 1, size=30) bandwidth = 1.06 * x.std() * x.size ** (-1 / 5.) support = np.linspace(-4, 4, 200) kernels = [] for x_i in x: kernel = stats.norm(x_i, bandwidth).pdf(support) kernels.append(kernel) plt.plot(support, kernel, color="r") sns.rugplot(x, color=".2", linewidth=3);
def ModelFeature_kdeplots(model_nature={'DMC':['temp','RH']},figNo=2,clr_background='Blues',clr_rugplots='blue',scatters=True): ''' : Function name: ModelFeature_kdeplots : Function works to demonstrate the kdeplots reagarding fire intensity with model features against natural features Explanation of kde plots: 1) x-axis: natural feature from ['temp','RH','wind'] P.S.: rain has been ignored since 99.6% of its data =0 which will have no density in kdeplots 2) y-aixs: model feature from ['DMC','DC','FFMC','ISI'] 3) color shades: fire intensity by values of 'ln(area+1)' 4) color bar: demonstrating the change and scope of values from 'ln(area+1)' 5) rug plots at each axis: mark the values of natural feature/model feature 6) scatters: locate the fire cases : type model_nature: dict{str:list} : param model_nature: model_nature.keys():model features model_nature.values(): a list of natural features that influence the corresponding key/model feature P.S.: If user relate a natural feature having no relationship with the model feature in the dict, assertion error will be thrown : type figNo: int : param figNo: number of figures plot= number of model features in dict : type clr_background: str : param clr_background: color palatte chosen for kdeplots, users can only set the color palattes available in Seaborn database : type clr_rugplots: str : param clr_rugplots: color palatte chosen for rugplots, users can only set the color available in Seaborn database : type scatters: bool : param scatters: whether to show scatters in kdeplots(True/False) ''' assert isinstance(model_nature,dict) assert isinstance(figNo,int) assert figNo==len(model_nature.keys()) #number of figures plot= number of model features in dict assert isinstance(clr_background,str) assert isinstance(clr_rugplots,str) for i in model_nature.keys(): assert isinstance(i,str) and i in ['DMC','DC','FFMC','ISI'] #only the model feature in dataset assert isinstance(model_nature[i],list) if i == 'DMC': # the following if/else commands make sure users can only match the model feature with those natural ones having influence on it for j in model_nature[i]: assert isinstance(j,str) and j in ['temp','RH'] # DMC can only be related to temperature and relative humidity elif i=='DC': for j in model_nature[i]: assert isinstance(j,str) and j in ['RH'] elif i=='FFMC': for j in model_nature[i]: assert isinstance(j,str) and j in ['temp','RH','wind'] elif i=='ISI': for j in model_nature[i]: assert isinstance(j,str) and j in ['wind'] for i in range(figNo): x=[i for i in model_nature.keys()]# a lits of all model features entered m=len(model_nature[x[i]]) # the number of natural features for one model feature n=100+10*m+1 # the index of subplot if m==1: figsize=(5,5) elif m==2: figsize=(14,6) else: figsize=(15,6) # set the figsize plt.figure(i,figsize) for j in range(m): plt.subplot(n) plt.title(x[i]+'-'+model_nature[x[i]][j], fontsize=14, position=(0.5,1.05)) plt.xlabel(model_nature[x[i]][j],fontsize=14) plt.ylabel(x[i],fontsize=14) sns.kdeplot(c[model_nature[x[i]][j]],c[x[i]], # !! c is the new dataframe for data in firecases cbar = True, # display color bar shade = True, # display shades cmap = clr_background, # set the color palatte shade_lowest=False, # not display periphery color/shade n_levels = 40 # number of curves, the higher, the smoother )# the color change indicates the change of density plt.grid(linestyle = '--')# show grids if scatters==True: #set scatters with their sizes, colors and shapes plt.scatter(c[model_nature[x[i]][j]],c[x[i]], s=5, alpha = 0.5, color = 'r', marker='*') sns.rugplot(c[model_nature[x[i]][j]], color=clr_rugplots, axis='x',alpha = 0.5) #set the rugplots sns.rugplot(c[x[i]], color=clr_rugplots, axis='y',alpha = 0.5) n+=1 plt.show()
sns.jointplot(x='total_bill', y='tip', data=tips, kind='scatter') sns.jointplot(x='total_bill', y='tip', data=tips, kind='hex') sns.jointplot(x='total_bill', y='tip', data=tips, kind='reg') ############################################################################## # pairplot: relationships across an entire dataframe for the numerical columns # supports a color hue argument for categorical columns ############################################################################## sns.pairplot(tips) sns.pairplot(tips, hue='sex', palette='coolwarm', diag_kind='hist') ################################################################################# # rugplot: It just draws a dash mark for every point on a univariate distribution # They are the building block of a KDE plot ################################################################################# sns.rugplot(tips['total_bill']) ################################################################################# # kdeplot: is Kernel Density Estimation plot These KDE plots replace every single # observation with a Gaussian (Normal) distribution centered around that value ################################################################################# #Create dataset dataset = np.random.randn(25) print dataset # Create another rugplot sns.rugplot(dataset) # Set up the x-axis for the plot x_min = dataset.min() - 2
kmeans_ari = adjusted_rand_score(y, kmeans_pred_labels) return gmm_ari - kmeans_ari np.random.seed(8888) n_sims = 40 seeds = np.random.randint(1e8, size=n_sims) # random # seeds = np.ones(n_sims, dtype=int) # uncomment for not random par = Parallel(n_jobs=2) ari_diffs = par(delayed(run_experiment)(seed) for seed in seeds) fig, ax = plt.subplots(1, 1, figsize=(8, 4)) ax.axvline(0, linewidth=2, linestyle="--", color="red") sns.distplot(ari_diffs, norm_hist=False) sns.rugplot(ari_diffs) xlim = ax.get_xlim() ylim = ax.get_ylim() y_range = ylim[1] - ylim[0] ypos = ylim[0] + y_range * 0.75 x_range = xlim[1] - xlim[0] ax.text(xlim[0] + 0.05 * x_range, ypos, "KMeans \n better") ax.text(xlim[1] - 0.05 * x_range, ypos, "GMM \n better", horizontalalignment="right") ax.spines["left"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.set_yticks([]) ax.set_xlabel("(GMM - KMeans) ARI")
sns.set_theme() # create two simple continuous variables x = np.random.normal(size=100) y = np.random.normal(size=100) data_df = pd.DataFrame({"x": x, "y": y}) sns.set_theme(style="darkgrid") # plot the distribution of the data sns.scatterplot(x="x", y="y", data=data_df, ax=ax[0, 0]); ax[0, 0].set_title("Scatter Plot") # create a histogram plot of x variable w/ red color sns.histplot(data=data_df, x="x", color="r", ax=ax[0, 1]) ax[0, 1].set_title("Histogram") # create a density plot of x variable sns.kdeplot(x="x", data=data_df, ax=ax[1, 0]) sns.rugplot(x="x", data=data_df, ax=ax[1, 0]) ax[1, 0].set_title("Density Plot") # create a box plot of x variable w/ green color sns.boxplot(x="x", data=data_df, color="g", ax=ax[1, 1]) ax[1, 1].set_title("Box Plot") plt.show();
df = pd.DataFrame(pd.read_csv('SmallSalaries.csv')) df['BasePay'].fillna(value=df['BasePay'].mean()) # fill NaN value with mean() sns.distplot(df['BasePay']) # draw a distplot on your own dataset tips = sns.load_dataset('tips') # DISTPLOT # The distplot shows the distribution of a univariate set of observations. sns.distplot(tips['total_bill']) # KDE: Kernel Density Estimation # to remove KDE line, use sns.distplot(tips['total_bill'], kde=False) # JOINTPLOT # jointplot() allows you to basically match up two distplots for bivariate data # With your choice of what kind parameter to compare with: # “scatter” “reg” “resid” “kde” “hex” sns.jointplot(x='total_bill', y='tip', data=tips, kind='reg') # or kind='scatter',.etc # PAIRPLOT # pairplot will plot pairwise relationships across an entire dataframe (for the numerical columns) # and supports a color hue argument (for categorical columns) sns.pairplot(tips, hue='sex', palette='coolwarm') # RUGPLOT # draw a dash mark for every point on a univariate distribution # They are the building block of a KDE plot sns.rugplot(tips['total_bill']) plt.show()
import seaborn as sns import matplotlib.pyplot as plt tips = sns.load_dataset( 'tips' ) # Make Data Frame with columns: total_bill, tip, sex, smoker, day, time, size sns.distplot(tips['total_bill']) # Make histogram sns.distplot(tips['total_bill'], kde=False) # Histogram without line sns.distplot(tips['total_bill'], bins=110) # count of parts of histogram sns.jointplot(x='total_bill', y='tip', data=tips) # Compare two columns sns.jointplot(x='total_bill', y='tip', data=tips, kind='hex') # Change type of representation(hex, reg, kde) sns.pairplot(tips) # visual all numeric data between each other sns.pairplot(tips, hue='sex') # divide data on categories sns.pairplot(tips, hue='sex', palette='coolwarm') # colors for categories sns.rugplot(tips['total_bill']) # graphic with one high dashes sns.kdeplot(tips['total_bill']) # graphic with one line plt.show()
tips=sns.load_dataset('tips') # distplot using seaborn sns.distplot(tips['total_bill'], kde=False,bins=30) #jointplot using seaborn with kind value like by defualt scatter 'scatter', 'reg', 'resid', 'kde', or 'hex' sns.jointplot(x='total_bill',y='tip',data=tips,kind="reg") # pairplot using seaborn """ pairplot will plot pairwise relationships across an entire dataframe (for the numerical columns) and supports a color hue argument (for categorical columns). hue='sex',palette='coolwarm', here hue means individual data """ sns.pairplot(tips,hue='sex',palette='coolwarm') """ rugplot rugplots are actually a very simple concept, they just draw a dash mark for every point on a univariate distribution. """ sns.rugplot(tips["total_bill"]) #kde plt.show()
num_pixels = 2000 rows = sc.range(num_pixels, numSlices=10) cols = sc.range(num_pixels, numSlices=10) indices = rows.cartesian(cols) def mandelbrot_wrapper(row, col): x = col/(num_pixels/4.) - 2. y = row/(num_pixels/4.) - 2. return ((row, col), P2.mandelbrot(x, y)) ########### Different from part A: load balancing! ######## new_indices = indices.repartition(100) # Randomly throw jobs between partitions mandelbrot_load_balanced = new_indices.map(lambda a: mandelbrot_wrapper(*a)) summed_rdd = P2.sum_values_for_partitions(mandelbrot_load_balanced) summed_result = summed_rdd.collect() # Now collect the data & plot plt.hist(summed_result, bins=np.logspace(3, 8, 20)) sns.rugplot(summed_result, color='red') plt.gca().set_xscale('log') plt.xlabel('Total Number of Iterations on Partition') plt.ylabel('Partition Count') plt.title('Number of Iterations on each Partition') plt.savefig('P2b_alternative_hist.png', dpi=200, bbox_inches='tight')
x_d = np.arange(np.min(x) - nSpace, np.max(x) + nSpace + 1, dtype = np.float32) support = np.linspace(np.min(x) - nSpace, np.max(x) + nSpace +1, 100) x = x.reshape(-1, 1) variance = 4 a_ = norm(x, variance).pdf(support) for a_row in a_: plt.plot(support, a_row, color="r") """c""" sns.rugplot(x, color=".2", linewidth=3) density = np.sum(a_, axis = 0) plt.plot(support, density) plt.show() a_ = norm(x, variance).pdf(x_d) density = np.sum(a_, axis = 0) m = density > 0.001
from pandas import Series,DataFrame import pandas as pd import csv import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt get_ipython().magic(u'matplotlib inline') #Read in and inspect ridership data dframe = pd.read_csv('total_wmata_riders.csv') dframe.head() dframe.describe() riders_frame = dframe['riders'] #Develop topline visualizations sns.rugplot(riders_frame) plt.suptitle("Rug Plot for Total Ridership by Station") sns.kdeplot(riders_frame) plt.suptitle("Density Plot for Riders by Station") #Identify top and bottom quartiles of station ridership top_quartile = dframe[dframe['riders']>589680] top_quartile.head() top_quartile bottom_quartile = dframe[dframe['riders']<254567] bottom_quartile.head() top_riders = top_quartile['riders'] bottom_riders = bottom_quartile['riders'] #Visualize top and bottom quartiles sns.violinplot([bottom_riders])
# from https://web.stanford.edu/~mwaskom/software/seaborn/tutorial/distributions.html import numpy as np import pandas as pd from scipy import stats, integrate import matplotlib.pyplot as plt import seaborn as sns sns.set(color_codes=True) np.random.seed(sum(map(ord, "distributions"))) x = np.random.normal(size = 100) sns.distplot(x); mean, cov = [0, 1], [(1, .5), (.5, 1)] data = np.random.multivariate_normal(mean, cov, 200) df = pd.DataFrame(data, columns=["x", "y"]) sns.jointplot(x="x", y="y", data=df); sns.jointplot(x="x", y="y", data=df, kind="kde"); f, ax = plt.subplots(figsize=(6, 6)) sns.kdeplot(df.x, df.y, ax=ax) sns.rugplot(df.x, color="g", ax=ax) sns.rugplot(df.y, vertical=True, ax=ax); iris = sns.load_dataset("iris") sns.pairplot(iris);
##################################### # create a distribution object hist = scipy.stats.rv_histogram((counts, binlims)) ##################################### # plot it # make an x axis for plotting padding = 3 n_points = 10000 x = np.linspace(binlims[0] - padding, binlims[-1] + padding, n_points) plt.plot(x, hist.pdf(x)) ##################################### # plot the cumulative histogram plt.plot(x, hist.cdf(x)) ########################################################## # sample from the histogram (aka draw random variates) n_sample = 30 sample = hist.rvs(size=n_sample) ############################################################################# # let's plot it (use seaborn to plot the data points as small vertical bars) plt.hist(sample, bins='auto', alpha=.5) sns.rugplot(sample, color='k', linewidth=3)
def ale_plot( model, train_set, features, bins=10, monte_carlo=False, predictor=None, features_classes=None, monte_carlo_rep=50, monte_carlo_ratio=0.1, rugplot_lim=1000, ): """Plots ALE function of specified features based on training set. Parameters ---------- model : object An object that implements a 'predict' method. If None, a `predictor` function must be supplied which will be used instead of `model.predict`. train_set : pandas.core.frame.DataFrame Training set on which model was trained. features : [2-iterable of] column label One or two features for which to plot the ALE plot. bins : [2-iterable of] int, optional Number of bins used to split feature's space. 2 integers can only be given when 2 features are supplied in order to compute a different number of quantiles for each feature. monte_carlo : boolean, optional Compute and plot Monte-Carlo samples. predictor : callable Custom prediction function. See `model`. features_classes : iterable of str, optional If features is first-order and a categorical variable, plot ALE according to discrete aspect of data. monte_carlo_rep : int Number of Monte-Carlo replicas. monte_carlo_ratio : float Proportion of randomly selected samples from dataset for each Monte-Carlo replica. rugplot_lim : int, optional If `train_set` has more rows than `rugplot_lim`, no rug plot will be plotted. Set to None to always plot rug plots. Set to 0 to always plot rug plots. Raises ------ ValueError If both `model` and `predictor` are None. ValueError If `len(features)` not in {1, 2}. ValueError If multiple bins were given for 1 feature. NotImplementedError If `features_classes` is not None. """ if model is None and predictor is None: raise ValueError("If 'model' is None, 'predictor' must be supplied.") if features_classes is not None: raise NotImplementedError("'features_classes' is not implemented yet.") fig, ax = plt.subplots() features = _parse_features(features) if len(features) == 1: if not isinstance(bins, (int, np.integer)): raise ValueError( "1 feature was given, but 'bins' was not an integer.") if features_classes is None: # Continuous data. if monte_carlo: mc_replicates = np.asarray([[ np.random.choice(range(train_set.shape[0])) for _ in range(int(monte_carlo_ratio * train_set.shape[0])) ] for _ in range(monte_carlo_rep)]) for k, rep in enumerate(mc_replicates): train_set_rep = train_set.iloc[rep, :] # Make this recursive? if features_classes is None: # The same quantiles cannot be reused here as this could cause # some bins to be empty or contain disproportionate numbers of # samples. mc_ale, mc_quantiles = _first_order_ale_quant( model.predict if predictor is None else predictor, train_set_rep, features[0], bins, ) _first_order_quant_plot(ax, mc_quantiles, mc_ale, color="#1f77b4", alpha=0.06) ale, quantiles = _first_order_ale_quant( model.predict if predictor is None else predictor, train_set, features[0], bins, ) _ax_labels(ax, "Feature '{}'".format(features[0]), "") _ax_title( ax, "First-order ALE of feature '{0}'".format(features[0]), "Bins : {0} - Monte-Carlo : {1}".format( len(quantiles) - 1, mc_replicates.shape[0] if monte_carlo else "False", ), ) ax.grid(True, linestyle="-", alpha=0.4) if rugplot_lim is None or train_set.shape[0] <= rugplot_lim: sns.rugplot(train_set[features[0]], ax=ax, alpha=0.2) _first_order_quant_plot(ax, quantiles, ale, color="black") _ax_quantiles(ax, quantiles) elif len(features) == 2: if features_classes is None: # Continuous data. ale, quantiles_list = _second_order_ale_quant( model.predict if predictor is None else predictor, train_set, features, bins, ) _second_order_quant_plot(fig, ax, quantiles_list, ale) _ax_labels( ax, "Feature '{}'".format(features[0]), "Feature '{}'".format(features[1]), ) for twin, quantiles in zip(("x", "y"), quantiles_list): _ax_quantiles(ax, quantiles, twin=twin) _ax_title( ax, "Second-order ALE of features '{0}' and '{1}'".format( features[0], features[1]), "Bins : {0}x{1}".format( *[len(quant) - 1 for quant in quantiles_list]), ) else: raise ValueError( "'{n_feat}' 'features' were given, but only up to 2 are supported." .format(n_feat=len(features))) plt.show() return ax
__author__ = 'Noventa' import numpy as np import pandas as pd from numpy.random import randn from scipy import stats import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns dataset = randn(25) sns.rugplot(dataset) plt.ylim(0, 1) #plt.show() plt.hist(dataset, alpha=0.3) sns.rugplot(dataset) #plt.show() sns.rugplot(dataset) x_min = dataset.min() - 2 x_max = dataset.max() + 2 x_axis = np.linspace(x_min, x_max, 100) bandwidth = ((4*dataset.std()**5)/(3*len(dataset))) ** 0.2 kernel_list = [] for data_pt in dataset: kernel = stats.norm(data_pt, bandwidth).pdf(x_axis)
# Get the data, and fit the normal distribution weight = np.array([2784, 2632, 2771, 2495, 2435, 2513, 2633, 2737, 2687, 2647], dtype=np.float32) (md, sd) = stats.norm.fit(weight) nd = stats.norm(md, sd) # Plot the data sns.set_context(context='poster') x = np.linspace(2300, 3000) y = nd.pdf(x) checkVal = 2460 print('p = {0:5.3f}'.format(nd.cdf(checkVal))) x1 = np.linspace(2300, checkVal) y1 = nd.pdf(x1) sns.rugplot(weight, height=0.0005) plt.hold(True) plt.plot(x,y) plt.fill_between(x1, y1, alpha=0.3) outDir = r'C:\Users\p20529\Documents\Teaching\Master_FH\Stats\Images' outFile = os.path.join(outDir, 'pdf_checkMean.png') plt.savefig(outFile, dpi=200) print('Figure saved to {0}'.format(outFile)) plt.show()
def plot(ctx): # Sets up the figure and subplots if ctx.obj['trj'] == 'empty': im_gen = plt.imread( '/home/josh/PycharmProjects/saarama_project/saarama/bg/rama_bg.png' ) im_gly = plt.imread( '/home/josh/PycharmProjects/saarama_project/saarama/bg/gly_bg.png') im_pro = plt.imread( '/home/josh/PycharmProjects/saarama_project/saarama/bg/pro_bg.png') im_pre = plt.imread( '/home/josh/PycharmProjects/saarama_project/saarama/bg/pre_bg.png') fig = plt.figure(constrained_layout=True) gs = gridspec.GridSpec(2, 2, figure=fig) ax1 = fig.add_subplot(gs[0, 0]) ax1.imshow(im_gen, extent=(-180, 180, -180, 180)) ax1.scatter(ctx.obj['phi_gen'], ctx.obj['psi_gen'], s=15, color='dimgray') ax1.plot([0, 0], [-180, 180], c='k', alpha=0.3) ax1.plot([-180, 180], [0, 0], c='k', alpha=0.3) ax1.set_xlim(-180, 180) ax1.set_ylim(-180, 180) ax1.set_xlabel('φ') ax1.set_ylabel('ψ') ax1.set_title('Ramachandran plot') ax2 = fig.add_subplot(gs[0, 1]) ax2.scatter(ctx.obj['phi_gly'], ctx.obj['psi_gly'], s=15, color='dimgray') ax2.imshow(im_gly, extent=(-180, 180, -180, 180)) ax2.plot([0, 0], [-180, 180], c='k', alpha=0.3) ax2.plot([-180, 180], [0, 0], c='k', alpha=0.3) ax2.set_xlim(-180, 180) ax2.set_ylim(-180, 180) ax2.set_xlabel('φ') ax2.set_ylabel('ψ') ax2.set_title('Ramachandran plot of Glycine') ax3 = fig.add_subplot(gs[1, 0]) ax3.scatter(ctx.obj['phi_pro'], ctx.obj['psi_pro'], s=15, color='dimgray') ax3.imshow(im_pro, extent=(-180, 180, -180, 180)) ax3.plot([0, 0], [-180, 180], c='k', alpha=0.3) ax3.plot([-180, 180], [0, 0], c='k', alpha=0.3) ax3.set_xlim(-180, 180) ax3.set_ylim(-180, 180) ax3.set_xlabel('φ') ax3.set_ylabel('ψ') ax3.set_title('Ramachandran plot of Proline') ax4 = fig.add_subplot(gs[1, 1]) ax4.scatter(ctx.obj['phi_pre'], ctx.obj['psi_pre'], s=15, color='dimgray') ax4.imshow(im_pre, extent=(-180, 180, -180, 180)) ax4.plot([0, 0], [-180, 180], c='k', alpha=0.3) ax4.plot([-180, 180], [0, 0], c='k', alpha=0.3) ax4.set_xlim(-180, 180) ax4.set_ylim(-180, 180) ax4.set_xlabel('φ') ax4.set_ylabel('ψ') ax4.set_title('Ramachandran plot of Pre-proline') plt.show() elif ctx.obj['trj'] == 'filled': plt.style.use('seaborn-darkgrid') fig = plt.figure(constrained_layout=True) gs = gridspec.GridSpec(3, 2, figure=fig) fig.suptitle('Number of angles: ' + str(len(ctx.obj['psi'])), fontsize=12) # Scatter plot ax1 = fig.add_subplot(gs[0, 0]) ax1.scatter(ctx.obj['phi'], ctx.obj['psi'], s=15, color='dimgray') ax1.plot([0, 0], [-180, 180], c='k', alpha=0.3) ax1.plot([-180, 180], [0, 0], c='k', alpha=0.3) ax1.set_xlim(-180, 180) ax1.set_ylim(-180, 180) ax1.set_xlabel('φ') ax1.set_ylabel('ψ') ax1.set_title('Ramachandran plot of a single amino acid') # Contour plot ax2 = fig.add_subplot(gs[0, 1]) sns.kdeplot(ctx.obj['phi'], ctx.obj['psi'], ax=ax2, cmap='Reds', shade=True, shade_lowest=False) sns.rugplot(ctx.obj['phi'], color='k', ax=ax2) sns.rugplot(ctx.obj['psi'], color='k', vertical=True, ax=ax2) ax2.plot([0, 0], [-180, 180], c='k', alpha=0.3) ax2.plot([-180, 180], [0, 0], c='k', alpha=0.3) ax2.set_xlabel('φ') ax2.set_ylabel('ψ') ax2.set_title('Contour plot of a single amino acid') ''' N_bins = 120 counts, xedges, yedges, im = ax2.hist2d(ctx.obj['phi'], ctx.obj['psi'], bins=N_bins, density=True, cmap='plasma') fig.colorbar(im, ax=ax2) ''' # Calculates angle differences psi_trans = angle_trans(ctx.obj['psi']) phi_trans = angle_trans(ctx.obj['phi']) psi_diff = angle_diff(psi_trans) phi_diff = angle_diff(phi_trans) combined_list = psi_diff + phi_diff # Angle difference over time for Phi ax3 = fig.add_subplot(gs[1, 0]) # average_phi = sum(ctx.obj['phi'])/len(ctx.obj['phi']) # ax3.plot([0, len(ctx.obj['phi'])], [average_phi, average_phi], color='k', alpha=0.75, label='φ-avg: '+str(round(average_phi, 2))) # ax3.plot(range(0, len(ctx.obj['phi'])), ctx.obj['phi'], color='indianred') average_phi = sum(phi_diff) / len(phi_diff) stdev_phi = statistics.stdev(phi_diff) ax3.plot([0, len(phi_diff)], [average_phi, average_phi], color='k', alpha=0.75, label='φ-avg: ' + str(round(average_phi, 2)) + ', stdev: ' + str(round(stdev_phi, 2))) ax3.plot(phi_diff, color='indianred', alpha=0.5) ax3.scatter(range(len(phi_diff)), phi_diff, marker='x', s=5, color='k') ax3.set_ylabel('Angel difference') ax3.set_xlabel('Timeframe') ax3.set_title('φ-difference over time') ax3.set_ylim(min(combined_list) - 5, max(combined_list) + 5) ax3.set_xlim(0, len(ctx.obj['phi'])) plt.legend(loc='upper right', borderaxespad=0.) # Angle difference over time for Psi ax4 = fig.add_subplot(gs[1, 1]) # average_psi = sum(ctx.obj['psi'])/len(ctx.obj['psi']) # ax4.plot([0, len(ctx.obj['psi'])], [average_psi, average_psi], color='k', alpha=0.75, label='ψ-avg: '+str(round(average_psi, 2))) # ax4.plot(range(0, len(ctx.obj['psi'])), ctx.obj['psi'], color='darkkhaki') average_psi = sum(psi_diff) / len(psi_diff) stdev_psi = statistics.stdev(psi_diff) ax4.plot([0, len(psi_diff)], [average_psi, average_psi], color='k', alpha=0.75, label='ψ-avg: ' + str(round(average_psi, 2)) + ', stdev: ' + str(round(stdev_psi, 2))) ax4.plot(psi_diff, color='darkkhaki', alpha=0.5) ax4.scatter(range(len(psi_diff)), psi_diff, marker='x', s=5, color='k') ax4.set_ylabel('Angel difference') ax4.set_xlabel('Timeframe') ax4.set_title('ψ-difference over time') ax4.set_ylim(min(combined_list) - 5, max(combined_list) + 5) ax4.set_xlim(0, len(ctx.obj['phi'])) plt.legend(loc='upper right', borderaxespad=0.) # 3D Density plot # bins_list_phi = list(range(int(min(psi_trans)), int(max(psi_trans)), 1)) # bins_list_psi = list(range(int(min(phi_trans)), int(max(phi_trans)), 1)) bins_list_phi = list( range(int(min(ctx.obj['phi'])), int(max(ctx.obj['phi'])), 1)) bins_list_psi = list( range(int(min(ctx.obj['psi'])), int(max(ctx.obj['psi'])), 1)) ax5 = fig.add_subplot(gs[2, 0], projection='3d') x = np.asarray(ctx.obj['phi']) y = np.asarray(ctx.obj['psi']) deltaX = (max(x) - min(x)) / 10 deltaY = (max(y) - min(y)) / 10 xmin = min(x) - deltaX xmax = max(x) + deltaX ymin = min(y) - deltaY ymax = max(y) + deltaY xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j] positions = np.vstack([xx.ravel(), yy.ravel()]) values = np.vstack([x, y]) kernel = st.gaussian_kde(values) f = np.reshape(kernel(positions).T, xx.shape) # ax5.plot_wireframe(xx, yy, f, alpha=0.8) ax5.plot_surface(xx, yy, f, rstride=1, cstride=1, edgecolor='none', cmap='plasma') ax5.set_xlim(-180, 180) ax5.set_ylim(-180, 180) ax5.set_xlabel('φ') ax5.set_ylabel('ψ') ax5.set_zlabel('Density') ax5.set_title('Surface plot of angle distributions') ax5.view_init(20, 280) # Histogram/Density plot for angle distribution ax6 = fig.add_subplot(gs[2, 1]) # sns.distplot(phi_trans, ax=ax5, bins=bins_list_phi, color = 'indianred', label='φ') # sns.distplot(psi_trans, ax=ax5, bins=bins_list_psi, color = 'darkkhaki', label='ψ') # ax5.set_xlim(0,360) # ax5.hist(ctx.obj['phi'], bins=bins_list_phi, alpha=0.75, color = 'indianred', label='φ') # ax5.hist(ctx.obj['psi'], bins=bins_list_psi, alpha=0.75, color = 'darkkhaki', label='ψ') sns.distplot(ctx.obj['phi'], ax=ax6, bins=bins_list_phi, color='indianred', label='φ') sns.distplot(ctx.obj['psi'], ax=ax6, bins=bins_list_psi, color='darkkhaki', label='ψ') ax6.set_xlim(-180, 180) ax6.set_title('Histogram/Density plot of angle distribution') plt.legend() ''' #Polar plots that are not included yet ax6 = fig.add_subplot(gs[3, 0], projection='polar') bin_size = 20 a, b = np.histogram(phi_trans, bins=np.arange(0, 360 + bin_size, bin_size)) centers = np.deg2rad(np.ediff1d(b) // 2 + b[:-1]) ax6.bar(centers, a, width=np.deg2rad(bin_size), bottom=0.0, color='.8', edgecolor='k') ax6.set_theta_zero_location("N") ax6.set_theta_direction(-1) ax7 = fig.add_subplot(gs[3, 1], projection='polar') bin_size = 20 a, b = np.histogram(psi_trans, bins=np.arange(0, 360 + bin_size, bin_size)) centers = np.deg2rad(np.ediff1d(b) // 2 + b[:-1]) ax7.bar(centers, a, width=np.deg2rad(bin_size), bottom=0.0, color='.8', edgecolor='k') ax7.set_theta_zero_location("N") ax7.set_theta_direction(-1) ''' plt.show()
plt.subplot(sub) plt.title('DMC-' + i, fontsize=14, position=(0.5, 1.05)) sns.kdeplot( df1[i], df1['DMC'], # demonstrate the probability distribution of two variables cbar=True, # display color bar shade=True, # display shades cmap='Blues', # set the color palatte shade_lowest=False, # not display periphery color/shade n_levels=40 # number of curves, the higher, the smoother ) # the color change indicates the change of density plt.grid(linestyle='--') plt.scatter( df1[i], df1['DMC'], s=5, alpha=0.5, color=C, marker='+') #scatter: green indicates no fire, red indicates fire sns.rugplot(df1[i], color='g', axis='x', alpha=0.5) sns.rugplot(df1['DMC'], color='r', axis='y', alpha=0.5) if sub == 133: plt.axis([-6, 6.5, 0, 300 ]) # move the plots to central area for better observation sub += 1 plt.show() plt.figure(18) # DMC-temp plt.title('DMC-temp', fontsize=14, position=(0.5, 1.05)) pal = 'Blues' sns.kdeplot(df1['temp'], df1['DMC'], cbar=True, shade=True, cmap=pal,
def plot_lean_one(b, s, side, **kwargs): print 'creating {0} LEAN for building {1} ...'.format(side, b) sns.set_style("darkgrid") sns.set_palette("Set2") sns.set_context("talk", font_scale=1) gas_line_color = 'deeppink' gas_mk_color = 'crimson' elec_line_color = 'navy' elec_mk_color = 'deepskyblue' base_gas_color = 'orange' base_elec_color = 'yellow' bx = plt.axes() if side == 'elec' or side == 'gas': t = kwargs[side]['x_range'] t_min = t[0] t_max = t[1] par = kwargs[side]['regression_par'] k = par[0] intercept = par[1] if type(kwargs[side]['breakpoint']) == int: breakpoint = kwargs[side]['breakpoint'] else: break_left = kwargs[side]['breakpoint'][0] break_right = kwargs[side]['breakpoint'][1] breakpoint = break_left base = k * breakpoint + intercept xd = np.linspace(t_min, t_max, 150) yd = kwargs[side]['fun'](xd, *par) - base if side == 'gas': plt.plot(xd, yd, gas_line_color) bx.fill_between(xd, 0, yd, facecolor=gas_line_color, alpha=0.3) rug_x = kwargs[side]['x'] rug_x = [x for x in rug_x if x < breakpoint] sns.rugplot(rug_x, ax=bx, color=gas_line_color) elif side == 'elec': plt.plot(xd, yd, elec_line_color) bx.fill_between(xd, 0, yd, facecolor=elec_line_color, alpha=0.3) rug_x = kwargs[side]['x'] try: break_left rug_x = [x for x in rug_x if x > break_right or x < break_left] except NameError: rug_x = [x for x in rug_x if x > breakpoint] sns.rugplot(rug_x, ax=bx, color=elec_line_color) plt.ylim((0, max(yd) * 1.1)) else: t = kwargs['gas']['x_range'] t_min = t[0] t_max = t[1] par_gas = kwargs['gas']['regression_par'] par_elec = kwargs['elec']['regression_par'] k_gas = par_gas[0] intercept_gas = par_gas[1] breakpoint_gas = kwargs['gas']['breakpoint'] base_gas = k_gas * breakpoint_gas + intercept_gas k_elec = par_elec[0] intercept_elec = par_elec[1] if type(kwargs['elec']['breakpoint']) == int: breakpoint_elec = kwargs['elec']['breakpoint'] else: break_elec_left = kwargs['elec']['breakpoint'][0] break_elec_right = kwargs['elec']['breakpoint'][1] breakpoint_elec = break_elec_left base_elec = k_elec * breakpoint_elec + intercept_elec xd = np.linspace(t_min, t_max, 150) yd_gas = (kwargs['gas']['fun'](xd, *par_gas)) + base_elec yd_elec = (kwargs['elec']['fun'](xd, *par_elec)) + base_gas if side == 'combined': plt.plot(xd, yd_gas, gas_line_color) plt.plot(xd, yd_elec, elec_line_color) bx.fill_between(xd, base_elec + base_gas, yd_elec, facecolor=elec_line_color, alpha=0.3) bx.fill_between(xd, base_elec + base_gas, yd_gas, facecolor=gas_line_color, alpha=0.3) plt.ylim((0, max(max(yd_elec), max(yd_gas)) * 1.1)) elif side == 'base': plt.plot(xd, [base_elec] * len(xd), base_elec_color) plt.plot(xd, [base_elec + base_gas] * len(xd), base_gas_color) bx.fill_between(xd, 0, base_elec, facecolor=base_elec_color, alpha=0.5) bx.fill_between(xd, base_elec, base_elec + base_gas, facecolor=base_gas_color, alpha=0.5) plt.ylim((0, max(max(yd_elec), max(yd_gas)) * 1.1)) rug_x = kwargs['gas']['x'] sns.rugplot(rug_x, ax=bx, color='gray') plt.title('Lean {0} plot, Building {1}, station {2}'.format(title_dict[side], b, s)) plt.xlabel('Monthly Mean Temperature, Deg F') plt.ylabel(ylabel_dict[side]) plt.tight_layout() P.savefig(os.getcwd() + '/plot_FY_weather/lean_piecewise/{0}_{1}_{2}.png'.format(b, s, side), dpi = 150, bbox_inches='tight') # plt.show() plt.close()