def show_binomial(): """Show an example of binomial distributions""" bd1 = stats.binom(20, 0.5) bd2 = stats.binom(20, 0.7) bd3 = stats.binom(40, 0.5) k = np.arange(40) sns.set_context('paper') sns.set_style('ticks') mystyle.set(14) markersize = 8 plt.plot(k, bd1.pmf(k), 'o-b', ms=markersize) plt.hold(True) plt.plot(k, bd2.pmf(k), 'd-r', ms=markersize) plt.plot(k, bd3.pmf(k), 's-g', ms=markersize) plt.title('Binomial distribuition') plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40']) plt.xlabel('X') plt.ylabel('P(X)') sns.despine() mystyle.printout_plain('Binomial_distribution_pmf.png')
def show_poisson(): """Show different views of a Poisson distribution""" fig, ax = plt.subplots(3,1) k = np.arange(25) pd = stats.poisson(10) mystyle.set(12) ax[0].plot(k, pd.pmf(k),'x-') ax[0].set_title('Poisson distribition') ax[0].set_xticklabels([]) ax[0].set_ylabel('PMF (X)') ax[1].plot(k, pd.cdf(k)) ax[1].set_xlabel('X') ax[1].set_ylabel('CDF (X)') y = np.linspace(0,1,100) ax[2].plot(y, pd.ppf(y)) ax[2].set_xlabel('X') ax[2].set_ylabel('PPF (X)') plt.tight_layout()
def show_poisson_views(): """Show different views of a Poisson distribution""" fig, ax = plt.subplots(3,1) k = np.arange(25) pd = stats.poisson(10) mystyle.set(12) ax[0].plot(k, pd.pmf(k),'x-') ax[0].set_title('Poisson distribition') ax[0].set_xticklabels([]) ax[0].set_ylabel('PMF (X)') ax[1].plot(k, pd.cdf(k)) ax[1].set_xlabel('X') ax[1].set_ylabel('CDF (X)') y = np.linspace(0,1,100) ax[2].plot(y, pd.ppf(y)) ax[2].set_xlabel('X') ax[2].set_ylabel('PPF (X)') plt.tight_layout()
def main(): '''Demonstrate central limit theorem.''' # Generate data ndata = 1e5 nbins = 50 data = np.random.random(ndata) # Show them fig, axs = plt.subplots(1,3) mystyle.set(14) myColor = '#CCCCCC' #sns.set_context('paper') #sns.set_style('whitegrid') axs[0].hist(data,bins=nbins, color=myColor) axs[0].set_title('Random data') axs[0].set_xticks([0, 0.5, 1]) axs[0].set_ylabel('Counts') axs[1].hist( np.mean(data.reshape((ndata/2,2)), axis=1), bins=nbins, color=myColor) axs[1].set_xticks([0, 0.5, 1]) axs[1].set_title(' Average over 2') axs[2].hist( np.mean(data.reshape((ndata/10,10)),axis=1), bins=nbins, color=myColor) axs[2].set_xticks([0, 0.5, 1]) axs[2].set_title(' Average over 10') plt.tight_layout() mystyle.printout_plain('CentralLimitTheorem.png')
def main(): # Univariate data ------------------------- # Generate data that are normally distributed x = randn(500) # Set the fonts the way I like them sns.set_context('paper') sns.set_style('white') mystyle.set() # Scatter plot plot(x, '.') mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter') # Histogram hist(x, color='#999999') mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') hist(x, 25, color='#999999') mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plot(stats.cumfreq(x, numbins)[0]) mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='Cumulative Freuqency') # Boxplot # The ox consists of the first, second (middle) and third quartile boxplot(x, sym='*') mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot') boxplot(x, sym='*', vert=False) title('Boxplot, horizontal') xlabel('Values') show() # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc=3, scale=1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls': data, 'Boys': data2}) sns.violinplot(df, color=["#999999", "#DDDDDD"]) mystyle.printout('violinplot.png')
import seaborn as sns import os.path import mystyle # Define the skewed distribution chi2 = stats.chi2(3) # Generate the data x = np.linspace(0,10, 100) y = chi2.pdf(x) data = chi2.rvs(100) # Arrange subplots sns.set_context('paper') sns.set_style('white') mystyle.set(11) fig, axs = plt.subplots(1,2) # Plot distribution axs[0].plot(x,y) axs[0].set_xlabel('X') axs[0].set_ylabel('PDF(X)') axs[0].set_title('chi2(x), k=3') sns.set_style('white') x0, x1 = axs[0].get_xlim() y0, y1 = axs[0].get_ylim() axs[0].set_aspect((x1-x0)/(y1-y0)) #sns.despine()
grandMean = np.mean(groupMean) ax.axhline(grandMean) ax.plot([80, 220], [groupMean[1], groupMean[1]], 'b') ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], 'b') ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=24) ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=24) if __name__ == '__main__': centers = [5, 5.3, 4.7] colors = 'brg' #sns.set_context('paper') #sns.set_style('white') np.random.seed(123) mystyle.set(18) fig = plt.figure() ax = fig.add_subplot(111) std = 0.1 numData = 100 show_fig(0.1, ax, 'Sum-Squares') mystyle.printout_plain('anova_annotated.png')
def show_fig(std, ax, title): '''Create plot of 3 different, normally distributed data groups''' for ii in range(3): data = stats.norm(centers[ii], std).rvs(numData) offset = ii*numData ax.plot( offset+np.arange(numData), data, '.', color=colors[ii], ms=10) ax.xaxis.set_ticks([50,150,250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.set_title(title) sns.despine() if __name__ == '__main__': centers = [5, 5.3, 4.7] colors = 'brg' sns.set_context('paper') sns.set_style('whitegrid') mystyle.set(14) fig, axs = plt.subplots(1, 2) stds = [0.1, 2] numData = 100 show_fig(0.1, axs[0], 'SD=0.1') show_fig(2, axs[1], 'SD=2.0') mystyle.printout_plain('anova_oneway.png')
import seaborn as sns import os.path import mystyle # Define the skewed distribution chi2 = stats.chi2(3) # Generate the data x = np.linspace(0, 10, 100) y = chi2.pdf(x) data = chi2.rvs(100) # Arrange subplots sns.set_context('paper') sns.set_style('white') mystyle.set(11) fig, axs = plt.subplots(1, 2) # Plot distribution axs[0].plot(x, y) axs[0].set_xlabel('X') axs[0].set_ylabel('PDF(X)') axs[0].set_title('chi2(x), k=3') sns.set_style('white') x0, x1 = axs[0].get_xlim() y0, y1 = axs[0].get_ylim() axs[0].set_aspect((x1 - x0) / (y1 - y0)) #sns.despine() # Plot probplot
# Don't worry that in Python it is called "weibull_min": the "weibull_max" is # simply mirrored about the origin. showDistribution(arange(0,5,0.02), stats.weibull_min(1), stats.weibull_min(2), 'Weibull Distribution', 'X', 'P(X)',['k=1', 'k=2'], xmin=0, xmax=4) # Uniform distribution showDistribution(x, stats.uniform,'' , 'Uniform Distribution', 'X', 'P(X)','') # Logistic distribution showDistribution(x, stats.norm, stats.logistic, 'Logistic Distribution', 'X', 'P(X)',['Normal', 'Logistic']) # Lognormal distribution x = logspace(-9,1,1001)+1e-9 showDistribution(x, stats.lognorm(2), '', 'Lognormal Distribution', 'X', 'lognorm(X)','', xmin=-0.1) # The log-lin plot has to be done by hand: plot(log(x), stats.lognorm.pdf(x,2)) xlim(-10, 4) title('Lognormal Distribution') xlabel('log(X)') ylabel('lognorm(X)') show() #---------------------------------------------------------------------- if __name__ == '__main__': mystyle.set() show_continuous()
facecolor='black')) ax.annotate('', xy=(90, groupMean[1]), xytext=(90, groupMean[1] + 0.2), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.text(210, (grandMean + groupMean[1]) / 2., '$SS_{Treatment}$', fontsize=24) ax.text(90, groupMean[1] + 0.1, '$SS_{Error}$', ha='right', fontsize=24) if __name__ == '__main__': centers = [5, 5.3, 4.7] colors = 'brg' #sns.set_context('paper') #sns.set_style('white') np.random.seed(123) mystyle.set(18) fig = plt.figure() ax = fig.add_subplot(111) std = 0.1 numData = 100 show_fig(0.1, ax, 'Sum-Squares') mystyle.printout_plain('anova_annotated.png')
import seaborn as sns import mystyle # Calculate the values nd = stats.norm() x = np.linspace(-3,3,100) yp = nd.pdf(x) y = nd.cdf(x) x1 = np.linspace(-3, 1) y1 = nd.pdf(x1) # Make the plot sns.set_context('paper') sns.set_style('white') mystyle.set(12) figs, axs = plt.subplots(1,2) axs[0].plot(x,yp, 'k') axs[0].fill_between(x1, y1, facecolor='#CCCCCC') axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic') axs[0].set_xlabel('x') axs[0].set_ylabel('PDF(x)') sns.despine() axs[1].plot(x, y, '#999999', lw=3) axs[1].set_xlabel('x') axs[1].set_ylabel('CDF(x)') plt.vlines(0, 0, 1, linestyles='--') sns.despine()
# additional packages import mystyle # Calculate the values nd = stats.norm() x = np.linspace(-3,3,100) yp = nd.pdf(x) y = nd.cdf(x) x1 = np.linspace(-3, 1) y1 = nd.pdf(x1) # Make the plot sns.set_context('paper') sns.set_style('white') mystyle.set(12) figs, axs = plt.subplots(1,2) axs[0].plot(x,yp, 'k') axs[0].fill_between(x1, y1, facecolor='#CCCCCC') axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic') axs[0].set_xlabel('x') axs[0].set_ylabel('PDF(x)') sns.despine() axs[1].plot(x, y, '#999999', lw=3) axs[1].set_xlabel('x') axs[1].set_ylabel('CDF(x)') plt.vlines(0, 0, 1, linestyles='--') sns.despine()
'') # Logistic distribution showDistribution(x, stats.norm, stats.logistic, 'Logistic Distribution', 'X', 'P(X)', ['Normal', 'Logistic']) # Lognormal distribution x = logspace(-9, 1, 1001) + 1e-9 showDistribution(x, stats.lognorm(2), '', 'Lognormal Distribution', 'X', 'lognorm(X)', '', xmin=-0.1) # The log-lin plot has to be done by hand: plot(log(x), stats.lognorm.pdf(x, 2)) xlim(-10, 4) title('Lognormal Distribution') xlabel('log(X)') ylabel('lognorm(X)') show() #---------------------------------------------------------------------- if __name__ == '__main__': mystyle.set() show_continuous()
offset = ii * numData ax.plot(offset + np.arange(numData), data, '.', color=colors[ii], ms=10) ax.xaxis.set_ticks([50, 150, 250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.set_title(title) sns.despine() if __name__ == '__main__': centers = [5, 5.3, 4.7] colors = 'brg' sns.set_context('paper') sns.set_style('whitegrid') mystyle.set(14) fig, axs = plt.subplots(1, 2) stds = [0.1, 2] numData = 100 show_fig(0.1, axs[0], 'SD=0.1') show_fig(2, axs[1], 'SD=2.0') mystyle.printout_plain('anova_oneway.png')