def scatterplot(): '''Fancy scatterplots, using the package "seaborn" ''' import seaborn as sns df = sns.load_dataset("iris") sns.pairplot(df, hue="species", size=2.5) C2_8_mystyle.printout_plain('multiScatterplot.png')
def showResults(challenger_data, model): ''' Show the original data, and the resulting logit-fit''' # First plot the original data plt.figure() sns.set_context('poster') sns.set_style('whitegrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(challenger_data[:, 0], challenger_data[:, 1], s=75, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside temperature (Fahrenheit)") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.xlim(50, 85) # Plot the fit x = np.arange(50, 85) alpha = model.params[0] beta = model.params[1] y = logistic(x, beta, alpha) plt.hold(True) plt.plot(x,y,'r') outFile = 'ChallengerPlain.png' C2_8_mystyle.printout_plain(outFile, outDir='..\Images') plt.show()
def generatedata(): ''' Generate and show the data ''' x = np.linspace(-5,5,101) (X,Y) = np.meshgrid(x,x) # To get reproducable values, I provide a seed value np.random.seed(987654321) Z = -5 + 3*X-0.5*Y+np.random.randn(np.shape(X)[0], np.shape(X)[1]) # Plot the figure fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(X,Y,Z, cmap=cm.afmhot, rstride=2, cstride=2, linewidth=0, antialiased=False) ax.view_init(20,-120) ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') fig.colorbar(surf, shrink=0.6) outFile = '3dSurface.png' C2_8_mystyle.printout_plain(outFile) return (X.flatten(),Y.flatten(),Z.flatten())
def generateData(): ''' Generate and show the data: a plane in 3D ''' x = np.linspace(-5,5,101) (X,Y) = np.meshgrid(x,x) # To get reproducable values, I provide a seed value np.random.seed(987654321) Z = -5 + 3*X-0.5*Y+np.random.randn(np.shape(X)[0], np.shape(X)[1]) # Set the color myCmap = cm.GnBu_r # If you want a colormap from seaborn use: #from matplotlib.colors import ListedColormap #myCmap = ListedColormap(sns.color_palette("Blues", 20)) # Plot the figure fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(X,Y,Z, cmap=myCmap, rstride=2, cstride=2, linewidth=0, antialiased=False) ax.view_init(20,-120) ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') fig.colorbar(surf, shrink=0.6) outFile = '3dSurface.png' C2_8_mystyle.printout_plain(outFile) return (X.flatten(),Y.flatten(),Z.flatten())
def show_poisson_views(): """Show different views of a Poisson distribution""" fig, ax = plt.subplots(3,1) k = np.arange(25) pd = stats.poisson(10) C2_8_mystyle.set(12) ax[0].plot(k, pd.pmf(k),'x-') ax[0].set_title('Poisson distribition') ax[0].set_xticklabels([]) ax[0].set_ylabel('PMF (X)') ax[1].plot(k, pd.cdf(k)) ax[1].set_xlabel('X') ax[1].set_ylabel('CDF (X)') y = np.linspace(0,1,100) ax[2].plot(y, pd.ppf(y)) ax[2].set_xlabel('X') ax[2].set_ylabel('PPF (X)') plt.tight_layout() plt.show()
def main(): # Generate dummy data x = np.array([-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]) # Define the two plots fig, axs = plt.subplots(1,2) # Generate the left plot plot_histogram(axs[0], x) # Generate the right plot explain_KDE(axs[1], x) # Save and show C2_8_mystyle.printout_plain('KDEexplained.png') plt.show()
def generate_probplot(): '''Generate a prob-plot for a chi2-distribution of sample data''' # Define the skewed distribution chi2 = stats.chi2(3) # Generate the data x = np.linspace(0,10, 100) y = chi2.pdf(x) np.random.seed(12345) numData = 100 data = chi2.rvs(numData) # Arrange subplots sns.set_context('paper') sns.set_style('white') C2_8_mystyle.set(11) fig, axs = plt.subplots(1,2) # Plot distribution axs[0].plot(x,y) axs[0].set_xlabel('X') axs[0].set_ylabel('PDF(X)') axs[0].set_title('chi2(x), k=3') sns.set_style('white') x0, x1 = axs[0].get_xlim() y0, y1 = axs[0].get_ylim() axs[0].set_aspect((x1-x0)/(y1-y0)) #sns.despine() # Plot probplot plt.axes(axs[1]) stats.probplot(data, plot=plt) x0, x1 = axs[1].get_xlim() y0, y1 = axs[1].get_ylim() axs[1].axhline(0, lw=0.5, ls='--') axs[1].axvline(0, lw=0.5, ls='--') axs[1].set_aspect((x1-x0)/(y1-y0)) #sns.despine() C2_8_mystyle.printout_plain('chi2pp.png') return(data) '''
def KS_principle(inData): '''Show the principle of the Kolmogorov-Smirnov test.''' # CDF of normally distributed data nd = stats.norm() nd_x = np.linspace(-4, 4, 101) nd_y = nd.cdf(nd_x) # Empirical CDF of the sample data, which range for approximately 0 to 10 numPts = 50 lowerLim = 0 upperLim = 10 ecdf_x = np.linspace(lowerLim, upperLim, numPts) ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0]/len(inData) #Add zero-point by hand ecdf_x = np.hstack((0., ecdf_x)) ecdf_y = np.hstack((0., ecdf_y)) # Plot the data sns.set_style('ticks') sns.set_context('poster') C2_8_mystyle.set(36) plt.plot(nd_x, nd_y, 'k--') plt.hold(True) plt.plot(ecdf_x, ecdf_y, color='k') plt.xlabel('X') plt.ylabel('Cumulative Probability') # For the arrow, find the start ecdf_startIndex = np.min(np.where(ecdf_x >= 2)) arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]]) nd_startIndex = np.min(np.where(nd_x >= 2)) arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]]) arrowDelta = arrowEnd - arrowStart plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.02, head_width=0.2, color='k') plt.arrow(arrowStart[0], arrowStart[1]+arrowDelta[1], 0, -arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.02, head_width=0.2, color='k') outFile = 'KS_Example.png' C2_8_mystyle.printout_plain(outFile)
def showChi2(): '''Utility function to show Chi2 distributions''' t = frange(0, 8, 0.05) Chi2Vals = [1,2,3,5] for chi2 in Chi2Vals: plt.plot(t, stats.chi2.pdf(t, chi2), label='k={0}'.format(chi2)) plt.legend() plt.xlim(0,8) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') outFile = 'dist_chi2.png' C2_8_mystyle.printout_plain(outFile)
def showExp(): '''Utility function to show exponential distributions''' t = frange(0, 3, 0.01) lambdas = [0.5, 1, 1.5] for par in lambdas: plt.plot(t, stats.expon.pdf(t, 0, par), label='$\lambda={0:3.1f}$'.format(par)) plt.legend() plt.xlim(0,3) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') plt.legend() outFile = 'dist_exp.png' C2_8_mystyle.printout_plain(outFile)
def show_poisson(): """Show an example of Poisson distributions""" # Arbitrarily select 3 lambda values lambdas = [1,4,10] k = np.arange(20) # generate x-values markersize = 8 for par in lambdas: plt.plot(k, stats.poisson.pmf(k, par), 'o--', label='$\lambda={0}$'.format(par)) # Format the plot plt.legend() plt.title('Poisson distribuition') plt.xlabel('X') plt.ylabel('P(X)') # Show and save the plot C2_8_mystyle.printout_plain('Poisson_distribution_pmf.png')
def showF(): '''Utility function to show F distributions''' t = frange(0, 3, 0.01) d1s = [1,2,5,100] d2s = [1,1,2,100] for (d1,d2) in zip(d1s,d2s): plt.plot(t, stats.f.pdf(t, d1, d2), label='F({0}/{1})'.format(d1,d2)) plt.legend() plt.xlim(0,3) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') plt.legend() outFile = 'dist_f.png' C2_8_mystyle.printout_plain(outFile)
def show3D(): '''Generation of 3D plots''' # imports specific to the plots in this example from matplotlib import cm # colormaps # This module is required for 3D plots! from mpl_toolkits.mplot3d import Axes3D # Twice as wide as it is tall. fig = plt.figure(figsize=plt.figaspect(0.5)) #---- First subplot # Generate the data X = np.arange(-5, 5, 0.1) Y = np.arange(-5, 5, 0.1) X, Y = np.meshgrid(X, Y) R = np.sqrt(X**2 + Y**2) Z = np.sin(R) # Note the definition of "projection", required for 3D plots #plt.style.use('ggplot') ax = fig.add_subplot(1, 2, 1, projection='3d') surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.GnBu, linewidth=0, antialiased=False) #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.viridis_r, #linewidth=0, antialiased=False) ax.set_zlim3d(-1.01, 1.01) fig.colorbar(surf, shrink=0.5, aspect=10) #---- Second subplot # Get some 3d test-data from mpl_toolkits.mplot3d.axes3d import get_test_data ax = fig.add_subplot(1, 2, 2, projection='3d') X, Y, Z = get_test_data(0.05) ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10) C2_8_mystyle.printout_plain('3dGraph.png')
def shifted_normal(): '''PDF and scatter plot''' # Plot 3 PDFs (Probability density functions) for normal distributions ---------- # Select 3 mean values, and 3 SDs myMean = [0,0,0,-2] mySD = [0.2,1,5,0.5] t = frange(-5,5,0.02) # Plot the 3 PDFs, using the color-palette "hls" with sns.color_palette('hls', 4): for mu,sigma in zip(myMean, np.sqrt(mySD)): y = stats.norm.pdf(t, mu, sigma) plt.plot(t,y, label='$\mu={0}, \; \t\sigma={1:3.1f}$'.format(mu,sigma)) # Format the plot plt.legend() plt.xlim([-5,5]) plt.title('Normal Distributions') # Show the plot, and save the out-file outFile = 'Normal_Distribution_PDF.png' C2_8_mystyle.printout_plain(outFile) # Generate random numbers with a normal distribution ------------------------ myMean = 0 mySD = 3 numData = 500 data = stats.norm.rvs(myMean, mySD, size = numData) # Plot the data plt.scatter(np.arange(len(data)), data) # Format the plot plt.title('Normally distributed data') plt.xlim([0,500]) plt.ylim([-10,10]) plt.show() plt.close()
def doTukey(data, multiComp): '''Do a pairwise comparison, and show the confidence intervals''' print((multiComp.tukeyhsd().summary())) # Calculate the p-values: res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment']) df = pd.DataFrame(data) numData = len(df) numTreatments = len(df.Treatment.unique()) dof = numData - numTreatments # Show the group names print((multiComp.groupsunique)) # Generate a print ------------------- # Get the data xvals = np.arange(3) res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment']) errors = np.ravel(np.diff(res2.confint)/2) # Plot them plt.plot(xvals, res2.meandiffs, 'o') plt.errorbar(xvals, res2.meandiffs, yerr=errors, ls='o') # Put on labels pair_labels = multiComp.groupsunique[np.column_stack(res2._multicomp.pairindices)] plt.xticks(xvals, pair_labels) # Format the plot xlim = -0.5, 2.5 plt.hlines(0, *xlim) plt.xlim(*xlim) plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' + '\n Pairwise Mean Differences') # Save to outfile, and show the data outFile = 'multComp.png' C2_8_mystyle.printout_plain(outFile)
def many_normals(): '''Show the histograms of 25 samples distributions, and compare the mean values ''' # Set the parameters numRows = 5 numData = 100 myMean = 0 mySD = 1 # Plot the histograms of the sample distributions, and format the plots plt.figure() for ii in range(numRows): for jj in range(numRows): data = stats.norm.rvs(myMean, mySD, size=numData) plt.subplot(numRows,numRows,numRows*ii+jj+1) plt.hist(data) plt.gca().set_xlim([-3, 3]) plt.gca().set_xticks(()) plt.gca().set_yticks(()) plt.gca().set_xticklabels(()) plt.gca().set_yticklabels(()) plt.tight_layout() # Show the data, and save the out-file outFile = 'Normal_MultHist.png' C2_8_mystyle.printout_plain(outFile) # Check out the mean of 1000 normal sample distributions numTrials = 1000; numData = 100 # Pre-allocate the memory for the output variable myMeans = np.ones(numTrials)*np.nan for ii in range(numTrials): data = stats.norm.rvs(myMean, mySD, size=numData) myMeans[ii] = np.mean(data) print(('The standard error of the mean, with {0} samples, is {1}'.format(numData, np.std(myMeans))))
def showT(): '''Utility function to show T distributions''' t = frange(-5, 5, 0.05) TVals = [1,5] normal = stats.norm.pdf(t) t1 = stats.t.pdf(t,1) t5 = stats.t.pdf(t,5) plt.plot(t,normal, '--', label='normal') plt.plot(t, t1, label='df=1') plt.plot(t, t5, label='df=5') plt.legend() plt.xlim(-5,5) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') outFile = 'dist_t.png' C2_8_mystyle.printout_plain(outFile)
def main(): # generate the data x = np.arange(10) np.random.seed(10) y = 3*x+2+20*np.random.rand(len(x)) # determine the line-fit k,d = np.polyfit(x,y,1) yfit = k*x+d # plot the data plt.scatter(x,y) plt.hold(True) plt.plot(x, yfit, '--',lw=2) for ii in range(len(x)): plt.plot([x[ii], x[ii]], [yfit[ii], y[ii]], 'k') plt.xlim((-0.1, 9.1)) plt.xlabel('X') plt.ylabel('Y') C2_8_mystyle.printout_plain('residuals.png')
def smSolution(M1, M2, M3): '''Solution with the tools from statsmodels''' import statsmodels.api as sm import C2_8_mystyle Res1 = sm.OLS(y, M1).fit() Res2 = sm.OLS(y, M2).fit() Res3 = sm.OLS(y, M3).fit() print(Res1.summary2()) print(Res2.summary2()) print(Res3.summary2()) # Plot the data plt.plot(x,y, '.', label='Data') plt.plot(x, Res1.fittedvalues, 'r--', label='Linear Fit') plt.plot(x, Res2.fittedvalues, 'g', label='Quadratic Fit') plt.plot(x, Res3.fittedvalues, 'y', label='Cubic Fit') plt.legend(loc='upper left', shadow=True) C2_8_mystyle.printout('linearModel.png', xlabel='x', ylabel='y')
def show_binomial(): """Show an example of binomial distributions""" # Arbitrarily select 3 total numbers, and 3 probabilities ns = [20,20,40] ps = [0.5, 0.7, 0.5] # For each (p,n)-pair, plot the corresponding binomial PMFs for (p,n) in zip(ps, ns): bd = stats.binom(n,p) # generate the "frozen function" x = np.arange(n+1) # generate the x-values plt.plot(x, bd.pmf(x), 'o--', label='p={0:3.1f}, n={1}'.format(p,n)) # Format the plot plt.legend() plt.title('Binomial distribuition') plt.xlabel('X') plt.ylabel('P(X)') plt.annotate('Upper Limit', xy=(20,0), xytext=(27,0.04), arrowprops=dict(shrink=0.05)) # Show and save the plot C2_8_mystyle.printout_plain('Binomial_distribution_pmf.png')
def main(): # Calculate the PDF-curves x = np.linspace(-10, 15, 201) nd1 = stats.norm(1,2) nd2 = stats.norm(6,2) y1 = nd1.pdf(x) y2 = nd2.pdf(x) # Axes locations ROC = {'left': 0.35, 'width': 0.36, 'bottom': 0.1, 'height': 0.47} PDF = {'left': 0.1, 'width': 0.8, 'bottom': 0.65, 'height': 0.3} rect_ROC = [ROC['left'], ROC['bottom'], ROC['width'], ROC['height']] rect_PDF = [PDF['left'], PDF['bottom'], PDF['width'], PDF['height']] fig = plt.figure() ax1 = plt.axes(rect_PDF) ax2 = plt.axes(rect_ROC) # Plot and label the PDF-curves ax1.plot(x,y1) ax1.hold(True) ax1.fill_between(x,0,y1, where=x<3, facecolor='#CCCCCC', alpha=0.5) ax1.annotate('Sensitivity', xy=(x[75], y1[65]), xytext=(x[40], y1[75]*1.2), fontsize=14, horizontalalignment='center', arrowprops=dict(facecolor='#CCCCCC')) ax1.plot(x,y2,'#888888') ax1.fill_between(x,0,y2, where=x<3, facecolor='#888888', alpha=0.5) ax1.annotate('1-Specificity', xy=(2.5, 0.03), xytext=(6,0.05), fontsize=14, horizontalalignment='center', arrowprops=dict(facecolor='#888888')) ax1.set_ylabel('PDF') # Plot the ROC-curve ax2.plot(nd2.cdf(x), nd1.cdf(x), 'k') ax2.hold(True) ax2.plot(np.array([0,1]), np.array([0,1]), 'k--') # Format the ROC-curve ax2.set_xlim([0, 1]) ax2.set_ylim([0, 1]) ax2.axis('equal') ax2.set_title('ROC-Curve') ax2.set_xlabel('1-specificity') ax2.set_ylabel('sensitivity') arrow_bidir(ax2, (0.5,0.5), (0.095, 0.885)) # Show the plot, and create a figure C2_8_mystyle.printout_plain('ROC.png')
ax.xaxis.set_ticks([50,150,250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.yaxis.set_ticks([]) ax.set_title(title) grandMean = np.mean(groupMean) ax.axhline(grandMean, color='#999999') ax.plot([80, 220], [groupMean[1], groupMean[1]], '#999999') ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], '#999999') ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=36) ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=36) if __name__ == '__main__': centers = [5, 5.3, 4.7] np.random.seed(123) C2_8_mystyle.set(30) fig = plt.figure() ax = fig.add_subplot(111) std = 0.1 numData = 100 show_fig(0.1, ax, 'Sum-Squares') # Save and show C2_8_mystyle.printout_plain('anova_annotated.png')
def main(): '''Demonstrate the generation of different statistical standard plots''' # Univariate data ------------------------- # Generate data that are normally distributed x = np.random.randn(500) # Set the fonts the way I like them sns.set_context('poster') sns.set_style('ticks') C2_8_mystyle.set(fs=32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format C2_8_mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter') # Histogram plt.hist(x) C2_8_mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') plt.hist(x,25) C2_8_mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x,numbins)[0]) C2_8_mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequncy') # KDE-plot sns.kdeplot(x) C2_8_mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density', title='KDE_plot') # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym='*') C2_8_mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot') plt.boxplot(x, sym='*', vert=False) plt.title('Boxplot, horizontal') plt.xlabel('Values') plt.show() # Errorbars x = np.arange(5) y = x**2 errorBar = x/2 plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) C2_8_mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars') # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc = 3, scale = 1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls':data, 'Boys':data2}) sns.violinplot(df) C2_8_mystyle.printout('violinplot.png', title='Violinplot') # Barplot df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df.plot(kind='bar', grid=False) C2_8_mystyle.printout('barplot.png', title='Barplot') # Grouped Boxplot sns.set_style('whitegrid') sns.boxplot(df) C2_8_mystyle.set(fs=28) C2_8_mystyle.printout('groupedBoxplot.png', title='sns.boxplot') # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd']) df2.plot(kind='scatter', x='a', y='b', s=df['c']*300); C2_8_mystyle.printout('bivariate.png') # Pieplot series = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series') oldPalette = sns.color_palette() sns.set_palette("husl") series.plot(kind='pie', figsize=(6, 6)) C2_8_mystyle.printout('piePlot.png', title='pie-plot') sns.set_palette(oldPalette)
y = nd.pdf(x) y1 = nd.pdf(x1) y2 = nd.pdf(x2) y3 = nd.pdf(x3) sns.set(context='poster') sns.set_style('ticks') fig, axs = plt.subplots(1,3, sharey=True) def show_SD(axis, xi, yi, text): '''Show the area covered by 1/2/3 SDs''' fc = '#DDDDDD' axis.plot(x,y) axis.fill_between(xi, yi, facecolor=fc) axis.text(0, 0.05, text, horizontalalignment='center', fontsize=25) axis.set_xlim([-3.5, 3.5]) axis.set_ylim([-0.0, 0.5]) sns.despine(ax=axis, left=True) axis.set_yticks([]) show_SD(axs[0], x1, y1, '68.3%') show_SD(axs[1], x2, y2, '95.4%') show_SD(axs[2], x3, y3, '99.7%') plt.tight_layout() C2_8_mystyle.printout_plain('area_SDs.png')
def simple_normal(): ''' Different aspects of a normal distribution''' # Generate the data x = np.arange(-4,4,0.1) # generate the desirded x-values x2 = np.arange(0,1,0.001) nd = stats.norm() # First simply define the normal distribution; # don't calculate any values yet # This is a more complex plot-layout: the first row # is taken up completely by the PDF ax = plt.subplot2grid((3,2),(0,0), colspan=2) plt.plot(x,nd.pdf(x)) plt.xlim([-4,4]) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.yticks(np.linspace(0, 0.4, 5)) plt.title('Normal Distribution - PDF: Probability Density Fct') # CDF plt.subplot(323) plt.plot(x,nd.cdf(x)) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.xlim([-4,4]) plt.ylim([0,1]) plt.vlines(0, 0, 1, linestyles='--') plt.title('CDF: Cumulative Distribution Fct') # SF plt.subplot(324) plt.plot(x,nd.sf(x)) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.xlim([-4,4]) plt.ylim([0,1]) plt.vlines(0, 0, 1, linestyles='--') plt.title('SF: Survival Fct') # PPF plt.subplot(325) plt.plot(x2,nd.ppf(x2)) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.yticks(np.linspace(-4,4,5)) plt.hlines(0, 0, 1, linestyles='--') plt.ylim([-4,4]) plt.title('PPF: Percentile Point Fct') # ISF plt.subplot(326) plt.plot(x2,nd.isf(x2)) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.yticks(np.linspace(-4,4,5)) plt.hlines(0, 0, 1, linestyles='--') plt.title('ISF: Inverse Survival Fct') plt.ylim([-4,4]) plt.tight_layout() outFile = 'DistributionFunctions.png' C2_8_mystyle.printout_plain(outFile)
# additional packages import C2_8_mystyle # Calculate the values nd = stats.norm() x = np.linspace(-3,3,100) yp = nd.pdf(x) y = nd.cdf(x) x1 = np.linspace(-3, 1) y1 = nd.pdf(x1) # Make the plot sns.set_context('paper') sns.set_style('white') C2_8_mystyle.set(12) figs, axs = plt.subplots(1,2) axs[0].plot(x,yp, 'k') axs[0].fill_between(x1, y1, facecolor='#CCCCCC') axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic') axs[0].set_xlabel('x') axs[0].set_ylabel('PDF(x)') sns.despine() axs[1].plot(x, y, '#999999', lw=3) axs[1].set_xlabel('x') axs[1].set_ylabel('CDF(x)') plt.vlines(0, 0, 1, linestyles='--') sns.despine()
# Import standard packages import numpy as np import matplotlib.pyplot as plt from scipy import stats import seaborn as sns # additional packages import C2_8_mystyle sns.set_context('poster') sns.set_style('ticks') # Generate the data np.random.seed(1234) nd = stats.norm(100, 20) scores = nd.rvs(10) # Make the plot plt.plot(scores, 'o') plt.axhline(110, ls='--') plt.axhline(np.mean(scores), ls='-.') plt.xlim(-0.2, 9.2) plt.ylim(50, 130) plt.xlabel('Student-Nr') plt.ylabel('Score') outFile = 'fig_ExampleTtest.png' C2_8_mystyle.printout_plain(outFile) plt.show()
sns.set_style('ticks') # Plot the normal distribution within 3 SDs limits = (md-3*sd, md+3*sd) x = np.linspace(limits[0], limits[1]) y = nd.pdf(x) # Shade the regions beyond a certain "checkVal" checkVal = 2.6 print('p = {0:5.3f}'.format(nd.cdf(checkVal))) x1 = np.linspace(limits[0], checkVal) y1 = nd.pdf(x1) x2 = np.linspace(md + (md-checkVal), limits[1]) y2 = nd.pdf(x2) plt.plot(x,y) plt.fill_between(x1, y1, alpha=0.5) plt.fill_between(x2, y2, alpha=0.2) # Label the axes plt.xlabel('Weight') plt.ylabel('P(Weight)') plt.text(2.1, 0.05, '11.8%', fontsize=20) # Remove the right- and top-axis sns.despine() # Save and show C2_8_mystyle.printout_plain('pdf_checkValue.png')
"miR-137" is a short non-coding RNA molecule that functions to regulate the expression levels of other genes. ''' # author: Thomas Haslwanter, date: Jun-2015 # Import standard packages import matplotlib.pyplot as plt import C2_8_mystyle as mystyle # additional packages from lifelines.datasets import load_waltons from lifelines import KaplanMeierFitter from lifelines.statistics import logrank_test # Set my favorite font mystyle.set() # Load and show the data df = load_waltons() # returns a Pandas DataFrame print(df.head()) ''' T E group 0 6 1 miR-137 1 13 1 miR-137 2 13 1 miR-137 3 13 1 miR-137 4 19 1 miR-137 ''' T = df['T']
data = nd.rvs(100) x = np.linspace(-5, 5, 101) pdf = nd.pdf(x) # Calculate the KDE sd = np.std(data, ddof=1) h = (4/(3*100))**0.2 h_str = '{0:4.2f}'.format(h) # Calculate the smoothed plots, with 3 different parameters kde_small = stats.kde.gaussian_kde(data, 0.1) kde = stats.kde.gaussian_kde(data, h) kde_large = stats.kde.gaussian_kde(data, 1) # Generate two plots: one KDE with rug-plot, and one with different parameters sns.set_context('poster') sns.set_style('ticks') fig, axs = plt.subplots(1,2) sns.distplot(data, rug=True, ax=axs[0]) axs[1].plot(x, pdf) axs[1].plot(x, kde.evaluate(x), 'r') axs[1].plot(x,kde_small.evaluate(x),'--', color=[0.8, 0.8, 0.8]) axs[1].plot(x,kde_large.evaluate(x),'--') axs[1].legend(['exact', h_str, '0.1', '1.0']) axs[1].set_ylim(0, 0.40) C2_8_mystyle.printout_plain('kdePlot.png') plt.show()