def generateData(): ''' Generate and show the data: a plane in 3D ''' x = np.linspace(-5,5,101) (X,Y) = np.meshgrid(x,x) # To get reproducable values, I provide a seed value np.random.seed(987654321) Z = -5 + 3*X-0.5*Y+np.random.randn(np.shape(X)[0], np.shape(X)[1]) # Set the color myCmap = cm.GnBu_r # If you want a colormap from seaborn use: #from matplotlib.colors import ListedColormap #myCmap = ListedColormap(sns.color_palette("Blues", 20)) # Plot the figure fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(X,Y,Z, cmap=myCmap, rstride=2, cstride=2, linewidth=0, antialiased=False) ax.view_init(20,-120) ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') fig.colorbar(surf, shrink=0.6) outFile = '3dSurface.png' showData(outFile) return (X.flatten(),Y.flatten(),Z.flatten())
def KS_principle(inData): '''Show the principle of the Kolmogorov-Smirnov test.''' # CDF of normally distributed data nd = stats.norm() nd_x = np.linspace(-4, 4, 101) nd_y = nd.cdf(nd_x) # Empirical CDF of the sample data, which range for approximately 0 to 10 numPts = 50 lowerLim = 0 upperLim = 10 ecdf_x = np.linspace(lowerLim, upperLim, numPts) ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0] / len(inData) #Add zero-point by hand ecdf_x = np.hstack((0., ecdf_x)) ecdf_y = np.hstack((0., ecdf_y)) # Plot the data sns.set_style('ticks') sns.set_context('poster') setFonts(36) plt.plot(nd_x, nd_y, 'k--') plt.hold(True) plt.plot(ecdf_x, ecdf_y, color='k') plt.xlabel('X') plt.ylabel('Cumulative Probability') # For the arrow, find the start ecdf_startIndex = np.min(np.where(ecdf_x >= 2)) arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]]) nd_startIndex = np.min(np.where(nd_x >= 2)) arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]]) arrowDelta = arrowEnd - arrowStart plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k') plt.arrow(arrowStart[0], arrowStart[1] + arrowDelta[1], 0, -arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k') outFile = 'KS_Example.png' showData(outFile)
def smSolution(M1, M2, M3): '''Solution with the tools from statsmodels Input: design matrices for linear quadratic, and cubic fit ''' import statsmodels.api as sm Res1 = sm.OLS(y, M1).fit() Res2 = sm.OLS(y, M2).fit() Res3 = sm.OLS(y, M3).fit() print(Res1.summary2()) print(Res2.summary2()) print(Res3.summary2()) # Plot the data plt.plot(x, y, '.', label='Data') plt.plot(x, Res1.fittedvalues, 'r--', label='Linear Fit') plt.plot(x, Res2.fittedvalues, 'g', label='Quadratic Fit') plt.plot(x, Res3.fittedvalues, 'y', label='Cubic Fit') plt.legend(loc='upper left', shadow=True) plt.xlabel('x') plt.ylabel('y') plt.tight_layout() showData('linearModel.png')
def showResults(challenger_data, model): ''' Show the original data, and the resulting logit-fit''' temperature = challenger_data[:,0] failures = challenger_data[:,1] # First plot the original data plt.figure() setFonts() sns.set_style('darkgrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(temperature, failures, s=200, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside Temperature [F]") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.tight_layout # Plot the fit x = np.arange(50, 85) alpha = model.params[0] beta = model.params[1] y = logistic(x, beta, alpha) plt.hold(True) plt.plot(x,y,'r') plt.xlim([50, 85]) outFile = 'ChallengerPlain.png' showData(outFile)
def smSolution(M1, M2, M3): '''Solution with the tools from statsmodels Input: design matrices for linear quadratic, and cubic fit ''' import statsmodels.api as sm Res1 = sm.OLS(y, M1).fit() Res2 = sm.OLS(y, M2).fit() Res3 = sm.OLS(y, M3).fit() print(Res1.summary2()) print(Res2.summary2()) print(Res3.summary2()) # Plot the data plt.plot(x,y, '.', label='Data') plt.plot(x, Res1.fittedvalues, 'r--', label='Linear Fit') plt.plot(x, Res2.fittedvalues, 'g', label='Quadratic Fit') plt.plot(x, Res3.fittedvalues, 'y', label='Cubic Fit') plt.legend(loc='upper left', shadow=True) plt.xlabel('x') plt.ylabel('y') plt.tight_layout() showData('linearModel.png')
def showAndSave(temperature: np.ndarray, failures: np.ndarray) -> None: """Shows the input data, and saves the resulting figure Parameters ---------- temperature : temperature data failureData : corresponding failure status """ # Plot it, as a function of tempature plt.figure() setFonts() sns.set_style('darkgrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(temperature, failures, s=200, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside Temperature [F]") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.tight_layout outFile = 'Challenger_ORings.png' showData(outFile)
def showSimResults(alpha_samples, beta_samples): '''Show the results of the simulations, and save them to an outFile''' plt.figure(figsize=(12.5, 6)) sns.set_style('darkgrid') setFonts(18) # Histogram of the samples: plt.subplot(211) plt.title(r"Posterior distributions of the variables $\alpha, \beta$") plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85, label=r"posterior of $\beta$", color="#7A68A6", normed=True) plt.legend() plt.subplot(212) plt.hist(alpha_samples, histtype='stepfilled', bins=35, alpha=0.85, label=r"posterior of $\alpha$", color="#A60628", normed=True) plt.legend() outFile = 'Challenger_Parameters.png' showData(outFile)
def main(): # Calculate the PDF-curves x = np.linspace(-10, 15, 201) nd1 = stats.norm(1, 2) nd2 = stats.norm(6, 2) y1 = nd1.pdf(x) y2 = nd2.pdf(x) # Axes locations ROC = {'left': 0.35, 'width': 0.36, 'bottom': 0.1, 'height': 0.47} PDF = {'left': 0.1, 'width': 0.8, 'bottom': 0.65, 'height': 0.3} rect_ROC = [ROC['left'], ROC['bottom'], ROC['width'], ROC['height']] rect_PDF = [PDF['left'], PDF['bottom'], PDF['width'], PDF['height']] fig = plt.figure() ax1 = plt.axes(rect_PDF) ax2 = plt.axes(rect_ROC) # Plot and label the PDF-curves ax1.plot(x, y1) ax1.hold(True) ax1.fill_between(x, 0, y1, where=x < 3, facecolor='#CCCCCC', alpha=0.5) ax1.annotate('Sensitivity', xy=(x[75], y1[65]), xytext=(x[40], y1[75] * 1.2), fontsize=14, horizontalalignment='center', arrowprops=dict(facecolor='#CCCCCC')) ax1.plot(x, y2, '#888888') ax1.fill_between(x, 0, y2, where=x < 3, facecolor='#888888', alpha=0.5) ax1.annotate('1-Specificity', xy=(2.5, 0.03), xytext=(6, 0.05), fontsize=14, horizontalalignment='center', arrowprops=dict(facecolor='#888888')) ax1.set_ylabel('PDF') # Plot the ROC-curve ax2.plot(nd2.cdf(x), nd1.cdf(x), 'k') ax2.hold(True) ax2.plot(np.array([0, 1]), np.array([0, 1]), 'k--') # Format the ROC-curve ax2.set_xlim([0, 1]) ax2.set_ylim([0, 1]) ax2.axis('equal') ax2.set_title('ROC-Curve') ax2.set_xlabel('1-Specificity') ax2.set_ylabel('Sensitivity') arrow_bidir(ax2, (0.5, 0.5), (0.095, 0.885)) # Show the plot, and create a figure showData('ROC.png')
def show_binomial(): """Show an example of binomial distributions""" # Arbitrarily select 3 total numbers, and 3 probabilities ns = [20, 20, 40] ps = [0.5, 0.7, 0.5] # For each (p,n)-pair, plot the corresponding binomial PMFs for (p, n) in zip(ps, ns): bd = stats.binom(n, p) # generate the "frozen function" x = np.arange(n + 1) # generate the x-values plt.plot(x, bd.pmf(x), 'o--', label='p={0:3.1f}, n={1}'.format(p, n)) # Format the plot plt.legend() plt.title('Binomial distribution') plt.xlabel('X') plt.ylabel('P(X)') plt.annotate('Upper Limit', xy=(20, 0), xytext=(27, 0.04), arrowprops=dict(shrink=0.05)) # Show and save the plot showData('Binomial_distribution_pmf.png')
def showProbabilities(linearTemperature, temperature, failures, mean_prob_t, p_t, quantiles) -> None: """Show the posterior probabilities, and save the resulting figures Parameters ---------- linearTemperature : temperature : failures : mean_prob_t : p_t : quantiles : """ # --- Show the probability curve ---- plt.figure(figsize=(12.5, 4)) setFonts(18) plt.plot(linearTemperature, mean_prob_t, lw=3, label="Average posterior\n \ probability of defect") plt.plot(linearTemperature, p_t[0, :], ls="--", label="Realization from posterior") plt.plot(linearTemperature, p_t[-2, :], ls="--", label="Realization from posterior") plt.scatter(temperature, failures, color="k", s=50, alpha=0.5) plt.title("Posterior expected value of probability of defect, plus realizations") plt.legend(loc="lower left") plt.ylim(-0.1, 1.1) plt.xlim(linearTemperature.min(), linearTemperature.max()) plt.ylabel("Probability") plt.xlabel("Temperature [F]") outFile = 'Challenger_Probability.png' showData(outFile) # --- Draw CIs --- setFonts() sns.set_style('darkgrid') plt.fill_between(linearTemperature[:, 0], *quantiles, alpha=0.7, color="#7A68A6") plt.plot(linearTemperature[:, 0], quantiles[0], label="95% CI", color="#7A68A6", alpha=0.7) plt.plot(linearTemperature, mean_prob_t, lw=1, ls="--", color="k", label="average posterior \nprobability of defect") plt.xlim(linearTemperature.min(), linearTemperature.max()) plt.ylim(-0.02, 1.02) plt.legend(loc="lower left") plt.scatter(temperature, failures, color="k", s=50, alpha=0.5) plt.xlabel("Temperature [F]") plt.ylabel("Posterior Probability Estimate") outFile = 'Challenger_CIs.png' showData(outFile)
def doTukey(data: np.ndarray, multiComp: MultiComparison) -> None: """Do a pairwise comparison, and show the confidence intervals Parameters ---------- data : structured array, containing the input data multComp : Result of the 'MultiComparison'-test """ # Show the results of the multicomparison test print((multiComp.tukeyhsd().summary())) # Calculate the p-values: res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment']) df = pd.DataFrame(data) numData = len(df) numTreatments = len(df.Treatment.unique()) dof = numData - numTreatments # Show the group names print((multiComp.groupsunique)) # Generate a print ------------------- # Get the data xvals = np.arange(3) res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment']) errors = np.ravel(np.diff(res2.confint)/2) # Plot them plt.plot(xvals, res2.meandiffs, 'o') plt.errorbar(xvals, res2.meandiffs, yerr=errors, fmt='o') # Put on labels pair_labels = \ multiComp.groupsunique[np.column_stack(res2._multicomp.pairindices)] pairs = [':'.join(labels) for labels in pair_labels] plt.xticks(xvals, pairs) # Format the plot xlim = -0.5, 2.5 plt.hlines(0, *xlim) plt.xlim(*xlim) plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' + '\n Pairwise Mean Differences') # Save to outfile, and show the data outFile = 'multComp.png' showData(outFile)
def show3D(): '''Generation of 3D plots''' # imports specific to the plots in this example from matplotlib import cm # colormaps # This module is required for 3D plots! from mpl_toolkits.mplot3d import Axes3D # Twice as wide as it is tall. fig = plt.figure(figsize=plt.figaspect(0.5)) setFonts(16) #---- First subplot # Generate the data X = np.arange(-5, 5, 0.1) Y = np.arange(-5, 5, 0.1) X, Y = np.meshgrid(X, Y) R = np.sqrt(X**2 + Y**2) Z = np.sin(R) # Note the definition of "projection", required for 3D plots #plt.style.use('ggplot') ax = fig.add_subplot(1, 2, 1, projection='3d') surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.GnBu, linewidth=0, antialiased=False) #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.viridis_r, #linewidth=0, antialiased=False) ax.set_zlim3d(-1.01, 1.01) fig.colorbar(surf, shrink=0.5, aspect=10) #---- Second subplot # Get some 3d test-data from mpl_toolkits.mplot3d.axes3d import get_test_data ax = fig.add_subplot(1, 2, 2, projection='3d') X, Y, Z = get_test_data(0.05) ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10) showData('3dGraph.png')
def Draw_multilinear(): df = pd.DataFrame({'x1': x1, 'x2': x2, 'y': y}) # --- >>> START stats <<< --- # Fit the model model = ols("y~x1+x2", df).fit() param_intercept = model.params[0] param_x1 = model.params[1] param_x2 = model.params[2] rSquared_adj = model.rsquared_adj #generate data,产生矩阵然后把数值附上去 x = np.linspace(-5, 5, 101) (X, Y) = np.meshgrid(x, x) # To get reproducable values, I provide a seed value np.random.seed(987654321) Z = param_intercept + param_x1 * X + param_x2 * Y + np.random.randn( np.shape(X)[0], np.shape(X)[1]) # 绘图 #Set the color myCmap = cm.GnBu_r # If you want a colormap from seaborn use: #from matplotlib.colors import ListedColormap #myCmap = ListedColormap(sns.color_palette("Blues", 20)) # Plot the figure fig = plt.figure("multi") ax = fig.gca(projection='3d') surf = ax.plot_surface(X, Y, Z, cmap=myCmap, rstride=2, cstride=2, linewidth=0, antialiased=False) ax.view_init(20, -120) ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') ax.set_title("multilinear with adj_Rsquare %f" % (rSquared_adj)) fig.colorbar(surf, shrink=0.6) outFile = '3dSurface.png' showData(outFile)
def main(): # Generate dummy data x = np.array([-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]) # Define the two plots fig, axs = plt.subplots(1,2) # Generate the left plot plot_histogram(axs[0], x) # Generate the right plot explain_KDE(axs[1], x) # Save and show showData('KDEexplained.png') plt.show()
def main(): # Generate dummy data x = np.array([-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]) # Define the two plots fig, axs = plt.subplots(1, 2) # Generate the left plot plot_histogram(axs[0], x) # Generate the right plot explain_KDE(axs[1], x) # Save and show showData('KDEexplained.png') plt.show()
def showChi2(): '''Utility function to show Chi2 distributions''' t = frange(0, 8, 0.05) Chi2Vals = [1,2,3,5] for chi2 in Chi2Vals: plt.plot(t, stats.chi2.pdf(t, chi2), label='n={0}'.format(chi2)) plt.legend() plt.xlim(0,8) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') outFile = 'dist_chi2.png' showData(outFile)
def showChi2(): '''Utility function to show Chi2 distributions''' t = frange(0, 8, 0.05) Chi2Vals = [1, 2, 3, 5] for chi2 in Chi2Vals: plt.plot(t, stats.chi2.pdf(t, chi2), label='k={0}'.format(chi2)) plt.legend() plt.xlim(0, 8) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') outFile = 'dist_chi2.png' showData(outFile)
def KS_principle(inData): '''Show the principle of the Kolmogorov-Smirnov test.''' # CDF of normally distributed data nd = stats.norm() nd_x = np.linspace(-4, 4, 101) nd_y = nd.cdf(nd_x) # Empirical CDF of the sample data, which range for approximately 0 to 10 numPts = 50 lowerLim = 0 upperLim = 10 ecdf_x = np.linspace(lowerLim, upperLim, numPts) ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0]/len(inData) #Add zero-point by hand ecdf_x = np.hstack((0., ecdf_x)) ecdf_y = np.hstack((0., ecdf_y)) # Plot the data sns.set_style('ticks') sns.set_context('poster') setFonts(36) plt.plot(nd_x, nd_y, 'k--') plt.hold(True) plt.plot(ecdf_x, ecdf_y, color='k') plt.xlabel('X') plt.ylabel('Cumulative Probability') # For the arrow, find the start ecdf_startIndex = np.min(np.where(ecdf_x >= 2)) arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]]) nd_startIndex = np.min(np.where(nd_x >= 2)) arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]]) arrowDelta = arrowEnd - arrowStart plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k') plt.arrow(arrowStart[0], arrowStart[1]+arrowDelta[1], 0, -arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k') outFile = 'KS_Example.png' showData(outFile)
def main(): '''Demonstrate central limit theorem.''' setFonts(24) # Generate data data = np.random.random(ndata) # Show three histograms, side-by-side fig, axs = plt.subplots(1,3) showAsHistogram(axs[0], data, 'Random data') showAsHistogram(axs[1], np.mean(data.reshape((ndata//2, 2 )), axis=1), 'Average over 2') showAsHistogram(axs[2], np.mean(data.reshape((ndata//10,10)), axis=1), 'Average over 10') # Format them and show them axs[0].set_ylabel('Counts') plt.tight_layout() showData('CentralLimitTheorem.png')
def showExp(): '''Utility function to show exponential distributions''' t = frange(0, 3, 0.01) lambdas = [0.5, 1, 1.5] for par in lambdas: plt.plot(t, stats.expon.pdf(t, 0, par), label='$\lambda={0:3.1f}$'.format(par)) plt.legend() plt.xlim(0,3) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') plt.legend() outFile = 'dist_exp.png' showData(outFile)
def showAndSave(temperature, failures): '''Shows the input data, and saves the resulting figure''' # Plot it, as a function of tempature plt.figure() setFonts() sns.set_style('darkgrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(temperature, failures, s=200, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside Temperature [F]") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.tight_layout outFile = 'Challenger_ORings.png' showData(outFile)
def showExp(): '''Utility function to show exponential distributions''' t = np.arange(0, 3, 0.01) lambdas = [0.5, 1, 1.5] for par in lambdas: plt.plot(t, stats.expon.pdf(t, 0, par), label='$\lambda={0:3.1f}$'.format(par)) plt.legend() plt.xlim(0,3) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') plt.legend() outFile = 'dist_exp.png' showData(outFile)
def generate_probplot(): '''Generate a prob-plot for a chi2-distribution of sample data''' # Define the skewed distribution chi2 = stats.chi2(3) # Generate the data x = np.linspace(0,10, 100) y = chi2.pdf(x) np.random.seed(12345) numData = 100 data = chi2.rvs(numData) # Arrange subplots sns.set_context('paper') sns.set_style('white') setFonts(11) fig, axs = plt.subplots(1,2) # Plot distribution axs[0].plot(x,y) axs[0].set_xlabel('X') axs[0].set_ylabel('PDF(X)') axs[0].set_title('chi2(x), k=3') sns.set_style('white') x0, x1 = axs[0].get_xlim() y0, y1 = axs[0].get_ylim() axs[0].set_aspect((x1-x0)/(y1-y0)) # Plot probplot plt.axes(axs[1]) stats.probplot(data, plot=plt) x0, x1 = axs[1].get_xlim() y0, y1 = axs[1].get_ylim() axs[1].axhline(0, lw=0.5, ls='--') axs[1].axvline(0, lw=0.5, ls='--') axs[1].set_aspect((x1-x0)/(y1-y0)) showData('chi2pp.png') return(data) '''
def show_poisson(): """Show an example of Poisson distributions""" # Arbitrarily select 3 lambda values lambdas = [1,4,10] k = np.arange(20) # generate x-values markersize = 8 for par in lambdas: plt.plot(k, stats.poisson.pmf(k, par), 'o--', label='$\lambda={0}$'.format(par)) # Format the plot plt.legend() plt.title('Poisson distribution') plt.xlabel('X') plt.ylabel('P(X)') # Show and save the plot showData('Poisson_distribution_pmf.png')
def showF(): '''Utility function to show F distributions''' t = frange(0, 3, 0.01) d1s = [1, 2, 5, 100] d2s = [1, 1, 2, 100] for (d1, d2) in zip(d1s, d2s): plt.plot(t, stats.f.pdf(t, d1, d2), label='F({0}/{1})'.format(d1, d2)) plt.legend() plt.xlim(0, 3) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') plt.legend() outFile = 'dist_f.png' showData(outFile)
def showWeibull(): '''Utility function to show Weibull distributions''' t = frange(0, 2.5, 0.01) lambdaVal = 1 ks = [0.5, 1, 1.5, 5] for k in ks: wd = stats.weibull_min(k) plt.plot(t, wd.pdf(t), label='k = {0:.1f}'.format(k)) plt.xlim(0, 2.5) plt.ylim(0, 2.5) plt.xlabel('X') plt.ylabel('pdf(X)') plt.legend() outFile = 'Weibull_PDF.png' showData(outFile)
def showProbabilities(linearTemperature, temperature, failures, mean_prob_t, p_t, quantiles): '''Show the posterior probabilities, and save the resulting figures''' # --- Show the probability curve ---- plt.figure(figsize=(12.5, 4)) setFonts(18) plt.plot(linearTemperature, mean_prob_t, lw=3, label="Average posterior\n \ probability of defect") plt.plot(linearTemperature, p_t[0, :], ls="--", label="Realization from posterior") plt.plot(linearTemperature, p_t[-2, :], ls="--", label="Realization from posterior") plt.scatter(temperature, failures, color="k", s=50, alpha=0.5) plt.title("Posterior expected value of probability of defect, plus realizations") plt.legend(loc="lower left") plt.ylim(-0.1, 1.1) plt.xlim(linearTemperature.min(), linearTemperature.max()) plt.ylabel("Probability") plt.xlabel("Temperature [F]") outFile = 'Challenger_Probability.png' showData(outFile) # --- Draw CIs --- setFonts() sns.set_style('darkgrid') plt.fill_between(linearTemperature[:, 0], *quantiles, alpha=0.7, color="#7A68A6") plt.plot(linearTemperature[:, 0], quantiles[0], label="95% CI", color="#7A68A6", alpha=0.7) plt.plot(linearTemperature, mean_prob_t, lw=1, ls="--", color="k", label="average posterior \nprobability of defect") plt.xlim(linearTemperature.min(), linearTemperature.max()) plt.ylim(-0.02, 1.02) plt.legend(loc="lower left") plt.scatter(temperature, failures, color="k", s=50, alpha=0.5) plt.xlabel("Temperature [F]") plt.ylabel("Posterior Probability Estimate") outFile = 'Challenger_CIs.png' showData(outFile)
def showWeibull(): '''Utility function to show Weibull distributions''' t = frange(0, 2.5, 0.01) lambdaVal = 1 ks = [0.5, 1, 1.5, 5] for k in ks: wd = stats.weibull_min(k) plt.plot(t, wd.pdf(t), label='k = {0:.1f}'.format(k)) plt.xlim(0,2.5) plt.ylim(0,2.5) plt.xlabel('X') plt.ylabel('pdf(X)') plt.legend() outFile = 'Weibull_PDF.png' showData(outFile)
def main(): '''Demonstrate central limit theorem.''' setFonts(24) # Generate data # Show three histograms, side-by-side fig, axs = plt.subplots(1,4) showAsHistogram(axs[0], data, 'Random data') showAsHistogram(axs[1], np.mean(data.reshape((ndata//2, 2 )), axis=1), 'Average over 2') showAsHistogram(axs[2], np.mean(data.reshape((ndata//10,10)), axis=1), 'Average over 10') showAsHistogram(axs[3], np.mean(data.reshape((ndata//100,100)), axis=1), 'Average over 100') # Format them and show them axs[0].set_ylabel('Counts') plt.tight_layout() showData('CentralLimitTheorem.png')
def showF(): '''Utility function to show F distributions''' t = frange(0, 3, 0.01) d1s = [1,2,5,100] d2s = [1,1,2,100] for (d1,d2) in zip(d1s,d2s): plt.plot(t, stats.f.pdf(t, d1, d2), label='F({0}/{1})'.format(d1,d2)) plt.legend() plt.xlim(0,3) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') plt.legend() outFile = 'dist_f.png' showData(outFile)
def shifted_normal(): '''PDF and scatter plot''' # Plot 3 PDFs (Probability density functions) for normal distributions ---------- # Select 3 mean values, and 3 SDs myMean = [0, 0, 0, -2] mySD = [0.2, 1, 5, 0.5] t = np.arange(-5, 5, 0.02) # Plot the 3 PDFs, using the color-palette "hls" with sns.color_palette('hls', 4): for mu, sigma in zip(myMean, np.sqrt(mySD)): y = stats.norm.pdf(t, mu, sigma) plt.plot(t, y, label='$\mu={0}, \; \t\sigma={1:3.1f}$'.format(mu, sigma)) # Format the plot plt.legend() plt.xlim([-5, 5]) plt.title('Normal Distributions') # Show the plot, and save the out-file outFile = 'Normal_Distribution_PDF.png' showData(outFile) # Generate random numbers with a normal distribution ------------------------ myMean = 0 mySD = 3 numData = 500 data = stats.norm.rvs(myMean, mySD, size=numData) # Plot the data plt.scatter(np.arange(len(data)), data) # Format the plot plt.title('Normally distributed data') plt.xlim([0, 500]) plt.ylim([-10, 10]) plt.show() plt.close()
def many_normals() -> None: """Show the histograms of 25 samples distributions, and compare the mean values """ # Set the parameters numRows = 5 numData = 100 myMean = 0 mySD = 1 # Plot the histograms of the sample distributions, and format the plots plt.figure() for ii in range(numRows): for jj in range(numRows): data = stats.norm.rvs(myMean, mySD, size=numData) plt.subplot(numRows,numRows,numRows*ii+jj+1) plt.hist(data, edgecolor='k', lw=0.5) plt.gca().set_xlim([-3, 3]) plt.gca().set_xticks(()) plt.gca().set_yticks(()) plt.gca().set_xticklabels(()) plt.gca().set_yticklabels(()) plt.tight_layout() # Show the data, and save the out-file outFile = 'Normal_MultHist.png' showData(outFile) # Check out the mean of 1000 normal sample distributions numTrials = 1000; numData = 100 # Pre-allocate the memory for the output variable myMeans = np.ones(numTrials)*np.nan for ii in range(numTrials): data = stats.norm.rvs(myMean, mySD, size=numData) myMeans[ii] = np.mean(data) se = np.std(myMeans) print('The standard error of the mean, with {numData} samples, is {se}')
def main(): # generate the data x = np.arange(10) np.random.seed(10) y = 3 * x + 2 + 20 * np.random.rand(len(x)) # determine the line-fit k, d = np.polyfit(x, y, 1) yfit = k * x + d # plot the data plt.scatter(x, y) plt.plot(x, yfit, '--', lw=2) for ii in range(len(x)): plt.plot([x[ii], x[ii]], [yfit[ii], y[ii]], 'k') plt.xlim((-0.1, 9.1)) plt.xlabel('X') plt.ylabel('Y') showData('residuals.png')
def shifted_normal(): '''PDF and scatter plot''' # Plot 3 PDFs (Probability density functions) for normal distributions ---------- # Select 3 mean values, and 3 SDs myMean = [0,0,0,-2] mySD = [0.2,1,5,0.5] t = frange(-5,5,0.02) # Plot the 3 PDFs, using the color-palette "hls" with sns.color_palette('hls', 4): for mu,sigma in zip(myMean, np.sqrt(mySD)): y = stats.norm.pdf(t, mu, sigma) plt.plot(t,y, label='$\mu={0}, \; \t\sigma={1:3.1f}$'.format(mu,sigma)) # Format the plot plt.legend() plt.xlim([-5,5]) plt.title('Normal Distributions') # Show the plot, and save the out-file outFile = 'Normal_Distribution_PDF.png' showData(outFile) # Generate random numbers with a normal distribution ------------------------ myMean = 0 mySD = 3 numData = 500 data = stats.norm.rvs(myMean, mySD, size = numData) # Plot the data plt.scatter(np.arange(len(data)), data) # Format the plot plt.title('Normally distributed data') plt.xlim([0,500]) plt.ylim([-10,10]) plt.show() plt.close()
def doTukey(data, multiComp): '''Do a pairwise comparison, and show the confidence intervals''' print((multiComp.tukeyhsd().summary())) # Calculate the p-values: res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment']) df = pd.DataFrame(data) numData = len(df) numTreatments = len(df.Treatment.unique()) dof = numData - numTreatments # Show the group names print((multiComp.groupsunique)) # Generate a print ------------------- # Get the data xvals = np.arange(3) res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment']) errors = np.ravel(np.diff(res2.confint)/2) # Plot them plt.plot(xvals, res2.meandiffs, 'o') plt.errorbar(xvals, res2.meandiffs, yerr=errors, fmt='o') # Put on labels pair_labels = multiComp.groupsunique[np.column_stack(res2._multicomp.pairindices)] plt.xticks(xvals, pair_labels) # Format the plot xlim = -0.5, 2.5 plt.hlines(0, *xlim) plt.xlim(*xlim) plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' + '\n Pairwise Mean Differences') # Save to outfile, and show the data outFile = 'multComp.png' showData(outFile)
def generateData() -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Generate and show the data: a plane in 3D Returns ------- X : x-values (vector) Y : y-values (vector) Z : z-values (vector) """ x = np.linspace(-5,5,101) (X,Y) = np.meshgrid(x,x) # To get reproducable values, I provide a seed value np.random.seed(987654321) Z = -5 + 3*X-0.5*Y+np.random.randn(np.shape(X)[0], np.shape(X)[1]) # Set the color myCmap = cm.GnBu_r # If you want a colormap from seaborn use: #from matplotlib.colors import ListedColormap #myCmap = ListedColormap(sns.color_palette("Blues", 20)) # Plot the figure fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(X,Y,Z, cmap=myCmap, rstride=2, cstride=2, linewidth=0, antialiased=False) ax.view_init(20,-120) ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') fig.colorbar(surf, shrink=0.6) outFile = '3dSurface.png' showData(outFile) return (X.flatten(), Y.flatten(), Z.flatten())
def showT(): '''Utility function to show T distributions''' t = frange(-5, 5, 0.05) TVals = [1,5] normal = stats.norm.pdf(t) t1 = stats.t.pdf(t,1) t5 = stats.t.pdf(t,5) plt.plot(t,normal, '--', label='normal') plt.plot(t, t1, label='df=1') plt.plot(t, t5, label='df=5') plt.legend() plt.xlim(-5,5) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') outFile = 'dist_t.png' showData(outFile)
def showT(): '''Utility function to show T distributions''' t = frange(-5, 5, 0.05) TVals = [1, 5] normal = stats.norm.pdf(t) t1 = stats.t.pdf(t, 1) t5 = stats.t.pdf(t, 5) plt.plot(t, normal, '--', label='normal') plt.plot(t, t1, label='df=1') plt.plot(t, t5, label='df=5') plt.legend() plt.xlim(-5, 5) plt.xlabel('X') plt.ylabel('pdf(X)') plt.axis('tight') outFile = 'dist_t.png' showData(outFile)
def many_normals(): '''Show the histograms of 25 samples distributions, and compare the mean values ''' # Set the parameters numRows = 5 numData = 100 myMean = 0 mySD = 1 # Plot the histograms of the sample distributions, and format the plots plt.figure() for ii in range(numRows): for jj in range(numRows): data = stats.norm.rvs(myMean, mySD, size=numData) plt.subplot(numRows,numRows,numRows*ii+jj+1) plt.hist(data) plt.gca().set_xlim([-3, 3]) plt.gca().set_xticks(()) plt.gca().set_yticks(()) plt.gca().set_xticklabels(()) plt.gca().set_yticklabels(()) plt.tight_layout() # Show the data, and save the out-file outFile = 'Normal_MultHist.png' showData(outFile) # Check out the mean of 1000 normal sample distributions numTrials = 1000; numData = 100 # Pre-allocate the memory for the output variable myMeans = np.ones(numTrials)*np.nan for ii in range(numTrials): data = stats.norm.rvs(myMean, mySD, size=numData) myMeans[ii] = np.mean(data) print(('The standard error of the mean, with {0} samples, is {1}'.format(numData, np.std(myMeans))))
def main(): # generate the data x = np.arange(10) np.random.seed(10) y = 3*x+2+20*np.random.rand(len(x)) # determine the line-fit k,d = np.polyfit(x,y,1) yfit = k*x+d # plot the data plt.scatter(x,y) plt.hold(True) plt.plot(x, yfit, '--',lw=2) for ii in range(len(x)): plt.plot([x[ii], x[ii]], [yfit[ii], y[ii]], 'k') plt.xlim((-0.1, 9.1)) plt.xlabel('X') plt.ylabel('Y') showData('residuals.png')
def show_binomial(): """Show an example of binomial distributions""" # Arbitrarily select 3 total numbers, and 3 probabilities ns = [20,20,40] ps = [0.5, 0.7, 0.5] # For each (p,n)-pair, plot the corresponding binomial PMFs for (p,n) in zip(ps, ns): bd = stats.binom(n,p) # generate the "frozen function" x = np.arange(n+1) # generate the x-values plt.plot(x, bd.pmf(x), 'o--', label='p={0:3.1f}, n={1}'.format(p,n)) # Format the plot plt.legend() plt.title('Binomial distribution') plt.xlabel('X') plt.ylabel('P(X)') plt.annotate('Upper Limit', xy=(20,0), xytext=(27,0.04), arrowprops=dict(shrink=0.05)) # Show and save the plot showData('Binomial_distribution_pmf.png')
def main(): # Calculate the PDF-curves x = np.linspace(-10, 15, 201) nd1 = stats.norm(1,2) nd2 = stats.norm(6,2) y1 = nd1.pdf(x) y2 = nd2.pdf(x) # Axes locations ROC = {'left': 0.35, 'width': 0.36, 'bottom': 0.1, 'height': 0.47} PDF = {'left': 0.1, 'width': 0.8, 'bottom': 0.65, 'height': 0.3} rect_ROC = [ROC['left'], ROC['bottom'], ROC['width'], ROC['height']] rect_PDF = [PDF['left'], PDF['bottom'], PDF['width'], PDF['height']] fig = plt.figure() ax1 = plt.axes(rect_PDF) ax2 = plt.axes(rect_ROC) # Plot and label the PDF-curves ax1.plot(x,y1) ax1.hold(True) ax1.fill_between(x,0,y1, where=x<3, facecolor='#CCCCCC', alpha=0.5) ax1.annotate('Sensitivity', xy=(x[75], y1[65]), xytext=(x[40], y1[75]*1.2), fontsize=14, horizontalalignment='center', arrowprops=dict(facecolor='#CCCCCC')) ax1.plot(x,y2,'#888888') ax1.fill_between(x,0,y2, where=x<3, facecolor='#888888', alpha=0.5) ax1.annotate('1-Specificity', xy=(2.5, 0.03), xytext=(6,0.05), fontsize=14, horizontalalignment='center', arrowprops=dict(facecolor='#888888')) ax1.set_ylabel('PDF') # Plot the ROC-curve ax2.plot(nd2.cdf(x), nd1.cdf(x), 'k') ax2.hold(True) ax2.plot(np.array([0,1]), np.array([0,1]), 'k--') # Format the ROC-curve ax2.set_xlim([0, 1]) ax2.set_ylim([0, 1]) ax2.axis('equal') ax2.set_title('ROC-Curve') ax2.set_xlabel('1-Specificity') ax2.set_ylabel('Sensitivity') arrow_bidir(ax2, (0.5,0.5), (0.095, 0.885)) # Show the plot, and create a figure showData('ROC.png')
x = np.linspace(-5, 5, 101) pdf = nd.pdf(x) # Calculate the KDE sd = np.std(data, ddof=1) h = (4 / (3 * 100))**0.2 h_str = '{0:4.2f}'.format(h) # Calculate the smoothed plots, with 3 different parameters kde_small = stats.kde.gaussian_kde(data, 0.1) kde = stats.kde.gaussian_kde(data, h) kde_large = stats.kde.gaussian_kde(data, 1) # Generate two plots: one KDE with rug-plot, and one with different parameters sns.set_context('poster') sns.set_style('ticks') setFonts() fig, axs = plt.subplots(1, 2) sns.distplot(data, rug=True, ax=axs[0]) axs[1].plot(x, pdf) axs[1].plot(x, kde.evaluate(x), 'r') axs[1].plot(x, kde_small.evaluate(x), '-', color=[0.8, 0.8, 0.8]) axs[1].plot(x, kde_large.evaluate(x), '--') axs[1].legend(['exact', h_str, '0.1', '1.0']) axs[1].set_ylim(0, 0.40) # Save and show showData('kdePlot.png') plt.show()
x = np.linspace(-3,3,100) yp = nd.pdf(x) y = nd.cdf(x) x1 = np.linspace(-3, 1) y1 = nd.pdf(x1) # Make the plot sns.set_context('paper') sns.set_style('white') setFonts(12) figs, axs = plt.subplots(1,2) axs[0].plot(x,yp, 'k') axs[0].fill_between(x1, y1, facecolor='#CCCCCC') axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic') axs[0].set_xlabel('x') axs[0].set_ylabel('PDF(x)') sns.despine() axs[1].plot(x, y, '#999999', lw=3) axs[1].set_xlabel('x') axs[1].set_ylabel('CDF(x)') plt.vlines(0, 0, 1, linestyles='--') sns.despine() # Save and show showData('PDF_CDF.png') plt.show()
plt.show() return # Generate the data x = np.arange(-20, 80) y = 10 + 0.2 * x + 4 * np.random.randn(len(x)) # Make the plot sns.set_style('ticks') sns.set_context('poster') setFonts() fig = plt.figure() ax = fig.add_subplot(111) ax.plot(x, y, '.') ax.spines['left'].set_position('zero') ax.spines['bottom'].set_position('zero') sns.despine() # Draw the fitted line p = np.polyfit(x, y, 1) yFit = np.polyval(p, x) ax.plot(x, yFit, 'r') # Save and show outFile = 'Linear_regression.png' showData(outFile)
def show_fig(std, ax, title): """Create a plot of normally distributed data in a given axis""" for ii in range(3): data = stats.norm(centers[ii], std).rvs(numData) offset = ii * numData ax.plot(offset + np.arange(numData), data, '.', ms=10) ax.xaxis.set_ticks([50, 150, 250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.set_title(title) sns.despine() if __name__ == '__main__': # Set up the figure sns.set_context('paper') sns.set_style('whitegrid') setFonts(14) # Create 2 plots of 3 different, normally distributed data groups, with different SDs fig, axs = plt.subplots(1, 2) centers = [5, 5.3, 4.7] stds = [0.1, 2] numData = 100 show_fig(0.1, axs[0], 'SD=0.1') show_fig(2, axs[1], 'SD=2.0') showData('anova_oneway.png')
# Make the plot sns.set_context('paper') sns.set_style('white') setFonts(12) figs, axs = plt.subplots(1, 2) axs[0].plot(x, yp, 'k') axs[0].fill_between(x1, y1, facecolor='#CCCCCC') axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic') axs[0].set_xlabel('x') axs[0].set_ylabel('PDF(x)') sns.despine() axs[1].plot(x, y, '#999999', lw=3) axs[1].set_xlabel('x') axs[1].set_ylabel('CDF(x)') plt.vlines(0, 0, 1, linestyles='--') sns.despine() # Save and show showData('PDF_CDF.png') plt.show()
def simple_normal(): ''' Different aspects of a normal distribution''' # Generate the data x = np.arange(-4,4,0.1) # generate the desirded x-values x2 = np.arange(0,1,0.001) nd = stats.norm() # First simply define the normal distribution; # don't calculate any values yet # This is a more complex plot-layout: the first row # is taken up completely by the PDF ax = plt.subplot2grid((3,2),(0,0), colspan=2) plt.plot(x,nd.pdf(x)) plt.xlim([-4,4]) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.yticks(np.linspace(0, 0.4, 5)) plt.title('Normal Distribution - PDF: Probability Density Fct') # CDF plt.subplot(323) plt.plot(x,nd.cdf(x)) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.xlim([-4,4]) plt.ylim([0,1]) plt.vlines(0, 0, 1, linestyles='--') plt.title('CDF: Cumulative Distribution Fct') # SF plt.subplot(324) plt.plot(x,nd.sf(x)) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.xlim([-4,4]) plt.ylim([0,1]) plt.vlines(0, 0, 1, linestyles='--') plt.title('SF: Survival Fct') # PPF plt.subplot(325) plt.plot(x2,nd.ppf(x2)) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.yticks(np.linspace(-4,4,5)) plt.hlines(0, 0, 1, linestyles='--') plt.ylim([-4,4]) plt.title('PPF: Percentile Point Fct') # ISF plt.subplot(326) plt.plot(x2,nd.isf(x2)) plt.gca().xaxis.set_ticks_position('bottom') plt.gca().yaxis.set_ticks_position('left') plt.yticks(np.linspace(-4,4,5)) plt.hlines(0, 0, 1, linestyles='--') plt.title('ISF: Inverse Survival Fct') plt.ylim([-4,4]) plt.tight_layout() outFile = 'DistributionFunctions.png' showData(outFile)
def show_fig(std, ax, title): '''Create a plot of normally distributed data in a given axis''' for ii in range(3): data = stats.norm(centers[ii], std).rvs(numData) offset = ii*numData ax.plot( offset+np.arange(numData), data, '.', ms=10) ax.xaxis.set_ticks([50,150,250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.set_title(title) sns.despine() if __name__ == '__main__': # Set up the figure sns.set_context('paper') sns.set_style('whitegrid') setFonts(14) # Create 2 plots of 3 different, normally distributed data groups, with different SDs fig, axs = plt.subplots(1, 2) centers = [5, 5.3, 4.7] stds = [0.1, 2] numData = 100 show_fig(0.1, axs[0], 'SD=0.1') show_fig(2, axs[1], 'SD=2.0') showData('anova_oneway.png')
x = np.linspace(-5, 5, 101) pdf = nd.pdf(x) # Calculate the KDE sd = np.std(data, ddof=1) h = (4/(3*100))**0.2 h_str = '{0:4.2f}'.format(h) # Calculate the smoothed plots, with 3 different parameters kde_small = stats.kde.gaussian_kde(data, 0.1) kde = stats.kde.gaussian_kde(data, h) kde_large = stats.kde.gaussian_kde(data, 1) # Generate two plots: one KDE with rug-plot, and one with different parameters sns.set_context('poster') sns.set_style('ticks') setFonts() fig, axs = plt.subplots(1,2) sns.distplot(data, rug=True, ax=axs[0]) axs[1].plot(x, pdf) axs[1].plot(x, kde.evaluate(x), 'r') axs[1].plot(x,kde_small.evaluate(x),'-', color=[0.8, 0.8, 0.8]) axs[1].plot(x,kde_large.evaluate(x),'--') axs[1].legend(['exact', h_str, '0.1', '1.0']) axs[1].set_ylim(0, 0.40) # Save and show showData('kdePlot.png') plt.show()
axis.hist(data, bins=nbins) axis.set_xticks([0, 0.5, 1]) axis.set_title(title) if __name__ == '__main__': # Formatting options sns.set(context='poster', style='ticks', palette='muted') # Input data ndata = 100000 nbins = 50 setFonts(24) # Generate data data = np.random.random(ndata) # Show three histograms, side-by-side fig, axs = plt.subplots(1, 3) showAsHistogram(axs[0], data, 'Random data') showAsHistogram(axs[1], np.mean(data.reshape((int(ndata / 2), 2)), axis=1), 'Average over 2') showAsHistogram(axs[2], np.mean(data.reshape((int(ndata / 10), 10)), axis=1), 'Average over 10') # Format them and show them axs[0].set_ylabel('Counts') plt.tight_layout() showData('CentralLimitTheorem.png')
ax.annotate('', xy=(210, grandMean), xytext=(210, groupMean[1]), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.annotate('', xy=(90, groupMean[1]), xytext=(90, groupMean[1] + 0.2), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.text(210, (grandMean + groupMean[1]) / 2., '$SS_{Treatment}$', fontsize=36) ax.text(90, groupMean[1] + 0.1, '$SS_{Error}$', ha='right', fontsize=36) if __name__ == '__main__': centers = [5, 5.3, 4.7] np.random.seed(123) setFonts(30) fig = plt.figure() ax = fig.add_subplot(111) std = 0.1 numData = 100 show_fig(0.1, ax, 'Sum-Squares') # Save and show showData('anova_annotated.png')
def simplePlots(): '''Demonstrate the generation of different statistical standard plots''' # Univariate data ------------------------- # Make sure that always the same random numbers are generated np.random.seed(1234) # Generate data that are normally distributed x = np.random.randn(500) # Other graphics settings sns.set(context='poster', style='ticks', palette=sns.color_palette('muted')) # Set the fonts the way I like them setFonts(32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter') # Histogram plt.hist(x) printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') plt.hist(x,25) printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x,numbins)[0]) printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequency') # KDE-plot sns.kdeplot(x) printout('kde.png', xlabel='Data Values', ylabel='Density', title='KDE_plot') # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym='*') printout('boxplot.png', xlabel='Values', title='Boxplot') plt.boxplot(x, sym='*', vert=False) plt.title('Boxplot, horizontal') plt.xlabel('Values') plt.show() # Errorbars x = np.arange(5) y = x**2 errorBar = x/2 plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars') # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc = 3, scale = 1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls':data, 'Boys':data2}) sns.violinplot(df) printout('violinplot.png', title='Violinplot') # Barplot # The font-size is set such that the legend does not overlap with the data np.random.seed(1234) setFonts(20) df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df.plot(kind='bar', grid=False, color=sns.color_palette('muted')) showData('barplot.png') setFonts(28) # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c']) df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500); plt.axhline(0, ls='--', color='#999999') plt.axvline(0, ls='--', color='#999999') printout('bivariate.png') # Grouped Boxplot sns.set_style('whitegrid') sns.boxplot(df) setFonts(28) printout('groupedBoxplot.png', title='sns.boxplot') sns.set_style('ticks') # Pieplot txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others' fractions = [45, 30, 15, 10] offsets =(0, 0.05, 0, 0) plt.pie(fractions, explode=offsets, labels=txtLabels, autopct='%1.1f%%', shadow=True, startangle=90, colors=sns.color_palette('muted') ) plt.axis('equal') printout('piePlot.png', title=' ')
# Generate the data x = np.r_[3, 1.5, 4, 6, 3, 2] dx = np.r_[0.1, 0.3, 0.2, 0.2, 0.3, 0.25] xs = x - dx index = range(len(x)) # plot the data setFonts(20) plt.plot(x, "o", ms=10, label="pre") plt.plot(xs, "r*", ms=12, label="post") plt.bar(index, dx, width=0.5, align="center", color=0.75 * np.ones(3), label="pre-post") # Format the plot plt.legend(loc="upper left") plt.axhline(0, ls="--") plt.xlim(-0.3, 5.3) plt.ylim(-0.2, 6.2) plt.xlabel("Subject Nr") plt.ylabel("Value") plt.tight_layout() # P-values for paired and unpaired T-tests _, p_paired = stats.ttest_rel(x, xs) _, p_ind = stats.ttest_ind(x, xs) print("A paired comparison yields p={0:.4f}, while an unpaired T-test gives us p={1:.3f}".format(p_paired, p_ind)) # Show and save figure outFile = "pairedTtest.png" showData(outFile)
ax.xaxis.set_ticks([50,150,250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.yaxis.set_ticks([]) ax.set_title(title) grandMean = np.mean(groupMean) ax.axhline(grandMean, color='#999999') ax.plot([80, 220], [groupMean[1], groupMean[1]], '#999999') ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], '#999999') ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=36) ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=36) if __name__ == '__main__': centers = [5, 5.3, 4.7] np.random.seed(123) setFonts(30) fig = plt.figure() ax = fig.add_subplot(111) std = 0.1 numData = 100 show_fig(0.1, ax, 'Sum-Squares') # Save and show showData('anova_annotated.png')