def showResults(challenger_data, model): ''' Show the original data, and the resulting logit-fit''' temperature = challenger_data[:,0] failures = challenger_data[:,1] # First plot the original data plt.figure() setFonts() sns.set_style('darkgrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(temperature, failures, s=200, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside Temperature [F]") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.tight_layout # Plot the fit x = np.arange(50, 85) alpha = model.params[0] beta = model.params[1] y = logistic(x, beta, alpha) plt.hold(True) plt.plot(x,y,'r') plt.xlim([50, 85]) outFile = 'ChallengerPlain.png' showData(outFile)
def showAndSave(temperature: np.ndarray, failures: np.ndarray) -> None: """Shows the input data, and saves the resulting figure Parameters ---------- temperature : temperature data failureData : corresponding failure status """ # Plot it, as a function of tempature plt.figure() setFonts() sns.set_style('darkgrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(temperature, failures, s=200, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside Temperature [F]") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.tight_layout outFile = 'Challenger_ORings.png' showData(outFile)
def show_poisson_views(): """Show different views of a Poisson distribution""" sns.set_palette(sns.color_palette('muted')) fig, ax = plt.subplots(3,1) k = np.arange(25) pd = stats.poisson(10) setFonts(12) ax[0].plot(k, pd.pmf(k),'x-') ax[0].set_title('Poisson distribution', fontsize=24) ax[0].set_xticklabels([]) ax[0].set_ylabel('PMF (X)') ax[1].plot(k, pd.cdf(k)) ax[1].set_xlabel('X') ax[1].set_ylabel('CDF (X)') y = np.linspace(0,1,100) ax[2].plot(y, pd.ppf(y)) ax[2].set_xlabel('X') ax[2].set_ylabel('PPF (X)') plt.tight_layout() plt.show()
def showSimResults(alpha_samples, beta_samples): '''Show the results of the simulations, and save them to an outFile''' plt.figure(figsize=(12.5, 6)) sns.set_style('darkgrid') setFonts(18) # Histogram of the samples: plt.subplot(211) plt.title(r"Posterior distributions of the variables $\alpha, \beta$") plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85, label=r"posterior of $\beta$", color="#7A68A6", normed=True) plt.legend() plt.subplot(212) plt.hist(alpha_samples, histtype='stepfilled', bins=35, alpha=0.85, label=r"posterior of $\alpha$", color="#A60628", normed=True) plt.legend() outFile = 'Challenger_Parameters.png' showData(outFile)
def KS_principle(inData): '''Show the principle of the Kolmogorov-Smirnov test.''' # CDF of normally distributed data nd = stats.norm() nd_x = np.linspace(-4, 4, 101) nd_y = nd.cdf(nd_x) # Empirical CDF of the sample data, which range for approximately 0 to 10 numPts = 50 lowerLim = 0 upperLim = 10 ecdf_x = np.linspace(lowerLim, upperLim, numPts) ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0] / len(inData) #Add zero-point by hand ecdf_x = np.hstack((0., ecdf_x)) ecdf_y = np.hstack((0., ecdf_y)) # Plot the data sns.set_style('ticks') sns.set_context('poster') setFonts(36) plt.plot(nd_x, nd_y, 'k--') plt.hold(True) plt.plot(ecdf_x, ecdf_y, color='k') plt.xlabel('X') plt.ylabel('Cumulative Probability') # For the arrow, find the start ecdf_startIndex = np.min(np.where(ecdf_x >= 2)) arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]]) nd_startIndex = np.min(np.where(nd_x >= 2)) arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]]) arrowDelta = arrowEnd - arrowStart plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k') plt.arrow(arrowStart[0], arrowStart[1] + arrowDelta[1], 0, -arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k') outFile = 'KS_Example.png' showData(outFile)
def show_poisson_views(): """Show different views of a Poisson distribution""" sns.set_palette(sns.color_palette('muted')) fig, ax = plt.subplots(3, 1) k = np.arange(25) pd = stats.poisson(10) setFonts(12) ax[0].plot(k, pd.pmf(k), 'x-') ax[0].set_title('Poisson distribution', fontsize=24) ax[0].set_xticklabels([]) ax[0].set_ylabel('PMF (X)') ax[1].plot(k, pd.cdf(k)) ax[1].set_xlabel('X') ax[1].set_ylabel('CDF (X)') y = np.linspace(0, 1, 100) ax[2].plot(y, pd.ppf(y)) ax[2].set_xlabel('X') ax[2].set_ylabel('PPF (X)') plt.tight_layout() plt.show()
def showProbabilities(linearTemperature, temperature, failures, mean_prob_t, p_t, quantiles) -> None: """Show the posterior probabilities, and save the resulting figures Parameters ---------- linearTemperature : temperature : failures : mean_prob_t : p_t : quantiles : """ # --- Show the probability curve ---- plt.figure(figsize=(12.5, 4)) setFonts(18) plt.plot(linearTemperature, mean_prob_t, lw=3, label="Average posterior\n \ probability of defect") plt.plot(linearTemperature, p_t[0, :], ls="--", label="Realization from posterior") plt.plot(linearTemperature, p_t[-2, :], ls="--", label="Realization from posterior") plt.scatter(temperature, failures, color="k", s=50, alpha=0.5) plt.title("Posterior expected value of probability of defect, plus realizations") plt.legend(loc="lower left") plt.ylim(-0.1, 1.1) plt.xlim(linearTemperature.min(), linearTemperature.max()) plt.ylabel("Probability") plt.xlabel("Temperature [F]") outFile = 'Challenger_Probability.png' showData(outFile) # --- Draw CIs --- setFonts() sns.set_style('darkgrid') plt.fill_between(linearTemperature[:, 0], *quantiles, alpha=0.7, color="#7A68A6") plt.plot(linearTemperature[:, 0], quantiles[0], label="95% CI", color="#7A68A6", alpha=0.7) plt.plot(linearTemperature, mean_prob_t, lw=1, ls="--", color="k", label="average posterior \nprobability of defect") plt.xlim(linearTemperature.min(), linearTemperature.max()) plt.ylim(-0.02, 1.02) plt.legend(loc="lower left") plt.scatter(temperature, failures, color="k", s=50, alpha=0.5) plt.xlabel("Temperature [F]") plt.ylabel("Posterior Probability Estimate") outFile = 'Challenger_CIs.png' showData(outFile)
def show3D(): '''Generation of 3D plots''' # imports specific to the plots in this example from matplotlib import cm # colormaps # This module is required for 3D plots! from mpl_toolkits.mplot3d import Axes3D # Twice as wide as it is tall. fig = plt.figure(figsize=plt.figaspect(0.5)) setFonts(16) #---- First subplot # Generate the data X = np.arange(-5, 5, 0.1) Y = np.arange(-5, 5, 0.1) X, Y = np.meshgrid(X, Y) R = np.sqrt(X**2 + Y**2) Z = np.sin(R) # Note the definition of "projection", required for 3D plots #plt.style.use('ggplot') ax = fig.add_subplot(1, 2, 1, projection='3d') surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.GnBu, linewidth=0, antialiased=False) #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.viridis_r, #linewidth=0, antialiased=False) ax.set_zlim3d(-1.01, 1.01) fig.colorbar(surf, shrink=0.5, aspect=10) #---- Second subplot # Get some 3d test-data from mpl_toolkits.mplot3d.axes3d import get_test_data ax = fig.add_subplot(1, 2, 2, projection='3d') X, Y, Z = get_test_data(0.05) ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10) showData('3dGraph.png')
def KS_principle(inData): '''Show the principle of the Kolmogorov-Smirnov test.''' # CDF of normally distributed data nd = stats.norm() nd_x = np.linspace(-4, 4, 101) nd_y = nd.cdf(nd_x) # Empirical CDF of the sample data, which range for approximately 0 to 10 numPts = 50 lowerLim = 0 upperLim = 10 ecdf_x = np.linspace(lowerLim, upperLim, numPts) ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0]/len(inData) #Add zero-point by hand ecdf_x = np.hstack((0., ecdf_x)) ecdf_y = np.hstack((0., ecdf_y)) # Plot the data sns.set_style('ticks') sns.set_context('poster') setFonts(36) plt.plot(nd_x, nd_y, 'k--') plt.hold(True) plt.plot(ecdf_x, ecdf_y, color='k') plt.xlabel('X') plt.ylabel('Cumulative Probability') # For the arrow, find the start ecdf_startIndex = np.min(np.where(ecdf_x >= 2)) arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]]) nd_startIndex = np.min(np.where(nd_x >= 2)) arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]]) arrowDelta = arrowEnd - arrowStart plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k') plt.arrow(arrowStart[0], arrowStart[1]+arrowDelta[1], 0, -arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k') outFile = 'KS_Example.png' showData(outFile)
def main(): '''Demonstrate central limit theorem.''' setFonts(24) # Generate data data = np.random.random(ndata) # Show three histograms, side-by-side fig, axs = plt.subplots(1,3) showAsHistogram(axs[0], data, 'Random data') showAsHistogram(axs[1], np.mean(data.reshape((ndata//2, 2 )), axis=1), 'Average over 2') showAsHistogram(axs[2], np.mean(data.reshape((ndata//10,10)), axis=1), 'Average over 10') # Format them and show them axs[0].set_ylabel('Counts') plt.tight_layout() showData('CentralLimitTheorem.png')
def showAndSave(temperature, failures): '''Shows the input data, and saves the resulting figure''' # Plot it, as a function of tempature plt.figure() setFonts() sns.set_style('darkgrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(temperature, failures, s=200, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside Temperature [F]") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.tight_layout outFile = 'Challenger_ORings.png' showData(outFile)
def generate_probplot(): '''Generate a prob-plot for a chi2-distribution of sample data''' # Define the skewed distribution chi2 = stats.chi2(3) # Generate the data x = np.linspace(0,10, 100) y = chi2.pdf(x) np.random.seed(12345) numData = 100 data = chi2.rvs(numData) # Arrange subplots sns.set_context('paper') sns.set_style('white') setFonts(11) fig, axs = plt.subplots(1,2) # Plot distribution axs[0].plot(x,y) axs[0].set_xlabel('X') axs[0].set_ylabel('PDF(X)') axs[0].set_title('chi2(x), k=3') sns.set_style('white') x0, x1 = axs[0].get_xlim() y0, y1 = axs[0].get_ylim() axs[0].set_aspect((x1-x0)/(y1-y0)) # Plot probplot plt.axes(axs[1]) stats.probplot(data, plot=plt) x0, x1 = axs[1].get_xlim() y0, y1 = axs[1].get_ylim() axs[1].axhline(0, lw=0.5, ls='--') axs[1].axvline(0, lw=0.5, ls='--') axs[1].set_aspect((x1-x0)/(y1-y0)) showData('chi2pp.png') return(data) '''
def showProbabilities(linearTemperature, temperature, failures, mean_prob_t, p_t, quantiles): '''Show the posterior probabilities, and save the resulting figures''' # --- Show the probability curve ---- plt.figure(figsize=(12.5, 4)) setFonts(18) plt.plot(linearTemperature, mean_prob_t, lw=3, label="Average posterior\n \ probability of defect") plt.plot(linearTemperature, p_t[0, :], ls="--", label="Realization from posterior") plt.plot(linearTemperature, p_t[-2, :], ls="--", label="Realization from posterior") plt.scatter(temperature, failures, color="k", s=50, alpha=0.5) plt.title("Posterior expected value of probability of defect, plus realizations") plt.legend(loc="lower left") plt.ylim(-0.1, 1.1) plt.xlim(linearTemperature.min(), linearTemperature.max()) plt.ylabel("Probability") plt.xlabel("Temperature [F]") outFile = 'Challenger_Probability.png' showData(outFile) # --- Draw CIs --- setFonts() sns.set_style('darkgrid') plt.fill_between(linearTemperature[:, 0], *quantiles, alpha=0.7, color="#7A68A6") plt.plot(linearTemperature[:, 0], quantiles[0], label="95% CI", color="#7A68A6", alpha=0.7) plt.plot(linearTemperature, mean_prob_t, lw=1, ls="--", color="k", label="average posterior \nprobability of defect") plt.xlim(linearTemperature.min(), linearTemperature.max()) plt.ylim(-0.02, 1.02) plt.legend(loc="lower left") plt.scatter(temperature, failures, color="k", s=50, alpha=0.5) plt.xlabel("Temperature [F]") plt.ylabel("Posterior Probability Estimate") outFile = 'Challenger_CIs.png' showData(outFile)
def main(): '''Demonstrate central limit theorem.''' setFonts(24) # Generate data # Show three histograms, side-by-side fig, axs = plt.subplots(1,4) showAsHistogram(axs[0], data, 'Random data') showAsHistogram(axs[1], np.mean(data.reshape((ndata//2, 2 )), axis=1), 'Average over 2') showAsHistogram(axs[2], np.mean(data.reshape((ndata//10,10)), axis=1), 'Average over 10') showAsHistogram(axs[3], np.mean(data.reshape((ndata//100,100)), axis=1), 'Average over 100') # Format them and show them axs[0].set_ylabel('Counts') plt.tight_layout() showData('CentralLimitTheorem.png')
def main(): # Generate some dummy data np.set_printoptions(precision=2) N = 20 study_duration = 12 # Note: a constant dropout rate is equivalent to an exponential distribution! subsciption_list = [ [exponential(18), exponential(3)][uniform()<0.5] \ for i in range(N) ] actual_subscriptiontimes = np.array(subsciption_list) observed_subscriptiontimes = np.minimum(actual_subscriptiontimes,study_duration) observed= actual_subscriptiontimes < study_duration # Show the data setFonts(18) plt.xlim(0,24) plt.vlines(12, 0, 30, lw=2, linestyles="--") plt.xlabel('time') plt.title('Subscription Times, at $t=12$ months') plot_lifetimes(observed_subscriptiontimes, event_observed=observed) plt.show() print(f'Observed subscription time at time {study_duration:d}', \ observed_subscriptiontimes)
def simplePlots(): '''Demonstrate the generation of different statistical standard plots''' # Univariate data ------------------------- # Make sure that always the same random numbers are generated np.random.seed(1234) # Generate data that are normally distributed x = np.random.randn(500) # Other graphics settings sns.set(context='poster', style='ticks', palette=sns.color_palette('muted')) # Set the fonts the way I like them setFonts(32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter') # Histogram plt.hist(x) printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') plt.hist(x,25) printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x,numbins)[0]) printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequency') # KDE-plot sns.kdeplot(x) printout('kde.png', xlabel='Data Values', ylabel='Density', title='KDE_plot') # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym='*') printout('boxplot.png', xlabel='Values', title='Boxplot') plt.boxplot(x, sym='*', vert=False) plt.title('Boxplot, horizontal') plt.xlabel('Values') plt.show() # Errorbars x = np.arange(5) y = x**2 errorBar = x/2 plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars') # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc = 3, scale = 1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls':data, 'Boys':data2}) sns.violinplot(df) printout('violinplot.png', title='Violinplot') # Barplot # The font-size is set such that the legend does not overlap with the data np.random.seed(1234) setFonts(20) df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df.plot(kind='bar', grid=False, color=sns.color_palette('muted')) showData('barplot.png') setFonts(28) # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c']) df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500); plt.axhline(0, ls='--', color='#999999') plt.axvline(0, ls='--', color='#999999') printout('bivariate.png') # Grouped Boxplot sns.set_style('whitegrid') sns.boxplot(df) setFonts(28) printout('groupedBoxplot.png', title='sns.boxplot') sns.set_style('ticks') # Pieplot txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others' fractions = [45, 30, 15, 10] offsets =(0, 0.05, 0, 0) plt.pie(fractions, explode=offsets, labels=txtLabels, autopct='%1.1f%%', shadow=True, startangle=90, colors=sns.color_palette('muted') ) plt.axis('equal') printout('piePlot.png', title=' ')
def show_fig(std, ax, title): """Create a plot of normally distributed data in a given axis""" for ii in range(3): data = stats.norm(centers[ii], std).rvs(numData) offset = ii * numData ax.plot(offset + np.arange(numData), data, '.', ms=10) ax.xaxis.set_ticks([50, 150, 250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.set_title(title) sns.despine() if __name__ == '__main__': # Set up the figure sns.set_context('paper') sns.set_style('whitegrid') setFonts(14) # Create 2 plots of 3 different, normally distributed data groups, with different SDs fig, axs = plt.subplots(1, 2) centers = [5, 5.3, 4.7] stds = [0.1, 2] numData = 100 show_fig(0.1, axs[0], 'SD=0.1') show_fig(2, axs[1], 'SD=2.0') showData('anova_oneway.png')
except ImportError: # Ensure correct performance otherwise def setFonts(*options): return # Generate some dummy data np.set_printoptions(precision=2) N = 20 study_duration = 12 # Note: a constant dropout rate is equivalent to an exponential distribution! actual_subscriptiontimes = np.array([[exponential(18), exponential(3)][uniform() < 0.5] for i in range(N)]) observed_subscriptiontimes = np.minimum(actual_subscriptiontimes, study_duration) observed = actual_subscriptiontimes < study_duration # Show the data setFonts(18) plt.xlim(0, 24) plt.vlines(12, 0, 30, lw=2, linestyles="--") plt.xlabel('time') plt.title('Subscription Times, at $t=12$ months') plot_lifetimes(observed_subscriptiontimes, event_observed=observed) print("Observed subscription time at time %d:\n" % (study_duration), observed_subscriptiontimes)
return # Calculate the values nd = stats.norm() x = np.linspace(-3, 3, 100) yp = nd.pdf(x) y = nd.cdf(x) x1 = np.linspace(-3, 1) y1 = nd.pdf(x1) # Make the plot sns.set_context('paper') sns.set_style('white') setFonts(12) figs, axs = plt.subplots(1, 2) axs[0].plot(x, yp, 'k') axs[0].fill_between(x1, y1, facecolor='#CCCCCC') axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic') axs[0].set_xlabel('x') axs[0].set_ylabel('PDF(x)') sns.despine()
def show_fig(std, ax, title): '''Create a plot of normally distributed data in a given axis''' for ii in range(3): data = stats.norm(centers[ii], std).rvs(numData) offset = ii*numData ax.plot( offset+np.arange(numData), data, '.', ms=10) ax.xaxis.set_ticks([50,150,250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.set_title(title) sns.despine() if __name__ == '__main__': # Set up the figure sns.set_context('paper') sns.set_style('whitegrid') setFonts(14) # Create 2 plots of 3 different, normally distributed data groups, with different SDs fig, axs = plt.subplots(1, 2) centers = [5, 5.3, 4.7] stds = [0.1, 2] numData = 100 show_fig(0.1, axs[0], 'SD=0.1') show_fig(2, axs[1], 'SD=2.0') showData('anova_oneway.png')
ax.xaxis.set_ticks([50,150,250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.yaxis.set_ticks([]) ax.set_title(title) grandMean = np.mean(groupMean) ax.axhline(grandMean, color='#999999') ax.plot([80, 220], [groupMean[1], groupMean[1]], '#999999') ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], '#999999') ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=36) ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=36) if __name__ == '__main__': centers = [5, 5.3, 4.7] np.random.seed(123) setFonts(30) fig = plt.figure() ax = fig.add_subplot(111) std = 0.1 numData = 100 show_fig(0.1, ax, 'Sum-Squares') # Save and show showData('anova_annotated.png')
try: # Import formatting commands if directory "Utilities" is available from ISP_mystyle import setFonts, showData except ImportError: # Ensure correct performance otherwise def setFonts(*options): return def showData(*options): plt.show() return # General formatting options sns.set(context='poster', style='ticks') sns.set_palette(sns.color_palette('hls', 3)) setFonts(24) #---------------------------------------------------------------------- def show_binomial(): """Show an example of binomial distributions""" # Arbitrarily select 3 total numbers, and 3 probabilities ns = [20,20,40] ps = [0.5, 0.7, 0.5] # For each (p,n)-pair, plot the corresponding binomial PMFs for (p,n) in zip(ps, ns): bd = stats.binom(n,p) # generate the "frozen function" x = np.arange(n+1) # generate the x-values plt.plot(x, bd.pmf(x), 'o--', label='p={0:3.1f}, n={1}'.format(p,n))
def setFonts(*options): return def showData(*options): plt.show() return # Generate the data x = np.arange(-20, 80) y = 10 + 0.2 * x + 4 * np.random.randn(len(x)) # Make the plot sns.set_style('ticks') sns.set_context('poster') setFonts() fig = plt.figure() ax = fig.add_subplot(111) ax.plot(x, y, '.') ax.spines['left'].set_position('zero') ax.spines['bottom'].set_position('zero') sns.despine() # Draw the fitted line p = np.polyfit(x, y, 1) yFit = np.polyval(p, x) ax.plot(x, yFit, 'r')
def simplePlots(): """Demonstrate the generation of different statistical standard plots""" # Univariate data ------------------------- # Make sure that always the same random numbers are generated np.random.seed(1234) # Generate data that are normally distributed x = np.random.randn(500) # Other graphics settings sns.set(context="poster", style="ticks", palette=sns.color_palette("muted")) # Set the fonts the way I like them setFonts(32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format printout("scatterPlot.png", xlabel="Datapoints", ylabel="Values", title="Scatter") # Histogram plt.hist(x) printout("histogram_plain.png", xlabel="Data Values", ylabel="Frequency", title="Histogram, default settings") plt.hist(x, 25) printout("histogram.png", xlabel="Data Values", ylabel="Frequency", title="Histogram, 25 bins") # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x, numbins)[0]) printout("CumulativeFrequencyFunction.png", xlabel="Data Values", ylabel="CumFreq", title="Cumulative Frequency") # KDE-plot sns.kdeplot(x) printout("kde.png", xlabel="Data Values", ylabel="Density", title="KDE_plot") # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym="*") printout("boxplot.png", xlabel="Values", title="Boxplot") plt.boxplot(x, sym="*", vert=False) plt.title("Boxplot, horizontal") plt.xlabel("Values") plt.show() # Errorbars x = np.arange(5) y = x ** 2 errorBar = x / 2 plt.errorbar(x, y, yerr=errorBar, fmt="o", capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) printout("Errorbars.png", xlabel="Data Values", ylabel="Measurements", title="Errorbars") # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc=3, scale=1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({"Girls": data, "Boys": data2}) sns.violinplot(df) printout("violinplot.png", title="Violinplot") # Barplot # The font-size is set such that the legend does not overlap with the data np.random.seed(1234) setFonts(20) df = pd.DataFrame(np.random.rand(10, 4), columns=["a", "b", "c", "d"]) df.plot(kind="bar", grid=False, color=sns.color_palette("muted")) showData("barplot.png") setFonts(28) # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 3), columns=["a", "b", "c"]) df2.plot(kind="scatter", x="a", y="b", s=df2["c"] * 500) plt.axhline(0, ls="--", color="#999999") plt.axvline(0, ls="--", color="#999999") printout("bivariate.png") # Grouped Boxplot sns.set_style("whitegrid") sns.boxplot(df) setFonts(28) printout("groupedBoxplot.png", title="sns.boxplot") sns.set_style("ticks") # Pieplot txtLabels = "Cats", "Dogs", "Frogs", "Others" fractions = [45, 30, 15, 10] offsets = (0, 0.05, 0, 0) plt.pie( fractions, explode=offsets, labels=txtLabels, autopct="%1.1f%%", shadow=True, startangle=90, colors=sns.color_palette("muted"), ) plt.axis("equal") printout("piePlot.png", title=" ")
def simplePlots(): '''Demonstrate the generation of different statistical standard plots''' # Univariate data ------------------------- # Make sure that always the same random numbers are generated np.random.seed(1234) # Generate data that are normally distributed x = np.random.randn(500) # Other graphics settings sns.set(context='poster', style='ticks', palette=sns.color_palette('muted')) # Set the fonts the way I like them setFonts(32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter') # Histogram plt.hist(x) printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') plt.hist(x,25) printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x,numbins)[0]) printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequency') # KDE-plot sns.kdeplot(x) printout('kde.png', xlabel='Data Values', ylabel='Density', title='KDE_plot') # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym='*') printout('boxplot.png', xlabel='Values', title='Boxplot') plt.boxplot(x, sym='*', vert=False) plt.title('Boxplot, horizontal') plt.xlabel('Values') plt.show() # Errorbars x = np.arange(5) y = x**2 errorBar = x/2 plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars') # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc = 3, scale = 1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls':data, 'Boys':data2}) sns.violinplot(df) printout('violinplot.png', title='Violinplot') # Barplot # The font-size is set such that the legend does not overlap with the data np.random.seed(1234) setFonts(20) df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df.plot(kind='bar', grid=False, color=sns.color_palette('muted')) showData('barplot.png') setFonts(28) # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c']) df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500); plt.axhline(0, ls='--', color='#999999') plt.axvline(0, ls='--', color='#999999') printout('bivariate.png') # Grouped Boxplot sns.set_style('whitegrid') sns.boxplot(df) setFonts(28) printout('groupedBoxplot.png', title='sns.boxplot') sns.set_style('ticks') # Pieplot txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others' fractions = [45, 30, 15, 10] offsets =(0, 0.05, 0, 0) plt.pie(fractions, explode=offsets, labels=txtLabels, autopct='%1.1f%%', shadow=False, startangle=90, colors=sns.color_palette('muted') ) plt.axis('equal') printout('piePlot.png', title=' ')
def setFonts(*options): return def showData(*options): plt.show() return # Generate the data x = np.r_[3, 1.5, 4, 6, 3, 2] dx = np.r_[0.1, 0.3, 0.2, 0.2, 0.3, 0.25] xs = x - dx index = range(len(x)) # plot the data setFonts(20) plt.plot(x, 'o', ms=10, label='pre') plt.plot(xs, 'r*', ms=12, label='post') plt.bar(index, dx, width=0.5, align='center', color=0.75 * np.ones(3), label='pre-post') # Format the plot plt.legend(loc='upper left') plt.axhline(0, ls='--') plt.xlim(-0.3, 5.3) plt.ylim(-0.2, 6.2) plt.xlabel('Subject Nr')
import sys sys.path.append(os.path.join('..', '..', 'Utilities')) try: from ISP_mystyle import setFonts, showData except ImportError: # Ensure correct performance otherwise def setFonts(*options): return def showData(*options): plt.show() return sns.set_context('poster') sns.set_style('ticks') setFonts() # Generate the data np.random.seed(1234) nd = stats.norm(100, 20) scores = nd.rvs(10) # Make the plot plt.plot(scores, 'o') plt.axhline(110, ls='--') plt.axhline(np.mean(scores), ls='-.') plt.xlim(-0.2, 9.2) plt.ylim(50, 130) plt.xlabel('Student-Nr') plt.ylabel('Score')
"""Subroutine showing a histogram and formatting it""" axis.hist(data, bins=nbins) axis.set_xticks([0, 0.5, 1]) axis.set_title(title) if __name__ == '__main__': # Formatting options sns.set(context='poster', style='ticks', palette='muted') # Input data ndata = 100000 nbins = 50 setFonts(24) # Generate data data = np.random.random(ndata) # Show three histograms, side-by-side fig, axs = plt.subplots(1, 3) showAsHistogram(axs[0], data, 'Random data') showAsHistogram(axs[1], np.mean(data.reshape((int(ndata / 2), 2)), axis=1), 'Average over 2') showAsHistogram(axs[2], np.mean(data.reshape((int(ndata / 10), 10)), axis=1), 'Average over 10') # Format them and show them axs[0].set_ylabel('Counts') plt.tight_layout()
ax.annotate('', xy=(210, grandMean), xytext=(210, groupMean[1]), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.annotate('', xy=(90, groupMean[1]), xytext=(90, groupMean[1] + 0.2), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.text(210, (grandMean + groupMean[1]) / 2., '$SS_{Treatment}$', fontsize=36) ax.text(90, groupMean[1] + 0.1, '$SS_{Error}$', ha='right', fontsize=36) if __name__ == '__main__': centers = [5, 5.3, 4.7] np.random.seed(123) setFonts(30) fig = plt.figure() ax = fig.add_subplot(111) std = 0.1 numData = 100 show_fig(0.1, ax, 'Sum-Squares') # Save and show showData('anova_annotated.png')
plt.show() return # Calculate the values nd = stats.norm() x = np.linspace(-3,3,100) yp = nd.pdf(x) y = nd.cdf(x) x1 = np.linspace(-3, 1) y1 = nd.pdf(x1) # Make the plot sns.set_context('paper') sns.set_style('white') setFonts(12) figs, axs = plt.subplots(1,2) axs[0].plot(x,yp, 'k') axs[0].fill_between(x1, y1, facecolor='#CCCCCC') axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic') axs[0].set_xlabel('x') axs[0].set_ylabel('PDF(x)') sns.despine() axs[1].plot(x, y, '#999999', lw=3) axs[1].set_xlabel('x') axs[1].set_ylabel('CDF(x)') plt.vlines(0, 0, 1, linestyles='--') sns.despine()
def setFonts(*options): return def showData(*options): plt.show() return # Generate the data x = np.r_[3, 1.5, 4, 6, 3, 2] dx = np.r_[0.1, 0.3, 0.2, 0.2, 0.3, 0.25] xs = x - dx index = range(len(x)) # plot the data setFonts(20) plt.plot(x, "o", ms=10, label="pre") plt.plot(xs, "r*", ms=12, label="post") plt.bar(index, dx, width=0.5, align="center", color=0.75 * np.ones(3), label="pre-post") # Format the plot plt.legend(loc="upper left") plt.axhline(0, ls="--") plt.xlim(-0.3, 5.3) plt.ylim(-0.2, 6.2) plt.xlabel("Subject Nr") plt.ylabel("Value") plt.tight_layout() # P-values for paired and unpaired T-tests _, p_paired = stats.ttest_rel(x, xs)
sys.path.append(os.path.join('..', '..', 'Utilities')) try: # Import formatting commands if directory "Utilities" is available from ISP_mystyle import setFonts except ImportError: # Ensure correct performance otherwise def setFonts(*options): return # Generate some dummy data np.set_printoptions(precision=2) N = 20 study_duration = 12 # Note: a constant dropout rate is equivalent to an exponential distribution! actual_subscriptiontimes = np.array([[exponential(18), exponential(3)][uniform()<0.5] for i in range(N)]) observed_subscriptiontimes = np.minimum(actual_subscriptiontimes,study_duration) observed= actual_subscriptiontimes < study_duration # Show the data setFonts(18) plt.xlim(0,24) plt.vlines(12, 0, 30, lw=2, linestyles="--") plt.xlabel('time') plt.title('Subscription Times, at $t=12$ months') plot_lifetimes(observed_subscriptiontimes, event_observed=observed) print("Observed subscription time at time %d:\n"%(study_duration), observed_subscriptiontimes)