def showResults(challenger_data, model):
    ''' Show the original data, and the resulting logit-fit'''
    
    temperature = challenger_data[:,0]
    failures = challenger_data[:,1]
    
    # First plot the original data
    plt.figure()
    setFonts()
    sns.set_style('darkgrid')
    np.set_printoptions(precision=3, suppress=True)
    
    plt.scatter(temperature, failures, s=200, color="k", alpha=0.5)
    plt.yticks([0, 1])
    plt.ylabel("Damage Incident?")
    plt.xlabel("Outside Temperature [F]")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.tight_layout
    
    # Plot the fit
    x = np.arange(50, 85)
    alpha = model.params[0]
    beta = model.params[1]
    y = logistic(x, beta, alpha)
    
    plt.hold(True)
    plt.plot(x,y,'r')
    plt.xlim([50, 85])
    
    outFile = 'ChallengerPlain.png'
    showData(outFile)
def showAndSave(temperature: np.ndarray, failures: np.ndarray) -> None:
    """Shows the input data, and saves the resulting figure
    
    Parameters
    ----------
    temperature : temperature data
    failureData : corresponding failure status
    
    """
    
    # Plot it, as a function of tempature
    plt.figure()
    setFonts()
    sns.set_style('darkgrid')
    np.set_printoptions(precision=3, suppress=True)
    
    plt.scatter(temperature, failures, s=200, color="k", alpha=0.5)
    plt.yticks([0, 1])
    plt.ylabel("Damage Incident?")
    plt.xlabel("Outside Temperature [F]")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.tight_layout
    
    outFile = 'Challenger_ORings.png'
    showData(outFile)
def show_poisson_views():
    """Show different views of a Poisson distribution"""
    
    sns.set_palette(sns.color_palette('muted'))
    
    fig, ax = plt.subplots(3,1)
    
    k = np.arange(25)
    pd = stats.poisson(10)
    setFonts(12)
    
    ax[0].plot(k, pd.pmf(k),'x-')
    ax[0].set_title('Poisson distribution', fontsize=24)
    ax[0].set_xticklabels([])
    ax[0].set_ylabel('PMF (X)')
    
    ax[1].plot(k, pd.cdf(k))
    ax[1].set_xlabel('X')
    ax[1].set_ylabel('CDF (X)')
    
    y = np.linspace(0,1,100)
    ax[2].plot(y, pd.ppf(y))
    ax[2].set_xlabel('X')
    ax[2].set_ylabel('PPF (X)')
    
    plt.tight_layout()
    plt.show()
def showSimResults(alpha_samples, beta_samples):
    '''Show the results of the simulations, and save them to an outFile'''

    plt.figure(figsize=(12.5, 6))
    sns.set_style('darkgrid')
    setFonts(18)

    # Histogram of the samples:
    plt.subplot(211)
    plt.title(r"Posterior distributions of the variables $\alpha, \beta$")
    plt.hist(beta_samples,
             histtype='stepfilled',
             bins=35,
             alpha=0.85,
             label=r"posterior of $\beta$",
             color="#7A68A6",
             normed=True)
    plt.legend()

    plt.subplot(212)
    plt.hist(alpha_samples,
             histtype='stepfilled',
             bins=35,
             alpha=0.85,
             label=r"posterior of $\alpha$",
             color="#A60628",
             normed=True)
    plt.legend()

    outFile = 'Challenger_Parameters.png'
    showData(outFile)
Exemple #5
0
def KS_principle(inData):
    '''Show the principle of the Kolmogorov-Smirnov test.'''

    # CDF of normally distributed data
    nd = stats.norm()
    nd_x = np.linspace(-4, 4, 101)
    nd_y = nd.cdf(nd_x)

    # Empirical CDF of the sample data, which range for approximately 0 to 10
    numPts = 50
    lowerLim = 0
    upperLim = 10
    ecdf_x = np.linspace(lowerLim, upperLim, numPts)
    ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0] / len(inData)

    #Add zero-point by hand
    ecdf_x = np.hstack((0., ecdf_x))
    ecdf_y = np.hstack((0., ecdf_y))

    # Plot the data
    sns.set_style('ticks')
    sns.set_context('poster')
    setFonts(36)

    plt.plot(nd_x, nd_y, 'k--')
    plt.hold(True)
    plt.plot(ecdf_x, ecdf_y, color='k')
    plt.xlabel('X')
    plt.ylabel('Cumulative Probability')

    # For the arrow, find the start
    ecdf_startIndex = np.min(np.where(ecdf_x >= 2))
    arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]])

    nd_startIndex = np.min(np.where(nd_x >= 2))
    arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]])
    arrowDelta = arrowEnd - arrowStart

    plt.arrow(arrowStart[0],
              arrowStart[1],
              0,
              arrowDelta[1],
              width=0.05,
              length_includes_head=True,
              head_length=0.04,
              head_width=0.4,
              color='k')

    plt.arrow(arrowStart[0],
              arrowStart[1] + arrowDelta[1],
              0,
              -arrowDelta[1],
              width=0.05,
              length_includes_head=True,
              head_length=0.04,
              head_width=0.4,
              color='k')

    outFile = 'KS_Example.png'
    showData(outFile)
def showResults(challenger_data, model):
    ''' Show the original data, and the resulting logit-fit'''
    
    temperature = challenger_data[:,0]
    failures = challenger_data[:,1]
    
    # First plot the original data
    plt.figure()
    setFonts()
    sns.set_style('darkgrid')
    np.set_printoptions(precision=3, suppress=True)
    
    plt.scatter(temperature, failures, s=200, color="k", alpha=0.5)
    plt.yticks([0, 1])
    plt.ylabel("Damage Incident?")
    plt.xlabel("Outside Temperature [F]")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.tight_layout
    
    # Plot the fit
    x = np.arange(50, 85)
    alpha = model.params[0]
    beta = model.params[1]
    y = logistic(x, beta, alpha)
    
    plt.hold(True)
    plt.plot(x,y,'r')
    plt.xlim([50, 85])
    
    outFile = 'ChallengerPlain.png'
    showData(outFile)
def show_poisson_views():
    """Show different views of a Poisson distribution"""

    sns.set_palette(sns.color_palette('muted'))

    fig, ax = plt.subplots(3, 1)

    k = np.arange(25)
    pd = stats.poisson(10)
    setFonts(12)

    ax[0].plot(k, pd.pmf(k), 'x-')
    ax[0].set_title('Poisson distribution', fontsize=24)
    ax[0].set_xticklabels([])
    ax[0].set_ylabel('PMF (X)')

    ax[1].plot(k, pd.cdf(k))
    ax[1].set_xlabel('X')
    ax[1].set_ylabel('CDF (X)')

    y = np.linspace(0, 1, 100)
    ax[2].plot(y, pd.ppf(y))
    ax[2].set_xlabel('X')
    ax[2].set_ylabel('PPF (X)')

    plt.tight_layout()
    plt.show()
def showProbabilities(linearTemperature, temperature, failures,
                      mean_prob_t, p_t, quantiles) -> None:
    """Show the posterior probabilities, and save the resulting figures
    
    Parameters
    ----------
    linearTemperature :
    temperature : 
    failures :
    mean_prob_t :
    p_t :
    quantiles :
    
    """

    # --- Show the probability curve ----
    plt.figure(figsize=(12.5, 4))
    setFonts(18)
    
    plt.plot(linearTemperature, mean_prob_t, lw=3, label="Average posterior\n \
    probability of defect")
    plt.plot(linearTemperature, p_t[0, :], ls="--", label="Realization from posterior")
    plt.plot(linearTemperature, p_t[-2, :], ls="--", label="Realization from posterior")
    plt.scatter(temperature, failures, color="k", s=50, alpha=0.5)
    plt.title("Posterior expected value of probability of defect, plus realizations")
    plt.legend(loc="lower left")
    plt.ylim(-0.1, 1.1)
    plt.xlim(linearTemperature.min(), linearTemperature.max())
    plt.ylabel("Probability")
    plt.xlabel("Temperature [F]")
    
    outFile = 'Challenger_Probability.png'
    showData(outFile)
    
    # --- Draw CIs ---
    setFonts()
    sns.set_style('darkgrid')
    
    plt.fill_between(linearTemperature[:, 0], *quantiles, alpha=0.7,
                     color="#7A68A6")
    
    plt.plot(linearTemperature[:, 0], quantiles[0], label="95% CI", color="#7A68A6", alpha=0.7)
    
    plt.plot(linearTemperature, mean_prob_t, lw=1, ls="--", color="k",
             label="average posterior \nprobability of defect")
    
    plt.xlim(linearTemperature.min(), linearTemperature.max())
    plt.ylim(-0.02, 1.02)
    plt.legend(loc="lower left")
    plt.scatter(temperature, failures, color="k", s=50, alpha=0.5)
    plt.xlabel("Temperature [F]")
    plt.ylabel("Posterior Probability Estimate")
    
    outFile = 'Challenger_CIs.png'
    showData(outFile)
Exemple #9
0
def show3D():
    '''Generation of 3D plots'''

    # imports specific to the plots in this example
    from matplotlib import cm  # colormaps

    # This module is required for 3D plots!
    from mpl_toolkits.mplot3d import Axes3D

    # Twice as wide as it is tall.
    fig = plt.figure(figsize=plt.figaspect(0.5))
    setFonts(16)

    #---- First subplot
    # Generate the data
    X = np.arange(-5, 5, 0.1)
    Y = np.arange(-5, 5, 0.1)
    X, Y = np.meshgrid(X, Y)
    R = np.sqrt(X**2 + Y**2)
    Z = np.sin(R)

    # Note the definition of "projection", required for 3D  plots
    #plt.style.use('ggplot')

    ax = fig.add_subplot(1, 2, 1, projection='3d')
    surf = ax.plot_surface(X,
                           Y,
                           Z,
                           rstride=1,
                           cstride=1,
                           cmap=cm.GnBu,
                           linewidth=0,
                           antialiased=False)
    #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.viridis_r,
    #linewidth=0, antialiased=False)
    ax.set_zlim3d(-1.01, 1.01)

    fig.colorbar(surf, shrink=0.5, aspect=10)

    #---- Second subplot
    # Get some 3d test-data
    from mpl_toolkits.mplot3d.axes3d import get_test_data

    ax = fig.add_subplot(1, 2, 2, projection='3d')
    X, Y, Z = get_test_data(0.05)
    ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10)

    showData('3dGraph.png')
def KS_principle(inData):
    '''Show the principle of the Kolmogorov-Smirnov test.'''
    
    # CDF of normally distributed data
    nd = stats.norm()
    nd_x = np.linspace(-4, 4, 101)
    nd_y = nd.cdf(nd_x)
    
    # Empirical CDF of the sample data, which range for approximately 0 to 10
    numPts = 50
    lowerLim = 0
    upperLim = 10
    ecdf_x = np.linspace(lowerLim, upperLim, numPts)
    ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0]/len(inData)
    
    #Add zero-point by hand
    ecdf_x = np.hstack((0., ecdf_x))
    ecdf_y = np.hstack((0., ecdf_y))
    
    # Plot the data
    sns.set_style('ticks')
    sns.set_context('poster')
    setFonts(36)
    
    plt.plot(nd_x, nd_y, 'k--')
    plt.hold(True)
    plt.plot(ecdf_x, ecdf_y, color='k')
    plt.xlabel('X')
    plt.ylabel('Cumulative Probability')
    
    # For the arrow, find the start
    ecdf_startIndex = np.min(np.where(ecdf_x >= 2))
    arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]])
    
    nd_startIndex = np.min(np.where(nd_x >= 2))
    arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]])
    arrowDelta = arrowEnd - arrowStart
    
    plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1],
              width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k')
    
    plt.arrow(arrowStart[0], arrowStart[1]+arrowDelta[1], 0, -arrowDelta[1],
              width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k')
    
    outFile = 'KS_Example.png'
    showData(outFile)
def main():
    '''Demonstrate central limit theorem.'''
    
    setFonts(24)
    # Generate data
    data = np.random.random(ndata)
    
    # Show three histograms, side-by-side
    fig, axs = plt.subplots(1,3)
    
    showAsHistogram(axs[0], data, 'Random data')
    showAsHistogram(axs[1], np.mean(data.reshape((ndata//2, 2 )), axis=1), 'Average over 2')
    showAsHistogram(axs[2], np.mean(data.reshape((ndata//10,10)), axis=1), 'Average over 10')
    
    # Format them and show them
    axs[0].set_ylabel('Counts')
    plt.tight_layout()
    showData('CentralLimitTheorem.png')
def showAndSave(temperature, failures):
    '''Shows the input data, and saves the resulting figure'''
    
    # Plot it, as a function of tempature
    plt.figure()
    setFonts()
    sns.set_style('darkgrid')
    np.set_printoptions(precision=3, suppress=True)
    
    plt.scatter(temperature, failures, s=200, color="k", alpha=0.5)
    plt.yticks([0, 1])
    plt.ylabel("Damage Incident?")
    plt.xlabel("Outside Temperature [F]")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.tight_layout
    
    outFile = 'Challenger_ORings.png'
    showData(outFile)
def generate_probplot():
    '''Generate a prob-plot for a chi2-distribution of sample data'''
    # Define the skewed distribution
    chi2 = stats.chi2(3)
    
    # Generate the data
    x = np.linspace(0,10, 100)
    y = chi2.pdf(x)
    np.random.seed(12345)
    numData = 100
    data = chi2.rvs(numData)
    
    # Arrange subplots
    sns.set_context('paper')
    sns.set_style('white')
    setFonts(11)
    fig, axs = plt.subplots(1,2)
    
    # Plot distribution
    axs[0].plot(x,y)
    axs[0].set_xlabel('X')
    axs[0].set_ylabel('PDF(X)')
    axs[0].set_title('chi2(x), k=3')
    sns.set_style('white')
    
    x0, x1 = axs[0].get_xlim()
    y0, y1 = axs[0].get_ylim()
    axs[0].set_aspect((x1-x0)/(y1-y0))
    
    
    # Plot probplot
    plt.axes(axs[1])
    stats.probplot(data, plot=plt)
    
    x0, x1 = axs[1].get_xlim()
    y0, y1 = axs[1].get_ylim()
    axs[1].axhline(0, lw=0.5, ls='--')
    axs[1].axvline(0, lw=0.5, ls='--')
    axs[1].set_aspect((x1-x0)/(y1-y0))
    
    showData('chi2pp.png')
    
    return(data)
    '''
Exemple #14
0
def generate_probplot():
    '''Generate a prob-plot for a chi2-distribution of sample data'''
    # Define the skewed distribution
    chi2 = stats.chi2(3)
    
    # Generate the data
    x = np.linspace(0,10, 100)
    y = chi2.pdf(x)
    np.random.seed(12345)
    numData = 100
    data = chi2.rvs(numData)
    
    # Arrange subplots
    sns.set_context('paper')
    sns.set_style('white')
    setFonts(11)
    fig, axs = plt.subplots(1,2)
    
    # Plot distribution
    axs[0].plot(x,y)
    axs[0].set_xlabel('X')
    axs[0].set_ylabel('PDF(X)')
    axs[0].set_title('chi2(x), k=3')
    sns.set_style('white')
    
    x0, x1 = axs[0].get_xlim()
    y0, y1 = axs[0].get_ylim()
    axs[0].set_aspect((x1-x0)/(y1-y0))
    
    
    # Plot probplot
    plt.axes(axs[1])
    stats.probplot(data, plot=plt)
    
    x0, x1 = axs[1].get_xlim()
    y0, y1 = axs[1].get_ylim()
    axs[1].axhline(0, lw=0.5, ls='--')
    axs[1].axvline(0, lw=0.5, ls='--')
    axs[1].set_aspect((x1-x0)/(y1-y0))
    
    showData('chi2pp.png')
    
    return(data)
    '''
def showProbabilities(linearTemperature, temperature, failures, mean_prob_t, p_t, quantiles):
    '''Show the posterior probabilities, and save the resulting figures'''

    # --- Show the probability curve ----
    plt.figure(figsize=(12.5, 4))
    setFonts(18)
    
    plt.plot(linearTemperature, mean_prob_t, lw=3, label="Average posterior\n \
    probability of defect")
    plt.plot(linearTemperature, p_t[0, :], ls="--", label="Realization from posterior")
    plt.plot(linearTemperature, p_t[-2, :], ls="--", label="Realization from posterior")
    plt.scatter(temperature, failures, color="k", s=50, alpha=0.5)
    plt.title("Posterior expected value of probability of defect, plus realizations")
    plt.legend(loc="lower left")
    plt.ylim(-0.1, 1.1)
    plt.xlim(linearTemperature.min(), linearTemperature.max())
    plt.ylabel("Probability")
    plt.xlabel("Temperature [F]")
    
    outFile = 'Challenger_Probability.png'
    showData(outFile)
    
    # --- Draw CIs ---
    setFonts()
    sns.set_style('darkgrid')
    
    plt.fill_between(linearTemperature[:, 0], *quantiles, alpha=0.7,
                     color="#7A68A6")
    
    plt.plot(linearTemperature[:, 0], quantiles[0], label="95% CI", color="#7A68A6", alpha=0.7)
    
    plt.plot(linearTemperature, mean_prob_t, lw=1, ls="--", color="k",
             label="average posterior \nprobability of defect")
    
    plt.xlim(linearTemperature.min(), linearTemperature.max())
    plt.ylim(-0.02, 1.02)
    plt.legend(loc="lower left")
    plt.scatter(temperature, failures, color="k", s=50, alpha=0.5)
    plt.xlabel("Temperature [F]")
    plt.ylabel("Posterior Probability Estimate")
    
    outFile = 'Challenger_CIs.png'
    showData(outFile)
def main():
    '''Demonstrate central limit theorem.'''
    
    setFonts(24)
    # Generate data
    
    
    # Show three histograms, side-by-side
    fig, axs = plt.subplots(1,4)
    
    showAsHistogram(axs[0], data, 'Random data')
    showAsHistogram(axs[1], np.mean(data.reshape((ndata//2, 2 )), axis=1), 'Average over 2')
    showAsHistogram(axs[2], np.mean(data.reshape((ndata//10,10)), axis=1), 'Average over 10')
    showAsHistogram(axs[3], np.mean(data.reshape((ndata//100,100)), axis=1), 'Average over 100')
    
    # Format them and show them
    axs[0].set_ylabel('Counts')
    plt.tight_layout()
    showData('CentralLimitTheorem.png')
def show3D():
    '''Generation of 3D plots'''
    
    # imports specific to the plots in this example
    from matplotlib import cm   # colormaps
    
    # This module is required for 3D plots!
    from mpl_toolkits.mplot3d import Axes3D
    
    # Twice as wide as it is tall.
    fig = plt.figure(figsize=plt.figaspect(0.5))
    setFonts(16)
    
    #---- First subplot
    # Generate the data
    X = np.arange(-5, 5, 0.1)
    Y = np.arange(-5, 5, 0.1)
    X, Y = np.meshgrid(X, Y)
    R = np.sqrt(X**2 + Y**2)
    Z = np.sin(R)
    
    # Note the definition of "projection", required for 3D  plots
    #plt.style.use('ggplot')

    ax = fig.add_subplot(1, 2, 1, projection='3d')
    surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.GnBu,
            linewidth=0, antialiased=False)
    #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.viridis_r,
            #linewidth=0, antialiased=False)
    ax.set_zlim3d(-1.01, 1.01)
    
    fig.colorbar(surf, shrink=0.5, aspect=10)
    
    #---- Second subplot
    # Get some 3d test-data
    from mpl_toolkits.mplot3d.axes3d import get_test_data
    
    ax = fig.add_subplot(1, 2, 2, projection='3d')
    X, Y, Z = get_test_data(0.05)
    ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10)

    showData('3dGraph.png')
def showSimResults(alpha_samples, beta_samples):
    '''Show the results of the simulations, and save them to an outFile'''
    
    plt.figure(figsize=(12.5, 6))
    sns.set_style('darkgrid')
    setFonts(18)
    
    # Histogram of the samples:
    plt.subplot(211)
    plt.title(r"Posterior distributions of the variables $\alpha, \beta$")
    plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85,
             label=r"posterior of $\beta$", color="#7A68A6", normed=True)
    plt.legend()
    
    plt.subplot(212)
    plt.hist(alpha_samples, histtype='stepfilled', bins=35, alpha=0.85,
             label=r"posterior of $\alpha$", color="#A60628", normed=True)
    plt.legend()
    
    outFile = 'Challenger_Parameters.png'
    showData(outFile)
def main():
    # Generate some dummy data
    np.set_printoptions(precision=2)
    N = 20
    study_duration = 12
    
    # Note: a constant dropout rate is equivalent to an exponential distribution!
    subsciption_list = [ [exponential(18), exponential(3)][uniform()<0.5] \
            for i in range(N) ]
    actual_subscriptiontimes = np.array(subsciption_list)
    observed_subscriptiontimes = np.minimum(actual_subscriptiontimes,study_duration)
    observed= actual_subscriptiontimes < study_duration
    
    # Show the data
    setFonts(18)
    plt.xlim(0,24)
    plt.vlines(12, 0, 30, lw=2, linestyles="--")
    plt.xlabel('time')
    plt.title('Subscription Times, at $t=12$  months')
    plot_lifetimes(observed_subscriptiontimes, event_observed=observed)
    plt.show()
    
    print(f'Observed subscription time at time {study_duration:d}', \
            observed_subscriptiontimes)
def simplePlots():
    '''Demonstrate the generation of different statistical standard plots'''
    
    # Univariate data -------------------------
    
    # Make sure that always the same random numbers are generated
    np.random.seed(1234)
    
    # Generate data that are normally distributed
    x = np.random.randn(500)
    
    # Other graphics settings
    sns.set(context='poster', style='ticks', palette=sns.color_palette('muted'))
    
    # Set the fonts the way I like them
    setFonts(32)
    
    # Scatter plot
    plt.scatter(np.arange(len(x)), x)
    plt.xlim([0, len(x)])
    
    # Save and show the data, in a systematic format
    printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter')
    
    # Histogram
    plt.hist(x)
    printout('histogram_plain.png', xlabel='Data Values',
             ylabel='Frequency', title='Histogram, default settings')
    
    plt.hist(x,25)
    printout('histogram.png', xlabel='Data Values', ylabel='Frequency',
             title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plt.plot(stats.cumfreq(x,numbins)[0])
    printout('CumulativeFrequencyFunction.png', xlabel='Data Values',
             ylabel='CumFreq', title='Cumulative Frequency')

    # KDE-plot
    sns.kdeplot(x)
    printout('kde.png', xlabel='Data Values', ylabel='Density',
            title='KDE_plot')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    plt.boxplot(x, sym='*')
    printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    plt.boxplot(x, sym='*', vert=False)
    plt.title('Boxplot, horizontal')
    plt.xlabel('Values')
    plt.show()
    
    # Errorbars
    x = np.arange(5)
    y = x**2
    errorBar = x/2
    plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    plt.xlim([-0.2, 4.2])
    plt.ylim([-0.2, 19])
    printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    sns.violinplot(df)
    
    printout('violinplot.png', title='Violinplot')
    
    # Barplot
    # The font-size is set such that the legend does not overlap with the data
    np.random.seed(1234)
    setFonts(20)
    
    df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
    df.plot(kind='bar', grid=False, color=sns.color_palette('muted'))
    
    showData('barplot.png')
    setFonts(28)

    # Bivariate Plots
    df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c'])
    df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500);
    plt.axhline(0, ls='--', color='#999999')
    plt.axvline(0, ls='--', color='#999999')
    printout('bivariate.png')
    
    # Grouped Boxplot
    sns.set_style('whitegrid')
    sns.boxplot(df)
    setFonts(28)
    printout('groupedBoxplot.png', title='sns.boxplot')

    sns.set_style('ticks')

    # Pieplot
    txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others'
    fractions = [45, 30, 15, 10]
    offsets =(0, 0.05, 0, 0)
    
    plt.pie(fractions, explode=offsets, labels=txtLabels,
            autopct='%1.1f%%', shadow=True, startangle=90,
            colors=sns.color_palette('muted') )
    plt.axis('equal')
    printout('piePlot.png', title=' ')
def show_fig(std, ax, title):
    """Create a plot of normally distributed data in a given axis"""

    for ii in range(3):
        data = stats.norm(centers[ii], std).rvs(numData)
        offset = ii * numData
        ax.plot(offset + np.arange(numData), data, '.', ms=10)

    ax.xaxis.set_ticks([50, 150, 250])
    ax.set_xticklabels(['Group1', 'Group2', 'Group3'])
    ax.set_title(title)
    sns.despine()


if __name__ == '__main__':

    # Set up the figure
    sns.set_context('paper')
    sns.set_style('whitegrid')
    setFonts(14)

    # Create 2 plots of 3 different, normally distributed data groups, with different SDs
    fig, axs = plt.subplots(1, 2)
    centers = [5, 5.3, 4.7]
    stds = [0.1, 2]
    numData = 100
    show_fig(0.1, axs[0], 'SD=0.1')
    show_fig(2, axs[1], 'SD=2.0')

    showData('anova_oneway.png')
except ImportError:
    # Ensure correct performance otherwise
    def setFonts(*options):
        return


# Generate some dummy data
np.set_printoptions(precision=2)
N = 20
study_duration = 12

# Note: a constant dropout rate is equivalent to an exponential distribution!
actual_subscriptiontimes = np.array([[exponential(18),
                                      exponential(3)][uniform() < 0.5]
                                     for i in range(N)])
observed_subscriptiontimes = np.minimum(actual_subscriptiontimes,
                                        study_duration)
observed = actual_subscriptiontimes < study_duration

# Show the data
setFonts(18)
plt.xlim(0, 24)
plt.vlines(12, 0, 30, lw=2, linestyles="--")
plt.xlabel('time')
plt.title('Subscription Times, at $t=12$  months')
plot_lifetimes(observed_subscriptiontimes, event_observed=observed)

print("Observed subscription time at time %d:\n" % (study_duration),
      observed_subscriptiontimes)
Exemple #23
0
        return


# Calculate the values
nd = stats.norm()

x = np.linspace(-3, 3, 100)
yp = nd.pdf(x)
y = nd.cdf(x)
x1 = np.linspace(-3, 1)
y1 = nd.pdf(x1)

# Make the plot
sns.set_context('paper')
sns.set_style('white')
setFonts(12)

figs, axs = plt.subplots(1, 2)

axs[0].plot(x, yp, 'k')
axs[0].fill_between(x1, y1, facecolor='#CCCCCC')
axs[0].text(0,
            0.1,
            'CDF(x)',
            family='cursive',
            fontsize=14,
            horizontalalignment='center',
            style='italic')
axs[0].set_xlabel('x')
axs[0].set_ylabel('PDF(x)')
sns.despine()
def show_fig(std, ax, title):
    '''Create a plot of normally distributed data in a given axis'''
    
    for ii in range(3):
        data = stats.norm(centers[ii], std).rvs(numData)
        offset = ii*numData
        ax.plot( offset+np.arange(numData), data, '.', ms=10)
        
    ax.xaxis.set_ticks([50,150,250])
    ax.set_xticklabels(['Group1', 'Group2', 'Group3'])
    ax.set_title(title)
    sns.despine()

if __name__ == '__main__':
    
    # Set up the figure
    sns.set_context('paper')
    sns.set_style('whitegrid')
    setFonts(14)
    
    # Create 2 plots of 3 different, normally distributed data groups, with different SDs
    fig, axs = plt.subplots(1, 2)
    centers = [5, 5.3, 4.7]
    stds = [0.1, 2]
    numData = 100
    show_fig(0.1, axs[0], 'SD=0.1')
    show_fig(2,   axs[1], 'SD=2.0')
    
    showData('anova_oneway.png')
    ax.xaxis.set_ticks([50,150,250])
    ax.set_xticklabels(['Group1', 'Group2', 'Group3'])
    ax.yaxis.set_ticks([])
    ax.set_title(title)
    
    grandMean = np.mean(groupMean)
    ax.axhline(grandMean, color='#999999')
    ax.plot([80, 220], [groupMean[1], groupMean[1]], '#999999')
    ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], '#999999')
    ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), 
            arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black'))
    ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), 
            arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black'))
    ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=36)
    ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=36)

if __name__ == '__main__':
    centers = [5, 5.3, 4.7]
    
    np.random.seed(123)
    setFonts(30)
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    std = 0.1
    numData = 100
    show_fig(0.1, ax, 'Sum-Squares')
    
    # Save and show
    showData('anova_annotated.png')
try:
# Import formatting commands if directory "Utilities" is available
    from ISP_mystyle import setFonts, showData 
    
except ImportError:
# Ensure correct performance otherwise
    def setFonts(*options):
        return
    def showData(*options):
        plt.show()
        return

# General formatting options
sns.set(context='poster', style='ticks')
sns.set_palette(sns.color_palette('hls', 3))
setFonts(24)

#----------------------------------------------------------------------
def show_binomial():
    """Show an example of binomial distributions"""
    
    # Arbitrarily select 3 total numbers, and 3 probabilities
    ns = [20,20,40]
    ps = [0.5, 0.7, 0.5]
    
    # For each (p,n)-pair, plot the corresponding binomial PMFs
    for (p,n) in zip(ps, ns):
        bd = stats.binom(n,p)       # generate the "frozen function"
        x = np.arange(n+1)          # generate the x-values
        plt.plot(x, bd.pmf(x), 'o--', label='p={0:3.1f}, n={1}'.format(p,n))
    
    def setFonts(*options):
        return

    def showData(*options):
        plt.show()
        return


# Generate the data
x = np.arange(-20, 80)
y = 10 + 0.2 * x + 4 * np.random.randn(len(x))

# Make the plot
sns.set_style('ticks')
sns.set_context('poster')
setFonts()

fig = plt.figure()
ax = fig.add_subplot(111)

ax.plot(x, y, '.')
ax.spines['left'].set_position('zero')
ax.spines['bottom'].set_position('zero')

sns.despine()

# Draw the fitted line
p = np.polyfit(x, y, 1)
yFit = np.polyval(p, x)
ax.plot(x, yFit, 'r')
def simplePlots():
    """Demonstrate the generation of different statistical standard plots"""

    # Univariate data -------------------------

    # Make sure that always the same random numbers are generated
    np.random.seed(1234)

    # Generate data that are normally distributed
    x = np.random.randn(500)

    # Other graphics settings
    sns.set(context="poster", style="ticks", palette=sns.color_palette("muted"))

    # Set the fonts the way I like them
    setFonts(32)

    # Scatter plot
    plt.scatter(np.arange(len(x)), x)
    plt.xlim([0, len(x)])

    # Save and show the data, in a systematic format
    printout("scatterPlot.png", xlabel="Datapoints", ylabel="Values", title="Scatter")

    # Histogram
    plt.hist(x)
    printout("histogram_plain.png", xlabel="Data Values", ylabel="Frequency", title="Histogram, default settings")

    plt.hist(x, 25)
    printout("histogram.png", xlabel="Data Values", ylabel="Frequency", title="Histogram, 25 bins")

    # Cumulative probability density
    numbins = 20
    plt.plot(stats.cumfreq(x, numbins)[0])
    printout("CumulativeFrequencyFunction.png", xlabel="Data Values", ylabel="CumFreq", title="Cumulative Frequency")

    # KDE-plot
    sns.kdeplot(x)
    printout("kde.png", xlabel="Data Values", ylabel="Density", title="KDE_plot")

    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    plt.boxplot(x, sym="*")
    printout("boxplot.png", xlabel="Values", title="Boxplot")

    plt.boxplot(x, sym="*", vert=False)
    plt.title("Boxplot, horizontal")
    plt.xlabel("Values")
    plt.show()

    # Errorbars
    x = np.arange(5)
    y = x ** 2
    errorBar = x / 2
    plt.errorbar(x, y, yerr=errorBar, fmt="o", capsize=5, capthick=3)
    plt.xlim([-0.2, 4.2])
    plt.ylim([-0.2, 19])
    printout("Errorbars.png", xlabel="Data Values", ylabel="Measurements", title="Errorbars")

    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))

    nd2 = stats.norm(loc=3, scale=1.5)
    data2 = nd2.rvs(size=(100))

    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({"Girls": data, "Boys": data2})
    sns.violinplot(df)

    printout("violinplot.png", title="Violinplot")

    # Barplot
    # The font-size is set such that the legend does not overlap with the data
    np.random.seed(1234)
    setFonts(20)

    df = pd.DataFrame(np.random.rand(10, 4), columns=["a", "b", "c", "d"])
    df.plot(kind="bar", grid=False, color=sns.color_palette("muted"))

    showData("barplot.png")
    setFonts(28)

    # Bivariate Plots
    df2 = pd.DataFrame(np.random.rand(50, 3), columns=["a", "b", "c"])
    df2.plot(kind="scatter", x="a", y="b", s=df2["c"] * 500)
    plt.axhline(0, ls="--", color="#999999")
    plt.axvline(0, ls="--", color="#999999")
    printout("bivariate.png")

    # Grouped Boxplot
    sns.set_style("whitegrid")
    sns.boxplot(df)
    setFonts(28)
    printout("groupedBoxplot.png", title="sns.boxplot")

    sns.set_style("ticks")

    # Pieplot
    txtLabels = "Cats", "Dogs", "Frogs", "Others"
    fractions = [45, 30, 15, 10]
    offsets = (0, 0.05, 0, 0)

    plt.pie(
        fractions,
        explode=offsets,
        labels=txtLabels,
        autopct="%1.1f%%",
        shadow=True,
        startangle=90,
        colors=sns.color_palette("muted"),
    )
    plt.axis("equal")
    printout("piePlot.png", title=" ")
Exemple #29
0
def simplePlots():
    '''Demonstrate the generation of different statistical standard plots'''
    
    # Univariate data -------------------------
    
    # Make sure that always the same random numbers are generated
    np.random.seed(1234)
    
    # Generate data that are normally distributed
    x = np.random.randn(500)
    
    # Other graphics settings
    sns.set(context='poster', style='ticks', palette=sns.color_palette('muted'))
    
    # Set the fonts the way I like them
    setFonts(32)
    
    # Scatter plot
    plt.scatter(np.arange(len(x)), x)
    plt.xlim([0, len(x)])
    
    # Save and show the data, in a systematic format
    printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter')
    
    # Histogram
    plt.hist(x)
    printout('histogram_plain.png', xlabel='Data Values',
             ylabel='Frequency', title='Histogram, default settings')
    
    plt.hist(x,25)
    printout('histogram.png', xlabel='Data Values', ylabel='Frequency',
             title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plt.plot(stats.cumfreq(x,numbins)[0])
    printout('CumulativeFrequencyFunction.png', xlabel='Data Values',
             ylabel='CumFreq', title='Cumulative Frequency')

    # KDE-plot
    sns.kdeplot(x)
    printout('kde.png', xlabel='Data Values', ylabel='Density',
            title='KDE_plot')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    plt.boxplot(x, sym='*')
    printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    plt.boxplot(x, sym='*', vert=False)
    plt.title('Boxplot, horizontal')
    plt.xlabel('Values')
    plt.show()
    
    # Errorbars
    x = np.arange(5)
    y = x**2
    errorBar = x/2
    plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    plt.xlim([-0.2, 4.2])
    plt.ylim([-0.2, 19])
    printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    sns.violinplot(df)
    
    printout('violinplot.png', title='Violinplot')
    
    # Barplot
    # The font-size is set such that the legend does not overlap with the data
    np.random.seed(1234)
    setFonts(20)
    
    df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
    df.plot(kind='bar', grid=False, color=sns.color_palette('muted'))
    
    showData('barplot.png')
    setFonts(28)

    # Bivariate Plots
    df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c'])
    df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500);
    plt.axhline(0, ls='--', color='#999999')
    plt.axvline(0, ls='--', color='#999999')
    printout('bivariate.png')
    
    # Grouped Boxplot
    sns.set_style('whitegrid')
    sns.boxplot(df)
    setFonts(28)
    printout('groupedBoxplot.png', title='sns.boxplot')

    sns.set_style('ticks')

    # Pieplot
    txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others'
    fractions = [45, 30, 15, 10]
    offsets =(0, 0.05, 0, 0)
    
    plt.pie(fractions, explode=offsets, labels=txtLabels,
            autopct='%1.1f%%', shadow=False, startangle=90,
            colors=sns.color_palette('muted') )
    plt.axis('equal')
    printout('piePlot.png', title=' ')
    def setFonts(*options):
        return

    def showData(*options):
        plt.show()
        return


# Generate the data
x = np.r_[3, 1.5, 4, 6, 3, 2]
dx = np.r_[0.1, 0.3, 0.2, 0.2, 0.3, 0.25]
xs = x - dx
index = range(len(x))

# plot the data
setFonts(20)
plt.plot(x, 'o', ms=10, label='pre')
plt.plot(xs, 'r*', ms=12, label='post')
plt.bar(index,
        dx,
        width=0.5,
        align='center',
        color=0.75 * np.ones(3),
        label='pre-post')

# Format the plot
plt.legend(loc='upper left')
plt.axhline(0, ls='--')
plt.xlim(-0.3, 5.3)
plt.ylim(-0.2, 6.2)
plt.xlabel('Subject Nr')
import sys
sys.path.append(os.path.join('..', '..', 'Utilities'))
try:
    from ISP_mystyle import setFonts, showData 
    
except ImportError:
# Ensure correct performance otherwise
    def setFonts(*options):
        return
    def showData(*options):
        plt.show()
        return

sns.set_context('poster')
sns.set_style('ticks')
setFonts()

# Generate the data
np.random.seed(1234)
nd = stats.norm(100, 20)
scores = nd.rvs(10)

# Make the plot
plt.plot(scores, 'o')
plt.axhline(110, ls='--')
plt.axhline(np.mean(scores), ls='-.')
plt.xlim(-0.2, 9.2)
plt.ylim(50, 130)
plt.xlabel('Student-Nr')
plt.ylabel('Score')
    """Subroutine showing a histogram and formatting it"""

    axis.hist(data, bins=nbins)
    axis.set_xticks([0, 0.5, 1])
    axis.set_title(title)


if __name__ == '__main__':
    # Formatting options
    sns.set(context='poster', style='ticks', palette='muted')

    # Input data
    ndata = 100000
    nbins = 50

    setFonts(24)
    # Generate data
    data = np.random.random(ndata)

    # Show three histograms, side-by-side
    fig, axs = plt.subplots(1, 3)

    showAsHistogram(axs[0], data, 'Random data')
    showAsHistogram(axs[1], np.mean(data.reshape((int(ndata / 2), 2)), axis=1),
                    'Average over 2')
    showAsHistogram(axs[2], np.mean(data.reshape((int(ndata / 10), 10)),
                                    axis=1), 'Average over 10')

    # Format them and show them
    axs[0].set_ylabel('Counts')
    plt.tight_layout()
    ax.annotate('',
                xy=(210, grandMean),
                xytext=(210, groupMean[1]),
                arrowprops=dict(arrowstyle='<->, head_width=0.1',
                                facecolor='black'))
    ax.annotate('',
                xy=(90, groupMean[1]),
                xytext=(90, groupMean[1] + 0.2),
                arrowprops=dict(arrowstyle='<->, head_width=0.1',
                                facecolor='black'))
    ax.text(210, (grandMean + groupMean[1]) / 2.,
            '$SS_{Treatment}$',
            fontsize=36)
    ax.text(90, groupMean[1] + 0.1, '$SS_{Error}$', ha='right', fontsize=36)


if __name__ == '__main__':
    centers = [5, 5.3, 4.7]

    np.random.seed(123)
    setFonts(30)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    std = 0.1
    numData = 100
    show_fig(0.1, ax, 'Sum-Squares')

    # Save and show
    showData('anova_annotated.png')
        plt.show()
        return

# Calculate the values
nd = stats.norm()

x = np.linspace(-3,3,100)
yp = nd.pdf(x)
y = nd.cdf(x)
x1 = np.linspace(-3, 1)
y1 = nd.pdf(x1)

# Make the plot
sns.set_context('paper')
sns.set_style('white')
setFonts(12)

figs, axs = plt.subplots(1,2)

axs[0].plot(x,yp, 'k')
axs[0].fill_between(x1, y1, facecolor='#CCCCCC')
axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic')
axs[0].set_xlabel('x')
axs[0].set_ylabel('PDF(x)')
sns.despine()

axs[1].plot(x, y, '#999999', lw=3)
axs[1].set_xlabel('x')
axs[1].set_ylabel('CDF(x)')
plt.vlines(0, 0, 1, linestyles='--')
sns.despine()
    def setFonts(*options):
        return

    def showData(*options):
        plt.show()
        return


# Generate the data
x = np.r_[3, 1.5, 4, 6, 3, 2]
dx = np.r_[0.1, 0.3, 0.2, 0.2, 0.3, 0.25]
xs = x - dx
index = range(len(x))

# plot the data
setFonts(20)
plt.plot(x, "o", ms=10, label="pre")
plt.plot(xs, "r*", ms=12, label="post")
plt.bar(index, dx, width=0.5, align="center", color=0.75 * np.ones(3), label="pre-post")

# Format the plot
plt.legend(loc="upper left")
plt.axhline(0, ls="--")
plt.xlim(-0.3, 5.3)
plt.ylim(-0.2, 6.2)
plt.xlabel("Subject Nr")
plt.ylabel("Value")
plt.tight_layout()

# P-values for paired and unpaired T-tests
_, p_paired = stats.ttest_rel(x, xs)
sys.path.append(os.path.join('..', '..', 'Utilities'))

try:
# Import formatting commands if directory "Utilities" is available
    from ISP_mystyle import setFonts
    
except ImportError:
# Ensure correct performance otherwise
    def setFonts(*options):
        return
    
# Generate some dummy data
np.set_printoptions(precision=2)
N = 20
study_duration = 12

# Note: a constant dropout rate is equivalent to an exponential distribution!
actual_subscriptiontimes = np.array([[exponential(18), exponential(3)][uniform()<0.5] for i in range(N)])
observed_subscriptiontimes = np.minimum(actual_subscriptiontimes,study_duration)
observed= actual_subscriptiontimes < study_duration

# Show the data
setFonts(18)
plt.xlim(0,24)
plt.vlines(12, 0, 30, lw=2, linestyles="--")
plt.xlabel('time')
plt.title('Subscription Times, at $t=12$  months')
plot_lifetimes(observed_subscriptiontimes, event_observed=observed)

print("Observed subscription time at time %d:\n"%(study_duration), observed_subscriptiontimes)