def scatterplot():
    '''Fancy scatterplots, using the package "seaborn" '''
    import seaborn as sns
    
    df = sns.load_dataset("iris")
    sns.pairplot(df, hue="species", size=2.5)    
    C2_8_mystyle.printout_plain('multiScatterplot.png')
def showResults(challenger_data, model):
    ''' Show the original data, and the resulting logit-fit'''
    
    # First plot the original data
    plt.figure()
    sns.set_context('poster')
    sns.set_style('whitegrid')
    np.set_printoptions(precision=3, suppress=True)
    
    plt.scatter(challenger_data[:, 0], challenger_data[:, 1], s=75, color="k",
                alpha=0.5)
    plt.yticks([0, 1])
    plt.ylabel("Damage Incident?")
    plt.xlabel("Outside temperature (Fahrenheit)")
    plt.title("Defects of the Space Shuttle O-Rings vs temperature")
    plt.xlim(50, 85)
    
    # Plot the fit
    x = np.arange(50, 85)
    alpha = model.params[0]
    beta = model.params[1]
    y = logistic(x, beta, alpha)
    
    plt.hold(True)
    plt.plot(x,y,'r')
    outFile = 'ChallengerPlain.png'
    C2_8_mystyle.printout_plain(outFile, outDir='..\Images')
    plt.show()
def generatedata():
    ''' Generate and show the data '''
    x = np.linspace(-5,5,101)
    (X,Y) = np.meshgrid(x,x)
    
    # To get reproducable values, I provide a seed value
    np.random.seed(987654321)   
    
    Z = -5 + 3*X-0.5*Y+np.random.randn(np.shape(X)[0], np.shape(X)[1])
    
    # Plot the figure
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    surf = ax.plot_surface(X,Y,Z, cmap=cm.afmhot, rstride=2, cstride=2, 
        linewidth=0, antialiased=False)
    ax.view_init(20,-120)
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    fig.colorbar(surf, shrink=0.6)
    
    outFile = '3dSurface.png'
    C2_8_mystyle.printout_plain(outFile)
    
    
    return (X.flatten(),Y.flatten(),Z.flatten())
def generateData():
    ''' Generate and show the data: a plane in 3D '''
    
    x = np.linspace(-5,5,101)
    (X,Y) = np.meshgrid(x,x)
    
    # To get reproducable values, I provide a seed value
    np.random.seed(987654321)   
    
    Z = -5 + 3*X-0.5*Y+np.random.randn(np.shape(X)[0], np.shape(X)[1])
    
    # Set the color
    myCmap = cm.GnBu_r
    
    # If you want a colormap from seaborn use:
    #from matplotlib.colors import ListedColormap
    #myCmap = ListedColormap(sns.color_palette("Blues", 20))
    
    # Plot the figure
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    surf = ax.plot_surface(X,Y,Z, cmap=myCmap, rstride=2, cstride=2, 
        linewidth=0, antialiased=False)
    ax.view_init(20,-120)
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    fig.colorbar(surf, shrink=0.6)
    
    outFile = '3dSurface.png'
    C2_8_mystyle.printout_plain(outFile)
    
    
    return (X.flatten(),Y.flatten(),Z.flatten())
def show_poisson_views():
    """Show different views of a Poisson distribution"""
    
    fig, ax = plt.subplots(3,1)
    
    k = np.arange(25)
    pd = stats.poisson(10)
    C2_8_mystyle.set(12)
    
    ax[0].plot(k, pd.pmf(k),'x-')
    ax[0].set_title('Poisson distribition')
    ax[0].set_xticklabels([])
    ax[0].set_ylabel('PMF (X)')
    
    ax[1].plot(k, pd.cdf(k))
    ax[1].set_xlabel('X')
    ax[1].set_ylabel('CDF (X)')
    
    y = np.linspace(0,1,100)
    ax[2].plot(y, pd.ppf(y))
    ax[2].set_xlabel('X')
    ax[2].set_ylabel('PPF (X)')
    
    plt.tight_layout()
    plt.show()
def main():
    # Generate dummy data
    x = np.array([-2.1, -1.3, -0.4, 1.9, 5.1, 6.2])
    
    # Define the two plots
    fig, axs = plt.subplots(1,2)
    
    # Generate the left plot
    plot_histogram(axs[0], x)
    
    # Generate the right plot
    explain_KDE(axs[1], x)
    
    # Save and show
    C2_8_mystyle.printout_plain('KDEexplained.png')
    plt.show()
def generate_probplot():
    '''Generate a prob-plot for a chi2-distribution of sample data'''
    # Define the skewed distribution
    chi2 = stats.chi2(3)
    
    # Generate the data
    x = np.linspace(0,10, 100)
    y = chi2.pdf(x)
    np.random.seed(12345)
    numData = 100
    data = chi2.rvs(numData)
    
    # Arrange subplots
    sns.set_context('paper')
    sns.set_style('white')
    C2_8_mystyle.set(11)
    fig, axs = plt.subplots(1,2)
    
    # Plot distribution
    axs[0].plot(x,y)
    axs[0].set_xlabel('X')
    axs[0].set_ylabel('PDF(X)')
    axs[0].set_title('chi2(x), k=3')
    sns.set_style('white')
    
    x0, x1 = axs[0].get_xlim()
    y0, y1 = axs[0].get_ylim()
    axs[0].set_aspect((x1-x0)/(y1-y0))
    #sns.despine()
    
    
    # Plot probplot
    plt.axes(axs[1])
    stats.probplot(data, plot=plt)
    
    x0, x1 = axs[1].get_xlim()
    y0, y1 = axs[1].get_ylim()
    axs[1].axhline(0, lw=0.5, ls='--')
    axs[1].axvline(0, lw=0.5, ls='--')
    axs[1].set_aspect((x1-x0)/(y1-y0))
    #sns.despine()
    
    C2_8_mystyle.printout_plain('chi2pp.png')
    
    return(data)
    '''
def KS_principle(inData):
    '''Show the principle of the Kolmogorov-Smirnov test.'''
    
    # CDF of normally distributed data
    nd = stats.norm()
    nd_x = np.linspace(-4, 4, 101)
    nd_y = nd.cdf(nd_x)
    
    # Empirical CDF of the sample data, which range for approximately 0 to 10
    numPts = 50
    lowerLim = 0
    upperLim = 10
    ecdf_x = np.linspace(lowerLim, upperLim, numPts)
    ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0]/len(inData)
    
    #Add zero-point by hand
    ecdf_x = np.hstack((0., ecdf_x))
    ecdf_y = np.hstack((0., ecdf_y))
    
    # Plot the data
    sns.set_style('ticks')
    sns.set_context('poster')
    C2_8_mystyle.set(36)
    
    plt.plot(nd_x, nd_y, 'k--')
    plt.hold(True)
    plt.plot(ecdf_x, ecdf_y, color='k')
    plt.xlabel('X')
    plt.ylabel('Cumulative Probability')
    
    # For the arrow, find the start
    ecdf_startIndex = np.min(np.where(ecdf_x >= 2))
    arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]])
    
    nd_startIndex = np.min(np.where(nd_x >= 2))
    arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]])
    arrowDelta = arrowEnd - arrowStart
    
    plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1],
              width=0.05, length_includes_head=True, head_length=0.02, head_width=0.2, color='k')
    
    plt.arrow(arrowStart[0], arrowStart[1]+arrowDelta[1], 0, -arrowDelta[1],
              width=0.05, length_includes_head=True, head_length=0.02, head_width=0.2, color='k')
    
    outFile = 'KS_Example.png'
    C2_8_mystyle.printout_plain(outFile)
def showChi2():
    '''Utility function to show Chi2 distributions'''
    
    t = frange(0, 8, 0.05)
    Chi2Vals = [1,2,3,5]
    
    for chi2 in Chi2Vals:
        plt.plot(t, stats.chi2.pdf(t, chi2), label='k={0}'.format(chi2))
    plt.legend()
        
    plt.xlim(0,8)
    plt.xlabel('X')
    plt.ylabel('pdf(X)')
    plt.axis('tight')
    
    outFile = 'dist_chi2.png'
    C2_8_mystyle.printout_plain(outFile)
def showExp():
    '''Utility function to show exponential distributions'''
    
    t = frange(0, 3, 0.01)
    lambdas = [0.5, 1, 1.5]
    
    for par in lambdas:
        plt.plot(t, stats.expon.pdf(t, 0, par), label='$\lambda={0:3.1f}$'.format(par))
    plt.legend()
        
    plt.xlim(0,3)
    plt.xlabel('X')
    plt.ylabel('pdf(X)')
    plt.axis('tight')
    plt.legend()
        
    outFile = 'dist_exp.png'
    C2_8_mystyle.printout_plain(outFile)
def show_poisson():
    """Show an example of Poisson distributions"""
    
    # Arbitrarily select 3 lambda values
    lambdas = [1,4,10]
    
    k = np.arange(20)       # generate x-values
    markersize = 8
    for par in lambdas:
        plt.plot(k, stats.poisson.pmf(k, par), 'o--', label='$\lambda={0}$'.format(par))
    
    # Format the plot
    plt.legend()
    plt.title('Poisson distribuition')
    plt.xlabel('X')
    plt.ylabel('P(X)')
    
    # Show and save the plot
    C2_8_mystyle.printout_plain('Poisson_distribution_pmf.png')
def showF():
    '''Utility function to show F distributions'''
    
    t = frange(0, 3, 0.01)
    d1s = [1,2,5,100]
    d2s = [1,1,2,100]
    
    for (d1,d2) in zip(d1s,d2s):
        plt.plot(t, stats.f.pdf(t, d1, d2), label='F({0}/{1})'.format(d1,d2))
    plt.legend()
        
    plt.xlim(0,3)
    plt.xlabel('X')
    plt.ylabel('pdf(X)')
    plt.axis('tight')
    plt.legend()
        
    outFile = 'dist_f.png'
    C2_8_mystyle.printout_plain(outFile)
def show3D():
    '''Generation of 3D plots'''
    
    # imports specific to the plots in this example
    from matplotlib import cm   # colormaps
    
    # This module is required for 3D plots!
    from mpl_toolkits.mplot3d import Axes3D
    
    # Twice as wide as it is tall.
    fig = plt.figure(figsize=plt.figaspect(0.5))
    
    #---- First subplot
    # Generate the data
    X = np.arange(-5, 5, 0.1)
    Y = np.arange(-5, 5, 0.1)
    X, Y = np.meshgrid(X, Y)
    R = np.sqrt(X**2 + Y**2)
    Z = np.sin(R)
    
    # Note the definition of "projection", required for 3D  plots
    #plt.style.use('ggplot')

    ax = fig.add_subplot(1, 2, 1, projection='3d')
    surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.GnBu,
            linewidth=0, antialiased=False)
    #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.viridis_r,
            #linewidth=0, antialiased=False)
    ax.set_zlim3d(-1.01, 1.01)
    
    fig.colorbar(surf, shrink=0.5, aspect=10)
    
    #---- Second subplot
    # Get some 3d test-data
    from mpl_toolkits.mplot3d.axes3d import get_test_data
    
    ax = fig.add_subplot(1, 2, 2, projection='3d')
    X, Y, Z = get_test_data(0.05)
    ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10)

    C2_8_mystyle.printout_plain('3dGraph.png')
def shifted_normal():
    '''PDF and scatter plot'''
    
    # Plot 3 PDFs (Probability density functions) for normal distributions ----------
    
    # Select 3 mean values, and 3 SDs
    myMean = [0,0,0,-2]
    mySD = [0.2,1,5,0.5]
    t = frange(-5,5,0.02)
    
    # Plot the 3 PDFs, using the color-palette "hls"
    with sns.color_palette('hls', 4):
        for mu,sigma in zip(myMean, np.sqrt(mySD)):
            y = stats.norm.pdf(t, mu, sigma)
            plt.plot(t,y, label='$\mu={0}, \; \t\sigma={1:3.1f}$'.format(mu,sigma))
        
    # Format the plot
    plt.legend()
    plt.xlim([-5,5])
    plt.title('Normal Distributions')
    
    # Show the plot, and save the out-file
    outFile = 'Normal_Distribution_PDF.png'
    C2_8_mystyle.printout_plain(outFile)
    
    # Generate random numbers with a normal distribution ------------------------
    myMean = 0
    mySD = 3
    numData = 500
    data = stats.norm.rvs(myMean, mySD, size = numData)
    
    # Plot the data
    plt.scatter(np.arange(len(data)), data)
    
    # Format the plot
    plt.title('Normally distributed data')
    plt.xlim([0,500])
    plt.ylim([-10,10])
    plt.show()
    plt.close()
def doTukey(data, multiComp):    
    '''Do a pairwise comparison, and show the confidence intervals'''
    
    print((multiComp.tukeyhsd().summary()))
    
    # Calculate the p-values:
    res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment'])
    df = pd.DataFrame(data)
    numData = len(df)
    numTreatments = len(df.Treatment.unique())
    dof = numData - numTreatments
    
    # Show the group names
    print((multiComp.groupsunique))
    
    # Generate a print -------------------
    
    # Get the data
    xvals = np.arange(3)
    res2 = pairwise_tukeyhsd(data['StressReduction'], data['Treatment'])
    errors = np.ravel(np.diff(res2.confint)/2)
    
    # Plot them
    plt.plot(xvals, res2.meandiffs, 'o')
    plt.errorbar(xvals, res2.meandiffs, yerr=errors, ls='o')
    
    # Put on labels
    pair_labels = multiComp.groupsunique[np.column_stack(res2._multicomp.pairindices)]
    plt.xticks(xvals, pair_labels)
    
    # Format the plot
    xlim = -0.5, 2.5
    plt.hlines(0, *xlim)
    plt.xlim(*xlim)
    plt.title('Multiple Comparison of Means - Tukey HSD, FWER=0.05' +
              '\n Pairwise Mean Differences')          
    
    # Save to outfile, and show the data
    outFile = 'multComp.png'
    C2_8_mystyle.printout_plain(outFile)
def many_normals():
    '''Show the histograms of 25 samples distributions, and compare the mean values '''
    
    # Set the parameters
    numRows = 5
    numData = 100
    myMean = 0
    mySD = 1
    
    # Plot the histograms of the sample distributions, and format the plots
    plt.figure()
    for ii in range(numRows):
        for jj in range(numRows):
            data = stats.norm.rvs(myMean, mySD, size=numData)
            plt.subplot(numRows,numRows,numRows*ii+jj+1)
            plt.hist(data)
            plt.gca().set_xlim([-3, 3])
            plt.gca().set_xticks(())
            plt.gca().set_yticks(())
            plt.gca().set_xticklabels(())
            plt.gca().set_yticklabels(())
    
    plt.tight_layout()
    
    # Show the data, and save the out-file
    outFile = 'Normal_MultHist.png'
    C2_8_mystyle.printout_plain(outFile)
    
    # Check out the mean of 1000 normal sample distributions
    numTrials = 1000;
    numData = 100
    
    # Pre-allocate the memory for the output variable
    myMeans = np.ones(numTrials)*np.nan
    
    for ii in range(numTrials):
        data = stats.norm.rvs(myMean, mySD, size=numData)
        myMeans[ii] = np.mean(data)
    print(('The standard error of the mean, with {0} samples, is {1}'.format(numData, np.std(myMeans))))
def showT():
    '''Utility function to show T distributions'''
    
    t = frange(-5, 5, 0.05)
    TVals = [1,5]
    
    normal = stats.norm.pdf(t)
    t1 = stats.t.pdf(t,1)
    t5 = stats.t.pdf(t,5)
    
    plt.plot(t,normal, '--',  label='normal')
    plt.plot(t, t1, label='df=1')
    plt.plot(t, t5, label='df=5')
    plt.legend()
        
    plt.xlim(-5,5)
    plt.xlabel('X')
    plt.ylabel('pdf(X)')
    plt.axis('tight')
    
    outFile = 'dist_t.png'
    C2_8_mystyle.printout_plain(outFile)
def main():
    # generate the data
    x = np.arange(10)
    np.random.seed(10)
    y = 3*x+2+20*np.random.rand(len(x))
    
    # determine the line-fit
    k,d = np.polyfit(x,y,1)
    yfit = k*x+d
    
    # plot the data
    plt.scatter(x,y)
    plt.hold(True)
    plt.plot(x, yfit, '--',lw=2)
    for ii in range(len(x)):
        plt.plot([x[ii], x[ii]], [yfit[ii], y[ii]], 'k')
        
    plt.xlim((-0.1, 9.1))
    plt.xlabel('X')
    plt.ylabel('Y')
    
    C2_8_mystyle.printout_plain('residuals.png') 
def smSolution(M1, M2, M3):
    '''Solution with the tools from statsmodels'''
    
    import statsmodels.api as sm
    import C2_8_mystyle
    
    Res1 = sm.OLS(y, M1).fit()
    Res2 = sm.OLS(y, M2).fit()
    Res3 = sm.OLS(y, M3).fit()
    
    print(Res1.summary2())
    print(Res2.summary2())
    print(Res3.summary2())
    
    # Plot the data
    plt.plot(x,y, '.', label='Data')
    plt.plot(x, Res1.fittedvalues, 'r--', label='Linear Fit')
    plt.plot(x, Res2.fittedvalues, 'g', label='Quadratic Fit')
    plt.plot(x, Res3.fittedvalues, 'y', label='Cubic Fit')
    plt.legend(loc='upper left', shadow=True)
    
    C2_8_mystyle.printout('linearModel.png', xlabel='x', ylabel='y')
def show_binomial():
    """Show an example of binomial distributions"""
    
    # Arbitrarily select 3 total numbers, and 3 probabilities
    ns = [20,20,40]
    ps = [0.5, 0.7, 0.5]
    
    # For each (p,n)-pair, plot the corresponding binomial PMFs
    for (p,n) in zip(ps, ns):
        bd = stats.binom(n,p)       # generate the "frozen function"
        x = np.arange(n+1)          # generate the x-values
        plt.plot(x, bd.pmf(x), 'o--', label='p={0:3.1f}, n={1}'.format(p,n))
    
    # Format the plot
    plt.legend()
    plt.title('Binomial distribuition')
    plt.xlabel('X')
    plt.ylabel('P(X)')
    plt.annotate('Upper Limit', xy=(20,0), xytext=(27,0.04), 
                 arrowprops=dict(shrink=0.05))
    
    # Show and save the plot
    C2_8_mystyle.printout_plain('Binomial_distribution_pmf.png')
def main():
    # Calculate the PDF-curves
    x = np.linspace(-10, 15, 201)
    nd1 = stats.norm(1,2)
    nd2 = stats.norm(6,2)
    y1 = nd1.pdf(x)
    y2 = nd2.pdf(x)
    
    # Axes locations
    ROC = {'left': 0.35,
           'width': 0.36,
           'bottom': 0.1,
           'height': 0.47}
    
    PDF = {'left': 0.1,
           'width': 0.8,
           'bottom': 0.65,
           'height': 0.3}
           
    rect_ROC = [ROC['left'], ROC['bottom'], ROC['width'], ROC['height']]
    rect_PDF = [PDF['left'], PDF['bottom'], PDF['width'], PDF['height']]
    
    fig = plt.figure()
    
    ax1 = plt.axes(rect_PDF)
    ax2 = plt.axes(rect_ROC)
    
    # Plot and label the PDF-curves
    ax1.plot(x,y1)
    ax1.hold(True)
    ax1.fill_between(x,0,y1, where=x<3, facecolor='#CCCCCC', alpha=0.5)
    ax1.annotate('Sensitivity',
                 xy=(x[75], y1[65]),
                 xytext=(x[40], y1[75]*1.2), 
                 fontsize=14,
                 horizontalalignment='center',
                 arrowprops=dict(facecolor='#CCCCCC'))
    
    ax1.plot(x,y2,'#888888')
    ax1.fill_between(x,0,y2, where=x<3, facecolor='#888888', alpha=0.5)
    ax1.annotate('1-Specificity',
                 xy=(2.5, 0.03),
                 xytext=(6,0.05), 
                 fontsize=14,
                 horizontalalignment='center',
                 arrowprops=dict(facecolor='#888888'))
    
    ax1.set_ylabel('PDF')
    
    # Plot the ROC-curve
    ax2.plot(nd2.cdf(x), nd1.cdf(x), 'k')
    ax2.hold(True)
    ax2.plot(np.array([0,1]), np.array([0,1]), 'k--')
    
    # Format the ROC-curve
    ax2.set_xlim([0, 1])
    ax2.set_ylim([0, 1])
    ax2.axis('equal')
    ax2.set_title('ROC-Curve')
    ax2.set_xlabel('1-specificity')
    ax2.set_ylabel('sensitivity')
    
    arrow_bidir(ax2, (0.5,0.5), (0.095, 0.885))
    
    # Show the plot, and create a figure
    C2_8_mystyle.printout_plain('ROC.png')    
        
    ax.xaxis.set_ticks([50,150,250])
    ax.set_xticklabels(['Group1', 'Group2', 'Group3'])
    ax.yaxis.set_ticks([])
    ax.set_title(title)
    
    grandMean = np.mean(groupMean)
    ax.axhline(grandMean, color='#999999')
    ax.plot([80, 220], [groupMean[1], groupMean[1]], '#999999')
    ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], '#999999')
    ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), 
            arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black'))
    ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), 
            arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black'))
    ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=36)
    ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=36)

if __name__ == '__main__':
    centers = [5, 5.3, 4.7]
    
    np.random.seed(123)
    C2_8_mystyle.set(30)
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    std = 0.1
    numData = 100
    show_fig(0.1, ax, 'Sum-Squares')
    
    # Save and show
    C2_8_mystyle.printout_plain('anova_annotated.png')
def main():
    '''Demonstrate the generation of different statistical standard plots'''
    
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = np.random.randn(500)
    
    # Set the fonts the way I like them
    sns.set_context('poster')
    sns.set_style('ticks')
    C2_8_mystyle.set(fs=32)
    
    # Scatter plot
    plt.scatter(np.arange(len(x)), x)
    plt.xlim([0, len(x)])
    
    # Save and show the data, in a systematic format
    C2_8_mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter')
    
    # Histogram
    plt.hist(x)
    C2_8_mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings')
    
    plt.hist(x,25)
    C2_8_mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plt.plot(stats.cumfreq(x,numbins)[0])
    C2_8_mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequncy')

    # KDE-plot
    sns.kdeplot(x)
    C2_8_mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density',
            title='KDE_plot')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    plt.boxplot(x, sym='*')
    C2_8_mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    plt.boxplot(x, sym='*', vert=False)
    plt.title('Boxplot, horizontal')
    plt.xlabel('Values')
    plt.show()
    
    # Errorbars
    x = np.arange(5)
    y = x**2
    errorBar = x/2
    plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    plt.xlim([-0.2, 4.2])
    plt.ylim([-0.2, 19])
    C2_8_mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    sns.violinplot(df)
    
    C2_8_mystyle.printout('violinplot.png', title='Violinplot')
    
    # Barplot
    df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
    df.plot(kind='bar', grid=False)
    C2_8_mystyle.printout('barplot.png', title='Barplot')

    # Grouped Boxplot
    sns.set_style('whitegrid')
    sns.boxplot(df)
    C2_8_mystyle.set(fs=28)
    C2_8_mystyle.printout('groupedBoxplot.png', title='sns.boxplot')

    # Bivariate Plots
    df2 = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd'])
    df2.plot(kind='scatter', x='a', y='b', s=df['c']*300);
    C2_8_mystyle.printout('bivariate.png')

    # Pieplot
    series = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series')
    oldPalette = sns.color_palette()
    sns.set_palette("husl")
    series.plot(kind='pie', figsize=(6, 6))
    C2_8_mystyle.printout('piePlot.png', title='pie-plot')
    sns.set_palette(oldPalette)
y = nd.pdf(x)
y1 = nd.pdf(x1)
y2 = nd.pdf(x2)
y3 = nd.pdf(x3)

sns.set(context='poster')
sns.set_style('ticks')
fig, axs = plt.subplots(1,3, sharey=True)

def show_SD(axis, xi, yi, text):
    '''Show the area covered by 1/2/3 SDs'''
    
    fc = '#DDDDDD'
    axis.plot(x,y)
    axis.fill_between(xi, yi, facecolor=fc)
    
    axis.text(0, 0.05, text, horizontalalignment='center', fontsize=25)
    axis.set_xlim([-3.5, 3.5])
    axis.set_ylim([-0.0, 0.5])
    sns.despine(ax=axis, left=True)
    axis.set_yticks([])

show_SD(axs[0], x1, y1, '68.3%')    
show_SD(axs[1], x2, y2, '95.4%')    
show_SD(axs[2], x3, y3, '99.7%')    

plt.tight_layout()

C2_8_mystyle.printout_plain('area_SDs.png')
def simple_normal():
    ''' Different aspects of a normal distribution'''
    
    # Generate the data
    x = np.arange(-4,4,0.1) # generate the desirded x-values
    x2 = np.arange(0,1,0.001)

    nd = stats.norm()   # First simply define the normal distribution;
                        # don't calculate any values yet

    
    # This is a more complex plot-layout: the first row
    # is taken up completely by the PDF
    ax = plt.subplot2grid((3,2),(0,0), colspan=2)

    plt.plot(x,nd.pdf(x))
    plt.xlim([-4,4])
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.gca().yaxis.set_ticks_position('left')
    plt.yticks(np.linspace(0, 0.4, 5))
    plt.title('Normal Distribution - PDF: Probability Density Fct')
    
    # CDF
    plt.subplot(323)
    plt.plot(x,nd.cdf(x))
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.gca().yaxis.set_ticks_position('left')
    plt.xlim([-4,4])
    plt.ylim([0,1])
    plt.vlines(0, 0, 1, linestyles='--')
    plt.title('CDF: Cumulative Distribution Fct')
    
    # SF
    plt.subplot(324)
    plt.plot(x,nd.sf(x))
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.gca().yaxis.set_ticks_position('left')
    plt.xlim([-4,4])
    plt.ylim([0,1])
    plt.vlines(0, 0, 1, linestyles='--')
    plt.title('SF: Survival Fct')
    
    # PPF
    plt.subplot(325)
    plt.plot(x2,nd.ppf(x2))
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.gca().yaxis.set_ticks_position('left')
    plt.yticks(np.linspace(-4,4,5))
    plt.hlines(0, 0, 1, linestyles='--')
    plt.ylim([-4,4])
    plt.title('PPF: Percentile Point Fct')

    # ISF
    plt.subplot(326)
    plt.plot(x2,nd.isf(x2))
    plt.gca().xaxis.set_ticks_position('bottom')
    plt.gca().yaxis.set_ticks_position('left')
    plt.yticks(np.linspace(-4,4,5))
    plt.hlines(0, 0, 1, linestyles='--')
    plt.title('ISF: Inverse Survival Fct')
    plt.ylim([-4,4])
    plt.tight_layout()
    
    outFile = 'DistributionFunctions.png'
    C2_8_mystyle.printout_plain(outFile)
# additional packages
import C2_8_mystyle

# Calculate the values
nd = stats.norm()

x = np.linspace(-3,3,100)
yp = nd.pdf(x)
y = nd.cdf(x)
x1 = np.linspace(-3, 1)
y1 = nd.pdf(x1)

# Make the plot
sns.set_context('paper')
sns.set_style('white')
C2_8_mystyle.set(12)

figs, axs = plt.subplots(1,2)

axs[0].plot(x,yp, 'k')
axs[0].fill_between(x1, y1, facecolor='#CCCCCC')
axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic')
axs[0].set_xlabel('x')
axs[0].set_ylabel('PDF(x)')
sns.despine()

axs[1].plot(x, y, '#999999', lw=3)
axs[1].set_xlabel('x')
axs[1].set_ylabel('CDF(x)')
plt.vlines(0, 0, 1, linestyles='--')
sns.despine()
# Import standard packages
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns

# additional packages
import C2_8_mystyle

sns.set_context('poster')
sns.set_style('ticks')

# Generate the data
np.random.seed(1234)
nd = stats.norm(100, 20)
scores = nd.rvs(10)

# Make the plot
plt.plot(scores, 'o')
plt.axhline(110, ls='--')
plt.axhline(np.mean(scores), ls='-.')
plt.xlim(-0.2, 9.2)
plt.ylim(50, 130)
plt.xlabel('Student-Nr')
plt.ylabel('Score')

outFile = 'fig_ExampleTtest.png'
C2_8_mystyle.printout_plain(outFile)
plt.show()
sns.set_style('ticks')

# Plot the normal distribution within 3 SDs
limits = (md-3*sd, md+3*sd)
x = np.linspace(limits[0], limits[1])
y = nd.pdf(x)

# Shade the regions beyond a certain "checkVal"
checkVal = 2.6
print('p = {0:5.3f}'.format(nd.cdf(checkVal)))

x1 = np.linspace(limits[0], checkVal)
y1 = nd.pdf(x1)
x2 = np.linspace(md + (md-checkVal), limits[1])
y2 = nd.pdf(x2)

plt.plot(x,y)
plt.fill_between(x1, y1, alpha=0.5)
plt.fill_between(x2, y2, alpha=0.2)

#  Label the axes
plt.xlabel('Weight')
plt.ylabel('P(Weight)')
plt.text(2.1, 0.05, '11.8%', fontsize=20)

# Remove the right- and top-axis
sns.despine()

# Save and show
C2_8_mystyle.printout_plain('pdf_checkValue.png')
"miR-137" is a short non-coding RNA molecule that functions to regulate
the expression levels of other genes.
'''
# author: Thomas Haslwanter, date: Jun-2015

# Import standard packages
import matplotlib.pyplot as plt
import C2_8_mystyle as mystyle

# additional packages
from lifelines.datasets import load_waltons
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test

# Set my favorite font
mystyle.set()

# Load and show the data
df = load_waltons() # returns a Pandas DataFrame

print(df.head())
'''
    T  E    group
0   6  1  miR-137
1  13  1  miR-137
2  13  1  miR-137
3  13  1  miR-137
4  19  1  miR-137
'''

T = df['T']
data = nd.rvs(100)

x = np.linspace(-5, 5, 101)
pdf = nd.pdf(x)

# Calculate the KDE
sd = np.std(data, ddof=1)
h = (4/(3*100))**0.2
h_str = '{0:4.2f}'.format(h)

# Calculate the smoothed plots, with 3 different parameters
kde_small = stats.kde.gaussian_kde(data, 0.1)
kde = stats.kde.gaussian_kde(data, h)
kde_large = stats.kde.gaussian_kde(data, 1)

# Generate two plots: one KDE with rug-plot, and one with different parameters
sns.set_context('poster')
sns.set_style('ticks')
fig, axs = plt.subplots(1,2)
sns.distplot(data, rug=True, ax=axs[0])

axs[1].plot(x, pdf)
axs[1].plot(x, kde.evaluate(x), 'r')
axs[1].plot(x,kde_small.evaluate(x),'--', color=[0.8, 0.8, 0.8])
axs[1].plot(x,kde_large.evaluate(x),'--')
axs[1].legend(['exact', h_str, '0.1', '1.0'])
axs[1].set_ylim(0, 0.40)

C2_8_mystyle.printout_plain('kdePlot.png')
plt.show()