Ejemplo n.º 1
0
def show_binomial():
    """Show an example of binomial distributions"""
    
    bd1 = stats.binom(20, 0.5)
    bd2 = stats.binom(20, 0.7)
    bd3 = stats.binom(40, 0.5)
    
    k = np.arange(40)
    
    sns.set_context('paper')
    sns.set_style('ticks')
    mystyle.set(14)
    
    markersize = 8
    plt.plot(k, bd1.pmf(k), 'o-b', ms=markersize)
    plt.hold(True)
    plt.plot(k, bd2.pmf(k), 'd-r', ms=markersize)
    plt.plot(k, bd3.pmf(k), 's-g', ms=markersize)
    plt.title('Binomial distribuition')
    plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40'])
    plt.xlabel('X')
    plt.ylabel('P(X)')
    sns.despine()
    
    mystyle.printout_plain('Binomial_distribution_pmf.png')
    
    plt.show()
Ejemplo n.º 2
0
def show_poisson():
    """Show different views of a Poisson distribution"""
    
    fig, ax = plt.subplots(3,1)
    
    k = np.arange(25)
    pd = stats.poisson(10)
    mystyle.set(12)
    
    ax[0].plot(k, pd.pmf(k),'x-')
    ax[0].set_title('Poisson distribition')
    ax[0].set_xticklabels([])
    ax[0].set_ylabel('PMF (X)')
    
    ax[1].plot(k, pd.cdf(k))
    ax[1].set_xlabel('X')
    ax[1].set_ylabel('CDF (X)')
    
    y = np.linspace(0,1,100)
    ax[2].plot(y, pd.ppf(y))
    ax[2].set_xlabel('X')
    ax[2].set_ylabel('PPF (X)')
    
    plt.tight_layout()
    plt.show()
Ejemplo n.º 3
0
def show_poisson_views():
    """Show different views of a Poisson distribution"""
    
    fig, ax = plt.subplots(3,1)
    
    k = np.arange(25)
    pd = stats.poisson(10)
    mystyle.set(12)
    
    ax[0].plot(k, pd.pmf(k),'x-')
    ax[0].set_title('Poisson distribition')
    ax[0].set_xticklabels([])
    ax[0].set_ylabel('PMF (X)')
    
    ax[1].plot(k, pd.cdf(k))
    ax[1].set_xlabel('X')
    ax[1].set_ylabel('CDF (X)')
    
    y = np.linspace(0,1,100)
    ax[2].plot(y, pd.ppf(y))
    ax[2].set_xlabel('X')
    ax[2].set_ylabel('PPF (X)')
    
    plt.tight_layout()
    plt.show()
Ejemplo n.º 4
0
def show_binomial():
    """Show an example of binomial distributions"""

    bd1 = stats.binom(20, 0.5)
    bd2 = stats.binom(20, 0.7)
    bd3 = stats.binom(40, 0.5)

    k = np.arange(40)

    sns.set_context('paper')
    sns.set_style('ticks')
    mystyle.set(14)

    markersize = 8
    plt.plot(k, bd1.pmf(k), 'o-b', ms=markersize)
    plt.hold(True)
    plt.plot(k, bd2.pmf(k), 'd-r', ms=markersize)
    plt.plot(k, bd3.pmf(k), 's-g', ms=markersize)
    plt.title('Binomial distribuition')
    plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40'])
    plt.xlabel('X')
    plt.ylabel('P(X)')
    sns.despine()

    mystyle.printout_plain('Binomial_distribution_pmf.png')

    plt.show()
def main():
    '''Demonstrate central limit theorem.'''
    # Generate data
    ndata = 1e5
    nbins = 50
    data = np.random.random(ndata)
    
    # Show them
    fig, axs = plt.subplots(1,3)
    mystyle.set(14)
    myColor = '#CCCCCC'
    #sns.set_context('paper')
    #sns.set_style('whitegrid')
    
    axs[0].hist(data,bins=nbins, color=myColor)
    axs[0].set_title('Random data')
    axs[0].set_xticks([0, 0.5, 1])
    axs[0].set_ylabel('Counts')
    
    axs[1].hist( np.mean(data.reshape((ndata/2,2)),  axis=1), bins=nbins, color=myColor)
    axs[1].set_xticks([0, 0.5, 1])
    axs[1].set_title(' Average over 2')
    
    axs[2].hist( np.mean(data.reshape((ndata/10,10)),axis=1), bins=nbins, color=myColor)
    axs[2].set_xticks([0, 0.5, 1])
    axs[2].set_title(' Average over 10')
    
    plt.tight_layout()
    mystyle.printout_plain('CentralLimitTheorem.png')
    
    plt.show()    
Ejemplo n.º 6
0
def main():
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = randn(500)

    # Set the fonts the way I like them
    sns.set_context('paper')
    sns.set_style('white')
    mystyle.set()

    # Scatter plot
    plot(x, '.')
    mystyle.printout('scatterPlot.png',
                     xlabel='x',
                     ylabel='y',
                     title='Scatter')

    # Histogram
    hist(x, color='#999999')
    mystyle.printout('histogram_plain.png',
                     xlabel='Data Values',
                     ylabel='Frequency',
                     title='Histogram, default settings')

    hist(x, 25, color='#999999')
    mystyle.printout('histogram.png',
                     xlabel='Data Values',
                     ylabel='Frequency',
                     title='Histogram, 25 bins')

    # Cumulative probability density
    numbins = 20
    plot(stats.cumfreq(x, numbins)[0])
    mystyle.printout('CumulativeFrequencyFunction.png',
                     xlabel='Data Values',
                     ylabel='Cumulative Freuqency')

    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    boxplot(x, sym='*')
    mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot')

    boxplot(x, sym='*', vert=False)
    title('Boxplot, horizontal')
    xlabel('Values')
    show()

    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))

    nd2 = stats.norm(loc=3, scale=1.5)
    data2 = nd2.rvs(size=(100))

    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls': data, 'Boys': data2})
    sns.violinplot(df, color=["#999999", "#DDDDDD"])

    mystyle.printout('violinplot.png')
Ejemplo n.º 7
0
import seaborn as sns
import os.path
import mystyle

# Define the skewed distribution
chi2 = stats.chi2(3)

# Generate the data
x = np.linspace(0,10, 100)
y = chi2.pdf(x)
data = chi2.rvs(100)

# Arrange subplots
sns.set_context('paper')
sns.set_style('white')
mystyle.set(11)
fig, axs = plt.subplots(1,2)

# Plot distribution
axs[0].plot(x,y)
axs[0].set_xlabel('X')
axs[0].set_ylabel('PDF(X)')
axs[0].set_title('chi2(x), k=3')
sns.set_style('white')

x0, x1 = axs[0].get_xlim()
y0, y1 = axs[0].get_ylim()
axs[0].set_aspect((x1-x0)/(y1-y0))
#sns.despine()

    grandMean = np.mean(groupMean)
    ax.axhline(grandMean)
    ax.plot([80, 220], [groupMean[1], groupMean[1]], 'b')
    ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], 'b')
    ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), 
            arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black'))
    ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), 
            arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black'))
    ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=24)
    ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=24)

if __name__ == '__main__':
    centers = [5, 5.3, 4.7]
    colors = 'brg'
    
    #sns.set_context('paper')
    #sns.set_style('white')
    np.random.seed(123)
    mystyle.set(18)
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    std = 0.1
    numData = 100
    show_fig(0.1, ax, 'Sum-Squares')
    
    mystyle.printout_plain('anova_annotated.png')
    
    plt.show()

Ejemplo n.º 9
0
def show_fig(std, ax, title):
    '''Create plot of 3 different, normally distributed data groups'''
    for ii in range(3):
        data = stats.norm(centers[ii], std).rvs(numData)
        offset = ii*numData
        ax.plot( offset+np.arange(numData), data, '.', color=colors[ii], ms=10)
        
    ax.xaxis.set_ticks([50,150,250])
    ax.set_xticklabels(['Group1', 'Group2', 'Group3'])
    ax.set_title(title)
    sns.despine()

if __name__ == '__main__':
    centers = [5, 5.3, 4.7]
    colors = 'brg'
    
    sns.set_context('paper')
    sns.set_style('whitegrid')
    mystyle.set(14)
    
    fig, axs = plt.subplots(1, 2)
    stds = [0.1, 2]
    numData = 100
    show_fig(0.1, axs[0], 'SD=0.1')
    show_fig(2,   axs[1], 'SD=2.0')
    
    mystyle.printout_plain('anova_oneway.png')
    
    plt.show()

Ejemplo n.º 10
0
import seaborn as sns
import os.path
import mystyle

# Define the skewed distribution
chi2 = stats.chi2(3)

# Generate the data
x = np.linspace(0, 10, 100)
y = chi2.pdf(x)
data = chi2.rvs(100)

# Arrange subplots
sns.set_context('paper')
sns.set_style('white')
mystyle.set(11)
fig, axs = plt.subplots(1, 2)

# Plot distribution
axs[0].plot(x, y)
axs[0].set_xlabel('X')
axs[0].set_ylabel('PDF(X)')
axs[0].set_title('chi2(x), k=3')
sns.set_style('white')

x0, x1 = axs[0].get_xlim()
y0, y1 = axs[0].get_ylim()
axs[0].set_aspect((x1 - x0) / (y1 - y0))
#sns.despine()

# Plot probplot
Ejemplo n.º 11
0
    # Don't worry that in Python it is called "weibull_min": the "weibull_max" is
    # simply mirrored about the origin.
    showDistribution(arange(0,5,0.02), stats.weibull_min(1), stats.weibull_min(2),
                     'Weibull Distribution', 'X', 'P(X)',['k=1', 'k=2'], xmin=0, xmax=4)
    
    # Uniform distribution
    showDistribution(x, stats.uniform,'' ,
                     'Uniform Distribution', 'X', 'P(X)','')
    
    # Logistic distribution
    showDistribution(x, stats.norm, stats.logistic,
                     'Logistic Distribution', 'X', 'P(X)',['Normal', 'Logistic'])
    
    # Lognormal distribution
    x = logspace(-9,1,1001)+1e-9
    showDistribution(x, stats.lognorm(2), '',
                     'Lognormal Distribution', 'X', 'lognorm(X)','', xmin=-0.1)
    
    # The log-lin plot has to be done by hand:
    plot(log(x), stats.lognorm.pdf(x,2))
    xlim(-10, 4)
    title('Lognormal Distribution')
    xlabel('log(X)')
    ylabel('lognorm(X)')
    show()
    
#----------------------------------------------------------------------
if __name__ == '__main__':
    mystyle.set()
    show_continuous()
Ejemplo n.º 12
0
                                facecolor='black'))
    ax.annotate('',
                xy=(90, groupMean[1]),
                xytext=(90, groupMean[1] + 0.2),
                arrowprops=dict(arrowstyle='<->, head_width=0.1',
                                facecolor='black'))
    ax.text(210, (grandMean + groupMean[1]) / 2.,
            '$SS_{Treatment}$',
            fontsize=24)
    ax.text(90, groupMean[1] + 0.1, '$SS_{Error}$', ha='right', fontsize=24)


if __name__ == '__main__':
    centers = [5, 5.3, 4.7]
    colors = 'brg'

    #sns.set_context('paper')
    #sns.set_style('white')
    np.random.seed(123)
    mystyle.set(18)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    std = 0.1
    numData = 100
    show_fig(0.1, ax, 'Sum-Squares')

    mystyle.printout_plain('anova_annotated.png')

    plt.show()
Ejemplo n.º 13
0
import seaborn as sns
import mystyle

# Calculate the values
nd = stats.norm()

x = np.linspace(-3,3,100)
yp = nd.pdf(x)
y = nd.cdf(x)
x1 = np.linspace(-3, 1)
y1 = nd.pdf(x1)

# Make the plot
sns.set_context('paper')
sns.set_style('white')
mystyle.set(12)

figs, axs = plt.subplots(1,2)

axs[0].plot(x,yp, 'k')
axs[0].fill_between(x1, y1, facecolor='#CCCCCC')
axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic')
axs[0].set_xlabel('x')
axs[0].set_ylabel('PDF(x)')
sns.despine()

axs[1].plot(x, y, '#999999', lw=3)
axs[1].set_xlabel('x')
axs[1].set_ylabel('CDF(x)')
plt.vlines(0, 0, 1, linestyles='--')
sns.despine()
Ejemplo n.º 14
0
# additional packages
import mystyle

# Calculate the values
nd = stats.norm()

x = np.linspace(-3,3,100)
yp = nd.pdf(x)
y = nd.cdf(x)
x1 = np.linspace(-3, 1)
y1 = nd.pdf(x1)

# Make the plot
sns.set_context('paper')
sns.set_style('white')
mystyle.set(12)

figs, axs = plt.subplots(1,2)

axs[0].plot(x,yp, 'k')
axs[0].fill_between(x1, y1, facecolor='#CCCCCC')
axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic')
axs[0].set_xlabel('x')
axs[0].set_ylabel('PDF(x)')
sns.despine()

axs[1].plot(x, y, '#999999', lw=3)
axs[1].set_xlabel('x')
axs[1].set_ylabel('CDF(x)')
plt.vlines(0, 0, 1, linestyles='--')
sns.despine()
Ejemplo n.º 15
0
                     '')

    # Logistic distribution
    showDistribution(x, stats.norm, stats.logistic, 'Logistic Distribution',
                     'X', 'P(X)', ['Normal', 'Logistic'])

    # Lognormal distribution
    x = logspace(-9, 1, 1001) + 1e-9
    showDistribution(x,
                     stats.lognorm(2),
                     '',
                     'Lognormal Distribution',
                     'X',
                     'lognorm(X)',
                     '',
                     xmin=-0.1)

    # The log-lin plot has to be done by hand:
    plot(log(x), stats.lognorm.pdf(x, 2))
    xlim(-10, 4)
    title('Lognormal Distribution')
    xlabel('log(X)')
    ylabel('lognorm(X)')
    show()


#----------------------------------------------------------------------
if __name__ == '__main__':
    mystyle.set()
    show_continuous()
Ejemplo n.º 16
0
        offset = ii * numData
        ax.plot(offset + np.arange(numData),
                data,
                '.',
                color=colors[ii],
                ms=10)

    ax.xaxis.set_ticks([50, 150, 250])
    ax.set_xticklabels(['Group1', 'Group2', 'Group3'])
    ax.set_title(title)
    sns.despine()


if __name__ == '__main__':
    centers = [5, 5.3, 4.7]
    colors = 'brg'

    sns.set_context('paper')
    sns.set_style('whitegrid')
    mystyle.set(14)

    fig, axs = plt.subplots(1, 2)
    stds = [0.1, 2]
    numData = 100
    show_fig(0.1, axs[0], 'SD=0.1')
    show_fig(2, axs[1], 'SD=2.0')

    mystyle.printout_plain('anova_oneway.png')

    plt.show()