148.4, 150.2, 148.8, 149.2, 149.2, 148.4, 150.2, 146.6, 149.8, 149., 150.8, 148.6, 150.2, 149., 148.6, 150.2, 148.2, 149.4, 150.8, 150.2, 152.2, 148.2, 149.2, 151., 149.6, 149.6, 149.4, 148.6, 150., 150.6, 149.2, 152.6, 152.8, 149.6, 151.6, 152.8, 153.2, 152.4, 152.2 ] # Compute mean and standard deviation: mu, sigma mu = np.mean(belmont_no_outliers) sigma = np.std(belmont_no_outliers) # Sample out of a normal distribution with this mu and sigma: samples samples = np.random.normal(mu, sigma, 10000) # Get the CDF of the samples and of the data x, y = ecdf.ecdf(belmont_no_outliers) x_theor, y_theor = ecdf.ecdf(samples) # Plot the CDFs and show the plot _ = plt.plot(x_theor, y_theor) _ = plt.plot(x, y, marker='.', linestyle='none') plt.margins(0.02) _ = plt.xlabel('Belmont winning time (sec.)') _ = plt.ylabel('CDF') plt.show() # Compute the fraction that are faster than 144 seconds: prob prob = sum(samples <= 144) / float(len(samples)) # Print the result
samples_std1 = np.random.normal(20, 1, size=100000) samples_std3 = np.random.normal(20, 3, size=100000) samples_std10 = np.random.normal(20, 10, size=100000) # Make histograms _ = plt.hist(samples_std1, normed=True, histtype='step', bins=100) _ = plt.hist(samples_std3, normed=True, histtype='step', bins=100) _ = plt.hist(samples_std10, normed=True, histtype='step', bins=100) # Make a legend, set limits and show plot _ = plt.legend(('std = 1', 'std = 3', 'std = 10')) plt.ylim(-0.01, 0.42) plt.show() # Generate CDFs x_std1, y_std1 = ecdf.ecdf(samples_std1) x_std3, y_std3 = ecdf.ecdf(samples_std3) x_std10, y_std10 = ecdf.ecdf(samples_std10) # Plot CDFs _ = plt.plot(x_std1, y_std1, marker='.', linestyle='none') _ = plt.plot(x_std3, y_std3, marker='.', linestyle='none') _ = plt.plot(x_std10, y_std10, marker='.', linestyle='none') # Make 2% margin plt.margins(0.02) # Make a legend and show the plot _ = plt.legend(('std = 1', 'std = 3', 'std = 10'), loc='lower right') plt.show()
import numpy as np import matplotlib.pyplot as plt import seaborn as sns import empiricalcumulativedistribution as ecdf # Seed the random number generator import seaborn as sns # Take 10,000 samples out of the binomial distribution: n_defaults n_defaults = np.random.binomial(100, 0.05, 10000) # Compute CDF: x, y x, y = ecdf.ecdf(n_defaults) # Plot the CDF with axis labels _ = plt.plot(x, y, marker='.', linestyle='none') _ = plt.xlabel('number of defaults out of 100') _ = plt.ylabel('CDF') # Show the plot plt.show()
def successive_poisson(tau1, tau2, size=1): # Draw samples out of first exponential distribution: t1 t1 = np.random.exponential(tau1, size) # Draw samples out of second exponential distribution: t2 t2 = np.random.exponential(tau2, size) return t1 + t2 # Draw samples of waiting times: waiting_times waiting_times = successive_poisson(764, 715, 100000) # Make the histogram _ = plt.hist(waiting_times, bins=100, normed=True, histtype='step') # Label axes _ = plt.xlabel('waiting_times') _ = plt.ylabel('probability ') # plot ecdf x, y = ecdf.ecdf(waiting_times) _ = plt.plot(x, y, marker='.', linestyle='none') _ = plt.xlabel('waitng times') _ = plt.ylabel('probability of waiting times') # Show the plot plt.show()
# Compute the mean: mean_length_vers mean_length_vers = np.mean(versicolor_petal_length) # Print the result with some nice formatting print('I. versicolor:', mean_length_vers, 'cm') # Specify array of percentiles: percentiles percentiles = np.array([2.5, 25, 50, 75, 97.5]) # Compute percentiles: ptiles_vers ptiles_vers = np.percentile(versicolor_petal_length, percentiles) # Print the result print(ptiles_vers) x_vers, y_vers = ecdf.ecdf(versicolor_petal_length) # Plot the ECDF _ = plt.plot(x_vers, y_vers, '.') plt.margins(0.02) _ = plt.xlabel('petal length (cm)') _ = plt.ylabel('ECDF') # Overlay percentiles as red diamonds. _ = plt.plot(ptiles_vers, percentiles / 100, marker='D', color='red', linestyle='none') # Show the plot