# Plot the histogram with default number of bins; label your axes _ = plt.hist(n_defaults, normed=True) _ = plt.xlabel('number of defaults out of 100 loans') _ = plt.ylabel('probability') # Show the plot plt.show( ) #not optimal to plot histogram when results are integers, hard to read plt.close() #will the bank fail? #ecdf is read: trials with 10 defaults OR LESS if ~100% # Compute ECDF: x, y from ecdf_func import ecdf x, y = ecdf(n_defaults) # Plot the ECDF with labeled axes _ = plt.plot(x, y, marker='.', linestyle='none') _ = plt.xlabel('number of defaults') _ = plt.ylabel('percentage defaults') # Show the plot plt.show() # Compute the number of 100-loan simulations with 10 or more defaults: n_lose_money n_lose_money = np.sum(n_defaults >= 10) print(n_lose_money) # Compute and print probability of losing money print('Probability of losing money =', n_lose_money / len(n_defaults)) #interest rate is such that banks will lose money if 10 or more
# -*- coding: utf-8 -*- """ Created on Sun Nov 19 14:36:18 2017 @author: James """ import numpy as np from ecdf_func import ecdf from pmf_func import pmf_plot import matplotlib.pyplot as plt samples = np.random.poisson(6, size=10000) x, y = ecdf(samples) pmf_plot(samples) _ = plt.plot(x, y, marker='.', linestyle='none') plt.margins(0.02) _ = plt.xlabel('number of sucesses') _ = plt.ylabel('CDF') plt.show() plt.close() #relationship between Binomial and Poisson distributions
# Print the result with some nice formatting print('I. versicolor:', mean_length_vers, 'cm') #computing percentiles # Specify array of percentiles: percentiles percentiles = np.array([2.5, 25, 50, 75, 97.5]) # Compute percentiles: ptiles_vers ptiles_vers = np.percentile(versicolor_petal_length, percentiles) # Print the result print(ptiles_vers) #comparing percentiles to ECDF from ecdf_func import ecdf x_vers, y_vers = ecdf(versicolor_petal_length) # Plot the ECDF _ = plt.plot(x_vers, y_vers, '.') plt.margins(0.02) _ = plt.xlabel('petal length (cm)') _ = plt.ylabel('ECDF') # Overlay percentiles as red diamonds. _ = plt.plot(ptiles_vers, percentiles / 100, marker='D', color='red', linestyle='none') # Show the plot plt.show()
from ecdf_func import ecdf import matplotlib.pyplot as plt import seaborn as sns sns.set() michelson_speed_of_light = pd.read_csv( 'michelson_speed_of_light.csv')['velocity of light in air (km/s)'] mean = np.mean(michelson_speed_of_light) std = np.std(michelson_speed_of_light) samples = np.random.normal(mean, std, size=10000) x, y = ecdf(michelson_speed_of_light) x_theor, y_theor = ecdf(samples) _ = plt.plot(x_theor, y_theor) _ = plt.plot(x, y, marker='.', linestyle='none') _ = plt.xlabel('speed of light (km/s)') _ = plt.ylabel('CDF') plt.show() plt.close()
import matplotlib.pyplot as plt np.random.binomial(4, 0.5, size=10) #repeat the 4 flip experiement 10 times samples = np.random.binomial(60, 0.1, size=10000) _ = plt.hist(samples) plt.show() plt.close() from ecdf_func import ecdf import seaborn as sns sns.set() x, y = ecdf(samples) _ = plt.plot(x, y, marker='.', linestyle='none') plt.margins(0.2) _ = plt.xlabel('number of successes') _ = plt.ylabel('CDF') plt.show() plt.close() #sampling out of the binoimal distribution # Take 10,000 samples out of the binomial distribution: n_defaults
import numpy as np import matplotlib.pyplot as plt from Bernoulli_Trial import perform_bernoulli_trials from ecdf_func import ecdf # Seed random number generator np.random.seed(42) # Take 10,000 samples out of the binomial distribution: n_defaults n_defaults = np.random.binomial(100, 0.05, size=10000) # Compute CDF: x, y x, y = ecdf(n_defaults) # Plot the CDF with axis labels _ = plt.plot(x, y, marker='.', linestyle='none') plt.margins(0.002) plt.xlabel('Defaults out of 100 loans') plt.ylabel('ECDF') # Show the plot plt.show() # ################################################################## # # Seed random number generator np.random.seed(42)
df = pd.DataFrame(data['data'], columns=data['feature_names']) df['target'] = data['target'] #replace the 0,1,2 with species names for idx, species in enumerate(data['target_names']): print(idx, species) df['target'].replace(idx, species, inplace=True) df.rename(columns={'target': 'species'}, inplace=True) versicolor_petal_length = df[df['species'] == 'versicolor']['petal length (cm)'] setosa_petal_length = df[df['species'] == 'setosa']['petal length (cm)'] virginica_petal_length = df[df['species'] == 'virginica']['petal length (cm)'] # Compute ECDF for versicolor data: x_vers, y_vers x_vers, y_vers = ecdf(versicolor_petal_length) # Generate plot _ = plt.plot(x_vers, y_vers, marker='.', linestyle='none') # Make the margins nice _ = plt.margins(0.02) # Label the axes _ = plt.xlabel("versicolor_petal_length") _ = plt.ylabel("ECDF") # Display the plot plt.show() plt.close()
import numpy as np import matplotlib.pyplot as plt # import pandas as pd from ecdf_func import ecdf # Compute mean and standard deviation: mu, sigma mu = np.mean(belmont_no_outliers) sigma = np.std(belmont_no_outliers) # Sample out of a normal distribution with this mu and sigma: samples samples = np.random.normal(mu, sigma, size=10000) # Get the CDF of the samples and of the data x_theor, y_theor = ecdf(samples) x, y = ecdf(belmont_no_outliers) # Plot the CDFs and show the plot _ = plt.plot(x_theor, y_theor) _ = plt.plot(x, y, marker='.', linestyle='none') plt.margins(0.02) _ = plt.xlabel('Belmont winning time (sec.)') _ = plt.ylabel('CDF') plt.show() # Take a million samples out of the Normal distribution: samples samples = np.random.normal(mu, sigma, size=1000000) # Compute the fraction that are faster than 144 seconds: prob prob = np.sum(samples <= 144) / len(samples)
# -*- coding: utf-8 -*- """ Created on Mon Nov 20 00:40:06 2017 @author: James """ import numpy as np from ecdf_func import ecdf #exponential inter-incident times inter_times = 500 mean = np.mean(inter_times) samples = np.random.exponential(mean, size=10000) x, y = ecdf(inter_times)