# Plot the histogram with default number of bins; label your axes
_ = plt.hist(n_defaults, normed=True)
_ = plt.xlabel('number of defaults out of 100 loans')
_ = plt.ylabel('probability')

# Show the plot
plt.show(
)  #not optimal to plot histogram when results are integers, hard to read
plt.close()

#will the bank fail?
#ecdf is read: trials with 10 defaults OR LESS if ~100%
# Compute ECDF: x, y
from ecdf_func import ecdf

x, y = ecdf(n_defaults)

# Plot the ECDF with labeled axes
_ = plt.plot(x, y, marker='.', linestyle='none')
_ = plt.xlabel('number of defaults')
_ = plt.ylabel('percentage defaults')

# Show the plot
plt.show()

# Compute the number of 100-loan simulations with 10 or more defaults: n_lose_money
n_lose_money = np.sum(n_defaults >= 10)
print(n_lose_money)
# Compute and print probability of losing money
print('Probability of losing money =', n_lose_money / len(n_defaults))
#interest rate is such that banks will lose money if 10 or more
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 19 14:36:18 2017

@author: James
"""

import numpy as np
from ecdf_func import ecdf
from pmf_func import pmf_plot
import matplotlib.pyplot as plt

samples = np.random.poisson(6, size=10000)

x, y = ecdf(samples)

pmf_plot(samples)

_ = plt.plot(x, y, marker='.', linestyle='none')

plt.margins(0.02)

_ = plt.xlabel('number of sucesses')

_ = plt.ylabel('CDF')

plt.show()

plt.close()

#relationship between Binomial and Poisson distributions
Beispiel #3
0
# Print the result with some nice formatting
print('I. versicolor:', mean_length_vers, 'cm')

#computing percentiles
# Specify array of percentiles: percentiles
percentiles = np.array([2.5, 25, 50, 75, 97.5])

# Compute percentiles: ptiles_vers
ptiles_vers = np.percentile(versicolor_petal_length, percentiles)

# Print the result
print(ptiles_vers)

#comparing percentiles to ECDF
from ecdf_func import ecdf
x_vers, y_vers = ecdf(versicolor_petal_length)
# Plot the ECDF
_ = plt.plot(x_vers, y_vers, '.')
plt.margins(0.02)
_ = plt.xlabel('petal length (cm)')
_ = plt.ylabel('ECDF')

# Overlay percentiles as red diamonds.
_ = plt.plot(ptiles_vers,
             percentiles / 100,
             marker='D',
             color='red',
             linestyle='none')

# Show the plot
plt.show()
Beispiel #4
0
from ecdf_func import ecdf
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

michelson_speed_of_light = pd.read_csv(
    'michelson_speed_of_light.csv')['velocity of light in air (km/s)']

mean = np.mean(michelson_speed_of_light)

std = np.std(michelson_speed_of_light)

samples = np.random.normal(mean, std, size=10000)

x, y = ecdf(michelson_speed_of_light)

x_theor, y_theor = ecdf(samples)

_ = plt.plot(x_theor, y_theor)

_ = plt.plot(x, y, marker='.', linestyle='none')

_ = plt.xlabel('speed of light (km/s)')

_ = plt.ylabel('CDF')

plt.show()

plt.close()
Beispiel #5
0
import matplotlib.pyplot as plt

np.random.binomial(4, 0.5, size=10)  #repeat the 4 flip experiement 10 times

samples = np.random.binomial(60, 0.1, size=10000)
_ = plt.hist(samples)
plt.show()
plt.close()

from ecdf_func import ecdf

import seaborn as sns

sns.set()

x, y = ecdf(samples)

_ = plt.plot(x, y, marker='.', linestyle='none')

plt.margins(0.2)

_ = plt.xlabel('number of successes')

_ = plt.ylabel('CDF')

plt.show()

plt.close()

#sampling out of the binoimal distribution
# Take 10,000 samples out of the binomial distribution: n_defaults

import numpy as np
import matplotlib.pyplot as plt

from Bernoulli_Trial import perform_bernoulli_trials
from ecdf_func import ecdf

# Seed random number generator
np.random.seed(42)

# Take 10,000 samples out of the binomial distribution: n_defaults
n_defaults = np.random.binomial(100, 0.05, size=10000)

# Compute CDF: x, y
x, y = ecdf(n_defaults)

# Plot the CDF with axis labels
_ = plt.plot(x, y, marker='.', linestyle='none')
plt.margins(0.002)
plt.xlabel('Defaults out of 100 loans')
plt.ylabel('ECDF')

# Show the plot
plt.show()

# ################################################################## #

# Seed random number generator
np.random.seed(42)
Beispiel #7
0
df = pd.DataFrame(data['data'], columns=data['feature_names'])
df['target'] = data['target']

#replace the 0,1,2 with species names
for idx, species in enumerate(data['target_names']):
    print(idx, species)
    df['target'].replace(idx, species, inplace=True)
df.rename(columns={'target': 'species'}, inplace=True)

versicolor_petal_length = df[df['species'] ==
                             'versicolor']['petal length (cm)']
setosa_petal_length = df[df['species'] == 'setosa']['petal length (cm)']
virginica_petal_length = df[df['species'] == 'virginica']['petal length (cm)']

# Compute ECDF for versicolor data: x_vers, y_vers
x_vers, y_vers = ecdf(versicolor_petal_length)

# Generate plot
_ = plt.plot(x_vers, y_vers, marker='.', linestyle='none')

# Make the margins nice
_ = plt.margins(0.02)

# Label the axes
_ = plt.xlabel("versicolor_petal_length")
_ = plt.ylabel("ECDF")

# Display the plot
plt.show()
plt.close()
import numpy as np
import matplotlib.pyplot as plt
# import pandas as pd

from ecdf_func import ecdf

# Compute mean and standard deviation: mu, sigma
mu = np.mean(belmont_no_outliers)
sigma = np.std(belmont_no_outliers)

# Sample out of a normal distribution with this mu and sigma: samples
samples = np.random.normal(mu, sigma, size=10000)

# Get the CDF of the samples and of the data
x_theor, y_theor = ecdf(samples)
x, y = ecdf(belmont_no_outliers)

# Plot the CDFs and show the plot
_ = plt.plot(x_theor, y_theor)
_ = plt.plot(x, y, marker='.', linestyle='none')
plt.margins(0.02)
_ = plt.xlabel('Belmont winning time (sec.)')
_ = plt.ylabel('CDF')
plt.show()

# Take a million samples out of the Normal distribution: samples
samples = np.random.normal(mu, sigma, size=1000000)

# Compute the fraction that are faster than 144 seconds: prob
prob = np.sum(samples <= 144) / len(samples)
Beispiel #9
0
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 20 00:40:06 2017

@author: James
"""

import numpy as np
from ecdf_func import ecdf

#exponential inter-incident times

inter_times = 500

mean = np.mean(inter_times)

samples = np.random.exponential(mean, size=10000)

x, y = ecdf(inter_times)