Python draw_bs_reps Examples, dc_stat_think.draw_bs_reps Python Examples

Example #1

0

Show file

File: 06-did-the-2015-event-have-this-problem.py Project: mehrdad2275/datacamp-1

INSTRUCTIONS

*   Compute the fractional improvement, f using the arrays swimtime_low_lanes_15 and swimtime_high_lanes_15. Also compute the mean of f, storing it as f_mean.
*   Draw 10,000 bootstrap replicates of the mean f.
*   Compute the 95% confidence interval of the mean fractional improvement.
*   Shift f to create f_shift such that its mean is zero.
*   Draw 100,000 bootstrap replicates of the mean of f_shift.
*   Compute the p-value.
'''

# Compute f and its mean
f = (swimtime_low_lanes_15 - swimtime_high_lanes_15) / swimtime_low_lanes_15
f_mean = np.mean(f)

# Draw 10,000 bootstrap replicates
bs_reps = dcst.draw_bs_reps(f, np.mean, size=10000)

# Compute 95% confidence interval
conf_int = np.percentile(bs_reps, [2.5, 97.5])

# Shift f
f_shift = f - f_mean

# Draw 100,000 bootstrap replicates of the mean
bs_reps = dcst.draw_bs_reps(f_shift, np.mean, size=100000)

# Compute the p-value
p_val = np.sum(bs_reps >= f_mean) / 100000

# Print the results
print("""

Example #2

0

Show file

File: 02-parameter-estimation-active-bout-length.py Project: mehrdad2275/datacamp-1

df = pd.read_csv('../datasets/gandhi_et_al_bouts.csv', comment='#')

bout_lengths_wt  = df[df.genotype=='wt'].bout_length.values
bout_lengths_mut = df[df.genotype=='mut'].bout_length.values

'''
INSTRUCTIONS

*   Compute the mean active bout length for wild type and mutant using np.mean(). Store the results as mean_wt and mean_mut.
*   Draw 10,000 bootstrap replicates for each using dcst.draw_bs_reps(), storing the results as bs_reps_wt and bs_reps_mut.
*   Compute a 95% confidence interval from the bootstrap replicates using np.percentile(), storing the results as conf_int_wt and conf_int_mut.
*   Print the mean and confidence intervals to the screen.
'''

# Compute mean active bout length
mean_wt = bout_lengths_wt.mean()
mean_mut = bout_lengths_mut.mean()

# Draw bootstrap replicates
bs_reps_wt = dcst.draw_bs_reps(bout_lengths_wt, np.mean, size=10000)
bs_reps_mut = dcst.draw_bs_reps(bout_lengths_mut, np.mean, size=10000)

# Compute 95% confidence intervals
conf_int_wt = np.percentile(bs_reps_wt, [2.5, 97.5])
conf_int_mut = np.percentile(bs_reps_mut, [2.5, 97.5])

# Print the results
print("""
wt:  mean = {0:.3f} min., conf. int. = [{1:.1f}, {2:.1f}] min.
mut: mean = {3:.3f} min., conf. int. = [{4:.1f}, {5:.1f}] min.
""".format(mean_wt, *conf_int_wt, mean_mut, *conf_int_mut))

Example #3

0

Show file

_ = plt.plot(x_mut, y_mut, marker='.', linestyle='none')

# Make a legend, label axes, and show plot
_ = plt.legend(('wt', 'mut'))
_ = plt.xlabel('active bout length (min)')
_ = plt.ylabel('ECDF')
plt.show()

#%%

# Compute mean active bout length
mean_wt = np.mean(bout_lengths_wt)
mean_mut = np.mean(bout_lengths_mut)

# Draw bootstrap replicates
bs_reps_wt = dcst.draw_bs_reps(bout_lengths_wt, np.mean, size=10000)
bs_reps_mut = dcst.draw_bs_reps(bout_lengths_mut, np.mean, size=10000)

# Compute 95% confidence intervals
conf_int_wt = np.percentile(bs_reps_wt, [2.5, 97.5])
conf_int_mut = np.percentile(bs_reps_mut, [2.5, 97.5])

# Print the results
print("""
wt:  mean = {0:.3f} min., conf. int. = [{1:.1f}, {2:.1f}] min.
mut: mean = {3:.3f} min., conf. int. = [{4:.1f}, {5:.1f}] min.
""".format(mean_wt, *conf_int_wt, mean_mut, *conf_int_mut))

#%%

# Compute the difference of means: diff_means_exp

Example #4

0

Show file

The permutation test has a pretty restrictive hypothesis, that the heterozygotic and wild type bout lengths are
identically distributed. Now, use a bootstrap hypothesis test to test the hypothesis that the means are equal,
making no assumptions about the distributions."""

import numpy as np
import dc_stat_think as dcst
from customlib import fish_sleep_df as fish
# Extracting fish data
bout_lengths_wt = fish.fish_data_extract('wt')
bout_lengths_het = fish.fish_data_extract('het')
# Concatenate arrays: bout_lengths_concat
bout_lengths_concat = np.concatenate((bout_lengths_wt, bout_lengths_het))

# Compute mean of all bout_lengths: mean_bout_length
mean_bout_length = np.mean(bout_lengths_concat)

# Generate shifted arrays
wt_shifted = bout_lengths_wt - np.mean(bout_lengths_wt) + mean_bout_length
het_shifted = bout_lengths_het - np.mean(bout_lengths_het) + mean_bout_length

# Compute 10,000 bootstrap replicates from shifted arrays
bs_reps_wt = dcst.draw_bs_reps(wt_shifted, np.mean, size=10000)
bs_reps_het = dcst.draw_bs_reps(het_shifted, np.mean, size=10000)

# Get replicates of difference of means: bs_replicates
bs_reps = bs_reps_het - bs_reps_wt
diff_means_exp = 2.669817067793108
# Compute and print p-value: p
p = np.sum(bs_reps >= diff_means_exp) / len(bs_reps)
print('p-value =', p)

Example #5

0

Show file

# Make x and y values for ECDF: x, y
x, y = dcst.ecdf(f)

# Plot the ECDFs as dots
_ = plt.plot(x, y, marker='.', linestyle='none')

# Label the axes and show the plot
plt.xlabel('f')
plt.ylabel('ECDF')

plt.show()
"""Estimation of mean improvement

You will now estimate how big this current effect is. Compute the mean fractional improvement for being in a 
high-numbered lane versus a low-numbered lane, along with a 95% confidence interval of the mean."""

# Compute the mean difference: f_mean
f_mean = np.mean(f)

# Draw 10,000 bootstrap replicates: bs_reps
bs_reps = dcst.draw_bs_reps(f, np.mean, size=10000)

# Compute 95% confidence interval: conf_int
conf_int = np.percentile(bs_reps, [2.5, 97.5])
# Print the result
print("""
mean frac. diff.: {0:.5f}
95% conf int of mean frac. diff.: [{1:.5f}, {2:.5f}]""".format(
    f_mean, *conf_int))

Example #6

0

Show file

# Make a legend, label axes, and show plot
_ = plt.legend(('wt', 'mut'))
_ = plt.xlabel('active bout length (min)')
_ = plt.ylabel('ECDF')
plt.show()

# -------------------------------------------------------
# Parameter estimation: active bout length
# -------------------------------------------------------

# Compute mean active bout length
mean_wt = np.mean(bout_lengths_wt)
mean_mut = np.mean(bout_lengths_mut)

# Draw bootstrap replicates
bs_reps_wt = dcst.draw_bs_reps(bout_lengths_wt, np.mean, size=10000)
bs_reps_mut = dcst.draw_bs_reps(bout_lengths_mut, np.mean, size=10000)

# Compute 95% confidence intervals
conf_int_wt = np.percentile(bs_reps_wt, [2.5, 97.5])
conf_int_mut = np.percentile(bs_reps_mut, [2.5, 97.5])

# Print the results
print("""
wt:  mean = {0:.3f} min., conf. int. = [{1:.1f}, {2:.1f}] min.
mut: mean = {3:.3f} min., conf. int. = [{4:.1f}, {5:.1f}] min.
""".format(mean_wt, *conf_int_wt, mean_mut, *conf_int_mut))

# -------------------------------------------------------
# Permutation test: wild type versus heterozygote
# -------------------------------------------------------

Example #7

0

Show file

File: 02-200m-free-time-with-confidence-interval.py Project: mehrdad2275/datacamp-1

       109.87, 106.73, 107.18, 110.98, 108.55, 114.31, 112.05])
    
'''
INSTRUCTIONS

*   Compute the mean and median swim times, storing them in variables mean_time and median_time. The swim times are contained in mens_200_free_heats.
*   Draw 10,000 bootstrap replicates each of the mean and median swim time using dcst.draw_bs_reps(). Store the results in bs_reps_mean and bs_reps_median.
*   Compute the 95% confidence intervals for the mean and median using the bootstrap replicates and np.percentile().
*   Hit 'Submit Answer' to print the results to the screen!
'''

# Compute mean and median swim times
mean_time = np.mean(mens_200_free_heats)
median_time = np.median(mens_200_free_heats)

# Draw 10,000 bootstrap replicates of the mean and median
bs_reps_mean = dcst.draw_bs_reps(mens_200_free_heats, np.mean, size=10000)
bs_reps_median = dcst.draw_bs_reps(mens_200_free_heats, np.median, size=10000)

# Compute the 95% confidence intervals
conf_int_mean = np.percentile(bs_reps_mean, [2.5, 97.5])
conf_int_median = np.percentile(bs_reps_median, [2.5, 97.5])

# Print the result to the screen
print("""
mean time: {0:.2f} sec.
95% conf int of mean: [{1:.2f}, {2:.2f}] sec.

median time: {3:.2f} sec.
95% conf int of median: [{4:.2f}, {5:.2f}] sec.
""".format(mean_time, *conf_int_mean, median_time, *conf_int_median))

Example #8

0

Show file

File: 02-estimates-of-the-mean-interearthquake-times.py Project: mehrdad2275/datacamp-1

])
'''
INSTRUCTIONS

*   Compute the mean interearthquake time for pre- (dt_pre) and post-2010 (dt_post).
*   Draw 10,000 bootstrap replicates of the mean for the pre- and post-2010 data sets.
*   Use np.percentile() to compute the 95% confidence interval of the mean for both data sets.
*   Hit 'Submit Answer' to print the results to the screen.
'''

# Compute mean interearthquake time
mean_dt_pre = np.mean(dt_pre)
mean_dt_post = np.mean(dt_post)

# Draw 10,000 bootstrap replicates of the mean
bs_reps_pre = dcst.draw_bs_reps(dt_pre, np.mean, size=10000)
bs_reps_post = dcst.draw_bs_reps(dt_post, np.mean, size=10000)

# Compute the confidence interval
conf_int_pre = np.percentile(bs_reps_pre, [2.5, 97.5])
conf_int_post = np.percentile(bs_reps_post, [2.5, 97.5])

# Print the results
print("""1980 through 2009
mean time gap: {0:.2f} days
95% conf int: [{1:.2f}, {2:.2f}] days""".format(mean_dt_pre, *conf_int_pre))

print("""
2010 through mid-2017
mean time gap: {0:.2f} days
95% conf int: [{1:.2f}, {2:.2f}] days""".format(mean_dt_post, *conf_int_post))

Example #9

0

Show file

# %% [markdown]
# Aftershocks happen within 5 days based on our filter

# %%
# visualize
_ = sns.ecdfplot(data=intereq_time)
_ = plt.xlabel('Inter-earthquake time (days)')

# %%
# calculate observed statistics
mean_intereq_time = np.mean(intereq_time)
print('Observed mean =', mean_intereq_time, 'days')

# drawbootstrap replicates of the mean to estimate the population parameter
bs_reps = dcst.draw_bs_reps(intereq_time, np.mean, 10000)
bs_mean = np.mean(bs_reps)
bs_median = np.median(bs_reps)
bs_ci = tuple(np.percentile(bs_reps, [2.5, 97.5]))
# print('Bootstrap mean = {} with 95% ci = {}'.format(bs_mean, bs_ci))

# plot histogram
_ = sns.histplot(bs_reps, bins=20)
_ = plt.axvline(x=bs_median, color='red')
_ = plt.title('Bootstrap mean = {} with 95% ci = {}'.format(bs_mean, bs_ci))
plt.show()

# %%
# 3. Did earthquake frequencies changed after fracking became widespread
'''
Null hypothesis: Before and after 2010 have the same mean inter-earthquake time

Example #10

0

Show file

# describe
display(time_between_eqs.describe())
# plot ecdf
_ = sns.ecdfplot(data=time_between_eqs.dt.days)
_ = plt.xlabel('days')
plt.show()

# %% [markdown]
# Most of them are not that far apart but those may be aftershocks.

# %%
# 2. Infer expected values and confidence interval for time between earthquakes
# days between earthquakes
days_between_eqs = time_between_eqs.dt.days
# draw bs replicates
bs_reps = dcst.draw_bs_reps(days_between_eqs, np.mean, 10000)
# mean and median of bs replicates
bs_reps_mean = np.mean(bs_reps)
bs_reps_median = np.median(bs_reps)
# confidence interval
bs_reps_ci = tuple(np.percentile(bs_reps, [2.5, 97.5]))
# plot histogram of bs replicates
ax = sns.histplot(data=bs_reps)
ax.set(title='Bs reps of the mean time between earthquakes',
       xlabel='sample means')
ax.axvline(bs_reps_median, color='r')
plt.show()
# print out results
print('Expected value of time between earthquakes is {} days with 95% ci {}'.
      format(bs_reps_mean, bs_reps_ci))