def test_pandas_conversion(seed): df = pd.DataFrame({ 'a': [3, 2, 1, 4], 'b': [8, 6, 7, 5], 'c': [9.1, 10.1, 11.1, np.nan] }) x, y = dcst.ecdf(df.loc[:, 'a']) assert (x == np.array([1, 2, 3, 4])).all() assert (y == np.array([0.25, 0.5, 0.75, 1.0])).all() x, y = dcst.ecdf(df.loc[:, 'c']) assert np.allclose(x, np.array([9.1, 10.1, 11.1])) assert np.allclose(y, np.array([1 / 3, 2 / 3, 1.0])) df = pd.DataFrame({ 'a': np.concatenate((np.random.normal(0, 1, size=10), [np.nan] * 990)), 'b': np.random.normal(0, 1, size=1000) }) correct, _ = st.ks_2samp(df['a'].dropna(), df['b']) assert np.isclose(dcst.ks_stat(df['a'], df['b']), correct) df = pd.DataFrame({ 'a': np.concatenate((np.random.normal(0, 1, size=80), [np.nan] * 20)), 'b': np.random.normal(0, 1, size=100) }) dcst_private._seed_numba(seed) correct = dcst.draw_bs_reps(df['a'].values, np.mean, size=100) dcst_private._seed_numba(seed) assert np.allclose(dcst.draw_bs_reps(df['a'], np.mean, size=100), correct, atol=atol) dcst_private._seed_numba(seed) correct = dcst.draw_bs_reps(df['b'].values, np.mean, size=100) dcst_private._seed_numba(seed) assert np.allclose(dcst.draw_bs_reps(df['b'], np.mean, size=100), correct, atol=atol) dcst_private._seed_numba(seed) correct = dcst.draw_perm_reps(df['a'].values, df['b'].values, dcst.diff_of_means, size=100) dcst_private._seed_numba(seed) assert np.allclose(dcst.draw_perm_reps(df['a'], df['b'], dcst.diff_of_means, size=100), correct, atol=atol)
def hypothesis_test(time, mags): mags_pre = mags[time < 2010] mags_post = mags[time >= 2010] mt = 3 # Only magnitudes above completeness threshold mags_pre = mags_pre[mags_pre >= mt] mags_post = mags_post[mags_post >= mt] # Observed difference in mean magnitudes: diff_obs diff_obs = np.mean(mags_post) - np.mean(mags_pre) # Generate permutation replicates: perm_reps perm_reps = dcst.draw_perm_reps(mags_post, mags_pre, dcst.diff_of_means, size=10000) # Compute and print p-value p_val = np.sum(perm_reps < diff_obs) / 10000 print('p =', p_val)
def permutation_test_wild_type_vs_heterozygote(bout_lengths_het, bout_lengths_wt): # Compute the difference of means: diff_means_exp diff_means_exp = np.mean(bout_lengths_het) - np.mean(bout_lengths_wt) # Draw permutation replicates: perm_reps perm_reps = dcst.draw_perm_reps(bout_lengths_het, bout_lengths_wt, dcst.diff_of_means, size=10000) # Compute the p-value: p-val p_val = np.sum(perm_reps >= diff_means_exp) / len(perm_reps) fig, ax = plt.subplots() _ = ax.hist(perm_reps, bins="sqrt", density=True) _ = ax.set_xlabel("bout length") _ = ax.set_ylabel("Probability") _ = ax.axvline(diff_means_exp, color="red") plt.show() # Print the result print('p =', p_val)
conf_int_wt = np.percentile(bs_reps_wt, [2.5, 97.5]) conf_int_mut = np.percentile(bs_reps_mut, [2.5, 97.5]) # Print the results print(""" wt: mean = {0:.3f} min., conf. int. = [{1:.1f}, {2:.1f}] min. mut: mean = {3:.3f} min., conf. int. = [{4:.1f}, {5:.1f}] min. """.format(mean_wt, *conf_int_wt, mean_mut, *conf_int_mut)) #%% # Compute the difference of means: diff_means_exp diff_means_exp = np.mean(bout_lengths_het) - np.mean(bout_lengths_wt) # Draw permutation replicates: perm_reps perm_reps = dcst.draw_perm_reps(bout_lengths_het, bout_lengths_wt, dcst.diff_of_means, size=10000) # Compute the p-value: p-val p_val = np.sum(perm_reps >= diff_means_exp) / len(perm_reps) # Print the result print('p =', p_val) #%% # Concatenate arrays: bout_lengths_concat bout_lengths_concat = np.concatenate((bout_lengths_wt, bout_lengths_het)) # Compute mean of all bout_lengths: mean_bout_length mean_bout_length = np.mean(bout_lengths_concat) # Generate shifted arrays
def test_draw_perm_reps(data_1, data_2, seed): # Have to use size=1 because np.random.shuffle and np.random.permutation # give different results on and after 2nd call np.random.seed(seed) x = no_numba.draw_perm_reps(data_1, data_2, no_numba.diff_of_means, size=1) np.random.seed(seed) x_correct = original.draw_perm_reps(data_1, data_2, original.diff_of_means, size=1) x_test_numba = dcst.draw_perm_reps(data_1, data_2, dcst.diff_of_means, size=1) assert np.allclose(x_correct, x, atol=atol, equal_nan=True) np.random.seed(seed) x = no_numba.draw_perm_reps(data_1, data_2, no_numba.studentized_diff_of_means, size=1) np.random.seed(seed) x_correct = original.draw_perm_reps(data_1, data_2, no_numba.studentized_diff_of_means, size=1) x_test_numba = dcst.draw_perm_reps(data_1, data_2, dcst.studentized_diff_of_means, size=1) assert np.allclose(x_correct, x, atol=atol, equal_nan=True) def my_fun(x, y, mult): return (np.mean(x) + np.mean(y)) * mult def my_fun_orig(x, y): return (np.mean(x) + np.mean(y)) * 2.4 np.random.seed(seed) x = no_numba.draw_perm_reps(data_1, data_2, my_fun, args=(2.4, ), size=1) np.random.seed(seed) x_correct = original.draw_perm_reps(data_1, data_2, my_fun_orig, size=1) x_test_numba = dcst.draw_perm_reps(data_1, data_2, my_fun, args=(2.4, ), size=1) assert np.allclose(x_correct, x, atol=atol, equal_nan=True) def diff_of_medians(data_1, data_2): return np.median(data_1) - np.median(data_2) np.random.seed(seed) x = no_numba.draw_perm_reps(data_1, data_2, diff_of_medians, size=1) np.random.seed(seed) x_correct = original.draw_perm_reps(data_1, data_2, diff_of_medians, size=1) x_test_numba = dcst.draw_perm_reps(data_1, data_2, diff_of_medians, size=1) assert np.allclose(x_correct, x, atol=atol, equal_nan=True)
mags_post = mags[time >= 2010] ''' INSTRUCTIONS * Slice out the magnitudes of earthquakes before 2010 that have a magnitude above (or equal) the completeness threshold and overwrite mags_pre with the result. Do the same for mags_post. * Compute the observed difference in mean magnitudes, subtracting the magnitudes of pre-2010 earthquakes from those of post-2010 earthquakes. * Generate 10,000 permutation replicates using dcst.draw_perm_reps(). Use dcst.diff_of_means as the argument for func. * Compute and print the p-value taking "at least as extreme as" to mean that the test statistic is smaller than what was observed. ''' # Only magnitudes above completeness threshold mags_pre = mags_pre[mags_pre >= mt] mags_post = mags_post[mags_post >= mt] # Observed difference in mean magnitudes: diff_obs diff_obs = np.mean(mags_post) - np.mean(mags_pre) # Generate permutation replicates: perm_reps perm_reps = dcst.draw_perm_reps(mags_post, mags_pre, func=dcst.diff_of_means, size=10000) # Compute and print p-value p_val = np.sum(perm_reps < diff_obs) / 10000 print('p =', p_val) ''' p = 0.0993 A p-value around 0.1 suggests that the observed magnitudes are commensurate with there being no change in b-value after wastewater injection began. '''