Exemple #1
0
def test_pandas_conversion(seed):
    df = pd.DataFrame({
        'a': [3, 2, 1, 4],
        'b': [8, 6, 7, 5],
        'c': [9.1, 10.1, 11.1, np.nan]
    })

    x, y = dcst.ecdf(df.loc[:, 'a'])
    assert (x == np.array([1, 2, 3, 4])).all()
    assert (y == np.array([0.25, 0.5, 0.75, 1.0])).all()

    x, y = dcst.ecdf(df.loc[:, 'c'])
    assert np.allclose(x, np.array([9.1, 10.1, 11.1]))
    assert np.allclose(y, np.array([1 / 3, 2 / 3, 1.0]))

    df = pd.DataFrame({
        'a':
        np.concatenate((np.random.normal(0, 1, size=10), [np.nan] * 990)),
        'b':
        np.random.normal(0, 1, size=1000)
    })
    correct, _ = st.ks_2samp(df['a'].dropna(), df['b'])
    assert np.isclose(dcst.ks_stat(df['a'], df['b']), correct)

    df = pd.DataFrame({
        'a':
        np.concatenate((np.random.normal(0, 1, size=80), [np.nan] * 20)),
        'b':
        np.random.normal(0, 1, size=100)
    })
    dcst_private._seed_numba(seed)
    correct = dcst.draw_bs_reps(df['a'].values, np.mean, size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(dcst.draw_bs_reps(df['a'], np.mean, size=100),
                       correct,
                       atol=atol)

    dcst_private._seed_numba(seed)
    correct = dcst.draw_bs_reps(df['b'].values, np.mean, size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(dcst.draw_bs_reps(df['b'], np.mean, size=100),
                       correct,
                       atol=atol)

    dcst_private._seed_numba(seed)
    correct = dcst.draw_perm_reps(df['a'].values,
                                  df['b'].values,
                                  dcst.diff_of_means,
                                  size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(dcst.draw_perm_reps(df['a'],
                                           df['b'],
                                           dcst.diff_of_means,
                                           size=100),
                       correct,
                       atol=atol)
Exemple #2
0
def hypothesis_test(time, mags):
    mags_pre = mags[time < 2010]
    mags_post = mags[time >= 2010]
    mt = 3

    # Only magnitudes above completeness threshold
    mags_pre = mags_pre[mags_pre >= mt]
    mags_post = mags_post[mags_post >= mt]

    # Observed difference in mean magnitudes: diff_obs
    diff_obs = np.mean(mags_post) - np.mean(mags_pre)

    # Generate permutation replicates: perm_reps
    perm_reps = dcst.draw_perm_reps(mags_post,
                                    mags_pre,
                                    dcst.diff_of_means,
                                    size=10000)

    # Compute and print p-value
    p_val = np.sum(perm_reps < diff_obs) / 10000
    print('p =', p_val)
Exemple #3
0
def permutation_test_wild_type_vs_heterozygote(bout_lengths_het, bout_lengths_wt):
    # Compute the difference of means: diff_means_exp
    diff_means_exp = np.mean(bout_lengths_het) - np.mean(bout_lengths_wt)

    # Draw permutation replicates: perm_reps
    perm_reps = dcst.draw_perm_reps(bout_lengths_het, bout_lengths_wt,
                                    dcst.diff_of_means, size=10000)

    # Compute the p-value: p-val
    p_val = np.sum(perm_reps >= diff_means_exp) / len(perm_reps)

    fig, ax = plt.subplots()
    _ = ax.hist(perm_reps, bins="sqrt", density=True)
    _ = ax.set_xlabel("bout length")
    _ = ax.set_ylabel("Probability")
    _ = ax.axvline(diff_means_exp, color="red")

    plt.show()

    # Print the result
    print('p =', p_val)
Exemple #4
0
conf_int_wt = np.percentile(bs_reps_wt, [2.5, 97.5])
conf_int_mut = np.percentile(bs_reps_mut, [2.5, 97.5])

# Print the results
print("""
wt:  mean = {0:.3f} min., conf. int. = [{1:.1f}, {2:.1f}] min.
mut: mean = {3:.3f} min., conf. int. = [{4:.1f}, {5:.1f}] min.
""".format(mean_wt, *conf_int_wt, mean_mut, *conf_int_mut))

#%%

# Compute the difference of means: diff_means_exp
diff_means_exp = np.mean(bout_lengths_het) - np.mean(bout_lengths_wt)

# Draw permutation replicates: perm_reps
perm_reps = dcst.draw_perm_reps(bout_lengths_het, bout_lengths_wt, 
                               dcst.diff_of_means, size=10000)

# Compute the p-value: p-val
p_val = np.sum(perm_reps >= diff_means_exp) / len(perm_reps)

# Print the result
print('p =', p_val)

#%%
# Concatenate arrays: bout_lengths_concat
bout_lengths_concat = np.concatenate((bout_lengths_wt, bout_lengths_het))

# Compute mean of all bout_lengths: mean_bout_length
mean_bout_length = np.mean(bout_lengths_concat)

# Generate shifted arrays
Exemple #5
0
def test_draw_perm_reps(data_1, data_2, seed):
    # Have to use size=1 because np.random.shuffle and np.random.permutation
    # give different results on and after 2nd call
    np.random.seed(seed)
    x = no_numba.draw_perm_reps(data_1, data_2, no_numba.diff_of_means, size=1)
    np.random.seed(seed)
    x_correct = original.draw_perm_reps(data_1,
                                        data_2,
                                        original.diff_of_means,
                                        size=1)
    x_test_numba = dcst.draw_perm_reps(data_1,
                                       data_2,
                                       dcst.diff_of_means,
                                       size=1)
    assert np.allclose(x_correct, x, atol=atol, equal_nan=True)

    np.random.seed(seed)
    x = no_numba.draw_perm_reps(data_1,
                                data_2,
                                no_numba.studentized_diff_of_means,
                                size=1)
    np.random.seed(seed)
    x_correct = original.draw_perm_reps(data_1,
                                        data_2,
                                        no_numba.studentized_diff_of_means,
                                        size=1)
    x_test_numba = dcst.draw_perm_reps(data_1,
                                       data_2,
                                       dcst.studentized_diff_of_means,
                                       size=1)
    assert np.allclose(x_correct, x, atol=atol, equal_nan=True)

    def my_fun(x, y, mult):
        return (np.mean(x) + np.mean(y)) * mult

    def my_fun_orig(x, y):
        return (np.mean(x) + np.mean(y)) * 2.4

    np.random.seed(seed)
    x = no_numba.draw_perm_reps(data_1, data_2, my_fun, args=(2.4, ), size=1)
    np.random.seed(seed)
    x_correct = original.draw_perm_reps(data_1, data_2, my_fun_orig, size=1)
    x_test_numba = dcst.draw_perm_reps(data_1,
                                       data_2,
                                       my_fun,
                                       args=(2.4, ),
                                       size=1)
    assert np.allclose(x_correct, x, atol=atol, equal_nan=True)

    def diff_of_medians(data_1, data_2):
        return np.median(data_1) - np.median(data_2)

    np.random.seed(seed)
    x = no_numba.draw_perm_reps(data_1, data_2, diff_of_medians, size=1)
    np.random.seed(seed)
    x_correct = original.draw_perm_reps(data_1,
                                        data_2,
                                        diff_of_medians,
                                        size=1)
    x_test_numba = dcst.draw_perm_reps(data_1, data_2, diff_of_medians, size=1)
    assert np.allclose(x_correct, x, atol=atol, equal_nan=True)
Exemple #6
0
mags_post = mags[time >= 2010]
'''
INSTRUCTIONS

*   Slice out the magnitudes of earthquakes before 2010 that have a magnitude above (or equal) the completeness threshold and overwrite mags_pre with the result. Do the same for mags_post.
*   Compute the observed difference in mean magnitudes, subtracting the magnitudes of pre-2010 earthquakes from those of post-2010 earthquakes.
*   Generate 10,000 permutation replicates using dcst.draw_perm_reps(). Use dcst.diff_of_means as the argument for func.
*   Compute and print the p-value taking "at least as extreme as" to mean that the test statistic is smaller than what was observed.
'''

# Only magnitudes above completeness threshold
mags_pre = mags_pre[mags_pre >= mt]
mags_post = mags_post[mags_post >= mt]

# Observed difference in mean magnitudes: diff_obs
diff_obs = np.mean(mags_post) - np.mean(mags_pre)

# Generate permutation replicates: perm_reps
perm_reps = dcst.draw_perm_reps(mags_post,
                                mags_pre,
                                func=dcst.diff_of_means,
                                size=10000)

# Compute and print p-value
p_val = np.sum(perm_reps < diff_obs) / 10000
print('p =', p_val)
'''
p = 0.0993

A p-value around 0.1 suggests that the observed magnitudes are commensurate with there being no change in b-value after wastewater injection began.
'''