def test_pearson_r(data):
    x, y = data
    if np.allclose(x, x[0], atol=atol, equal_nan=True) or np.allclose(
            y, y[0], atol=atol, equal_nan=True):
        assert np.isnan(dcst.pearson_r(x, y))
    else:
        assert np.isclose(dcst.pearson_r(x, y), original.pearson_r(x, y))
        assert np.isclose(dcst.pearson_r(x, y), np.corrcoef(x, y)[0, 1])
Beispiel #2
0
def hypothesis_testing(lanes, f_13):
    # Compute observed correlation: rho
    rho = dcst.pearson_r(lanes, f_13)

    # Initialize permutation reps: perm_reps_rho
    perm_reps_rho = np.empty(10000)

    # Make permutation reps
    for i in range(10000):
        # Scramble the lanes array: scrambled_lanes
        scrambled_lanes = np.random.permutation(lanes)

        # Compute the Pearson correlation coefficient
        perm_reps_rho[i] = dcst.pearson_r(scrambled_lanes, f_13)

    # Compute and print p-value
    p_val = np.sum(perm_reps_rho >= rho) / 10000
    print('p =', p_val)
def hypothesis_are_they_slowing_down(split_number, splits):
    mean_splits = np.mean(splits, axis=0)
    # Observed correlation
    rho = dcst.pearson_r(split_number, mean_splits)

    # Initialize permutation reps
    perm_reps_rho = np.empty(10000)

    # Make permutation reps
    for i in range(10000):
        # Scramble the split number array
        scrambled_split_number = np.random.permutation(split_number)

        # Compute the Pearson correlation coefficient
        perm_reps_rho[i] = dcst.pearson_r(scrambled_split_number, mean_splits)

    # Compute and print p-value
    p_val = np.sum(perm_reps_rho >= rho) / len(perm_reps_rho)
    print('p =', p_val)
def test_pearson_r_edge():
    x = np.array([])
    y = np.array([])
    with pytest.raises(RuntimeError) as excinfo:
        dcst.pearson_r(x, y)
    excinfo.match("Arrays must have at least 2 mutual non-NaN entries.")

    x = np.array([np.nan])
    y = np.array([np.nan])
    with pytest.raises(RuntimeError) as excinfo:
        dcst.pearson_r(x, y)
    excinfo.match("Arrays must have at least 2 mutual non-NaN entries.")

    x = np.array([np.nan, 1])
    y = np.array([1, np.nan])
    with pytest.raises(RuntimeError) as excinfo:
        dcst.pearson_r(x, y)
    excinfo.match("Arrays must have at least 2 mutual non-NaN entries.")

    x = np.array([0, 1, 5])
    y = np.array([1, np.inf, 3])
    with pytest.raises(RuntimeError) as excinfo:
        dcst.pearson_r(x, y)
    excinfo.match("All entries in arrays must be finite.")
# Compute the mean split times
mean_splits = np.mean(splits, axis=0)

'''
INSTRUCTIONS

*   Compute the observed Pearson correlation, storing it as rho.
*   Using np.empty(), initialize the array of 10,000 permutation replicates of the Pearson correlation, naming it perm_reps_rho.
*   Write a for loop to:
    *   Scramble the split number array using np.random.permutation(), naming it scrambled_split_number.
    *   Compute the Pearson correlation coefficient between the scrambled split number array and the mean split times and store it in perm_reps_rho.
*   Compute the p-value and display it on the screen. Take "at least as extreme as" to mean that the Pearson correlation is at least as big as was observed.
'''

# Observed correlation
rho = dcst.pearson_r(split_number, mean_splits)

# Initialize permutation reps
perm_reps_rho = np.empty(10000)

# Make permutation reps
for i in range(10000):
    # Scramble the split number array
    scrambled_split_number = np.random.permutation(split_number)
    
    # Compute the Pearson correlation coefficient
    perm_reps_rho[i] = dcst.pearson_r(scrambled_split_number, mean_splits)
    
# Compute and print p-value
p_val = np.sum(perm_reps_rho >= rho) / len(perm_reps_rho)
"""
Hypothesis test: can this be by chance?

The EDA and linear regression analysis is pretty conclusive. Nonetheless, you will top off the analysis of the
zigzag effect by testing the hypothesis that lane assignment has nothing to do with the mean fractional difference
between even and odd lanes using a permutation test. You will use the Pearson correlation coefficient,
which you can compute with dcst.pearson_r() as the test statistic. The variables lanes and f_13 are already in
your namespace.
"""
import numpy as np
import dc_stat_think as dcst

# Compute observed correlation: rho
rho = dcst.pearson_r(lanes, f_13)

# Initialize permutation reps: perm_reps_rho
perm_reps_rho = np.empty(10000)

# Make permutation reps
for i in range(10000):
    # Scramble the lanes array: scrambled_lanes
    scrambled_lanes = np.random.permutation(lanes)

    # Compute the Pearson correlation coefficient
    perm_reps_rho[i] = dcst.pearson_r(scrambled_lanes, f_13)

# Compute and print p-value
p_val = np.sum(perm_reps_rho[i] >= rho) / 10000
print('p =', p_val)