def test_pearson_r(data): x, y = data if np.allclose(x, x[0], atol=atol, equal_nan=True) or np.allclose( y, y[0], atol=atol, equal_nan=True): assert np.isnan(dcst.pearson_r(x, y)) else: assert np.isclose(dcst.pearson_r(x, y), original.pearson_r(x, y)) assert np.isclose(dcst.pearson_r(x, y), np.corrcoef(x, y)[0, 1])
def hypothesis_testing(lanes, f_13): # Compute observed correlation: rho rho = dcst.pearson_r(lanes, f_13) # Initialize permutation reps: perm_reps_rho perm_reps_rho = np.empty(10000) # Make permutation reps for i in range(10000): # Scramble the lanes array: scrambled_lanes scrambled_lanes = np.random.permutation(lanes) # Compute the Pearson correlation coefficient perm_reps_rho[i] = dcst.pearson_r(scrambled_lanes, f_13) # Compute and print p-value p_val = np.sum(perm_reps_rho >= rho) / 10000 print('p =', p_val)
def hypothesis_are_they_slowing_down(split_number, splits): mean_splits = np.mean(splits, axis=0) # Observed correlation rho = dcst.pearson_r(split_number, mean_splits) # Initialize permutation reps perm_reps_rho = np.empty(10000) # Make permutation reps for i in range(10000): # Scramble the split number array scrambled_split_number = np.random.permutation(split_number) # Compute the Pearson correlation coefficient perm_reps_rho[i] = dcst.pearson_r(scrambled_split_number, mean_splits) # Compute and print p-value p_val = np.sum(perm_reps_rho >= rho) / len(perm_reps_rho) print('p =', p_val)
def test_pearson_r_edge(): x = np.array([]) y = np.array([]) with pytest.raises(RuntimeError) as excinfo: dcst.pearson_r(x, y) excinfo.match("Arrays must have at least 2 mutual non-NaN entries.") x = np.array([np.nan]) y = np.array([np.nan]) with pytest.raises(RuntimeError) as excinfo: dcst.pearson_r(x, y) excinfo.match("Arrays must have at least 2 mutual non-NaN entries.") x = np.array([np.nan, 1]) y = np.array([1, np.nan]) with pytest.raises(RuntimeError) as excinfo: dcst.pearson_r(x, y) excinfo.match("Arrays must have at least 2 mutual non-NaN entries.") x = np.array([0, 1, 5]) y = np.array([1, np.inf, 3]) with pytest.raises(RuntimeError) as excinfo: dcst.pearson_r(x, y) excinfo.match("All entries in arrays must be finite.")
# Compute the mean split times mean_splits = np.mean(splits, axis=0) ''' INSTRUCTIONS * Compute the observed Pearson correlation, storing it as rho. * Using np.empty(), initialize the array of 10,000 permutation replicates of the Pearson correlation, naming it perm_reps_rho. * Write a for loop to: * Scramble the split number array using np.random.permutation(), naming it scrambled_split_number. * Compute the Pearson correlation coefficient between the scrambled split number array and the mean split times and store it in perm_reps_rho. * Compute the p-value and display it on the screen. Take "at least as extreme as" to mean that the Pearson correlation is at least as big as was observed. ''' # Observed correlation rho = dcst.pearson_r(split_number, mean_splits) # Initialize permutation reps perm_reps_rho = np.empty(10000) # Make permutation reps for i in range(10000): # Scramble the split number array scrambled_split_number = np.random.permutation(split_number) # Compute the Pearson correlation coefficient perm_reps_rho[i] = dcst.pearson_r(scrambled_split_number, mean_splits) # Compute and print p-value p_val = np.sum(perm_reps_rho >= rho) / len(perm_reps_rho)
""" Hypothesis test: can this be by chance? The EDA and linear regression analysis is pretty conclusive. Nonetheless, you will top off the analysis of the zigzag effect by testing the hypothesis that lane assignment has nothing to do with the mean fractional difference between even and odd lanes using a permutation test. You will use the Pearson correlation coefficient, which you can compute with dcst.pearson_r() as the test statistic. The variables lanes and f_13 are already in your namespace. """ import numpy as np import dc_stat_think as dcst # Compute observed correlation: rho rho = dcst.pearson_r(lanes, f_13) # Initialize permutation reps: perm_reps_rho perm_reps_rho = np.empty(10000) # Make permutation reps for i in range(10000): # Scramble the lanes array: scrambled_lanes scrambled_lanes = np.random.permutation(lanes) # Compute the Pearson correlation coefficient perm_reps_rho[i] = dcst.pearson_r(scrambled_lanes, f_13) # Compute and print p-value p_val = np.sum(perm_reps_rho[i] >= rho) / 10000 print('p =', p_val)