def test_ndtri(self): """ndtri should give same result as implementation in cephes""" exp = [-1.79769313486e+308, -2.32634787404, -2.05374891063, -1.88079360815, -1.75068607125, -1.64485362695, -1.5547735946, -1.47579102818, -1.40507156031, -1.34075503369, -1.28155156554, -1.22652812004, -1.17498679207, -1.12639112904, -1.08031934081, -1.03643338949, -0.99445788321, - 0.954165253146, -0.915365087843, -0.877896295051, -0.841621233573, - 0.806421247018, -0.772193214189, -0.738846849185, -0.70630256284, - 0.674489750196, -0.643345405393, -0.612812991017, -0.582841507271, - 0.553384719556, -0.524400512708, -0.495850347347, -0.467698799115, - 0.439913165673, -0.412463129441, -0.385320466408, -0.358458793251, - 0.331853346437, -0.305480788099, -0.279319034447, -0.253347103136, - 0.227544976641, -0.201893479142, -0.176374164781, -0.150969215497, - 0.125661346855, -0.100433720511, -0.0752698620998, -0.0501535834647, -0.0250689082587, 0.0, 0.0250689082587, 0.0501535834647, 0.0752698620998, 0.100433720511, 0.125661346855, 0.150969215497, 0.176374164781, 0.201893479142, 0.227544976641, 0.253347103136, 0.279319034447, 0.305480788099, 0.331853346437, 0.358458793251, 0.385320466408, 0.412463129441, 0.439913165673, 0.467698799115, 0.495850347347, 0.524400512708, 0.553384719556, 0.582841507271, 0.612812991017, 0.643345405393, 0.674489750196, 0.70630256284, 0.738846849185, 0.772193214189, 0.806421247018, 0.841621233573, 0.877896295051, 0.915365087843, 0.954165253146, 0.99445788321, 1.03643338949, 1.08031934081, 1.12639112904, 1.17498679207, 1.22652812004, 1.28155156554, 1.34075503369, 1.40507156031, 1.47579102818, 1.5547735946, 1.64485362695, 1.75068607125, 1.88079360815, 2.05374891063, 2.32634787404, ] obs = [ndtri(i / 100.0) for i in range(100)] np.testing.assert_allclose(obs, exp, 1e-6)
def correlation_t(x_items, y_items, method='pearson', tails=None, permutations=999, confidence_level=0.95): """Computes the correlation between two vectors and its significance. Computes a parametric p-value by using Student's t-distribution with df=n-2 to perform the test of significance, as well as a nonparametric p-value obtained by permuting one of the input vectors the specified number of times given by the permutations parameter. A confidence interval is also computed using Fisher's Z transform if the number of observations is greater than 3. Please see Sokal and Rohlf pp. 575-580 and pg. 598-601 for more details regarding these techniques. Warning: the parametric p-value is unreliable when the method is spearman and there are less than 11 observations in each vector. Returns the correlation coefficient (r or rho), the parametric p-value, a list of the r or rho values obtained from permuting the input, the nonparametric p-value, and a tuple for the confidence interval, with the first element being the lower bound of the confidence interval and the second element being the upper bound for the confidence interval. The confidence interval will be (None, None) if the number of observations is not greater than 3. x_items and y_items must be the same length, and cannot have fewer than 2 elements each. If one or both of the input vectors do not have any variation, r or rho will be 0.0. Note: the parametric portion of this function is based on the correlation function in this module. Arguments: x_items - the first list of observations y_items - the second list of observations method - 'pearson' or 'spearman' tails - if None (the default), a two-sided test is performed. 'high' for a one-tailed test for positive association, or 'low' for a one-tailed test for negative association. This parameter affects both the parametric and nonparametric tests, but the confidence interval will always be two-sided permutations - the number of permutations to use in the nonparametric test. Must be a number greater than or equal to 0. If 0, the nonparametric test will not be performed. In this case, the list of correlation coefficients obtained from permutations will be empty, and the nonparametric p-value will be None confidence_level - the confidence level to use when constructing the confidence interval. Must be between 0 and 1 (exclusive) """ # Perform some initial error checking. if method == 'pearson': corr_fn = pearson elif method == 'spearman': corr_fn = spearman else: raise ValueError("Invalid method '%s'. Must be either 'pearson' or " "'spearman'." % method) if tails is not None and tails != 'high' and tails != 'low': raise ValueError("Invalid tail type '%s'. Must be either None, " "'high', or 'low'." % tails) if permutations < 0: raise ValueError("Invalid number of permutations: %d. Must be greater " "than or equal to zero." % permutations) if confidence_level <= 0 or confidence_level >= 1: raise ValueError("Invalid confidence level: %.4f. Must be between " "zero and one." % confidence_level) # Calculate the correlation coefficient. corr_coeff = corr_fn(x_items, y_items) # Perform the parametric test first. x_items, y_items = np.array(x_items), np.array(y_items) n = len(x_items) df = n - 2 if n < 3: parametric_p_val = 1 else: try: t = corr_coeff / np.sqrt((1 - (corr_coeff * corr_coeff)) / df) parametric_p_val = t_tailed_prob(t, df, tails) except (ZeroDivisionError, FloatingPointError): # r/rho was presumably 1. parametric_p_val = 0 # Perform the nonparametric test. permuted_corr_coeffs = [] nonparametric_p_val = None better = 0 for i in range(permutations): permuted_y_items = y_items[np.random.permutation(n)] permuted_corr_coeff = corr_fn(x_items, permuted_y_items) permuted_corr_coeffs.append(permuted_corr_coeff) if tails is None: if abs(permuted_corr_coeff) >= abs(corr_coeff): better += 1 elif tails == 'high': if permuted_corr_coeff >= corr_coeff: better += 1 elif tails == 'low': if permuted_corr_coeff <= corr_coeff: better += 1 else: # Not strictly necessary since this was checked above, but included # for safety in case the above check gets removed or messed up. We # don't want to return a p-value of 0 if someone passes in a bogus # tail type somehow. raise ValueError("Invalid tail type '%s'. Must be either None, " "'high', or 'low'." % tails) if permutations > 0: nonparametric_p_val = (better + 1) / (permutations + 1) # Compute the confidence interval for corr_coeff using Fisher's Z # transform. z_crit = abs(ndtri((1 - confidence_level) / 2)) ci_low, ci_high = None, None if n > 3: try: ci_low = np.tanh(np.arctanh(corr_coeff) - (z_crit / np.sqrt(n - 3))) ci_high = np.tanh(np.arctanh(corr_coeff) + (z_crit / np.sqrt(n - 3))) except (ZeroDivisionError, FloatingPointError): # r/rho was presumably 1 or -1. Match what R does in this case. ci_low, ci_high = corr_coeff, corr_coeff return (corr_coeff, parametric_p_val, permuted_corr_coeffs, nonparametric_p_val, (ci_low, ci_high))
def test_ndtri(self): """ndtri should give same result as implementation in cephes""" exp = [ -1.79769313486e+308, -2.32634787404, -2.05374891063, -1.88079360815, -1.75068607125, -1.64485362695, -1.5547735946, -1.47579102818, -1.40507156031, -1.34075503369, -1.28155156554, -1.22652812004, -1.17498679207, -1.12639112904, -1.08031934081, -1.03643338949, -0.99445788321, -0.954165253146, -0.915365087843, -0.877896295051, -0.841621233573, -0.806421247018, -0.772193214189, -0.738846849185, -0.70630256284, -0.674489750196, -0.643345405393, -0.612812991017, -0.582841507271, -0.553384719556, -0.524400512708, -0.495850347347, -0.467698799115, -0.439913165673, -0.412463129441, -0.385320466408, -0.358458793251, -0.331853346437, -0.305480788099, -0.279319034447, -0.253347103136, -0.227544976641, -0.201893479142, -0.176374164781, -0.150969215497, -0.125661346855, -0.100433720511, -0.0752698620998, -0.0501535834647, -0.0250689082587, 0.0, 0.0250689082587, 0.0501535834647, 0.0752698620998, 0.100433720511, 0.125661346855, 0.150969215497, 0.176374164781, 0.201893479142, 0.227544976641, 0.253347103136, 0.279319034447, 0.305480788099, 0.331853346437, 0.358458793251, 0.385320466408, 0.412463129441, 0.439913165673, 0.467698799115, 0.495850347347, 0.524400512708, 0.553384719556, 0.582841507271, 0.612812991017, 0.643345405393, 0.674489750196, 0.70630256284, 0.738846849185, 0.772193214189, 0.806421247018, 0.841621233573, 0.877896295051, 0.915365087843, 0.954165253146, 0.99445788321, 1.03643338949, 1.08031934081, 1.12639112904, 1.17498679207, 1.22652812004, 1.28155156554, 1.34075503369, 1.40507156031, 1.47579102818, 1.5547735946, 1.64485362695, 1.75068607125, 1.88079360815, 2.05374891063, 2.32634787404, ] obs = [ndtri(i / 100.0) for i in range(100)] np.testing.assert_allclose(obs, exp, 1e-6)