def chao1_ci(counts, bias_corrected=True, zscore=1.96): """Calculate chao1 confidence interval. Parameters ---------- counts : 1-D array_like, int Vector of counts. bias_corrected : bool, optional Indicates whether or not to use the bias-corrected version of the equation. If ``False`` *and* there are both singletons and doubletons, the uncorrected version will be used. The biased-corrected version will be used otherwise. zscore : scalar, optional Score to use for confidence. Default of 1.96 is for a 95% confidence interval. Returns ------- tuple chao1 confidence interval as ``(lower_bound, upper_bound)``. See Also -------- chao1 Notes ----- The implementation here is based on the equations in the EstimateS manual [1]_. Different equations are employed to calculate the chao1 variance and confidence interval depending on `bias_corrected` and the presence/absence of singletons and/or doubletons. Specifically, the following EstimateS equations are used: 1. No singletons, Equation 14. 2. Singletons but no doubletons, Equations 7, 13. 3. Singletons and doubletons, ``bias_corrected=True``, Equations 6, 13. 4. Singletons and doubletons, ``bias_corrected=False``, Equations 5, 13. References ---------- .. [1] http://viceroy.eeb.uconn.edu/estimates/ """ counts = _validate(counts) o, s, d = osd(counts) if s: chao = chao1(counts, bias_corrected) chaovar = _chao1_var(counts, bias_corrected) return _chao_confidence_with_singletons(chao, o, chaovar, zscore) else: n = counts.sum() return _chao_confidence_no_singletons(n, o, zscore)
def chao1(counts, bias_corrected=True): """Calculate chao1 richness estimator. Uses the bias-corrected version unless `bias_corrected` is ``False`` *and* there are both singletons and doubletons. Parameters ---------- counts : 1-D array_like, int Vector of counts. bias_corrected : bool, optional Indicates whether or not to use the bias-corrected version of the equation. If ``False`` *and* there are both singletons and doubletons, the uncorrected version will be used. The biased-corrected version will be used otherwise. Returns ------- double Computed chao1 richness estimator. See Also -------- chao1_ci Notes ----- The uncorrected version is based on Equation 6 in [1]_: .. math:: chao1=S_{obs}+\\frac{F_1^2}{2F_2} where :math:`F_1` and :math:`F_2` are the count of singletons and doubletons, respectively. The bias-corrected version is defined as .. math:: chao1=S_{obs}+\\frac{F_1(F_1-1)}{2(F_2+1)} References ---------- .. [1] Chao, A. 1984. Non-parametric estimation of the number of classes in a population. Scandinavian Journal of Statistics 11, 265-270. """ counts = _validate(counts) o, s, d = osd(counts) if not bias_corrected and s and d: return o + s ** 2 / (d * 2) else: return o + s * (s - 1) / (2 * (d + 1))
def chao1(counts, bias_corrected=True): """Calculate chao1 richness estimator. Uses the bias-corrected version unless `bias_corrected` is ``False`` *and* there are both singletons and doubletons. Parameters ---------- counts : 1-D array_like, int Vector of counts. bias_corrected : bool, optional Indicates whether or not to use the bias-corrected version of the equation. If ``False`` *and* there are both singletons and doubletons, the uncorrected version will be used. The biased-corrected version will be used otherwise. Returns ------- double Computed chao1 richness estimator. See Also -------- chao1_ci Notes ----- The uncorrected version is based on Equation 6 in [1]_: .. math:: chao1=S_{obs}+\\frac{F_1^2}{2F_2} where :math:`F_1` and :math:`F_2` are the count of singletons and doubletons, respectively. The bias-corrected version is defined as .. math:: chao1=S_{obs}+\\frac{F_1(F_1-1)}{2(F_2+1)} References ---------- .. [1] Chao, A. 1984. Non-parametric estimation of the number of classes in a population. Scandinavian Journal of Statistics 11, 265-270. """ counts = _validate(counts) o, s, d = osd(counts) if not bias_corrected and s and d: return o + s**2 / (d * 2) else: return o + s * (s - 1) / (2 * (d + 1))
def ace(counts, rare_threshold=10): """Calculate the ACE metric (Abundance-based Coverage Estimator). Parameters ---------- counts : 1-D array_like, int Vector of counts. rare_threshold : int, optional Threshold at which an OTU containing as many or fewer individuals will be considered rare. Returns ------- double Computed ACE metric. Raises ------ ValueError If every rare OTU is a singleton. Notes ----- ACE was first introduced in [1]_ and [2]_. The implementation here is based on the description given in the EstimateS manual [3]_. If no rare OTUs exist, returns the number of abundant OTUs. The default value of 10 for `rare_threshold` is based on [4]_. If `counts` contains zeros, indicating OTUs which are known to exist in the environment but did not appear in the sample, they will be ignored for the purpose of calculating the number of rare OTUs. References ---------- .. [1] Chao, A. & S.-M Lee. 1992 Estimating the number of classes via sample coverage. Journal of the American Statistical Association 87, 210-217. .. [2] Chao, A., M.-C. Ma, & M. C. K. Yang. 1993. Stopping rules and estimation for recapture debugging with unequal failure rates. Biometrika 80, 193-201. .. [3] http://viceroy.eeb.uconn.edu/estimates/ .. [4] Chao, A., W.-H. Hwang, Y.-C. Chen, and C.-Y. Kuo. 2000. Estimating the number of shared species in two communities. Statistica Sinica 10:227-246. """ counts = _validate(counts) freq_counts = np.bincount(counts) s_rare = _otus_rare(freq_counts, rare_threshold) singles = freq_counts[1] if singles > 0 and singles == s_rare: raise ValueError("The only rare OTUs are singletons, so the ACE " "metric is undefined. EstimateS suggests using " "bias-corrected Chao1 instead.") s_abun = _otus_abundant(freq_counts, rare_threshold) if s_rare == 0: return s_abun n_rare = _number_rare(freq_counts, rare_threshold) c_ace = 1 - singles / n_rare top = s_rare * _number_rare(freq_counts, rare_threshold, gamma=True) bottom = c_ace * n_rare * (n_rare - 1) gamma_ace = (top / bottom) - 1 if gamma_ace < 0: gamma_ace = 0 return s_abun + (s_rare / c_ace) + ((singles / c_ace) * gamma_ace)