def make_normal_quantile_normalizer(mean, sd, bins=1000):
    """returns f(a) that converts a to the specified normal distribution."""
    dist = array([ndtri(i) * sd for i in arange(1.0 / bins, 1, 1.0 / bins)])
    dist = (dist * sd) + mean
    return make_quantile_normalizer(dist)
Exemple #2
0
    def __call__(self, size, confidence_level=0.95):
        if confidence_level <= 0 or confidence_level >= 1:
            raise ValueError("Invalid confidence level: %.4f. Must be between "
                             "zero and one (exclusive)." % confidence_level)

        # We'll use the variable names from Colwell 2012 for clarity and
        # brevity.
        m = size
        fk = self.getAbundanceFrequencyCounts()
        n = self.getTotalIndividualCount()
        s_obs = self.getObservationCount()
        s_est = self.estimateFullRichness()

        if m <= n:
            # Interpolation.

            # Equation 4 in Colwell 2012 for the estimate.
            estimate_acc = 0

            # Equation 5 in Colwell 2012 gives unconditional variance, but they
            # report the standard error (SE) (which is the same as the standard
            # deviation in this case) in their tables and use this to construct
            # confidence intervals. Thus, we compute SE as sqrt(variance).
            std_err_acc = 0

            for k in range(1, n + 1):
                alpha_km = self._calculate_alpha_km(n, k, m)
                estimate_acc += alpha_km * fk[k]
                std_err_acc += (((1 - alpha_km)**2) * fk[k])

            estimate = s_obs - estimate_acc

            # Convert variance to standard error.
            std_err = sqrt(std_err_acc - (estimate ** 2 / s_est))
        else:
            # Extrapolation.
            m_star = m - n
            f1 = fk[1]
            f2 = fk[2]
            f_hat = self.estimateUnobservedObservationCount()

            try:
                # Equation 9 in Colwell 2012.
                estimate = s_obs + f_hat * (1 -
                        (1 - (f1 / (n * f_hat))) ** m_star)
            except ZeroDivisionError:
                # This can happen if we have exactly one singleton and no
                # doubletons, or no singletons and no doubletons.
                estimate = None
                std_err = None
            else:
                # Equation 10 in Colwell 2012. I used Wolfram Alpha to
                # calculate the analytic partial derivatives since they weren't
                # provided in the original paper. We have two partial
                # derivatives, wrt f1 and f2, that we really care about. All
                # other partial derivatives (e.g. wrt f3, f4, etc.) get a value
                # of 1.
                pd_f1 = self._partial_derivative_f1(f1, f2, m_star, n)
                pd_f2 = self._partial_derivative_f2(f1, f2, m_star, n)
                pd_f1f2 = pd_f1 * pd_f2

                # To do this efficiently, here's the algorithm:
                #
                # 1) Create nxn array filled with ones. Each element represents
                #    the multiplication of two partial derivatives.
                # 2) Fill in only what we need: the multiplication of partial
                #    derivatives wrt f1 and f2.
                # 3) Do an element-wise multiply between our partial derivative
                #    matrix and the covariance matrix. tensordot does this and
                #    also sums the result, which is exactly what we need. In
                #    the end, we've summed all n^2 elements, each of which are
                #    (pd_fi * pd_fj * cov_ij).
                self._pd_matrix[0, :] = pd_f1
                self._pd_matrix[1, :] = pd_f2
                self._pd_matrix[:, 0] = pd_f1
                self._pd_matrix[:, 1] = pd_f2

                self._pd_matrix[0, 0] = pd_f1 ** 2
                self._pd_matrix[0, 1] = pd_f1f2
                self._pd_matrix[1, 0] = pd_f1f2
                self._pd_matrix[1, 1] = pd_f2 ** 2

                std_err = sqrt(tensordot(self._pd_matrix, self._cov_matrix))

        # Compute CI based on std_err.
        ci_low = None
        ci_high = None
        if std_err is not None:
            # z_crit will be something like 1.96 for 95% CI.
            z_crit = abs(ndtri((1 - confidence_level) / 2))
            ci_bound = z_crit * std_err
            ci_low = estimate - ci_bound
            ci_high = estimate + ci_bound

        return estimate, std_err, ci_low, ci_high
def make_normal_quantile_normalizer(mean, sd, bins=1000):
    """returns f(a) that converts a to the specified normal distribution."""
    dist = array([ndtri(i)*sd for i in arange(1.0/bins,1,1.0/bins)])
    dist = (dist * sd) + mean
    return make_quantile_normalizer(dist)