def create_fake_observation(): """Create a subsample with defined property""" # Create a subsample of a larger sample such that we can compute # the expected probability of the unseen portion. # This is used in the tests of lladser_pe and lladser_ci counts = np.ones(1001, dtype='int64') counts[0] = 9000 total = counts.sum() fake_obs = subsample(counts, 1000) exp_p = 1 - sum([x/total for (x, y) in zip(counts, fake_obs) if y > 0]) return fake_obs, exp_p
def create_fake_observation(): """Create a subsample with defined property""" # Create a subsample of a larger sample such that we can compute # the expected probability of the unseen portion. # This is used in the tests of lladser_pe and lladser_ci counts = np.ones(1001, dtype='int64') counts[0] = 9000 total = counts.sum() fake_obs = subsample(counts, 1000) exp_p = 1 - sum([x / total for (x, y) in zip(counts, fake_obs) if y > 0]) return fake_obs, exp_p
def michaelis_menten_fit(counts, num_repeats=1, params_guess=None): """Calculate Michaelis-Menten fit to rarefaction curve of observed OTUs. The Michaelis-Menten equation is defined as .. math:: S=\\frac{nS_{max}}{n+B} where :math:`n` is the number of individuals and :math:`S` is the number of OTUs. This function estimates the :math:`S_{max}` parameter. The fit is made to datapoints for :math:`n=1,2,...,N`, where :math:`N` is the total number of individuals (sum of abundances for all OTUs). :math:`S` is the number of OTUs represented in a random sample of :math:`n` individuals. Parameters ---------- counts : 1-D array_like, int Vector of counts. num_repeats : int, optional The number of times to perform rarefaction (subsampling without replacement) at each value of :math:`n`. params_guess : tuple, optional Initial guess of :math:`S_{max}` and :math:`B`. If ``None``, default guess for :math:`S_{max}` is :math:`S` (as :math:`S_{max}` should be >= :math:`S`) and default guess for :math:`B` is ``round(N / 2)``. Returns ------- S_max : double Estimate of the :math:`S_{max}` parameter in the Michaelis-Menten equation. See Also -------- skbio.math.subsample Notes ----- There is some controversy about how to do the fitting. The ML model given in [1]_ is based on the assumption that error is roughly proportional to magnitude of observation, reasonable for enzyme kinetics but not reasonable for rarefaction data. Here we just do a nonlinear curve fit for the parameters using least-squares. References ---------- .. [1] Raaijmakers, J. G. W. 1987 Statistical analysis of the Michaelis-Menten equation. Biometrics 43, 793-803. """ counts = _validate(counts) n_indiv = counts.sum() if params_guess is None: S_max_guess = observed_otus(counts) B_guess = int(round(n_indiv / 2)) params_guess = (S_max_guess, B_guess) # observed # of OTUs vs # of individuals sampled, S vs n xvals = np.arange(1, n_indiv + 1) ymtx = np.empty((num_repeats, len(xvals)), dtype=int) for i in range(num_repeats): ymtx[i] = np.asarray([observed_otus(subsample(counts, n)) for n in xvals], dtype=int) yvals = ymtx.mean(0) # Vectors of actual vals y and number of individuals n. def errfn(p, n, y): return (((p[0] * n / (p[1] + n)) - y) ** 2).sum() # Return S_max. return fmin_powell(errfn, params_guess, ftol=1e-5, args=(xvals, yvals), disp=False)[0]