def create_fake_observation(): """Create a subsample with defined property""" # Create a subsample of a larger sample such that we can compute # the expected probability of the unseen portion. # This is used in the tests of lladser_pe and lladser_ci counts = np.ones(1001, dtype='int64') counts[0] = 9000 total = counts.sum() fake_obs = subsample(counts, 1000) exp_p = 1 - sum([x/total for (x, y) in zip(counts, fake_obs) if y > 0]) return fake_obs, exp_p
def michaelis_menten_fit(counts, num_repeats=1, params_guess=None, return_b=False): """Michaelis-Menten fit to rarefaction curve of observed species Note: there is some controversy about how to do the fitting. The ML model givem by Raaijmakers 1987 is based on the assumption that error is roughly proportional to magnitude of observation, reasonable for enzyme kinetics but not reasonable for rarefaction data. Here we just do a nonlinear curve fit for the parameters using least-squares. S = Smax*n/(B + n) . n: number of individuals, S: # of species returns Smax inputs: num_repeats: will perform rarefaction (subsampling without replacement) this many times at each value of n params_guess: intial guess of Smax, B (None => default) return_b: if True will return the estimate for Smax, B. Default is just Smax the fit is made to datapoints where n = 1,2,...counts.sum(), S = species represented in random sample of n individuals """ counts = asarray(counts) if params_guess is None: params_guess = array([100, 500]) # observed # of species vs # of individuals sampled, S vs n xvals = arange(1, counts.sum() + 1) ymtx = [] for i in range(num_repeats): ymtx.append(array([observed_species(subsample(counts, n)) for n in xvals])) ymtx = asarray(ymtx) yvals = ymtx.mean(0) # fit to obs_sp = max_sp * num_idiv / (num_indiv + B) # return max_sp def fitfn(p, n): # works with vectors of n, returns vector of S return p[0] * n / (p[1] + n) def errfn(p, n, y): # vectors of actual vals y and number of individuals n return ((fitfn(p, n) - y) ** 2).sum() p1 = fmin_powell(errfn, params_guess, args=(xvals, yvals), disp=0) if return_b: return p1 else: return p1[0] # return only S_max, not the K_m (B) param
def create_fake_observation(): """Create a subsample with defined property""" # Create a subsample of a larger sample such that we can compute # the expected probability of the unseen portion. # This is used in the tests of lladser_pe and lladser_ci x = [9000] x.extend([1] * 1000) counts = np.array(x) total = counts.sum() fake_obs = subsample(counts, 1000) exp_p = 1 - sum([x / total for (x, y) in zip(counts, fake_obs) if y > 0]) return fake_obs, exp_p
def create_fake_observation(): """Create a subsample with defined property""" # Create a subsample of a larger sample such that we can compute # the expected probability of the unseen portion. # This is used in the tests of lladser_pe and lladser_ci x = [9000] x.extend([1] * 1000) counts = np.array(x) total = counts.sum() fake_obs = subsample(counts, 1000) exp_p = 1 - sum([x/total for (x, y) in zip(counts, fake_obs) if y > 0]) return fake_obs, exp_p
def michaelis_menten_fit(counts, num_repeats=1, params_guess=None): """Calculate Michaelis-Menten fit to rarefaction curve of observed OTUs. The Michaelis-Menten equation is defined as .. math:: S=\\frac{nS_{max}}{n+B} where :math:`n` is the number of individuals and :math:`S` is the number of OTUs. This function estimates the :math:`S_{max}` parameter. The fit is made to datapoints for :math:`n=1,2,...,N`, where :math:`N` is the total number of individuals (sum of abundances for all OTUs). :math:`S` is the number of OTUs represented in a random sample of :math:`n` individuals. Parameters ---------- counts : 1-D array_like, int Vector of counts. num_repeats : int, optional The number of times to perform rarefaction (subsampling without replacement) at each value of :math:`n`. params_guess : tuple, optional Initial guess of :math:`S_{max}` and :math:`B`. If ``None``, default guess for :math:`S_{max}` is :math:`S` (as :math:`S_{max}` should be >= :math:`S`) and default guess for :math:`B` is ``round(N / 2)``. Returns ------- S_max : double Estimate of the :math:`S_{max}` parameter in the Michaelis-Menten equation. See Also -------- skbio.math.subsample Notes ----- There is some controversy about how to do the fitting. The ML model given in [1]_ is based on the assumption that error is roughly proportional to magnitude of observation, reasonable for enzyme kinetics but not reasonable for rarefaction data. Here we just do a nonlinear curve fit for the parameters using least-squares. References ---------- .. [1] Raaijmakers, J. G. W. 1987 Statistical analysis of the Michaelis-Menten equation. Biometrics 43, 793-803. """ counts = _validate(counts) n_indiv = counts.sum() if params_guess is None: S_max_guess = observed_otus(counts) B_guess = int(round(n_indiv / 2)) params_guess = (S_max_guess, B_guess) # observed # of OTUs vs # of individuals sampled, S vs n xvals = np.arange(1, n_indiv + 1) ymtx = np.empty((num_repeats, len(xvals)), dtype=int) for i in range(num_repeats): ymtx[i] = np.asarray([observed_otus(subsample(counts, n)) for n in xvals], dtype=int) yvals = ymtx.mean(0) # Vectors of actual vals y and number of individuals n. def errfn(p, n, y): return (((p[0] * n / (p[1] + n)) - y) ** 2).sum() # Return S_max. return fmin_powell(errfn, params_guess, ftol=1e-5, args=(xvals, yvals), disp=False)[0]
def y(self): try: return [self.div_fn(subsample(self.parent.sample.counts, k)) for k in self.x] except ValueError: return [0 for k in self.x]
def michaelis_menten_fit(counts, num_repeats=1, params_guess=None): """Calculate Michaelis-Menten fit to rarefaction curve of observed OTUs. The Michaelis-Menten equation is defined as .. math:: S=\\frac{nS_{max}}{n+B} where :math:`n` is the number of individuals and :math:`S` is the number of OTUs. This function estimates the :math:`S_{max}` parameter. The fit is made to datapoints for :math:`n=1,2,...,N`, where :math:`N` is the total number of individuals (sum of abundances for all OTUs). :math:`S` is the number of OTUs represented in a random sample of :math:`n` individuals. Parameters ---------- counts : 1-D array_like, int Vector of counts. num_repeats : int, optional The number of times to perform rarefaction (subsampling without replacement) at each value of :math:`n`. params_guess : tuple, optional Initial guess of :math:`S_{max}` and :math:`B`. If ``None``, default guess for :math:`S_{max}` is :math:`S` (as :math:`S_{max}` should be >= :math:`S`) and default guess for :math:`B` is ``round(N / 2)``. Returns ------- S_max : double Estimate of the :math:`S_{max}` parameter in the Michaelis-Menten equation. See Also -------- skbio.math.subsample Notes ----- There is some controversy about how to do the fitting. The ML model given in [1]_ is based on the assumption that error is roughly proportional to magnitude of observation, reasonable for enzyme kinetics but not reasonable for rarefaction data. Here we just do a nonlinear curve fit for the parameters using least-squares. References ---------- .. [1] Raaijmakers, J. G. W. 1987 Statistical analysis of the Michaelis-Menten equation. Biometrics 43, 793-803. """ counts = _validate(counts) n_indiv = counts.sum() if params_guess is None: S_max_guess = observed_otus(counts) B_guess = int(round(n_indiv / 2)) params_guess = (S_max_guess, B_guess) # observed # of OTUs vs # of individuals sampled, S vs n xvals = np.arange(1, n_indiv + 1) ymtx = np.empty((num_repeats, len(xvals)), dtype=int) for i in range(num_repeats): ymtx[i] = np.asarray( [observed_otus(subsample(counts, n)) for n in xvals], dtype=int) yvals = ymtx.mean(0) # Vectors of actual vals y and number of individuals n. def errfn(p, n, y): return (((p[0] * n / (p[1] + n)) - y)**2).sum() # Return S_max. return fmin_powell(errfn, params_guess, ftol=1e-5, args=(xvals, yvals), disp=False)[0]