def lik_sp_abd_dbh_agsne(stat_var, pars, n, dbh_list, log = True): """Probability of a species having abundance n and its individuals having dbh [d1, d2, ..., d_n] in AGSNE. Inputs: stat_var - [G, S, N, E] pars - [lambda1, beta, lambda3, z] in AGSNE n - abundance of the species dbh_list - a list or array of length n with scaled dbh values """ G, S, N, E = stat_var lambda1, beta, lambda3, z = pars sad = mete_distributions.sad_agsne(stat_var, pars) #logp = 0 #for d in dbh_list: #t = np.exp(-(lambda1 + (beta - lambda1) * n + lambda3 * n * d ** 2)) #logp_dn = np.log(G / S / z * 2 * d) + np.log(t) - 2 * np.log(1 - t) + np.log(1 - (S + 1) * t ** S + S * t ** (S + 1)) #logp += logp_dn - np.log(sad.pmf(n)) #logp += np.log(sad.pmf(n)) # Convert conditional distribution to joint distribution logsum = 0 for d in dbh_list: logt = -(lambda1 + (beta - lambda3) * n + lambda3 * n * (d ** 2)) t = np.exp(logt) logsum += logt - 2 * np.log(1 - t) + np.log(1 - (S + 1) * (t ** S) + S * (t ** (S + 1))) + np.log(2 * d) logp = logsum + n * np.log(G / S / z) - (n - 1) * sad.logpmf(n) if log == True: return logp else: return np.exp(logp)
def get_mete_agsne_rad(G, S, N, E, version='precise', pars=None): """Compute RAD predicted by the AGSNE. Arguments: G, S, N, E - state variables (number of genera, number of species, number of individuals, total metabolic rate) Keyword arguments: version - which version is used to calculate the state variables, can take 'precise' or 'approx' pars - a list of Langrage multipliers [lambda1, beta, lambda3, Z], if these are already available. If None, Langrage multipliers are calculated from get_agsne_lambdas(). Return a list of expected abundances with length S, ranked from high to low. """ if pars is None: lambda1, beta, lambda3 = get_agsne_lambdas(G, S, N, E, version=version) lambda3z = agsne_lambda3_z(lambda1, beta, S) pars = [lambda1, beta, lambda3, lambda3z / lambda3] sad = medis.sad_agsne([G, S, N, E], pars) rank = range(1, int(S) + 1) abundance = list(np.empty([S])) cdf_obs = [(rank[i] - 0.5) / S for i in range(0, int(S))] i, j = 1, 0 cdf_cum = 0 while i <= N + 1: cdf_cum += sad.pmf(i) while cdf_cum >= cdf_obs[j]: abundance[j] = i j += 1 if j == S: abundance.reverse() return abundance i += 1
def lik_sp_abd_dbh_agsne(stat_var, pars, n, dbh_list, log=True): """Probability of a species having abundance n and its individuals having dbh [d1, d2, ..., d_n] in AGSNE. Inputs: stat_var - [G, S, N, E] pars - [lambda1, beta, lambda3, z] in AGSNE n - abundance of the species dbh_list - a list or array of length n with scaled dbh values """ G, S, N, E = stat_var lambda1, beta, lambda3, z = pars sad = mete_distributions.sad_agsne(stat_var, pars) #logp = 0 #for d in dbh_list: #t = np.exp(-(lambda1 + (beta - lambda1) * n + lambda3 * n * d ** 2)) #logp_dn = np.log(G / S / z * 2 * d) + np.log(t) - 2 * np.log(1 - t) + np.log(1 - (S + 1) * t ** S + S * t ** (S + 1)) #logp += logp_dn - np.log(sad.pmf(n)) #logp += np.log(sad.pmf(n)) # Convert conditional distribution to joint distribution logsum = 0 for d in dbh_list: logt = -(lambda1 + (beta - lambda3) * n + lambda3 * n * (d**2)) t = np.exp(logt) logsum += logt - 2 * np.log(1 - t) + np.log(1 - (S + 1) * (t**S) + S * (t**(S + 1))) + np.log( 2 * d) logp = logsum + n * np.log(G / S / z) - (n - 1) * sad.logpmf(n) if log == True: return logp else: return np.exp(logp)
def get_mete_agsne_rad(G, S, N, E, version='precise', pars = None): """Compute RAD predicted by the AGSNE. Arguments: G, S, N, E - state variables (number of genera, number of species, number of individuals, total metabolic rate) Keyword arguments: version - which version is used to calculate the state variables, can take 'precise' or 'approx' pars - a list of Langrage multipliers [lambda1, beta, lambda3, Z], if these are already available. If None, Langrage multipliers are calculated from get_agsne_lambdas(). Return a list of expected abundances with length S, ranked from high to low. """ if pars is None: lambda1, beta, lambda3 = get_agsne_lambdas(G, S, N, E, version = version) lambda3z = agsne_lambda3_z(lambda1, beta, S) pars = [lambda1, beta, lambda3, lambda3z / lambda3] sad = medis.sad_agsne([G, S, N, E], pars) rank = range(1, int(S)+1) abundance = list(np.empty([S])) cdf_obs = [(rank[i]-0.5) / S for i in range(0, int(S))] i, j = 1, 0 cdf_cum = 0 while i <= N + 1: cdf_cum += sad.pmf(i) while cdf_cum >= cdf_obs[j]: abundance[j] = i j += 1 if j == S: abundance.reverse() return abundance i += 1
def bootstrap_SAD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200): """A general function of bootstrapping for SAD applying to all four models. Inputs: name_site_combo: a list with dat_name and site model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne' in_dir - directory of raw data out_dir - directory used both in input (obs_pred.csv file) and output Niter - number of bootstrap samples Output: Writes to disk, with one file for R^2 and one for KS statistic. """ dat_name, site = name_site_combo dat = wk.import_raw_data(in_dir + dat_name + '.csv') dat_site = dat[dat['site'] == site] dat_clean = clean_data_agsne(dat_site) G, S, N, E = get_GSNE(dat_clean) beta_ssnt = mete.get_beta(S, N, version = 'untruncated') beta_asne = mete.get_beta(S, N) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3]) dist_for_model = {'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 'asne': md.trunc_logser(np.exp(-beta_asne), N), 'agsne': sad_agsne} dist = dist_for_model[model] pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv') pred = pred_obs[pred_obs['site'] == site]['pred'][::-1] obs = pred_obs[pred_obs['site'] == site]['obs'][::-1] out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))] emp_cdf = mtools.get_emp_cdf(obs) out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))] for i in range(Niter): obs_boot = np.array(sorted(dist.rvs(S))) cdf_boot = np.array([dist.cdf(x) for x in obs_boot]) emp_cdf_boot = mtools.get_emp_cdf(obs_boot) out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred)))) out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot))))) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare)) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
def bootstrap_SAD(name_site_combo, model, in_dir='./data/', out_dir='./out_files/', Niter=200): """A general function of bootstrapping for SAD applying to all four models. Inputs: name_site_combo: a list with dat_name and site model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne' in_dir - directory of raw data out_dir - directory used both in input (obs_pred.csv file) and output Niter - number of bootstrap samples Output: Writes to disk, with one file for R^2 and one for KS statistic. """ dat_name, site = name_site_combo dat = wk.import_raw_data(in_dir + dat_name + '.csv') dat_site = dat[dat['site'] == site] dat_clean = clean_data_agsne(dat_site) G, S, N, E = get_GSNE(dat_clean) beta_ssnt = mete.get_beta(S, N, version='untruncated') beta_asne = mete.get_beta(S, N) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [ lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3 ]) dist_for_model = { 'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 'asne': md.trunc_logser(np.exp(-beta_asne), N), 'agsne': sad_agsne } dist = dist_for_model[model] pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv') pred = pred_obs[pred_obs['site'] == site]['pred'][::-1] obs = pred_obs[pred_obs['site'] == site]['obs'][::-1] out_list_rsquare = [ dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred))) ] emp_cdf = mtools.get_emp_cdf(obs) out_list_ks = [ dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs])))) ] for i in range(Niter): obs_boot = np.array(sorted(dist.rvs(S))) cdf_boot = np.array([dist.cdf(x) for x in obs_boot]) emp_cdf_boot = mtools.get_emp_cdf(obs_boot) out_list_rsquare.append( str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred)))) out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot))))) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare)) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))