def get_lik_sp_abd_dbh_four_models(raw_data_site, dataset_name, out_dir='./out_files/'): """Obtain the summed log likelihood of each species having abundance n and its individuals having their specific dbh values for the three models METE, SSNT on D, and SSNT on D ** (2/3). """ site = raw_data_site['site'][0] G, S, N, E = get_GSNE(raw_data_site) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) beta_ssnt = mete.get_beta(S, N, version='untruncated') beta_asne = mete.get_beta(S, N) d_list = raw_data_site['dbh'] / min(raw_data_site['dbh']) lik_asne, lik_agsne, lik_ssnt_0, lik_ssnt_1 = 0, 0, 0, 0 for sp in np.unique(raw_data_site['sp']): sp_dbh = d_list[raw_data_site['sp'] == sp] lik_asne += lik_sp_abd_dbh_asne([G, S, N, E], np.exp(-beta_asne), len(sp_dbh), sp_dbh) lik_agsne += lik_sp_abd_dbh_agsne([G, S, N, E], [ lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3 ], len(sp_dbh), sp_dbh) lik_ssnt_0 += lik_sp_abd_dbh_ssnt([G, S, N, E], np.exp(-beta_ssnt), 'ssnt_0', len(sp_dbh), sp_dbh, d_list) lik_ssnt_1 += lik_sp_abd_dbh_ssnt([G, S, N, E], np.exp(-beta_ssnt), 'ssnt_1', len(sp_dbh), sp_dbh, d_list) out = open(out_dir + 'lik_sp_abd_dbh_four_models.txt', 'a') print >> out, dataset_name, site, str(lik_asne), str(lik_agsne), str( lik_ssnt_0), str(lik_ssnt_1) out.close()
def get_envpred(envpred_data, predtype=['sad', 'rad']): if predtype is 'sad': envpred = DataFrame(columns=['site_id', 'octave', 'env_pred']) if predtype is 'rad': envpred = DataFrame(columns=['site_id', 'rank', 'env_pred']) if predtype is 'rare': envpred = DataFrame(columns=['site_id', 'env_pred']) for index, site in envpred_data.iterrows(): obs_S = site['S'] envpred_S = 10 ** site['logSpred'] envpred_N = 10 ** site['logNpred'] if predtype is 'sad': sad_bins = get_log_bins([envpred_N]) octave = range(0, len(sad_bins) - 1) site_pred = get_mete_sad(envpred_S, envpred_N, bin_edges=sad_bins) site_ids = [site['site_id'] for i in range(0, len(site_pred))] site_pred_with_id = DataFrame(np.column_stack([site_ids, octave, site_pred]), columns=['site_id', 'octave', 'env_pred']) if predtype is 'rad': # note using observed S here for time being rank = range(1, int(obs_S + 1)) site_beta = get_beta(envpred_S, envpred_N) site_pred, p = get_mete_rad(obs_S, envpred_N, beta=site_beta) site_ids = [site['site_id'] for i in range(0, len(site_pred))] site_pred_with_id = DataFrame(np.column_stack([site_ids, rank, site_pred]), columns=['site_id', 'rank', 'env_pred']) if predtype is 'rare': pred_rad = get_mete_rad(int(envpred_S), envpred_N)[0] site_pred = sum([i <= 10 for i in pred_rad]) site_pred_with_id = DataFrame(np.column_stack([site['site_id'], site_pred]), columns=['site_id', 'env_pred']) envpred = envpred.append(site_pred_with_id, ignore_index=True) return envpred
def bootstrap_SAD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200): """A general function of bootstrapping for SAD applying to all four models. Inputs: name_site_combo: a list with dat_name and site model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne' in_dir - directory of raw data out_dir - directory used both in input (obs_pred.csv file) and output Niter - number of bootstrap samples Output: Writes to disk, with one file for R^2 and one for KS statistic. """ dat_name, site = name_site_combo dat = wk.import_raw_data(in_dir + dat_name + '.csv') dat_site = dat[dat['site'] == site] dat_clean = clean_data_agsne(dat_site) G, S, N, E = get_GSNE(dat_clean) beta_ssnt = mete.get_beta(S, N, version = 'untruncated') beta_asne = mete.get_beta(S, N) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3]) dist_for_model = {'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 'asne': md.trunc_logser(np.exp(-beta_asne), N), 'agsne': sad_agsne} dist = dist_for_model[model] pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv') pred = pred_obs[pred_obs['site'] == site]['pred'][::-1] obs = pred_obs[pred_obs['site'] == site]['obs'][::-1] out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))] emp_cdf = mtools.get_emp_cdf(obs) out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))] for i in range(Niter): obs_boot = np.array(sorted(dist.rvs(S))) cdf_boot = np.array([dist.cdf(x) for x in obs_boot]) emp_cdf_boot = mtools.get_emp_cdf(obs_boot) out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred)))) out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot))))) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare)) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
def explore_parameter_space(Svals, Nvals, ncomm, bisec, transect=False): for S in Svals: for N in Nvals: beta = mete.get_beta(S, N) if exp(-beta) < 1: comms = [mete.sim_spatial_whole(S, N, bisec, transect=transect, beta=beta) for i in range(0, ncomm)] output_results(comms, S, N, ncomm, bisec, transect, None, None) print S, N
def get_lik_sp_abd_dbh_four_models(raw_data_site, dataset_name, out_dir = './out_files/'): """Obtain the summed log likelihood of each species having abundance n and its individuals having their specific dbh values for the three models METE, SSNT on D, and SSNT on D ** (2/3). """ site = raw_data_site['site'][0] G, S, N, E = get_GSNE(raw_data_site) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) beta_ssnt = mete.get_beta(S, N, version = 'untruncated') beta_asne = mete.get_beta(S, N) d_list = raw_data_site['dbh'] / min(raw_data_site['dbh']) lik_asne, lik_agsne, lik_ssnt_0, lik_ssnt_1 = 0, 0, 0, 0 for sp in np.unique(raw_data_site['sp']): sp_dbh = d_list[raw_data_site['sp'] == sp] lik_asne += lik_sp_abd_dbh_asne([G, S, N, E], np.exp(-beta_asne), len(sp_dbh), sp_dbh) lik_agsne += lik_sp_abd_dbh_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3], len(sp_dbh), sp_dbh) lik_ssnt_0 += lik_sp_abd_dbh_ssnt([G, S, N, E], np.exp(-beta_ssnt), 'ssnt_0', len(sp_dbh), sp_dbh, d_list) lik_ssnt_1 += lik_sp_abd_dbh_ssnt([G, S, N, E], np.exp(-beta_ssnt), 'ssnt_1', len(sp_dbh), sp_dbh, d_list) out = open(out_dir + 'lik_sp_abd_dbh_four_models.txt', 'a') print>>out, dataset_name, site, str(lik_asne), str(lik_agsne), str(lik_ssnt_0), str(lik_ssnt_1) out.close()
def run_test(raw_data, dataset_name, data_dir='./data/', cutoff = 9): """Use data to compare the predicted and empirical SADs and get results in csv files Keyword arguments: raw_data : numpy structured array with 4 columns: 'site','year','sp','ab' dataset_name : short code that will indicate the name of the dataset in the output file names data_dir : directory in which to store output cutoff : minimum number of species required to run - 1. """ usites = np.sort(list(set(raw_data["site"]))) f1 = csv.writer(open(data_dir + dataset_name + '_obs_pred.csv','wb')) f2 = csv.writer(open(data_dir + dataset_name + '_dist_test.csv','wb')) for i in range(0, len(usites)): subsites = raw_data["site"][raw_data["site"] == usites[i]] subab = raw_data["ab"][raw_data["site"] == usites[i]] N = sum(subab) S = len(subsites) if S > cutoff: print("%s, Site %s, S=%s, N=%s" % (dataset_name, i, S, N)) # Generate predicted values and p (e ** -beta) based on METE: mete_pred = mete.get_mete_rad(int(S), int(N)) pred = np.array(mete_pred[0]) p = mete_pred[1] p_untruncated = exp(-mete.get_beta(S, N, version='untruncated')) obsab = np.sort(subab)[::-1] # Calculate Akaike weight of log-series: L_logser = md.logser_ll(obsab, p) L_logser_untruncated = md.logser_ll(obsab, p_untruncated) mu, sigma = md.pln_solver(obsab) L_pln = md.pln_ll(mu,sigma,obsab) k1 = 1 k2 = 2 AICc_logser = macroecotools.AICc(k1, L_logser, S) AICc_logser_untruncated = macroecotools.AICc(k1, L_logser_untruncated, S) AICc_pln = macroecotools.AICc(k2, L_pln, S) weight = macroecotools.aic_weight(AICc_logser, AICc_pln, S, cutoff = 4) weight_untruncated = macroecotools.aic_weight(AICc_logser_untruncated, AICc_pln, S, cutoff = 4) #save results to a csv file: results = ((np.column_stack((subsites, obsab, pred)))) results2 = ((np.column_stack((np.array(usites[i], dtype='S20'), S, N, p, weight, p_untruncated, weight_untruncated)))) f1.writerows(results) f2.writerows(results2)
def get_envpred_sads(envpred_data): envpred_sads = DataFrame(columns=['SiteID', 'EnvPred']) for index, site in envpred_data.iterrows(): obs_S = site['S'] envpred_S = 10 ** site['predlogS'] envpred_N = 10 ** site['predlogN'] beta = get_beta(envpred_S, envpred_N) #To produce a comparable number of species use obs_S; IS THIS RIGHT? site_sad, p = get_mete_rad(obs_S, envpred_N, beta=beta) site_ids = [site['SiteID'] for i in range(0, len(site_sad))] site_sad_with_id = DataFrame(np.column_stack([site_ids, site_sad]), columns=['SiteID', 'EnvPred']) envpred_sads = envpred_sads.append(site_sad_with_id, ignore_index=True) return envpred_sads
def sim_null(S0, N0, dic_beta): """Abundances simulated from a discrete uniform and associated METE predictions""" N_sim = sorted(np.random.random_integers(1, (2 * N0 - S0) / S0, S0), reverse = True) N_tot = sum(N_sim) #In cases where N and S are nearly equal it is possible for random draws to #yield all singletons which breaks the numerical solutions for Beta. #If this is the case make one species a doubleton. if N_tot == S0: N_sim[0] = 2 if (S0, N0) not in dic_beta: dic_beta[(S0, N0)] = mete.get_beta(S0, sum(N_sim)) N_pred = mete.get_mete_rad(S0, sum(N_sim), dic_beta[(S0, N0)])[0] np.random.seed() return N_sim, N_pred
def get_par_multi_dists(ab, dist_name): """Returns the parameters given the observed abundances and the designated distribution.""" if dist_name == 'logser': beta = mete.get_beta(len(ab), sum(ab), version='untruncated') par = (np.exp(-beta), ) elif dist_name == 'pln': par = md.pln_solver(ab) elif dist_name == 'geom': par = (len(ab) / sum(ab), ) elif dist_name == 'negbin': par = md.negbin_solver(ab) if np.isnan(par[0]): par = None elif dist_name == 'zipf': par = (md.zipf_solver(ab), ) else: print "Error: distribution not recognized." par = None return par
def get_par_multi_dists(ab, dist_name): """Returns the parameters given the observed abundances and the designated distribution.""" if dist_name == 'logser': beta = mete.get_beta(len(ab), sum(ab), version = 'untruncated') par = (np.exp(-beta), ) elif dist_name == 'pln': par = md.pln_solver(ab) elif dist_name == 'geom': par = (len(ab) / sum(ab), ) elif dist_name == 'negbin': par = md.negbin_solver(ab) if np.isnan(par[0]): par = None elif dist_name == 'zipf': par = (md.zipf_solver(ab), ) else: print "Error: distribution not recognized." par = None return par
site_data = [map(float, x) for x in site_data] # enforce a minimum individual density of 2 indices = mete.which([site_data[i][2] > 2 for i in range(0, len(site_data))]) site_data = [site_data[i] for i in indices] site_data = np.array(site_data) Amin = min(site_data[:, 0]) Amax = max(site_data[:, 0]) S0 = int(max(site_data[:, 1])) N0 = int(max(site_data[:, 2])) sar_down_iterative = [] for i in range(0, nperm): p = mete.exp(-mete.get_beta(S0, N0)) n0_rvs = mete.trunc_logser_rvs(p, N0, S0) sar_down_iterative.append(mete.downscale_sar_fixed_abu(Amax, n0_rvs, Amin)) Avals = sar_down_iterative[0][0][:] len_A = len(Avals) out = np.empty((nperm * len_A, 2)) for j in range(0, nperm): for i in range(0, 2): start = j * len_A stop = start + len_A out[start:stop, i] = sar_down_iterative[j][i] filename = "./sar/" + shrt_name + "_mete_sar_middle_ground.txt"
def bootstrap_SAD(name_site_combo, model, in_dir='./data/', out_dir='./out_files/', Niter=200): """A general function of bootstrapping for SAD applying to all four models. Inputs: name_site_combo: a list with dat_name and site model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne' in_dir - directory of raw data out_dir - directory used both in input (obs_pred.csv file) and output Niter - number of bootstrap samples Output: Writes to disk, with one file for R^2 and one for KS statistic. """ dat_name, site = name_site_combo dat = wk.import_raw_data(in_dir + dat_name + '.csv') dat_site = dat[dat['site'] == site] dat_clean = clean_data_agsne(dat_site) G, S, N, E = get_GSNE(dat_clean) beta_ssnt = mete.get_beta(S, N, version='untruncated') beta_asne = mete.get_beta(S, N) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [ lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3 ]) dist_for_model = { 'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 'asne': md.trunc_logser(np.exp(-beta_asne), N), 'agsne': sad_agsne } dist = dist_for_model[model] pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv') pred = pred_obs[pred_obs['site'] == site]['pred'][::-1] obs = pred_obs[pred_obs['site'] == site]['obs'][::-1] out_list_rsquare = [ dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred))) ] emp_cdf = mtools.get_emp_cdf(obs) out_list_ks = [ dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs])))) ] for i in range(Niter): obs_boot = np.array(sorted(dist.rvs(S))) cdf_boot = np.array([dist.cdf(x) for x in obs_boot]) emp_cdf_boot = mtools.get_emp_cdf(obs_boot) out_list_rsquare.append( str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred)))) out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot))))) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare)) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
import numpy as np import csv import sys import os from math import exp import mete if len(sys.argv) > 1: S0 = int(sys.argv[1]) N0 = int(sys.argv[2]) if os.path.exists('../demo') is False: os.mkdir('../demo') beta = mete.get_beta(S0, N0) n0 = mete.trunc_logser_rvs(exp(-beta), N0, S0) n0 = list(n0) n0 = [int(x) for x in n0] n0.sort(reverse=True) rad = mete.get_mete_rad(S0, N0)[0] Amax = 4 Amin = 1 recur = mete.downscale_sar(Amax, S0, N0, Amin) recur_obsSAD = mete.downscale_sar_fixed_abu(Amax, n0, Amin) Avals = recur_obsSAD[0][ : ]