def get_obs_pred_sdr(raw_data_site, dataset_name, model, out_dir='./out_files/'): """Write the observed and predicted SDR (in unit of D^2) to file for a given model. Inputs: raw_data_site - data in the same format as obtained by clean_data_genera(), with four columns site, sp, dbh, and genus, and only for one site. dataset_name - name of the dataset for raw_data_site. model - can take one of four values 'ssnt_0' (constant growth of diameter D), 'ssnt_1' (constant growth of D^2/3), 'asne', or 'agsne'. out_dir - directory for output file. """ scaled_d = raw_data_site['dbh'] / min(raw_data_site['dbh']) scaled_d2 = scaled_d**2 G, S, N, E = get_GSNE(raw_data_site) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) theta_agsne = mete_distributions.theta_agsne([G, S, N, E], [ lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3 ]) theta_asne = mete_distributions.theta_epsilon(S, N, E) if model == 'ssnt_1': alpha = 2 / 3 else: alpha = 1 par = N / (sum(scaled_d**alpha) - N) iisd_ssnt = ssnt_isd_bounded(alpha, par) pred, obs = [], [] for sp in np.unique(raw_data_site['sp']): n = len(raw_data_site[raw_data_site['sp'] == sp]) # Number of individuals within species if model == 'agsne': genus_sp = raw_data_site['genus'][raw_data_site['sp'] == sp][0] m = len( np.unique( raw_data_site['sp'][raw_data_site['genus'] == genus_sp]) ) # Number of specis within genus pred.append(theta_agsne.expected(m, n)) elif model == 'asne': pred.append(theta_asne.E(n)) elif model in ['ssnt_0', 'ssnt_1']: pred.append(iisd_ssnt.expected_square()) obs.append(np.mean(scaled_d2[raw_data_site['sp'] == sp])) results = np.zeros((S, ), dtype=('S15, f8, f8')) results['f0'] = np.array([raw_data_site['site'][0]] * S) results['f1'] = obs results['f2'] = pred f1_write = open(out_dir + dataset_name + '_obs_pred_sdr_' + model + '.csv', 'ab') f1 = csv.writer(f1_write) f1.writerows(results) f1_write.close()
def bootstrap_SDR(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200): """A general function of bootstrapping for ISD applying to all four models. Inputs: name_site_combo: a list with dat_name and site model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne' in_dir - directory of raw data out_dir - directory used both in input (obs_pred.csv file) and output Niter - number of bootstrap samples Output: Writes to one file on disk for R^2. """ dat_name, site = name_site_combo dat = wk.import_raw_data(in_dir + dat_name + '.csv') dat_site = dat[dat['site'] == site] dat_clean = clean_data_agsne(dat_site) G, S, N, E = get_GSNE(dat_clean) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) par_list = [] for sp in np.unique(dat_clean['sp']): dat_sp = dat_clean[dat_clean['sp'] == sp] n = len(dat_sp) genus_sp = dat_sp['genus'][0] m = len(np.unique(dat_clean[dat_clean['genus'] == genus_sp]['sp'])) par_list.append([m, n]) pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_sdr_' + model + '.csv') pred = pred_obs[pred_obs['site'] == site]['pred'] obs = pred_obs[pred_obs['site'] == site]['obs'] out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))] iisd_agsne = mete_distributions.theta_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3]) iisd_asne = mete_distributions.theta_epsilon(S, N, E) dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh'])) iisd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled ** 1) - N)) iisd_ssnt_1 = ssnt_isd_bounded(2/3, N / (sum(dbh_scaled ** (2/3)) - N)) dist_for_model = {'ssnt_0': iisd_ssnt_0, 'ssnt_1': iisd_ssnt_1, 'asne': iisd_asne, 'agsne': iisd_agsne} dist = dist_for_model[model] for i in range(Niter): if model in ['ssnt_0', 'ssnt_1']: obs_boot = np.array([np.mean((dist.rvs(par[1])) ** 2) for par in par_list]) # Here par[1] is n for each species elif model == 'asne': obs_boot = np.array([np.mean(np.array(dist.rvs(par[1], par[1]))) for par in par_list]) else: obs_boot = np.array([np.mean(np.array(dist.rvs(par[1], par[1], par[0]))) for par in par_list]) out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred)))) wk.write_to_file(out_dir + 'SDR_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))
def get_obs_pred_sdr(raw_data_site, dataset_name, model, out_dir = './out_files/'): """Write the observed and predicted SDR (in unit of D^2) to file for a given model. Inputs: raw_data_site - data in the same format as obtained by clean_data_genera(), with four columns site, sp, dbh, and genus, and only for one site. dataset_name - name of the dataset for raw_data_site. model - can take one of four values 'ssnt_0' (constant growth of diameter D), 'ssnt_1' (constant growth of D^2/3), 'asne', or 'agsne'. out_dir - directory for output file. """ scaled_d = raw_data_site['dbh'] / min(raw_data_site['dbh']) scaled_d2 = scaled_d **2 G, S, N, E = get_GSNE(raw_data_site) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) theta_agsne = mete_distributions.theta_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3]) theta_asne = mete_distributions.theta_epsilon(S, N, E) if model == 'ssnt_1': alpha = 2/3 else: alpha = 1 par = N / (sum(scaled_d ** alpha) - N) iisd_ssnt = ssnt_isd_bounded(alpha, par) pred, obs = [], [] for sp in np.unique(raw_data_site['sp']): n = len(raw_data_site[raw_data_site['sp'] == sp]) # Number of individuals within species if model == 'agsne': genus_sp = raw_data_site['genus'][raw_data_site['sp'] == sp][0] m = len(np.unique(raw_data_site['sp'][raw_data_site['genus'] == genus_sp])) # Number of specis within genus pred.append(theta_agsne.expected(m, n)) elif model == 'asne': pred.append(theta_asne.E(n)) elif model in ['ssnt_0', 'ssnt_1']: pred.append(iisd_ssnt.expected_square()) obs.append(np.mean(scaled_d2[raw_data_site['sp'] == sp])) results = np.zeros((S, ), dtype = ('S15, f8, f8')) results['f0'] = np.array([raw_data_site['site'][0]] * S) results['f1'] = obs results['f2'] = pred f1_write = open(out_dir + dataset_name + '_obs_pred_sdr_' + model + '.csv', 'ab') f1 = csv.writer(f1_write) f1.writerows(results) f1_write.close()
def bootstrap_SDR(name_site_combo, model, in_dir='./data/', out_dir='./out_files/', Niter=200): """A general function of bootstrapping for ISD applying to all four models. Inputs: name_site_combo: a list with dat_name and site model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne' in_dir - directory of raw data out_dir - directory used both in input (obs_pred.csv file) and output Niter - number of bootstrap samples Output: Writes to one file on disk for R^2. """ dat_name, site = name_site_combo dat = wk.import_raw_data(in_dir + dat_name + '.csv') dat_site = dat[dat['site'] == site] dat_clean = clean_data_agsne(dat_site) G, S, N, E = get_GSNE(dat_clean) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) par_list = [] for sp in np.unique(dat_clean['sp']): dat_sp = dat_clean[dat_clean['sp'] == sp] n = len(dat_sp) genus_sp = dat_sp['genus'][0] m = len(np.unique(dat_clean[dat_clean['genus'] == genus_sp]['sp'])) par_list.append([m, n]) pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_sdr_' + model + '.csv') pred = pred_obs[pred_obs['site'] == site]['pred'] obs = pred_obs[pred_obs['site'] == site]['obs'] out_list_rsquare = [ dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred))) ] iisd_agsne = mete_distributions.theta_agsne([G, S, N, E], [ lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3 ]) iisd_asne = mete_distributions.theta_epsilon(S, N, E) dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh'])) iisd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled**1) - N)) iisd_ssnt_1 = ssnt_isd_bounded(2 / 3, N / (sum(dbh_scaled**(2 / 3)) - N)) dist_for_model = { 'ssnt_0': iisd_ssnt_0, 'ssnt_1': iisd_ssnt_1, 'asne': iisd_asne, 'agsne': iisd_agsne } dist = dist_for_model[model] for i in range(Niter): if model in ['ssnt_0', 'ssnt_1']: obs_boot = np.array([ np.mean((dist.rvs(par[1]))**2) for par in par_list ]) # Here par[1] is n for each species elif model == 'asne': obs_boot = np.array([ np.mean(np.array(dist.rvs(par[1], par[1]))) for par in par_list ]) else: obs_boot = np.array([ np.mean(np.array(dist.rvs(par[1], par[1], par[0]))) for par in par_list ]) out_list_rsquare.append( str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred)))) wk.write_to_file(out_dir + 'SDR_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))