def get_obs_pred_sdr(raw_data_site,
                     dataset_name,
                     model,
                     out_dir='./out_files/'):
    """Write the observed and predicted SDR (in unit of D^2) to file for a given model.
    
    Inputs:
     raw_data_site - data in the same format as obtained by clean_data_genera(), with
        four columns site, sp, dbh, and genus, and only for one site.
    dataset_name - name of the dataset for raw_data_site.
    model - can take one of four values 'ssnt_0' (constant growth of diameter D), 
        'ssnt_1' (constant growth of D^2/3), 'asne', or 'agsne'. 
    out_dir - directory for output file.
    
    """
    scaled_d = raw_data_site['dbh'] / min(raw_data_site['dbh'])
    scaled_d2 = scaled_d**2
    G, S, N, E = get_GSNE(raw_data_site)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    theta_agsne = mete_distributions.theta_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    theta_asne = mete_distributions.theta_epsilon(S, N, E)
    if model == 'ssnt_1': alpha = 2 / 3
    else: alpha = 1
    par = N / (sum(scaled_d**alpha) - N)
    iisd_ssnt = ssnt_isd_bounded(alpha, par)

    pred, obs = [], []
    for sp in np.unique(raw_data_site['sp']):
        n = len(raw_data_site[raw_data_site['sp'] ==
                              sp])  # Number of individuals within species
        if model == 'agsne':
            genus_sp = raw_data_site['genus'][raw_data_site['sp'] == sp][0]
            m = len(
                np.unique(
                    raw_data_site['sp'][raw_data_site['genus'] == genus_sp])
            )  # Number of specis within genus
            pred.append(theta_agsne.expected(m, n))
        elif model == 'asne':
            pred.append(theta_asne.E(n))
        elif model in ['ssnt_0', 'ssnt_1']:
            pred.append(iisd_ssnt.expected_square())
        obs.append(np.mean(scaled_d2[raw_data_site['sp'] == sp]))

    results = np.zeros((S, ), dtype=('S15, f8, f8'))
    results['f0'] = np.array([raw_data_site['site'][0]] * S)
    results['f1'] = obs
    results['f2'] = pred
    f1_write = open(out_dir + dataset_name + '_obs_pred_sdr_' + model + '.csv',
                    'ab')
    f1 = csv.writer(f1_write)
    f1.writerows(results)
    f1_write.close()
def bootstrap_SDR(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to one file on disk for R^2.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    
    par_list = []
    for sp in np.unique(dat_clean['sp']):
        dat_sp = dat_clean[dat_clean['sp'] == sp]
        n = len(dat_sp)
        genus_sp = dat_sp['genus'][0]
        m = len(np.unique(dat_clean[dat_clean['genus'] == genus_sp]['sp']))
        par_list.append([m, n])
        
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_sdr_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs'] 
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    
    iisd_agsne = mete_distributions.theta_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    iisd_asne = mete_distributions.theta_epsilon(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    iisd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled ** 1) - N))
    iisd_ssnt_1 = ssnt_isd_bounded(2/3, N / (sum(dbh_scaled ** (2/3)) - N))
    dist_for_model = {'ssnt_0': iisd_ssnt_0, 'ssnt_1': iisd_ssnt_1, 'asne': iisd_asne, 'agsne': iisd_agsne}
    dist = dist_for_model[model]
        
    for i in range(Niter):
        if model in ['ssnt_0', 'ssnt_1']: obs_boot = np.array([np.mean((dist.rvs(par[1])) ** 2) for par in par_list]) # Here par[1] is n for each species
        elif model == 'asne': 
            obs_boot = np.array([np.mean(np.array(dist.rvs(par[1], par[1]))) for par in par_list])
        else:
            obs_boot = np.array([np.mean(np.array(dist.rvs(par[1], par[1], par[0]))) for par in par_list])
        out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
    
    wk.write_to_file(out_dir + 'SDR_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))
def lik_sp_abd_dbh_asne(stat_var, beta, n, dbh_list, log = True):
    """Probability of a species having abundance n and its individuals having dbh [d1, d2, ..., d_n] in METE
    
    Here unlike SSNT, P(d|n) is not equal to the ISD f(d). 
    Inputs:
    stat_var - [G, S, N, E]
    beta - parameter for SAD
    n - abundance of the species
    dbh_list - a list or array of length n with scaled dbh values
    """
    G, S, N, E = stat_var
    p_sad_log = md.trunc_logser.logpmf(n, beta, N)
    theta = mete_distributions.theta_epsilon(S, N, E)
    p_dbh_log = [theta.logpdf(d ** 2, n) + np.log(2 * d) for d in dbh_list] # Prediction of METE has to be transformed back to distribution of dbh
    if log: return p_sad_log + sum(p_dbh_log)
    else: return np.exp(p_sad_log + sum(p_dbh_log))
def lik_sp_abd_dbh_asne(stat_var, beta, n, dbh_list, log=True):
    """Probability of a species having abundance n and its individuals having dbh [d1, d2, ..., d_n] in METE
    
    Here unlike SSNT, P(d|n) is not equal to the ISD f(d). 
    Inputs:
    stat_var - [G, S, N, E]
    beta - parameter for SAD
    n - abundance of the species
    dbh_list - a list or array of length n with scaled dbh values
    """
    G, S, N, E = stat_var
    p_sad_log = md.trunc_logser.logpmf(n, beta, N)
    theta = mete_distributions.theta_epsilon(S, N, E)
    p_dbh_log = [
        theta.logpdf(d**2, n) + np.log(2 * d) for d in dbh_list
    ]  # Prediction of METE has to be transformed back to distribution of dbh
    if log: return p_sad_log + sum(p_dbh_log)
    else: return np.exp(p_sad_log + sum(p_dbh_log))
def get_obs_pred_sdr(raw_data_site, dataset_name, model, out_dir = './out_files/'):
    """Write the observed and predicted SDR (in unit of D^2) to file for a given model.
    
    Inputs:
     raw_data_site - data in the same format as obtained by clean_data_genera(), with
        four columns site, sp, dbh, and genus, and only for one site.
    dataset_name - name of the dataset for raw_data_site.
    model - can take one of four values 'ssnt_0' (constant growth of diameter D), 
        'ssnt_1' (constant growth of D^2/3), 'asne', or 'agsne'. 
    out_dir - directory for output file.
    
    """
    scaled_d = raw_data_site['dbh'] / min(raw_data_site['dbh'])
    scaled_d2 = scaled_d **2
    G, S, N, E = get_GSNE(raw_data_site)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    theta_agsne = mete_distributions.theta_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    theta_asne = mete_distributions.theta_epsilon(S, N, E)
    if model == 'ssnt_1': alpha = 2/3
    else: alpha = 1
    par = N / (sum(scaled_d ** alpha) - N)
    iisd_ssnt = ssnt_isd_bounded(alpha, par)
   
    pred, obs = [], []
    for sp in np.unique(raw_data_site['sp']):
        n = len(raw_data_site[raw_data_site['sp'] == sp]) # Number of individuals within species
        if model == 'agsne': 
            genus_sp = raw_data_site['genus'][raw_data_site['sp'] == sp][0]
            m = len(np.unique(raw_data_site['sp'][raw_data_site['genus'] == genus_sp])) # Number of specis within genus
            pred.append(theta_agsne.expected(m, n))
        elif model == 'asne': pred.append(theta_asne.E(n))
        elif model in ['ssnt_0', 'ssnt_1']: pred.append(iisd_ssnt.expected_square())
        obs.append(np.mean(scaled_d2[raw_data_site['sp'] == sp]))
    
    results = np.zeros((S, ), dtype = ('S15, f8, f8'))
    results['f0'] = np.array([raw_data_site['site'][0]] * S)
    results['f1'] = obs
    results['f2'] = pred    
    f1_write = open(out_dir + dataset_name + '_obs_pred_sdr_' + model + '.csv', 'ab')
    f1 = csv.writer(f1_write)
    f1.writerows(results)
    f1_write.close()
def bootstrap_SDR(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to one file on disk for R^2.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)

    par_list = []
    for sp in np.unique(dat_clean['sp']):
        dat_sp = dat_clean[dat_clean['sp'] == sp]
        n = len(dat_sp)
        genus_sp = dat_sp['genus'][0]
        m = len(np.unique(dat_clean[dat_clean['genus'] == genus_sp]['sp']))
        par_list.append([m, n])

    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_sdr_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs']
    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]

    iisd_agsne = mete_distributions.theta_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    iisd_asne = mete_distributions.theta_epsilon(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    iisd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled**1) - N))
    iisd_ssnt_1 = ssnt_isd_bounded(2 / 3, N / (sum(dbh_scaled**(2 / 3)) - N))
    dist_for_model = {
        'ssnt_0': iisd_ssnt_0,
        'ssnt_1': iisd_ssnt_1,
        'asne': iisd_asne,
        'agsne': iisd_agsne
    }
    dist = dist_for_model[model]

    for i in range(Niter):
        if model in ['ssnt_0', 'ssnt_1']:
            obs_boot = np.array([
                np.mean((dist.rvs(par[1]))**2) for par in par_list
            ])  # Here par[1] is n for each species
        elif model == 'asne':
            obs_boot = np.array([
                np.mean(np.array(dist.rvs(par[1], par[1]))) for par in par_list
            ])
        else:
            obs_boot = np.array([
                np.mean(np.array(dist.rvs(par[1], par[1], par[0])))
                for par in par_list
            ])
        out_list_rsquare.append(
            str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))

    wk.write_to_file(out_dir + 'SDR_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare))