def get_lik_sp_abd_dbh_four_models(raw_data_site,
                                   dataset_name,
                                   out_dir='./out_files/'):
    """Obtain the summed log likelihood of each species having abundance n and its individuals having 
    
    their specific dbh values for the three models METE, SSNT on D, and SSNT on D ** (2/3).
    
    """
    site = raw_data_site['site'][0]
    G, S, N, E = get_GSNE(raw_data_site)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    beta_ssnt = mete.get_beta(S, N, version='untruncated')
    beta_asne = mete.get_beta(S, N)
    d_list = raw_data_site['dbh'] / min(raw_data_site['dbh'])
    lik_asne, lik_agsne, lik_ssnt_0, lik_ssnt_1 = 0, 0, 0, 0
    for sp in np.unique(raw_data_site['sp']):
        sp_dbh = d_list[raw_data_site['sp'] == sp]
        lik_asne += lik_sp_abd_dbh_asne([G, S, N, E], np.exp(-beta_asne),
                                        len(sp_dbh), sp_dbh)
        lik_agsne += lik_sp_abd_dbh_agsne([G, S, N, E], [
            lambda1, beta, lambda3,
            agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
        ], len(sp_dbh), sp_dbh)
        lik_ssnt_0 += lik_sp_abd_dbh_ssnt([G, S, N, E],
                                          np.exp(-beta_ssnt), 'ssnt_0',
                                          len(sp_dbh), sp_dbh, d_list)
        lik_ssnt_1 += lik_sp_abd_dbh_ssnt([G, S, N, E],
                                          np.exp(-beta_ssnt), 'ssnt_1',
                                          len(sp_dbh), sp_dbh, d_list)
    out = open(out_dir + 'lik_sp_abd_dbh_four_models.txt', 'a')
    print >> out, dataset_name, site, str(lik_asne), str(lik_agsne), str(
        lik_ssnt_0), str(lik_ssnt_1)
    out.close()
Example #2
0
def get_envpred(envpred_data, predtype=['sad', 'rad']):
    if predtype is 'sad':
        envpred = DataFrame(columns=['site_id', 'octave', 'env_pred'])
    if predtype is 'rad':
        envpred = DataFrame(columns=['site_id', 'rank', 'env_pred'])
    if predtype is 'rare':
        envpred = DataFrame(columns=['site_id', 'env_pred'])
    for index, site in envpred_data.iterrows():
        obs_S = site['S']
        envpred_S = 10 ** site['logSpred']
        envpred_N = 10 ** site['logNpred']
        if predtype is 'sad':        
            sad_bins = get_log_bins([envpred_N])
            octave = range(0, len(sad_bins) - 1)
            site_pred = get_mete_sad(envpred_S, envpred_N, bin_edges=sad_bins)
            site_ids = [site['site_id'] for i in range(0, len(site_pred))]
            site_pred_with_id = DataFrame(np.column_stack([site_ids, octave, site_pred]),
                                          columns=['site_id', 'octave', 'env_pred'])    
        if predtype is 'rad':
            # note using observed S here for time being            
            rank = range(1, int(obs_S + 1))
            site_beta = get_beta(envpred_S, envpred_N)
            site_pred, p = get_mete_rad(obs_S, envpred_N, beta=site_beta)
            site_ids = [site['site_id'] for i in range(0, len(site_pred))]
            site_pred_with_id = DataFrame(np.column_stack([site_ids, rank, site_pred]),
                                          columns=['site_id', 'rank', 'env_pred'])
        if predtype is 'rare':
            pred_rad = get_mete_rad(int(envpred_S), envpred_N)[0]
            site_pred = sum([i <= 10 for i in pred_rad])
            site_pred_with_id = DataFrame(np.column_stack([site['site_id'], site_pred]),
                                          columns=['site_id', 'env_pred']) 
        envpred = envpred.append(site_pred_with_id, ignore_index=True)
    return envpred
def bootstrap_SAD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for SAD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    beta_ssnt = mete.get_beta(S, N, version = 'untruncated')
    beta_asne = mete.get_beta(S, N)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    dist_for_model = {'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 
                      'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 
                      'asne': md.trunc_logser(np.exp(-beta_asne), N),
                      'agsne': sad_agsne}
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred'][::-1]
    obs = pred_obs[pred_obs['site'] == site]['obs'][::-1]
    
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))]
    
    for i in range(Niter):
        obs_boot = np.array(sorted(dist.rvs(S)))
        cdf_boot = np.array([dist.cdf(x) for x in obs_boot])
        emp_cdf_boot = mtools.get_emp_cdf(obs_boot)
        out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
        out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot)))))
    
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
def explore_parameter_space(Svals, Nvals, ncomm, bisec, transect=False):
    for S in Svals:
        for N in Nvals:
            beta = mete.get_beta(S, N)
            if exp(-beta) < 1:
                comms = [mete.sim_spatial_whole(S, N, bisec, transect=transect,
                                            beta=beta) for i in range(0, ncomm)]
                output_results(comms, S, N, ncomm, bisec, transect, None, None)
            print S, N
def get_lik_sp_abd_dbh_four_models(raw_data_site, dataset_name, out_dir = './out_files/'):
    """Obtain the summed log likelihood of each species having abundance n and its individuals having 
    
    their specific dbh values for the three models METE, SSNT on D, and SSNT on D ** (2/3).
    
    """
    site = raw_data_site['site'][0]
    G, S, N, E = get_GSNE(raw_data_site)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    beta_ssnt = mete.get_beta(S, N, version = 'untruncated')
    beta_asne = mete.get_beta(S, N) 
    d_list = raw_data_site['dbh'] / min(raw_data_site['dbh'])
    lik_asne, lik_agsne, lik_ssnt_0, lik_ssnt_1 = 0, 0, 0, 0
    for sp in np.unique(raw_data_site['sp']):
        sp_dbh = d_list[raw_data_site['sp'] == sp]
        lik_asne += lik_sp_abd_dbh_asne([G, S, N, E], np.exp(-beta_asne), len(sp_dbh), sp_dbh)
        lik_agsne += lik_sp_abd_dbh_agsne([G, S, N, E], 
                                          [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3], len(sp_dbh), sp_dbh)
        lik_ssnt_0 += lik_sp_abd_dbh_ssnt([G, S, N, E], np.exp(-beta_ssnt), 'ssnt_0', len(sp_dbh), sp_dbh, d_list)
        lik_ssnt_1 += lik_sp_abd_dbh_ssnt([G, S, N, E], np.exp(-beta_ssnt), 'ssnt_1', len(sp_dbh), sp_dbh, d_list)
    out = open(out_dir + 'lik_sp_abd_dbh_four_models.txt', 'a')
    print>>out, dataset_name, site, str(lik_asne), str(lik_agsne), str(lik_ssnt_0), str(lik_ssnt_1)
    out.close()    
def run_test(raw_data, dataset_name, data_dir='./data/', cutoff = 9):
    """Use data to compare the predicted and empirical SADs and get results in csv files
    
    Keyword arguments:
    raw_data : numpy structured array with 4 columns: 'site','year','sp','ab'
    dataset_name : short code that will indicate the name of the dataset in
                    the output file names
    data_dir : directory in which to store output
    cutoff : minimum number of species required to run - 1.
    
    """
    
    usites = np.sort(list(set(raw_data["site"])))
    f1 = csv.writer(open(data_dir + dataset_name + '_obs_pred.csv','wb'))
    f2 = csv.writer(open(data_dir + dataset_name + '_dist_test.csv','wb'))
    
    for i in range(0, len(usites)):
        subsites = raw_data["site"][raw_data["site"] == usites[i]]        
        subab = raw_data["ab"][raw_data["site"] == usites[i]]
        N = sum(subab)
        S = len(subsites)
        if S > cutoff:
            print("%s, Site %s, S=%s, N=%s" % (dataset_name, i, S, N))
            # Generate predicted values and p (e ** -beta) based on METE:
            mete_pred = mete.get_mete_rad(int(S), int(N))
            pred = np.array(mete_pred[0])
            p = mete_pred[1]
            p_untruncated = exp(-mete.get_beta(S, N, version='untruncated'))
            obsab = np.sort(subab)[::-1]
            # Calculate Akaike weight of log-series:
            L_logser = md.logser_ll(obsab, p)
            L_logser_untruncated = md.logser_ll(obsab, p_untruncated)
            mu, sigma = md.pln_solver(obsab)
            L_pln = md.pln_ll(mu,sigma,obsab)        
            k1 = 1
            k2 = 2    
            AICc_logser = macroecotools.AICc(k1, L_logser, S)
            AICc_logser_untruncated = macroecotools.AICc(k1, L_logser_untruncated, S)
            AICc_pln = macroecotools.AICc(k2, L_pln, S)
            weight = macroecotools.aic_weight(AICc_logser, AICc_pln, S, cutoff = 4)
            weight_untruncated = macroecotools.aic_weight(AICc_logser_untruncated,
                                                     AICc_pln, S, cutoff = 4)
            #save results to a csv file:
            results = ((np.column_stack((subsites, obsab, pred))))
            results2 = ((np.column_stack((np.array(usites[i], dtype='S20'),
                                                   S, N, p, weight,
                                                   p_untruncated,
                                                   weight_untruncated))))
            f1.writerows(results)
            f2.writerows(results2)
Example #7
0
def get_envpred_sads(envpred_data):
    envpred_sads = DataFrame(columns=['SiteID', 'EnvPred'])
    for index, site in envpred_data.iterrows():
        obs_S = site['S']
        envpred_S = 10 ** site['predlogS']
        envpred_N = 10 ** site['predlogN']
        beta = get_beta(envpred_S, envpred_N)
        #To produce a comparable number of species use obs_S; IS THIS RIGHT?
        site_sad, p = get_mete_rad(obs_S, envpred_N, beta=beta)
        site_ids = [site['SiteID'] for i in range(0, len(site_sad))]
        site_sad_with_id = DataFrame(np.column_stack([site_ids, site_sad]),
                                     columns=['SiteID', 'EnvPred'])
        envpred_sads = envpred_sads.append(site_sad_with_id, ignore_index=True)
    return envpred_sads
def sim_null(S0, N0, dic_beta):
    """Abundances simulated from a discrete uniform and associated METE predictions"""
    N_sim = sorted(np.random.random_integers(1, (2 * N0 - S0) / S0, S0), reverse = True)
    N_tot = sum(N_sim)
    
    #In cases where N and S are nearly equal it is possible for random draws to
    #yield all singletons which breaks the numerical solutions for Beta.
    #If this is the case make one species a doubleton.
    if N_tot == S0:
        N_sim[0] = 2
        
    if (S0, N0) not in dic_beta:
        dic_beta[(S0, N0)] = mete.get_beta(S0, sum(N_sim))
    N_pred = mete.get_mete_rad(S0, sum(N_sim), dic_beta[(S0, N0)])[0] 
    np.random.seed()
    return N_sim, N_pred
Example #9
0
def get_par_multi_dists(ab, dist_name):
    """Returns the parameters given the observed abundances and the designated distribution."""
    if dist_name == 'logser':
        beta = mete.get_beta(len(ab), sum(ab), version='untruncated')
        par = (np.exp(-beta), )
    elif dist_name == 'pln':
        par = md.pln_solver(ab)
    elif dist_name == 'geom':
        par = (len(ab) / sum(ab), )
    elif dist_name == 'negbin':
        par = md.negbin_solver(ab)
        if np.isnan(par[0]):
            par = None
    elif dist_name == 'zipf':
        par = (md.zipf_solver(ab), )
    else:
        print "Error: distribution not recognized."
        par = None
    return par
def get_par_multi_dists(ab, dist_name):
    """Returns the parameters given the observed abundances and the designated distribution."""
    if dist_name == 'logser':
        beta = mete.get_beta(len(ab), sum(ab), version = 'untruncated')
        par = (np.exp(-beta), )
    elif dist_name == 'pln':
        par = md.pln_solver(ab)
    elif dist_name == 'geom':
        par = (len(ab) / sum(ab), )
    elif dist_name == 'negbin':
        par = md.negbin_solver(ab)
        if np.isnan(par[0]):
            par = None
    elif dist_name == 'zipf':
        par = (md.zipf_solver(ab), )
    else: 
        print "Error: distribution not recognized."
        par = None    
    return par
    site_data = [map(float, x) for x in site_data]

    # enforce a minimum individual density of 2
    indices = mete.which([site_data[i][2] > 2 for i in range(0, len(site_data))])
    site_data = [site_data[i] for i in indices]

    site_data = np.array(site_data)

    Amin = min(site_data[:, 0])
    Amax = max(site_data[:, 0])
    S0 = int(max(site_data[:, 1]))
    N0 = int(max(site_data[:, 2]))

    sar_down_iterative = []
    for i in range(0, nperm):
        p = mete.exp(-mete.get_beta(S0, N0))
        n0_rvs = mete.trunc_logser_rvs(p, N0, S0)
        sar_down_iterative.append(mete.downscale_sar_fixed_abu(Amax, n0_rvs, Amin))

    Avals = sar_down_iterative[0][0][:]
    len_A = len(Avals)

    out = np.empty((nperm * len_A, 2))

    for j in range(0, nperm):
        for i in range(0, 2):
            start = j * len_A
            stop = start + len_A
            out[start:stop, i] = sar_down_iterative[j][i]

    filename = "./sar/" + shrt_name + "_mete_sar_middle_ground.txt"
def bootstrap_SAD(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for SAD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    beta_ssnt = mete.get_beta(S, N, version='untruncated')
    beta_asne = mete.get_beta(S, N)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    dist_for_model = {
        'ssnt_0': stats.logser(np.exp(-beta_ssnt)),
        'ssnt_1': stats.logser(np.exp(-beta_ssnt)),
        'asne': md.trunc_logser(np.exp(-beta_asne), N),
        'agsne': sad_agsne
    }
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred'][::-1]
    obs = pred_obs[pred_obs['site'] == site]['obs'][::-1]

    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [
        dat_name, site,
        str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))
    ]

    for i in range(Niter):
        obs_boot = np.array(sorted(dist.rvs(S)))
        cdf_boot = np.array([dist.cdf(x) for x in obs_boot])
        emp_cdf_boot = mtools.get_emp_cdf(obs_boot)
        out_list_rsquare.append(
            str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
        out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot)))))

    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare))
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt',
                     ",".join(str(x) for x in out_list_ks))
Example #13
0
import numpy as np
import csv
import sys
import os
from math import exp

import mete

if len(sys.argv) > 1:
    S0 = int(sys.argv[1])
    N0 = int(sys.argv[2])

if os.path.exists('../demo') is False:
    os.mkdir('../demo')

beta = mete.get_beta(S0, N0)

n0 = mete.trunc_logser_rvs(exp(-beta), N0, S0)
n0 = list(n0)
n0 = [int(x) for x in n0]
n0.sort(reverse=True)

rad = mete.get_mete_rad(S0, N0)[0]

Amax = 4
Amin = 1

recur = mete.downscale_sar(Amax, S0, N0, Amin)
recur_obsSAD = mete.downscale_sar_fixed_abu(Amax, n0, Amin)

Avals = recur_obsSAD[0][ : ]