Esempio n. 1
    def from_cdf(self):
        """ Obtain the maximum likelihood form of the Zipf distribution, given
        the mle value for the Zipf shape parameter (a). Using a, this code
        generates a rank-abundance distribution (RAD) from the cumulative
        density function (cdf) using the percent point function (ppf) also known
        as the quantile function.

        This is an actual form of the Zipf distribution, obtained from getting
        the mle for the shape parameter.

        p = md.zipf_solver(self.obs)
        S = len(self.obs)
        rv = stats.zipf(a=p)
        rad = []

        for i in range(1, S+1):
            print rad
            val = (S - i + 0.5)/S
            x = rv.ppf(val)

        return rad
Esempio n. 2
def get_par_multi_dists(ab, dist_name):
    """Returns the parameters given the observed abundances and the designated distribution."""
    if dist_name == 'logser':
        beta = mete.get_beta(len(ab), sum(ab), version='untruncated')
        par = (np.exp(-beta), )
    elif dist_name == 'pln':
        par = md.pln_solver(ab)
    elif dist_name == 'geom':
        par = (len(ab) / sum(ab), )
    elif dist_name == 'negbin':
        par = md.negbin_solver(ab)
        if np.isnan(par[0]):
            par = None
    elif dist_name == 'zipf':
        par = (md.zipf_solver(ab), )
        print "Error: distribution not recognized."
        par = None
    return par
def get_par_multi_dists(ab, dist_name):
    """Returns the parameters given the observed abundances and the designated distribution."""
    if dist_name == 'logser':
        beta = mete.get_beta(len(ab), sum(ab), version = 'untruncated')
        par = (np.exp(-beta), )
    elif dist_name == 'pln':
        par = md.pln_solver(ab)
    elif dist_name == 'geom':
        par = (len(ab) / sum(ab), )
    elif dist_name == 'negbin':
        par = md.negbin_solver(ab)
        if np.isnan(par[0]):
            par = None
    elif dist_name == 'zipf':
        par = (md.zipf_solver(ab), )
        print "Error: distribution not recognized."
        par = None    
    return par
Esempio n. 4
    def from_cdf(self):
        """ Obtain the maximum likelihood form of the Zipf distribution, given
        the mle value for the Zipf shape parameter (a). Using a, this code
        generates a rank-abundance distribution (RAD) from the cumulative
        density function (cdf) using the percent point function (ppf) also known
        as the quantile function.

        This is an actual form of the Zipf distribution, obtained from getting
        the mle for the shape parameter.

        p = md.zipf_solver(self.obs)
        S = len(self.obs)
        rv = stats.zipf(a=p)
        rad = []
        for i in range(1, S+1):
            val = (S - i + 0.5)/S
            x = rv.ppf(val)

        return rad
Esempio n. 5
    x_values = np.array(range(max(ab) + 2)[1:])

    logser_p = md.logser_solver(ab)
    logser_values = md.trunc_logser.pmf(x_values, logser_p, upper_bound=float("inf"))
    lsll = md.logser_ll(ab, logser_p)

    nb_n, nb_p = md.nbinom_lower_trunc_solver(ab)
    nb_values = md.nbinom_lower_trunc.pmf(x_values, nb_n, nb_p)
    nbll = md.nbinom_lower_trunc_ll(ab, nb_n, nb_p)

    pln_mu, pln_sigma = md.pln_solver(ab)
    pln_values = md.pln.pmf(x_values, pln_mu, pln_sigma, lower_trunc=True)
    plnll = md.pln_ll(ab, pln_mu, pln_sigma)

    zipf_par = md.zipf_solver(ab)
    zipf_values = zipf.pmf(x_values, zipf_par)
    zll = md.zipf_ll(ab, zipf_par)

    ab_y = np.zeros(len(x_values) + 1)
    for j in range(len(ab)):
        ab_y[ab[j]] = ab_y[ab[j]] + 1/len(ab)

    ax.set_xlim([0,min(50, max(x_values))])


    # Width originally set at 12 when width was 50.
    # This should be the same proportional width
Esempio n. 6
        ranks = np.log(range(1, len(self.obs)+1))
        off = [np.log(sum(self.obs))] * len(self.obs)

        d = pd.DataFrame({'ranks': ranks, 'off': off, 'x':self.obs})

        lm = smf.glm(formula='x ~ ranks', data = d, family = sm.families.Poisson()).fit()
        pred = lm.predict()

        return pred

ad = [20000, 10000, 8000, 6000, 1000, 200, 200, 100, 18, 16, 14, 12, 10, 4, 4, 2, 2, 2, 2, 2, 1,
            1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]

a = md.zipf_solver(ad)
S = len(ad)
rv = stats.zipf(a)

rad = []
vals = []
for i in range(1, S+1):
    vals.append((S - i + 0.5)/S)

t = time.clock()
x = rv.ppf(vals)
elapsed_t = time.clock() - t
print x, elapsed_t

ranks = range(1,len(ad)+1)
Esempio n. 7
    logser_p = md.logser_solver(ab)
    logser_values = md.trunc_logser.pmf(x_values,
    lsll = md.logser_ll(ab, logser_p)

    nb_n, nb_p = md.nbinom_lower_trunc_solver(ab)
    nb_values = md.nbinom_lower_trunc.pmf(x_values, nb_n, nb_p)
    nbll = md.nbinom_lower_trunc_ll(ab, nb_n, nb_p)

    pln_mu, pln_sigma = md.pln_solver(ab)
    pln_values = md.pln.pmf(x_values, pln_mu, pln_sigma, lower_trunc=True)
    plnll = md.pln_ll(ab, pln_mu, pln_sigma)

    zipf_par = md.zipf_solver(ab)
    zipf_values = zipf.pmf(x_values, zipf_par)
    zll = md.zipf_ll(ab, zipf_par)

    ab_y = np.zeros(len(x_values) + 1)
    for j in range(len(ab)):
        ab_y[ab[j]] = ab_y[ab[j]] + 1 / len(ab)

    ax.set_xlim([0, min(50, max(x_values))])


    # Width originally set at 12 when width was 50.
    # This should be the same proportional width
Esempio n. 8
def model_comparisons(raw_data, dataset_name, data_dir, cutoff=9):
    """ Uses raw species abundance data to compare predicted vs. empirical species abundance distributions (SAD) and output results in csv files. 
    Keyword arguments:
    raw_data: numpy structured array with 4 columns: 'site', 'year', 'sp' (species), 'ab' (abundance).
    dataset_name: short code to indicate the name of the dataset in the output file names.
    data_dir: directory in which to store results output.
    cutoff: minimum number of species required to run -1.
    SAD models and packages used:
    Logseries (macroecotools/macroecodistributions)
    Poisson lognormal (macroecotools/macroecodistributions)
    Negative binomial (macroecotools/macroecodistributions)
    Zipf (macroecotools/macroecodistributions)
    Neutral theory: Neutral theory predicts the negative binomial distribution (Connolly et al. 2014. Commonness and rarity in the marine biosphere. PNAS 111: 8524-8529.
    usites = np.sort(np.unique(raw_data["site"]))

    # Open output files
    f1 = open(data_dir + dataset_name + '_dist_test.csv', 'wb')
    output1 = csv.writer(f1)
    f2 = open(data_dir + dataset_name + '_likelihoods.csv', 'wb')
    output2 = csv.writer(f2)
    f3 = open(data_dir + dataset_name + '_relative_L.csv', 'wb')
    output3 = csv.writer(f3)

    # Insert header
        'site', 'S', 'N', 'AICc_logseries', 'AICc_pln', 'AICc_negbin',
        'site', 'S', 'N', 'likelihood_logseries', 'likelihood_pln',
        'likelihood_negbin', 'likelihood_zipf'
        'site', 'S', 'N', 'relative_ll_logseries', 'relative_ll_pln',
        'relative_ll_negbin', 'relative_ll_zipf'

    results = []
    for site in usites:
        subsites = raw_data["site"][raw_data["site"] == site]
        subabundance = raw_data["ab"][raw_data["site"] == site]
        N = sum(subabundance)  # N = total abundance for a site
        S = len(subsites)  # S = species richness at a site
        if (min(subabundance) > 0) and (S > cutoff):
            print("%s, Site %s, S=%s, N=%s" % (dataset_name, site, S, N))

            # Calculate Akaike weight of species abundance models:
            # Parameter k is the number of fitted parameters
            k1 = 1
            k2 = 2

            # Calculate log-likelihoods of species abundance models and calculate AICc values:
            # Logseries
            p_untruncated = md.logser_solver(subabundance)
            L_logser_untruncated = md.logser_ll(
                p_untruncated)  # Log-likelihood of untruncated logseries
            AICc_logser_untruncated = macroecotools.AICc(
                k1, L_logser_untruncated, S)  # AICc logseries untruncated
            relative_ll_logser_untruncated = AICc_logser_untruncated  # Relative likelihood untruncated logseries

            #Start making AICc list
            AICc_list = [AICc_logser_untruncated]
            likelihood_list = [L_logser_untruncated]
            relative_likelihood_list = [relative_ll_logser_untruncated]

            # Poisson lognormal
            mu, sigma = md.pln_solver(subabundance)
            L_pln = md.pln_ll(subabundance, mu,
                              sigma)  # Log-likelihood of Poisson lognormal
            AICc_pln = macroecotools.AICc(k2, L_pln,
                                          S)  # AICc Poisson lognormal
            relative_ll_pln = macroecotools.AICc(
                k1, L_pln, S)  #Relative likelihood, Poisson lognormal
            # Add to AICc list
            AICc_list = AICc_list + [AICc_pln]
            likelihood_list = likelihood_list + [L_pln]
            relative_likelihood_list = relative_likelihood_list + [

            # Negative binomial
            n0, p0 = md.nbinom_lower_trunc_solver(subabundance)
            L_negbin = md.nbinom_lower_trunc_ll(
                subabundance, n0, p0)  # Log-likelihood of negative binomial
            AICc_negbin = macroecotools.AICc(k2, L_negbin,
                                             S)  # AICc negative binomial
            relative_ll_negbin = macroecotools.AICc(
                k1, L_negbin,
                S)  # Relative log-likelihood of negative binomial
            # Add to AICc list
            AICc_list = AICc_list + [AICc_negbin]
            likelihood_list = likelihood_list + [L_negbin]
            relative_likelihood_list = relative_likelihood_list + [

            # Zipf distribution
            par = md.zipf_solver(subabundance)
            L_zipf = md.zipf_ll(subabundance,
                                par)  #Log-likelihood of Zipf distribution
            AICc_zipf = macroecotools.AICc(k1, L_zipf, S)
            relative_ll_zipf = AICc_zipf
            #Add to AICc list
            AICc_list = AICc_list + [AICc_zipf]
            likelihood_list = likelihood_list + [L_zipf]
            relative_likelihood_list = relative_likelihood_list + [

            # Calculate AICc weight
            weight = macroecotools.aic_weight(AICc_list, S, cutoff=4)

            #Calculate relative likelihood
            relative_likelihoods = macroecotools.aic_weight(
                relative_likelihood_list, S, cutoff=4)

            # Convert weight to list
            weights_output = weight.tolist()

            #Convert relative likelihoods to list
            relative_likelihoods_output = relative_likelihoods.tolist()

            # Format results for output
            for weight in weights_output:
                results1 = [[site, S, N] + weights_output]
            results2 = [[site, S, N] + likelihood_list]
            results3 = [[site, S, N] + relative_likelihoods_output]
            results.append([site, S, N] + weights_output + likelihood_list +

            # Save results to a csv file:

    results = DataFrame(results,
                            'site', 'S', 'N', 'AICc_logseries', 'AICc_pln',
                            'AICc_negbin', 'AICc_zipf', 'likelihood_logseries',
                            'likelihood_pln', 'likelihood_negbin',
                            'likelihood_zipf', 'relative_ll_logseries',
                            'relative_ll_pln', 'relative_ll_negbin',
                                dataset_name + '_likelihood_results.csv'),