Esempio n. 1
0
    def from_cdf(self):
        """ Obtain the maximum likelihood form of the Zipf distribution, given
        the mle value for the Zipf shape parameter (a). Using a, this code
        generates a rank-abundance distribution (RAD) from the cumulative
        density function (cdf) using the percent point function (ppf) also known
        as the quantile function.
        see: http://www.esapubs.org/archive/ecol/E093/155/appendix-B.htm

        This is an actual form of the Zipf distribution, obtained from getting
        the mle for the shape parameter.
        """

        p = md.zipf_solver(self.obs)
        S = len(self.obs)
        rv = stats.zipf(a=p)
        rad = []

        for i in range(1, S+1):
            print rad
            val = (S - i + 0.5)/S
            x = rv.ppf(val)
            rad.append(int(x))


        return rad
Esempio n. 2
0
def get_par_multi_dists(ab, dist_name):
    """Returns the parameters given the observed abundances and the designated distribution."""
    if dist_name == 'logser':
        beta = mete.get_beta(len(ab), sum(ab), version='untruncated')
        par = (np.exp(-beta), )
    elif dist_name == 'pln':
        par = md.pln_solver(ab)
    elif dist_name == 'geom':
        par = (len(ab) / sum(ab), )
    elif dist_name == 'negbin':
        par = md.negbin_solver(ab)
        if np.isnan(par[0]):
            par = None
    elif dist_name == 'zipf':
        par = (md.zipf_solver(ab), )
    else:
        print "Error: distribution not recognized."
        par = None
    return par
def get_par_multi_dists(ab, dist_name):
    """Returns the parameters given the observed abundances and the designated distribution."""
    if dist_name == 'logser':
        beta = mete.get_beta(len(ab), sum(ab), version = 'untruncated')
        par = (np.exp(-beta), )
    elif dist_name == 'pln':
        par = md.pln_solver(ab)
    elif dist_name == 'geom':
        par = (len(ab) / sum(ab), )
    elif dist_name == 'negbin':
        par = md.negbin_solver(ab)
        if np.isnan(par[0]):
            par = None
    elif dist_name == 'zipf':
        par = (md.zipf_solver(ab), )
    else: 
        print "Error: distribution not recognized."
        par = None    
    return par
Esempio n. 4
0
    def from_cdf(self):
        """ Obtain the maximum likelihood form of the Zipf distribution, given
        the mle value for the Zipf shape parameter (a). Using a, this code
        generates a rank-abundance distribution (RAD) from the cumulative
        density function (cdf) using the percent point function (ppf) also known
        as the quantile function.
        see: http://www.esapubs.org/archive/ecol/E093/155/appendix-B.htm

        This is an actual form of the Zipf distribution, obtained from getting
        the mle for the shape parameter.
        """

        p = md.zipf_solver(self.obs)
        S = len(self.obs)
        rv = stats.zipf(a=p)
        rad = []
        for i in range(1, S+1):
            val = (S - i + 0.5)/S
            x = rv.ppf(val)
            rad.append(int(x))

        return rad
Esempio n. 5
0
    x_values = np.array(range(max(ab) + 2)[1:])

    logser_p = md.logser_solver(ab)
    logser_values = md.trunc_logser.pmf(x_values, logser_p, upper_bound=float("inf"))
    lsll = md.logser_ll(ab, logser_p)

    nb_n, nb_p = md.nbinom_lower_trunc_solver(ab)
    nb_values = md.nbinom_lower_trunc.pmf(x_values, nb_n, nb_p)
    nbll = md.nbinom_lower_trunc_ll(ab, nb_n, nb_p)

    pln_mu, pln_sigma = md.pln_solver(ab)
    pln_values = md.pln.pmf(x_values, pln_mu, pln_sigma, lower_trunc=True)
    plnll = md.pln_ll(ab, pln_mu, pln_sigma)

    zipf_par = md.zipf_solver(ab)
    zipf_values = zipf.pmf(x_values, zipf_par)
    zll = md.zipf_ll(ab, zipf_par)

    ab_y = np.zeros(len(x_values) + 1)
    for j in range(len(ab)):
        ab_y[ab[j]] = ab_y[ab[j]] + 1/len(ab)

    ax.set_xlim([0,min(50, max(x_values))])

    plt.ylabel('frequency')
    plt.xlabel('abundance')
    plt.title(plot_labels[i])

    # Width originally set at 12 when width was 50.
    # This should be the same proportional width
Esempio n. 6
0
        ranks = np.log(range(1, len(self.obs)+1))
        off = [np.log(sum(self.obs))] * len(self.obs)

        d = pd.DataFrame({'ranks': ranks, 'off': off, 'x':self.obs})

        lm = smf.glm(formula='x ~ ranks', data = d, family = sm.families.Poisson()).fit()
        pred = lm.predict()

        return pred


ad = [20000, 10000, 8000, 6000, 1000, 200, 200, 100, 18, 16, 14, 12, 10, 4, 4, 2, 2, 2, 2, 2, 1,
            1, 1, 1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]

a = md.zipf_solver(ad)
S = len(ad)
rv = stats.zipf(a)

rad = []
vals = []
for i in range(1, S+1):
    vals.append((S - i + 0.5)/S)

t = time.clock()
x = rv.ppf(vals)
elapsed_t = time.clock() - t
print x, elapsed_t
sys.exit()

ranks = range(1,len(ad)+1)
Esempio n. 7
0
    logser_p = md.logser_solver(ab)
    logser_values = md.trunc_logser.pmf(x_values,
                                        logser_p,
                                        upper_bound=float("inf"))
    lsll = md.logser_ll(ab, logser_p)

    nb_n, nb_p = md.nbinom_lower_trunc_solver(ab)
    nb_values = md.nbinom_lower_trunc.pmf(x_values, nb_n, nb_p)
    nbll = md.nbinom_lower_trunc_ll(ab, nb_n, nb_p)

    pln_mu, pln_sigma = md.pln_solver(ab)
    pln_values = md.pln.pmf(x_values, pln_mu, pln_sigma, lower_trunc=True)
    plnll = md.pln_ll(ab, pln_mu, pln_sigma)

    zipf_par = md.zipf_solver(ab)
    zipf_values = zipf.pmf(x_values, zipf_par)
    zll = md.zipf_ll(ab, zipf_par)

    ab_y = np.zeros(len(x_values) + 1)
    for j in range(len(ab)):
        ab_y[ab[j]] = ab_y[ab[j]] + 1 / len(ab)

    ax.set_xlim([0, min(50, max(x_values))])

    plt.ylabel('frequency')
    plt.xlabel('abundance')
    plt.title(plot_labels[i])

    # Width originally set at 12 when width was 50.
    # This should be the same proportional width
Esempio n. 8
0
def model_comparisons(raw_data, dataset_name, data_dir, cutoff=9):
    """ Uses raw species abundance data to compare predicted vs. empirical species abundance distributions (SAD) and output results in csv files. 
    
    Keyword arguments:
    raw_data: numpy structured array with 4 columns: 'site', 'year', 'sp' (species), 'ab' (abundance).
    dataset_name: short code to indicate the name of the dataset in the output file names.
    data_dir: directory in which to store results output.
    cutoff: minimum number of species required to run -1.
    
    SAD models and packages used:
    Logseries (macroecotools/macroecodistributions)
    Poisson lognormal (macroecotools/macroecodistributions)
    Negative binomial (macroecotools/macroecodistributions)
    Zipf (macroecotools/macroecodistributions)
    
    Neutral theory: Neutral theory predicts the negative binomial distribution (Connolly et al. 2014. Commonness and rarity in the marine biosphere. PNAS 111: 8524-8529. http://www.pnas.org/content/111/23/8524.abstract
    
    """
    usites = np.sort(np.unique(raw_data["site"]))

    # Open output files
    f1 = open(data_dir + dataset_name + '_dist_test.csv', 'wb')
    output1 = csv.writer(f1)
    f2 = open(data_dir + dataset_name + '_likelihoods.csv', 'wb')
    output2 = csv.writer(f2)
    f3 = open(data_dir + dataset_name + '_relative_L.csv', 'wb')
    output3 = csv.writer(f3)

    # Insert header
    output1.writerow([
        'site', 'S', 'N', 'AICc_logseries', 'AICc_pln', 'AICc_negbin',
        'AICc_zipf'
    ])
    output2.writerow([
        'site', 'S', 'N', 'likelihood_logseries', 'likelihood_pln',
        'likelihood_negbin', 'likelihood_zipf'
    ])
    output3.writerow([
        'site', 'S', 'N', 'relative_ll_logseries', 'relative_ll_pln',
        'relative_ll_negbin', 'relative_ll_zipf'
    ])

    results = []
    for site in usites:
        subsites = raw_data["site"][raw_data["site"] == site]
        subabundance = raw_data["ab"][raw_data["site"] == site]
        N = sum(subabundance)  # N = total abundance for a site
        S = len(subsites)  # S = species richness at a site
        if (min(subabundance) > 0) and (S > cutoff):
            print("%s, Site %s, S=%s, N=%s" % (dataset_name, site, S, N))

            # Calculate Akaike weight of species abundance models:
            # Parameter k is the number of fitted parameters
            k1 = 1
            k2 = 2

            # Calculate log-likelihoods of species abundance models and calculate AICc values:
            # Logseries
            p_untruncated = md.logser_solver(subabundance)
            L_logser_untruncated = md.logser_ll(
                subabundance,
                p_untruncated)  # Log-likelihood of untruncated logseries
            AICc_logser_untruncated = macroecotools.AICc(
                k1, L_logser_untruncated, S)  # AICc logseries untruncated
            relative_ll_logser_untruncated = AICc_logser_untruncated  # Relative likelihood untruncated logseries

            #Start making AICc list
            AICc_list = [AICc_logser_untruncated]
            likelihood_list = [L_logser_untruncated]
            relative_likelihood_list = [relative_ll_logser_untruncated]

            # Poisson lognormal
            mu, sigma = md.pln_solver(subabundance)
            L_pln = md.pln_ll(subabundance, mu,
                              sigma)  # Log-likelihood of Poisson lognormal
            AICc_pln = macroecotools.AICc(k2, L_pln,
                                          S)  # AICc Poisson lognormal
            relative_ll_pln = macroecotools.AICc(
                k1, L_pln, S)  #Relative likelihood, Poisson lognormal
            # Add to AICc list
            AICc_list = AICc_list + [AICc_pln]
            likelihood_list = likelihood_list + [L_pln]
            relative_likelihood_list = relative_likelihood_list + [
                relative_ll_pln
            ]

            # Negative binomial
            n0, p0 = md.nbinom_lower_trunc_solver(subabundance)
            L_negbin = md.nbinom_lower_trunc_ll(
                subabundance, n0, p0)  # Log-likelihood of negative binomial
            AICc_negbin = macroecotools.AICc(k2, L_negbin,
                                             S)  # AICc negative binomial
            relative_ll_negbin = macroecotools.AICc(
                k1, L_negbin,
                S)  # Relative log-likelihood of negative binomial
            # Add to AICc list
            AICc_list = AICc_list + [AICc_negbin]
            likelihood_list = likelihood_list + [L_negbin]
            relative_likelihood_list = relative_likelihood_list + [
                relative_ll_negbin
            ]

            # Zipf distribution
            par = md.zipf_solver(subabundance)
            L_zipf = md.zipf_ll(subabundance,
                                par)  #Log-likelihood of Zipf distribution
            AICc_zipf = macroecotools.AICc(k1, L_zipf, S)
            relative_ll_zipf = AICc_zipf
            #Add to AICc list
            AICc_list = AICc_list + [AICc_zipf]
            likelihood_list = likelihood_list + [L_zipf]
            relative_likelihood_list = relative_likelihood_list + [
                relative_ll_zipf
            ]

            # Calculate AICc weight
            weight = macroecotools.aic_weight(AICc_list, S, cutoff=4)

            #Calculate relative likelihood
            relative_likelihoods = macroecotools.aic_weight(
                relative_likelihood_list, S, cutoff=4)

            # Convert weight to list
            weights_output = weight.tolist()

            #Convert relative likelihoods to list
            relative_likelihoods_output = relative_likelihoods.tolist()

            # Format results for output
            for weight in weights_output:
                results1 = [[site, S, N] + weights_output]
            results2 = [[site, S, N] + likelihood_list]
            results3 = [[site, S, N] + relative_likelihoods_output]
            results.append([site, S, N] + weights_output + likelihood_list +
                           relative_likelihoods_output)

            # Save results to a csv file:
            output1.writerows(results1)
            output2.writerows(results2)
            output3.writerows(results3)

    results = DataFrame(results,
                        columns=[
                            'site', 'S', 'N', 'AICc_logseries', 'AICc_pln',
                            'AICc_negbin', 'AICc_zipf', 'likelihood_logseries',
                            'likelihood_pln', 'likelihood_negbin',
                            'likelihood_zipf', 'relative_ll_logseries',
                            'relative_ll_pln', 'relative_ll_negbin',
                            'relative_ll_zipf'
                        ])
    results.to_csv(os.path.join(data_dir,
                                dataset_name + '_likelihood_results.csv'),
                   index=False)
    f1.close()
    f2.close()
    f3.close()