def bootstrap_SAD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for SAD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    beta_ssnt = mete.get_beta(S, N, version = 'untruncated')
    beta_asne = mete.get_beta(S, N)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    dist_for_model = {'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 
                      'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 
                      'asne': md.trunc_logser(np.exp(-beta_asne), N),
                      'agsne': sad_agsne}
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred'][::-1]
    obs = pred_obs[pred_obs['site'] == site]['obs'][::-1]
    
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))]
    
    for i in range(Niter):
        obs_boot = np.array(sorted(dist.rvs(S)))
        cdf_boot = np.array([dist.cdf(x) for x in obs_boot])
        emp_cdf_boot = mtools.get_emp_cdf(obs_boot)
        out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
        out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot)))))
    
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
Esempio n. 2
0
 def test_rvs(self):
     vals = stats.logser.rvs(0.75, size=(2, 50))
     assert numpy.all(vals >= 1)
     assert numpy.shape(vals) == (2, 50)
     assert vals.dtype.char in typecodes["AllInteger"]
     val = stats.logser.rvs(0.75)
     assert isinstance(val, int)
     val = stats.logser(0.75).rvs(3)
     assert isinstance(val, numpy.ndarray)
     assert val.dtype.char in typecodes["AllInteger"]
Esempio n. 3
0
 def test_rvs(self):
     vals = stats.logser.rvs(0.75, size=(2, 50))
     assert_(numpy.all(vals >= 1))
     assert_(numpy.shape(vals) == (2, 50))
     assert_(vals.dtype.char in typecodes['AllInteger'])
     val = stats.logser.rvs(0.75)
     assert_(isinstance(val, int))
     val = stats.logser(0.75).rvs(3)
     assert_(isinstance(val, numpy.ndarray))
     assert_(val.dtype.char in typecodes['AllInteger'])
p = 0.6
mean, var, skew, kurt = logser.stats(p, moments='mvsk')

# Display the probability mass function (``pmf``):

x = np.arange(logser.ppf(0.01, p), logser.ppf(0.99, p))
ax.plot(x, logser.pmf(x, p), 'bo', ms=8, label='logser pmf')
ax.vlines(x, 0, logser.pmf(x, p), colors='b', lw=5, alpha=0.5)

# Alternatively, the distribution object can be called (as a function)
# to fix the shape and location. This returns a "frozen" RV object holding
# the given parameters fixed.

# Freeze the distribution and display the frozen ``pmf``:

rv = logser(p)
ax.vlines(x,
          0,
          rv.pmf(x),
          colors='k',
          linestyles='-',
          lw=1,
          label='frozen pmf')
ax.legend(loc='best', frameon=False)
plt.show()

# Check accuracy of ``cdf`` and ``ppf``:

prob = logser.cdf(x, p)
np.allclose(x, logser.ppf(prob, p))
# True
Esempio n. 5
0
 def nat_to_scipy_distribution(self, q: LogarithmicNP) -> Any:
     return ss.logser(np.exp(q.log_probability))
def bootstrap_SAD(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for SAD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    beta_ssnt = mete.get_beta(S, N, version='untruncated')
    beta_asne = mete.get_beta(S, N)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    dist_for_model = {
        'ssnt_0': stats.logser(np.exp(-beta_ssnt)),
        'ssnt_1': stats.logser(np.exp(-beta_ssnt)),
        'asne': md.trunc_logser(np.exp(-beta_asne), N),
        'agsne': sad_agsne
    }
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred'][::-1]
    obs = pred_obs[pred_obs['site'] == site]['obs'][::-1]

    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [
        dat_name, site,
        str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))
    ]

    for i in range(Niter):
        obs_boot = np.array(sorted(dist.rvs(S)))
        cdf_boot = np.array([dist.cdf(x) for x in obs_boot])
        emp_cdf_boot = mtools.get_emp_cdf(obs_boot)
        out_list_rsquare.append(
            str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
        out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot)))))

    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare))
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt',
                     ",".join(str(x) for x in out_list_ks))