def bootstrap_SAD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200): """A general function of bootstrapping for SAD applying to all four models. Inputs: name_site_combo: a list with dat_name and site model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne' in_dir - directory of raw data out_dir - directory used both in input (obs_pred.csv file) and output Niter - number of bootstrap samples Output: Writes to disk, with one file for R^2 and one for KS statistic. """ dat_name, site = name_site_combo dat = wk.import_raw_data(in_dir + dat_name + '.csv') dat_site = dat[dat['site'] == site] dat_clean = clean_data_agsne(dat_site) G, S, N, E = get_GSNE(dat_clean) beta_ssnt = mete.get_beta(S, N, version = 'untruncated') beta_asne = mete.get_beta(S, N) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3]) dist_for_model = {'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 'asne': md.trunc_logser(np.exp(-beta_asne), N), 'agsne': sad_agsne} dist = dist_for_model[model] pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv') pred = pred_obs[pred_obs['site'] == site]['pred'][::-1] obs = pred_obs[pred_obs['site'] == site]['obs'][::-1] out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))] emp_cdf = mtools.get_emp_cdf(obs) out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))] for i in range(Niter): obs_boot = np.array(sorted(dist.rvs(S))) cdf_boot = np.array([dist.cdf(x) for x in obs_boot]) emp_cdf_boot = mtools.get_emp_cdf(obs_boot) out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred)))) out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot))))) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare)) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
def test_rvs(self): vals = stats.logser.rvs(0.75, size=(2, 50)) assert numpy.all(vals >= 1) assert numpy.shape(vals) == (2, 50) assert vals.dtype.char in typecodes["AllInteger"] val = stats.logser.rvs(0.75) assert isinstance(val, int) val = stats.logser(0.75).rvs(3) assert isinstance(val, numpy.ndarray) assert val.dtype.char in typecodes["AllInteger"]
def test_rvs(self): vals = stats.logser.rvs(0.75, size=(2, 50)) assert_(numpy.all(vals >= 1)) assert_(numpy.shape(vals) == (2, 50)) assert_(vals.dtype.char in typecodes['AllInteger']) val = stats.logser.rvs(0.75) assert_(isinstance(val, int)) val = stats.logser(0.75).rvs(3) assert_(isinstance(val, numpy.ndarray)) assert_(val.dtype.char in typecodes['AllInteger'])
p = 0.6 mean, var, skew, kurt = logser.stats(p, moments='mvsk') # Display the probability mass function (``pmf``): x = np.arange(logser.ppf(0.01, p), logser.ppf(0.99, p)) ax.plot(x, logser.pmf(x, p), 'bo', ms=8, label='logser pmf') ax.vlines(x, 0, logser.pmf(x, p), colors='b', lw=5, alpha=0.5) # Alternatively, the distribution object can be called (as a function) # to fix the shape and location. This returns a "frozen" RV object holding # the given parameters fixed. # Freeze the distribution and display the frozen ``pmf``: rv = logser(p) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) plt.show() # Check accuracy of ``cdf`` and ``ppf``: prob = logser.cdf(x, p) np.allclose(x, logser.ppf(prob, p)) # True
def nat_to_scipy_distribution(self, q: LogarithmicNP) -> Any: return ss.logser(np.exp(q.log_probability))
def bootstrap_SAD(name_site_combo, model, in_dir='./data/', out_dir='./out_files/', Niter=200): """A general function of bootstrapping for SAD applying to all four models. Inputs: name_site_combo: a list with dat_name and site model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne' in_dir - directory of raw data out_dir - directory used both in input (obs_pred.csv file) and output Niter - number of bootstrap samples Output: Writes to disk, with one file for R^2 and one for KS statistic. """ dat_name, site = name_site_combo dat = wk.import_raw_data(in_dir + dat_name + '.csv') dat_site = dat[dat['site'] == site] dat_clean = clean_data_agsne(dat_site) G, S, N, E = get_GSNE(dat_clean) beta_ssnt = mete.get_beta(S, N, version='untruncated') beta_asne = mete.get_beta(S, N) lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E) sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [ lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3 ]) dist_for_model = { 'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 'asne': md.trunc_logser(np.exp(-beta_asne), N), 'agsne': sad_agsne } dist = dist_for_model[model] pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv') pred = pred_obs[pred_obs['site'] == site]['pred'][::-1] obs = pred_obs[pred_obs['site'] == site]['obs'][::-1] out_list_rsquare = [ dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred))) ] emp_cdf = mtools.get_emp_cdf(obs) out_list_ks = [ dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs])))) ] for i in range(Niter): obs_boot = np.array(sorted(dist.rvs(S))) cdf_boot = np.array([dist.cdf(x) for x in obs_boot]) emp_cdf_boot = mtools.get_emp_cdf(obs_boot) out_list_rsquare.append( str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred)))) out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot))))) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare)) wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))