def plot_numsp_obs_pred(sites, obs_ab, min_abundance, max_abundance): """Observed vs. predicted plot of the number of species in an abundance range Drops communities where there are 0 species that occur within the range so that the results can be displayed on log-scaled axes. Prints the number of dropped communities to the screen. """ sites = np.array(sites) usites = np.unique(sites) obs_ab = np.array(obs_ab) pred = [] obs = [] for site in usites: site_abs = obs_ab[sites==site] site_range_abundances = site_abs[(site_abs >= min_abundance) & (site_abs <= max_abundance)] obs_richness = len(site_range_abundances) pred_richness = mete.get_mete_sad(len(site_abs), sum(site_abs), bin_edges=[min_abundance, max_abundance + 1]) obs.append(obs_richness) pred.append(pred_richness) pred = np.array(list(itertools.chain.from_iterable(pred))) obs = np.array(obs) obs_pred_data = np.column_stack((obs, pred)) np.savetxt('temp_sp_obs_pred_data', obs_pred_data) num_dropped_communities = len(obs[obs==0]) pred = pred[obs > 0] obs = obs[obs > 0] print("%s communities out of a total of %s communities were dropped because no species were observed in the given abundance range" % (num_dropped_communities, num_dropped_communities + len(obs))) macroecotools.plot_color_by_pt_dens(pred, obs, 3, loglog=1)
def get_envpred(envpred_data, predtype=['sad', 'rad']): if predtype is 'sad': envpred = DataFrame(columns=['site_id', 'octave', 'env_pred']) if predtype is 'rad': envpred = DataFrame(columns=['site_id', 'rank', 'env_pred']) if predtype is 'rare': envpred = DataFrame(columns=['site_id', 'env_pred']) for index, site in envpred_data.iterrows(): obs_S = site['S'] envpred_S = 10 ** site['logSpred'] envpred_N = 10 ** site['logNpred'] if predtype is 'sad': sad_bins = get_log_bins([envpred_N]) octave = range(0, len(sad_bins) - 1) site_pred = get_mete_sad(envpred_S, envpred_N, bin_edges=sad_bins) site_ids = [site['site_id'] for i in range(0, len(site_pred))] site_pred_with_id = DataFrame(np.column_stack([site_ids, octave, site_pred]), columns=['site_id', 'octave', 'env_pred']) if predtype is 'rad': # note using observed S here for time being rank = range(1, int(obs_S + 1)) site_beta = get_beta(envpred_S, envpred_N) site_pred, p = get_mete_rad(obs_S, envpred_N, beta=site_beta) site_ids = [site['site_id'] for i in range(0, len(site_pred))] site_pred_with_id = DataFrame(np.column_stack([site_ids, rank, site_pred]), columns=['site_id', 'rank', 'env_pred']) if predtype is 'rare': pred_rad = get_mete_rad(int(envpred_S), envpred_N)[0] site_pred = sum([i <= 10 for i in pred_rad]) site_pred_with_id = DataFrame(np.column_stack([site['site_id'], site_pred]), columns=['site_id', 'env_pred']) envpred = envpred.append(site_pred_with_id, ignore_index=True) return envpred
def plot_avg_deviation_from_logseries(sites, obs_ab, p=None, sites_for_p=None, error_bars=0, color='b'): """Plot a figure showing deviations from the log-series as a function of ab Takes the obs-pred data for individual sites, groups them into Preston bins, stores the difference between observed and predicted data within each bin for each site, and then plots the average deviation against the center of the bin. Deviations are calculated as the percentage deviation within each bin, so if there is a difference of one species in a bin with 10 predicted species the deviation = 0.1. """ usites = np.unique(sites) max_N = max(obs_ab) max_integer_logN = int(np.ceil(np.log2(max_N)) + 1) log_bin_edges = np.array(range(0, max_integer_logN)) bin_edges = np.exp2(log_bin_edges) deviations = np.zeros((len(usites), len(bin_edges)-1)) for i, site in enumerate(usites): site_abundances = obs_ab[sites == site] S = len(site_abundances) N = sum(site_abundances) obs_sad = macroecotools.preston_sad(site_abundances, b=bin_edges) if p==None: pred_sad = mete.get_mete_sad(S, N, bin_edges=bin_edges) else: beta = -log(p[sites_for_p==site]) pred_sad = mete.get_mete_sad(S, N, beta=beta, bin_edges=bin_edges) deviation_from_predicted = (obs_sad[0] - pred_sad) / S * 100 deviations[i,:] = deviation_from_predicted bin_numbers = range(1, max_integer_logN) mean_deviations = stats.nanmean(deviations) if error_bars == 1: std_deviations = stats.nanstd(deviations) plt.errorbar(bin_numbers, mean_deviations, yerr=std_deviations, fmt='b-') else: plt.plot(bin_numbers, mean_deviations, color=color, linewidth=3)