def plot_numsp_obs_pred(sites, obs_ab, min_abundance, max_abundance):
    """Observed vs. predicted plot of the number of species in an abundance range

    Drops communities where there are 0 species that occur within the range so
    that the results can be displayed on log-scaled axes. Prints the number of
    dropped communities to the screen.
    
    """
    sites = np.array(sites)
    usites = np.unique(sites)
    obs_ab = np.array(obs_ab)
    pred = []
    obs = []
    for site in usites:
        site_abs = obs_ab[sites==site]
        site_range_abundances = site_abs[(site_abs >= min_abundance) &
                                            (site_abs <= max_abundance)]
        obs_richness = len(site_range_abundances)
        pred_richness = mete.get_mete_sad(len(site_abs), sum(site_abs),
                                          bin_edges=[min_abundance,
                                                     max_abundance + 1])
        obs.append(obs_richness)
        pred.append(pred_richness)
    pred = np.array(list(itertools.chain.from_iterable(pred)))
    obs = np.array(obs)
    obs_pred_data = np.column_stack((obs, pred))
    np.savetxt('temp_sp_obs_pred_data', obs_pred_data)
    num_dropped_communities = len(obs[obs==0])
    pred = pred[obs > 0]
    obs = obs[obs > 0]
    print("%s communities out of a total of %s communities were dropped because no species were observed in the given abundance range"
          % (num_dropped_communities, num_dropped_communities + len(obs)))
    macroecotools.plot_color_by_pt_dens(pred, obs, 3, loglog=1)
Example #2
0
def get_envpred(envpred_data, predtype=['sad', 'rad']):
    if predtype is 'sad':
        envpred = DataFrame(columns=['site_id', 'octave', 'env_pred'])
    if predtype is 'rad':
        envpred = DataFrame(columns=['site_id', 'rank', 'env_pred'])
    if predtype is 'rare':
        envpred = DataFrame(columns=['site_id', 'env_pred'])
    for index, site in envpred_data.iterrows():
        obs_S = site['S']
        envpred_S = 10 ** site['logSpred']
        envpred_N = 10 ** site['logNpred']
        if predtype is 'sad':        
            sad_bins = get_log_bins([envpred_N])
            octave = range(0, len(sad_bins) - 1)
            site_pred = get_mete_sad(envpred_S, envpred_N, bin_edges=sad_bins)
            site_ids = [site['site_id'] for i in range(0, len(site_pred))]
            site_pred_with_id = DataFrame(np.column_stack([site_ids, octave, site_pred]),
                                          columns=['site_id', 'octave', 'env_pred'])    
        if predtype is 'rad':
            # note using observed S here for time being            
            rank = range(1, int(obs_S + 1))
            site_beta = get_beta(envpred_S, envpred_N)
            site_pred, p = get_mete_rad(obs_S, envpred_N, beta=site_beta)
            site_ids = [site['site_id'] for i in range(0, len(site_pred))]
            site_pred_with_id = DataFrame(np.column_stack([site_ids, rank, site_pred]),
                                          columns=['site_id', 'rank', 'env_pred'])
        if predtype is 'rare':
            pred_rad = get_mete_rad(int(envpred_S), envpred_N)[0]
            site_pred = sum([i <= 10 for i in pred_rad])
            site_pred_with_id = DataFrame(np.column_stack([site['site_id'], site_pred]),
                                          columns=['site_id', 'env_pred']) 
        envpred = envpred.append(site_pred_with_id, ignore_index=True)
    return envpred
def plot_avg_deviation_from_logseries(sites, obs_ab, p=None, sites_for_p=None,
                                      error_bars=0, color='b'):
    """Plot a figure showing deviations from the log-series as a function of ab
    
    Takes the obs-pred data for individual sites, groups them into Preston bins,
    stores the difference between observed and predicted data within each bin
    for each site, and then plots the average deviation against the center of
    the bin. Deviations are calculated as the percentage deviation within each
    bin, so if there is a difference of one species in a bin with 10 predicted
    species the deviation = 0.1.
    
    """
    usites = np.unique(sites)
    max_N = max(obs_ab)
    max_integer_logN = int(np.ceil(np.log2(max_N)) + 1)
    log_bin_edges = np.array(range(0, max_integer_logN))
    bin_edges = np.exp2(log_bin_edges)
    deviations = np.zeros((len(usites), len(bin_edges)-1))
    for i, site in enumerate(usites):
        site_abundances = obs_ab[sites == site]
        S = len(site_abundances)
        N = sum(site_abundances)
        obs_sad = macroecotools.preston_sad(site_abundances, b=bin_edges)
        if p==None:
            pred_sad = mete.get_mete_sad(S, N, bin_edges=bin_edges)
        else:
            beta = -log(p[sites_for_p==site])
            pred_sad = mete.get_mete_sad(S, N, beta=beta,
                                         bin_edges=bin_edges)
        deviation_from_predicted = (obs_sad[0] - pred_sad) / S * 100
        deviations[i,:] = deviation_from_predicted
    bin_numbers = range(1, max_integer_logN)
    mean_deviations = stats.nanmean(deviations)
    if error_bars == 1:
        std_deviations = stats.nanstd(deviations)
        plt.errorbar(bin_numbers, mean_deviations, yerr=std_deviations, fmt='b-')
    else:
        plt.plot(bin_numbers, mean_deviations, color=color, linewidth=3)