def _post_pred_summary_bottom_node(bottom_node, samples=500, stats=None, plot=False, bins=100, evals=None): """Create posterior predictive check for a single bottom node.""" def _calc_stats(data, stats): out = {} for name, func in stats.iteritems(): out[name] = func(data) return out if stats is None: stats = OrderedDict((('mean', np.mean), ('std', np.std))) ############################ # Compute stats over data data = bottom_node.value data_stats = _calc_stats(data, stats) ############################################### # Initialize posterior sample stats container sampled_stats = {} for name in stats.iterkeys(): sampled_stats[name] = np.empty(samples) ############################## # Sample and generate stats for sample in range(samples): _parents_to_random_posterior_sample(bottom_node) # Generate data from bottom node sampled = bottom_node.random() sampled_stat = _calc_stats(sampled, stats) # Add it the results container for name, value in sampled_stat.iteritems(): sampled_stats[name][sample] = value if plot: from pymc.Matplot import gof_plot for name, value in sampled_stats.iteritems(): gof_plot(value, data_stats[name], nbins=bins, name=name, verbose=0) result = _evaluate_post_pred(sampled_stats, data_stats, evals=evals) return result
def post_pred_stats(data, sim_datasets, stats=None, plot=False, bins=100, evals=None, call_compare=True): """Calculate a set of summary statistics over posterior predictives. :Arguments: data : pandas.Series sim_data : pandas.Series :Optional: bins : int How many bins to use for computing the histogram. stats : dict or function User-defined statistics to compute (by default mean and std are computed) and evaluate over the samples. :Example: * {'mean': np.mean, 'median': np.median} * lambda x: np.mean(x) evals : dict User-defined evaluations of the statistics (by default 95 percentile and SEM). :Example: {'percentile': scoreatpercentile} plot : bool Whether to plot the posterior predictive distributions. progress_bar : bool Display progress bar while sampling. field : string Which column name to run the stats on call_com,pare : bool (default=True) Whether to call post_pred_compare_stats. If False, return stats directly. """ def _calc_stats(data, stats): out = {} for name, func in stats.items(): out[name] = func(data) return out if stats is None: stats = OrderedDict((('mean', np.mean), ('std', np.std))) if isinstance(stats, FunctionType): stats = OrderedDict((('stat', stats), )) data_stats = _calc_stats(data, stats) ############################################### # Initialize posterior sample stats container samples = len(sim_datasets) sampled_stats = {} sampled_stats = pd.DataFrame( index=sim_datasets.index.droplevel(2).unique(), columns=list(stats.keys()), dtype=np.float32) for i, sim_dataset in sim_datasets.groupby(level=(0, 1)): sampled_stat = _calc_stats(sim_dataset.values, stats) # Add it to the results container for name, value in sampled_stat.items(): sampled_stats[name][i] = value if plot: from pymc.Matplot import gof_plot for name, value in sampled_stats.items(): gof_plot(value, data_stats[name], bins=bins, name=name, verbose=0) if call_compare: return post_pred_compare_stats(sampled_stats, data_stats, evals=evals) else: return sampled_stats
def post_pred_stats(data, sim_datasets, stats=None, plot=False, bins=100, evals=None, call_compare=True): """Calculate a set of summary statistics over posterior predictives. :Arguments: data : pandas.Series sim_data : pandas.Series :Optional: bins : int How many bins to use for computing the histogram. stats : dict or function User-defined statistics to compute (by default mean and std are computed) and evaluate over the samples. :Example: * {'mean': np.mean, 'median': np.median} * lambda x: np.mean(x) evals : dict User-defined evaluations of the statistics (by default 95 percentile and SEM). :Example: {'percentile': scoreatpercentile} plot : bool Whether to plot the posterior predictive distributions. progress_bar : bool Display progress bar while sampling. field : string Which column name to run the stats on call_com,pare : bool (default=True) Whether to call post_pred_compare_stats. If False, return stats directly. """ def _calc_stats(data, stats): out = {} for name, func in stats.iteritems(): out[name] = func(data) return out if stats is None: stats = OrderedDict((('mean', np.mean), ('std', np.std))) if isinstance(stats, FunctionType): stats = OrderedDict((('stat', stats),)) data_stats = _calc_stats(data, stats) ############################################### # Initialize posterior sample stats container samples = len(sim_datasets) sampled_stats = {} sampled_stats = pd.DataFrame(index=sim_datasets.index.droplevel(2).unique(), columns=stats.keys(), dtype=np.float32) for i, sim_dataset in sim_datasets.groupby(level=(0, 1)): sampled_stat = _calc_stats(sim_dataset.values, stats) # Add it to the results container for name, value in sampled_stat.iteritems(): sampled_stats[name][i] = value if plot: from pymc.Matplot import gof_plot for name, value in sampled_stats.iteritems(): gof_plot(value, data_stats[name], bins=bins, name=name, verbose=0) if call_compare: return post_pred_compare_stats(sampled_stats, data_stats, evals=evals) else: return sampled_stats