Exemple #1
0
def _post_pred_summary_bottom_node(bottom_node,
                                   samples=500,
                                   stats=None,
                                   plot=False,
                                   bins=100,
                                   evals=None):
    """Create posterior predictive check for a single bottom node."""
    def _calc_stats(data, stats):
        out = {}
        for name, func in stats.iteritems():
            out[name] = func(data)
        return out

    if stats is None:
        stats = OrderedDict((('mean', np.mean), ('std', np.std)))

    ############################
    # Compute stats over data
    data = bottom_node.value
    data_stats = _calc_stats(data, stats)

    ###############################################
    # Initialize posterior sample stats container
    sampled_stats = {}
    for name in stats.iterkeys():
        sampled_stats[name] = np.empty(samples)

    ##############################
    # Sample and generate stats
    for sample in range(samples):
        _parents_to_random_posterior_sample(bottom_node)
        # Generate data from bottom node
        sampled = bottom_node.random()
        sampled_stat = _calc_stats(sampled, stats)

        # Add it the results container
        for name, value in sampled_stat.iteritems():
            sampled_stats[name][sample] = value

    if plot:
        from pymc.Matplot import gof_plot
        for name, value in sampled_stats.iteritems():
            gof_plot(value, data_stats[name], nbins=bins, name=name, verbose=0)

    result = _evaluate_post_pred(sampled_stats, data_stats, evals=evals)

    return result
Exemple #2
0
def _post_pred_summary_bottom_node(bottom_node, samples=500, stats=None, plot=False, bins=100, evals=None):
    """Create posterior predictive check for a single bottom node."""
    def _calc_stats(data, stats):
        out = {}
        for name, func in stats.iteritems():
            out[name] = func(data)
        return out

    if stats is None:
        stats = OrderedDict((('mean', np.mean), ('std', np.std)))

    ############################
    # Compute stats over data
    data = bottom_node.value
    data_stats = _calc_stats(data, stats)

    ###############################################
    # Initialize posterior sample stats container
    sampled_stats = {}
    for name in stats.iterkeys():
        sampled_stats[name] = np.empty(samples)

    ##############################
    # Sample and generate stats
    for sample in range(samples):
        _parents_to_random_posterior_sample(bottom_node)
        # Generate data from bottom node
        sampled = bottom_node.random()
        sampled_stat = _calc_stats(sampled, stats)

        # Add it the results container
        for name, value in sampled_stat.iteritems():
            sampled_stats[name][sample] = value

    if plot:
        from pymc.Matplot import gof_plot
        for name, value in sampled_stats.iteritems():
            gof_plot(value, data_stats[name], nbins=bins, name=name, verbose=0)

    result = _evaluate_post_pred(sampled_stats, data_stats, evals=evals)

    return result
Exemple #3
0
def post_pred_stats(data,
                    sim_datasets,
                    stats=None,
                    plot=False,
                    bins=100,
                    evals=None,
                    call_compare=True):
    """Calculate a set of summary statistics over posterior predictives.

    :Arguments:
        data : pandas.Series

        sim_data : pandas.Series

    :Optional:
        bins : int
            How many bins to use for computing the histogram.
        stats : dict or function
            User-defined statistics to compute (by default mean and std are computed)
            and evaluate over the samples.
            :Example:
              * {'mean': np.mean, 'median': np.median}
              * lambda x: np.mean(x)
        evals : dict
            User-defined evaluations of the statistics (by default 95 percentile and SEM).
            :Example: {'percentile': scoreatpercentile}
        plot : bool
            Whether to plot the posterior predictive distributions.
        progress_bar : bool
            Display progress bar while sampling.
        field : string
            Which column name to run the stats on
        call_com,pare : bool (default=True)
            Whether to call post_pred_compare_stats. If False, return stats directly.
    """
    def _calc_stats(data, stats):
        out = {}
        for name, func in stats.items():
            out[name] = func(data)
        return out

    if stats is None:
        stats = OrderedDict((('mean', np.mean), ('std', np.std)))
    if isinstance(stats, FunctionType):
        stats = OrderedDict((('stat', stats), ))

    data_stats = _calc_stats(data, stats)

    ###############################################
    # Initialize posterior sample stats container
    samples = len(sim_datasets)
    sampled_stats = {}
    sampled_stats = pd.DataFrame(
        index=sim_datasets.index.droplevel(2).unique(),
        columns=list(stats.keys()),
        dtype=np.float32)

    for i, sim_dataset in sim_datasets.groupby(level=(0, 1)):
        sampled_stat = _calc_stats(sim_dataset.values, stats)

        # Add it to the results container
        for name, value in sampled_stat.items():
            sampled_stats[name][i] = value

    if plot:
        from pymc.Matplot import gof_plot
        for name, value in sampled_stats.items():
            gof_plot(value, data_stats[name], bins=bins, name=name, verbose=0)

    if call_compare:
        return post_pred_compare_stats(sampled_stats, data_stats, evals=evals)
    else:
        return sampled_stats
Exemple #4
0
def post_pred_stats(data, sim_datasets, stats=None, plot=False, bins=100, evals=None, call_compare=True):
    """Calculate a set of summary statistics over posterior predictives.

    :Arguments:
        data : pandas.Series

        sim_data : pandas.Series

    :Optional:
        bins : int
            How many bins to use for computing the histogram.
        stats : dict or function
            User-defined statistics to compute (by default mean and std are computed)
            and evaluate over the samples.
            :Example:
              * {'mean': np.mean, 'median': np.median}
              * lambda x: np.mean(x)
        evals : dict
            User-defined evaluations of the statistics (by default 95 percentile and SEM).
            :Example: {'percentile': scoreatpercentile}
        plot : bool
            Whether to plot the posterior predictive distributions.
        progress_bar : bool
            Display progress bar while sampling.
        field : string
            Which column name to run the stats on
        call_com,pare : bool (default=True)
            Whether to call post_pred_compare_stats. If False, return stats directly.
    """

    def _calc_stats(data, stats):
        out = {}
        for name, func in stats.iteritems():
            out[name] = func(data)
        return out

    if stats is None:
        stats = OrderedDict((('mean', np.mean), ('std', np.std)))
    if isinstance(stats, FunctionType):
        stats = OrderedDict((('stat', stats),))

    data_stats = _calc_stats(data, stats)

    ###############################################
    # Initialize posterior sample stats container
    samples = len(sim_datasets)
    sampled_stats = {}
    sampled_stats = pd.DataFrame(index=sim_datasets.index.droplevel(2).unique(),
                                 columns=stats.keys(),
                                 dtype=np.float32)

    for i, sim_dataset in sim_datasets.groupby(level=(0, 1)):
        sampled_stat = _calc_stats(sim_dataset.values, stats)

        # Add it to the results container
        for name, value in sampled_stat.iteritems():
            sampled_stats[name][i] = value

    if plot:
        from pymc.Matplot import gof_plot
        for name, value in sampled_stats.iteritems():
            gof_plot(value, data_stats[name], bins=bins, name=name, verbose=0)

    if call_compare:
        return post_pred_compare_stats(sampled_stats, data_stats, evals=evals)
    else:
        return sampled_stats