Example #1
0
def save_pair_plot(fit,
                   param_names=None,
                   info_path=InfoPath(),
                   pair_plot_params=PairPlotParams()):
    """
    Save a pair plot of distributions of parameters. It helps
    to see correlations between parameters and spot funnel
    shaped distributions that can result in sampling problems.

    Parameters
    ----------

    fit : cmdstanpy.stanfit.CmdStanMCMC
        Samples from cmdstanpy.

    param_names : list of str
        Names of parameters. Include all if None.

    info_path : InfoPath
        Path information for creating summaries.

    """

    info_path.set_codefile()
    param_names = filter_param_names(fit.column_names, param_names)
    samples = fit.get_drawset(params=param_names)

    shared_save_pair_plot(samples, param_names=param_names,
                          info_path=info_path,
                          pair_plot_params=pair_plot_params)
Example #2
0
def save_summary(samples, param_names=None, info_path=InfoPath(),
                 summary_params=SummaryParams()):
    """
    Generates and saves statistical summary of the samples using mean, std, mode, hpdi.

    Parameters
    ----------

    samples : Panda's dataframe

        Each column contains samples for a parameter.

    param_names : list of str

        Names of parameters to be included in the summary. Include all if None.

    info_path : InfoPath

        Path information for creating summaries.
    """

    info_path.set_codefile()
    column_names = list(samples)
    param_names = filter_param_names(column_names, param_names)
    samples = samples[param_names]  # Filter by column names
    df_summary, table = sample_summary(samples, params=summary_params)
    return save_summary_to_disk(df_summary, table, info_path)
Example #3
0
def save_tree_plot(models,
                   extra_values=[],
                   param_names=None,
                   info_path=InfoPath(),
                   summary_params=SummaryParams(),
                   tree_params=TreePlotParams()):
    """
    Save a tree plot that summarises parameter distributions.
    Can compare summaries from multiple models, when multiple samples are
    supplied. One can also supply additional markers
    to be compared with using `extra_values` parameter.

    Parameters
    ----------

    models : list Panda's data frames

        List of data frames for each model, containg sample values for
        multiple parameters (one parameter is one data frame column).
        Supply multiple data frames to see their distribution summaries
        compared on the tree plot.

    extra_values : list of dict
        Additional markers to be shown on tree plot, without error bars:

        [
            {
                "mu": 2.3,
                "sigma": 3.3
            }
        ]

    param_names : list of str

        Names of parameters. Include all if None.

    info_path : InfoPath

        Path information for creating summaries.

    """

    info_path.set_codefile()
    summaries = []

    for samples in models:
        column_names = list(samples)
        param_names = filter_param_names(column_names, param_names)
        summary, _ = sample_summary(samples, params=summary_params)
        summaries.append(summary)

    for values in extra_values:
        summaries.append(summary_from_dict(values))

    make_comparative_tree_plot(summaries,
                               info_path=info_path,
                               tree_params=tree_params)
Example #4
0
def make_histograms(samples, summary, param_names=None,
                    params=HistogramParams(),
                    summary_params=SummaryParams()):
    """
    Make multiple files with
    histograms for the parameters from posterior destribution.

    Parameters
    -----------

    samples : Panda's DataFrame

        Each column contains samples from posterior distribution.

    summary : Panda's DataFrame

        Summary information about each column.

    param_names : list of str

        Names of the parameters for plotting. If None, all will be plotted.
    """
    param_names = filter_param_names(samples.columns, param_names)

    # Total number of plots
    n_plots = math.ceil(math.ceil(len(param_names) / params.ncols) /
                        params.num_plot_rows)

    if n_plots > params.max_plot_pages:
        print((
            f'Showing only first {params.max_plot_pages} '
            f'pages out of {n_plots} of histogram.'
            'Consider specifying "param_names".'))

        n_plots = params.max_plot_pages

    if n_plots < 1:
        n_plots = 1

    figures_and_axes = []

    # Make multiple traceplots
    for i_plot in range(n_plots):
        fig, ax = make_histogram_one_page(
            i_start=i_plot * params.num_plot_rows * params.ncols,
            samples=samples,
            summary=summary,
            param_names=param_names,
            params=params,
            summary_params=summary_params)

        figures_and_axes.append([fig, ax])

    return figures_and_axes
Example #5
0
def save_tree_plot(fits,
                   extra_values=[],
                   param_names=None,
                   info_path=InfoPath(),
                   summary_params=SummaryParams(),
                   tree_params=TreePlotParams()):
    """
    Save a tree plot that summarises parameter distributions.
    Can compare summaries from multiple models, when multiple fits are
    supplied. One can also supply additional markers
    to be compared with using `extra_values` parameter.

    Parameters
    ----------

    fits : list of cmdstanpy.stanfit.CmdStanMCMC

        Contains the samples from cmdstanpy.

    extra_values : list of dict
        Additional markers to be shown on tree plot, without error bars:

        [
            {
                "mu": 2.3,
                "sigma": 3.3
            }
        ]

    param_names : list of str

        Names of parameters. Include all if None.

    info_path : InfoPath

        Path information for creating summaries.

    """

    info_path.set_codefile()
    summaries = []

    for fit in fits:
        param_names = filter_param_names(fit.column_names, param_names)
        samples = fit.get_drawset(params=param_names)
        summary, _ = sample_summary(samples, params=summary_params)
        summaries.append(summary)

    for values in extra_values:
        summaries.append(summary_from_dict(values))

    make_comparative_tree_plot(summaries,
                               info_path=info_path,
                               tree_params=tree_params)
Example #6
0
def make_pair_plot(samples, param_names=None,
                   pair_plot_params=PairPlotParams()):
    """
    Make a pair plot for the parameters from posterior destribution.

    Parameters
    -----------

    samples : Panda's DataFrame

        Each column contains samples from posterior distribution.

    param_names : list of str

        Names of the parameters for plotting. If None, all will be plotted.

    Returns
    -------
    Seaborn's PairGrid
    """

    param_names = filter_param_names(samples.columns, param_names)

    if len(param_names) > pair_plot_params.max_params:
        print((
            f'Showing only first {pair_plot_params.max_params} '
            f'parameters out of {len(param_names)} in pair plot.'
            'Consider limiting the parameter with "param_names".'))

        param_names = param_names[:pair_plot_params.max_params]

    samples = samples[param_names]

    # Show no more than `max_samples` markers
    keep_nth = math.ceil(samples.shape[0] / pair_plot_params.max_samples)
    samples = samples[::keep_nth]

    g = sns.PairGrid(samples)

    g = g.map_upper(sns.scatterplot, s=pair_plot_params.marker_size,
                    color=pair_plot_params.color,
                    edgecolor=pair_plot_params.edgecolor,
                    alpha=pair_plot_params.alpha)

    g = g.map_lower(sns.kdeplot, color=pair_plot_params.color)
    g = g.map_diag(plt.hist, color=pair_plot_params.color,
                   edgecolor=pair_plot_params.diag_edge_color)

    return g
Example #7
0
def traceplot(fit, param_names=None, params=TraceplotParams()):
    """
    Show traceplots, diagnostic plots of samples for all
    parameters for all chains.

    Parameters
    ----------

    param_names: list of str

        List of parameters to plot.  If None, plot all.

    """

    sns.set(style="ticks")

    # Make the list of columns to be shown in the plots
    param_names = filter_param_names(fit.column_names, param_names)
    param_names.insert(0, 'lp__')  # Always show traceplot of probability

    # Total number of plots
    n_plots = math.ceil(len(param_names) / params.num_traceplot_rows)

    if n_plots > params.max_traceplot_pages:
        print((
            f'Traceplot shows only first {params.max_traceplot_pages} '
            f'pages out of {n_plots}. Consider specifying "param_names".'))

        n_plots = params.max_traceplot_pages

    if n_plots < 1:
        n_plots = 1

    figures_and_axes = []

    # Make multople traceplots
    for i_plot in range(n_plots):
        fig, ax = make_single_traceplot(
            i_start=i_plot * params.num_traceplot_rows,
            fit=fit,
            param_names=param_names,
            params=params)

        figures_and_axes.append([fig, ax])

    return figures_and_axes
Example #8
0
def make_summary(fit, param_names, summary_params=SummaryParams()):
    """
    Returns statistical summary table for parameters:
    mean, std, mode, hpdi.

    Parameters
    ----------

    fit : cmdstanpy.stanfit.CmdStanMCMC

        Contains the samples from cmdstanpy.

    param_names : list of str

        Names of parameters to be included in the summar. Include all if None.
    """

    param_names = filter_param_names(fit.column_names, param_names)
    samples = fit.get_drawset(params=param_names)

    # Get R_hat values from the summary
    # --------

    df_summary = fit.summary()

    df_summary.rename(
        index=(lambda name: name.replace('[', '.').replace(']', '')),
        inplace=True)

    df_summary = df_summary[['N_Eff', 'R_hat']]
    df_summary['N_Eff'] = np.round(df_summary['N_Eff'])
    df_summary['N_Eff'] = df_summary['N_Eff'].astype(int)

    # Get the summary
    df_summary, table = sample_summary(df=samples,
                                       extra_values=df_summary,
                                       params=summary_params)

    return df_summary, table, samples
Example #9
0
def save_compare_parameters(
    fits,
    labels,
    extra_values=[],
    param_names=None,
    type: CompareParametersType = CompareParametersType.TEXT,
    info_path=InfoPath(),
    summary_params=SummaryParams()):
    """
    Saves a text table that compares model parameters

    Parameters
    ----------

    fits : list of cmdstanpy.stanfit.CmdStanMCMC

        Contains the samples from cmdstanpy.

    labels : list of str

        Names of the models in `fits` list.

    extra_values : list of dict
        Additional values to be shown in the table:

        [
            {
                "mu": 2.3,
                "sigma": 3.3
            }
        ]

    param_names : list of str

        Names of parameters. Include all if None.

    type : CompareParametersType

        Format of values in the text table.

    info_path : InfoPath

        Path information for creating summaries.

    """

    info_path.set_codefile()
    models = []

    for fit in fits:
        param_names = filter_param_names(fit.column_names, param_names)
        samples = fit.get_drawset(params=param_names)
        models.append(samples)

    shared_save_compare_parameters(models,
                                   labels=labels,
                                   extra_values=extra_values,
                                   type=type,
                                   param_names=param_names,
                                   info_path=info_path,
                                   summary_params=summary_params)
Example #10
0
def test_filter_param_names__numbered():
    result = filter_param_names(['a.1', 'a.2', 'a', 'b', 'c'], ['a', 'b'])

    assert result == ['a.1', 'a.2', 'a', 'b']
Example #11
0
def test_filter_param_names():
    result = filter_param_names(['a', 'b', 'c'], ['a', 'b'])

    assert result == ['a', 'b']
Example #12
0
def test_filter_param_names__remove_technical_columns():
    result = filter_param_names(['a', 'stepsize__', 'c'])

    assert result == ['a', 'c']
Example #13
0
def compare_parameters(
    models,
    labels,
    extra_values=[],
    type: CompareParametersType = CompareParametersType.TEXT,
    param_names=None,
    summary_params=SummaryParams()):
    """
    Create model parameters

    Parameters
    ----------

    models : list Panda's data frames
        List of data frames for each model, containg sample values for
        multiple parameters (one parameter is one data frame column).
        Supply multiple data frames to compare parameter distributions.

    labels : list of str
        Names of the models in `models` list.

    extra_values : list of dict
        Additional values to be shown in the table:

        [
            {
                "mu": 2.3,
                "sigma": 3.3
            }
        ]

    type : CompareParametersType
        Format of values in the text table.

    param_names : list of str
        Names of parameters. Include all if None.

    Returns
    --------
    df: Panda's data frame
        Table in Panda's format
    txt : str
        Table in text format
    """

    if len(models) == 0:
        raise ValueError('Models list is empty')
        return

    if (len(models) + len(extra_values)) != len(labels):
        raise ValueError('Models list length is different from labels')
        return

    samples = models[0]
    column_names = list(samples)
    param_names = filter_param_names(column_names, param_names)

    df = pd.DataFrame(index=labels, columns=param_names)
    param_names_filtered = None

    for samples, label in zip(models, labels):
        column_names = list(samples)

        if param_names_filtered is None:
            param_names_filtered = filter_param_names(column_names,
                                                      param_names)

        samples = samples[param_names_filtered]
        df_summary, _ = sample_summary(samples, params=summary_params)

        values = [
            format_parameter(df_summary.loc[name], type)
            for name in param_names_filtered
        ]

        df.loc[label] = values

    # Add extra values
    # ---------------

    extra_labels = labels[len(models):]

    for data, label in zip(extra_values, extra_labels):
        column_names = list(data.keys())
        values = []

        for name in param_names_filtered:
            text_value = ''

            if name in data:
                text_value = format_value(data[name], type)

            values.append(text_value)

        df.loc[label] = values

    table = tabulate(df,
                     headers=param_names_filtered,
                     tablefmt="pipe",
                     stralign="right")

    return df, table