コード例 #1
0
def _plot_simulated_and_empirical_moment(simulated, empirical, ax=None):
    """Plot moments into axis."""
    if ax is None:
        _, ax = plt.subplots()

    sim_color, emp_color = get_colors("categorical", 2)

    dates = simulated.index

    for run in simulated:
        plot_line_with_gaps(
            x=dates, y=simulated[run], ax=ax, color=sim_color, alpha=0.15
        )

    plot_line_with_gaps(
        x=dates,
        y=simulated.mean(axis=1),
        ax=ax,
        color=sim_color,
        lw=2.5,
        label="simulated",
    )

    plot_line_with_gaps(
        x=empirical.index,
        y=empirical,
        ax=ax,
        color=emp_color,
        lw=2.5,
        label="empirical",
    )
コード例 #2
0
def make_gantt_chart_of_policy_dict(policies,
                                    title=None,
                                    bar_height=0.8,
                                    bar_color=None,
                                    edge_color=None,
                                    alpha=1):
    cm_names = sorted(
        {pol["affected_contact_model"]
         for pol in policies.values()})
    positions = dict(zip(cm_names, range(len(cm_names))))
    fig, ax = plt.subplots(figsize=(12, len(cm_names)))
    edge_color = get_colors("categorical",
                            1)[0] if edge_color is None else edge_color
    bar_color = "#ffffff00" if bar_color is None else bar_color

    for pol in policies.values():
        affected_model = pol["affected_contact_model"]
        start = pd.Timestamp(pol["start"])
        end = pd.Timestamp(pol["end"])
        ax.broken_barh(
            xranges=[(start, end - start)],
            yrange=(positions[affected_model] - 0.5 * bar_height, bar_height),
            edgecolors=edge_color,
            facecolors=bar_color,
            alpha=alpha,
        )
    ax.set_yticks(range(len(cm_names)))
    ax.set_yticklabels(cm_names)
    if title is not None:
        ax.set_title(title.replace("_", " ").title())
    return fig, ax
コード例 #3
0
def plot_univariate_effects(criterion,
                            params,
                            n_gridpoints=21,
                            n_random_values=2,
                            plots_per_row=2,
                            seed=5471):
    """Plot criterion along coordinates at given and random values.

    Args:
        criterion (callable): criterion function. Takes a DataFrame and
            returns a scalar value or dictionary with the entry "value".
        params (pandas.DataFrame): See :ref:`params`. Must contain finite
            lower and upper bounds for all parameters.
        n_gridpoints (int): Number of gridpoints on which the criterion
            function is evaluated. This is the number per plotted line.
        n_random_values (int): Number of random parameter vectors that
            are used as center of the plots.
        plots_per_row (int): How many plots are plotted per row.


    """
    np.random.seed(seed)
    if ("lower_bound" not in params.columns
            or not np.isfinite(params["lower_bound"]).all()):
        raise ValueError("All parameters need a finite lower bound.")
    if ("upper_bound" not in params.columns
            or not np.isfinite(params["upper_bound"]).all()):
        raise ValueError("All parameters need a finite upper bound.")

    if "name" not in params.columns:
        names = [_index_element_to_string(tup) for tup in params.index]
        params["name"] = names

    plot_data = _get_plot_data(
        params=params,
        use_random_value=False,
        value_identifier="start values",
        n_gridpoints=n_gridpoints,
    )
    to_concat = [plot_data]

    for i in range(n_random_values):
        to_concat.append(
            _get_plot_data(
                params=params,
                use_random_value=True,
                value_identifier=f"random value {i}",
                n_gridpoints=n_gridpoints,
            ))

    plot_data = pd.concat(to_concat).reset_index()

    arguments = []
    for _, row in plot_data.iterrows():
        p = params.copy(deep=True)
        p["value"] = row[params.index].astype(float)
        arguments.append(p)

    function_values = [criterion(arg) for arg in arguments]
    if isinstance(function_values[0], dict):
        function_values = [val["value"] for val in function_values]

    plot_data["Criterion Value"] = function_values

    colors = get_colors("categorical", 1 + n_random_values)
    g = sns.FacetGrid(
        plot_data,
        col="name",
        hue="value_identifier",
        col_wrap=plots_per_row,
        palette=colors,
        aspect=1.5,
        sharex=False,
    )
    g.map(sns.lineplot, "Parameter Value", "Criterion Value", linewidth=2)

    return g
コード例 #4
0
def profile_plot(
    problems=None,
    results=None,
    runtime_measure="n_evaluations",
    normalize_runtime=False,
    stopping_criterion="y",
    x_precision=1e-4,
    y_precision=1e-4,
):
    """Compare optimizers over a problem set.

    This plot answers the question: What percentage of problems can each algorithm
    solve within a certain runtime budget?

    The runtime budget is plotted on the x axis and the share of problems each
    algorithm solved on the y axis.

    Thus, algorithms that are very specialized and perform well on some share of
    problems but are not able to solve more problems with a larger computational budget
    will have steep increases and then flat lines. Algorithms that are robust but slow,
    will have low shares in the beginning but reach very high.

    Note that failing to converge according to the given stopping_criterion and
    precisions is scored as needing an infinite computational budget.

    For details, see the description of performance and data profiles by
    Moré and Wild (2009).

    Args:
        problems (dict): estimagic benchmarking problems dictionary. Keys are the
            problem names. Values contain information on the problem, including the
            solution value.
        results (dict): estimagic benchmarking results dictionary. Keys are
            tuples of the form (problem, algorithm), values are dictionaries of the
            collected information on the benchmark run, including 'criterion_history'
            and 'time_history'.
        runtime_measure (str): "n_evaluations" or "walltime".
            This is the runtime until the desired convergence was reached by an
            algorithm. This is called performance measure by Moré and Wild (2009).
        normalize_runtime (bool): If True the runtime each algorithm needed for each
            problem is scaled by the time the fastest algorithm needed. If True, the
            resulting plot is what Moré and Wild (2009) called data profiles.
        stopping_criterion (str): one of "x_and_y", "x_or_y", "x", "y". Determines
            how convergence is determined from the two precisions.
        x_precision (float or None): how close an algorithm must have gotten to the
            true parameter values (as percent of the Euclidean distance between start
            and solution parameters) before the criterion for clipping and convergence
            is fulfilled.
        y_precision (float or None): how close an algorithm must have gotten to the
            true criterion values (as percent of the distance between start
            and solution criterion value) before the criterion for clipping and
            convergence is fulfilled.

    Returns:
        fig

    """
    if stopping_criterion is None:
        raise ValueError(
            "You must specify a stopping criterion for the performance plot. ")
    df, converged_info = create_convergence_histories(
        problems=problems,
        results=results,
        stopping_criterion=stopping_criterion,
        x_precision=x_precision,
        y_precision=y_precision,
    )

    solution_times = _create_solution_times(
        df,
        runtime_measure=runtime_measure,
        converged_info=converged_info,
    )

    if normalize_runtime:
        solution_times = solution_times.divide(solution_times.min(axis=1),
                                               axis=0)
        # set again to inf because no inf Timedeltas were allowed.
        solution_times[~converged_info] = np.inf
    else:
        if (runtime_measure == "walltime"
                and (solution_times == pd.Timedelta(weeks=1000)).any().any()):
            warnings.warn(
                "Some algorithms did not converge. Their walltime has been "
                "set to a very high value instead of infinity because Timedeltas do not"
                "support infinite values.")

    # create performance profiles
    alphas = _determine_alpha_grid(solution_times)
    for_each_alpha = pd.concat(
        {alpha: solution_times <= alpha
         for alpha in alphas},
        names=["alpha"],
    )
    performance_profiles = for_each_alpha.groupby(
        "alpha").mean().stack().reset_index()

    # Build plot
    fig, ax = plt.subplots(figsize=(8, 6))
    n_algos = len(solution_times.columns)
    sns.lineplot(
        data=performance_profiles,
        x="alpha",
        y=0,
        hue="algorithm",
        ax=ax,
        lw=2.5,
        alpha=1.0,
        palette=get_colors("categorical", n_algos),
    )

    # Plot Styling
    xlabels = {
        ("n_evaluations", True):
        "Multiple of Minimal Number of Function Evaluations\n"
        "Needed to Solve the Problem",
        (
            "walltime",
            True,
        ):
        "Multiple of Minimal Wall Time\nNeeded to Solve the Problem",
        ("n_evaluations", False):
        "Number of Function Evaluations",
        ("walltime", False):
        "Wall Time Needed to Solve the Problem",
    }

    ax.set_xlabel(xlabels[(runtime_measure, normalize_runtime)])
    ax.set_ylabel("Share of Problems Solved")
    spine_lw = ax.spines["bottom"].get_linewidth()
    ax.axhline(1.0, color="silver", xmax=0.955, lw=spine_lw)
    ax.legend(title=None)
    fig.tight_layout()

    return fig
コード例 #5
0
2. Mobility in different forms of division of Germany: "City States", "Non-City States",
"City vs Territorial", "Former BRD vs DDR states" and "North-East-South-West comparison"
\n
"""
import matplotlib.pyplot as plt
import pandas as pd
import pytask
import seaborn as sns
from estimagic.visualization.colors import get_colors
from utils import mobility_plot

from src.config import BLD


# Function for plots
colors = get_colors("categorical", 12)

titles = [
    "Retail and Recreation (7d-average)",
    "Grocery and Pharmacy (7d-average)",
    "Workplaces (7d-average)",
    "Parks (7d-average)",
    "Residential (7d-average)",
    "Transit Stations (7d-average)",
]

varlist_moving_avg = [
    "retail_and_recreation_avg_7d",
    "grocery_and_pharmacy_avg_7d",
    "workplaces_avg_7d",
    "parks_avg_7d",
コード例 #6
0
def lollipop_plot(
        data,
        sharex=True,
        plot_bar=True,
        pairgrid_kws=None,
        stripplot_kws=None,
        barplot_kws=None,
        style=("whitegrid"),
        dodge=True,
):
    """Make a lollipop plot.

    Args:
        data (pandas.DataFrame): The datapoints to be plotted. In contrast
            to many seaborn functions, the whole data will be plotted. Thus if you
            want to plot just some variables or rows you need to restrict the dataset
            before passing it.
        sharex (bool): Whether the x-axis is shared across variables, default True.
        plot_bar (bool): Whether thin bars are plotted, default True.
        pairgrid_kws (dict): Keyword arguments for for the creation of a Seaborn
            PairGrid. Most notably, "height" and "aspect" to control the sizes.
        stripplot_kws (dict): Keyword arguments to plot the dots of the lollipop plot
            via the stripplot function. Most notably, "color" and "size".
        barplot_kws (dict): Keyword arguments to plot the lines of the lollipop plot
            via the barplot function. Most notably, "color" and "alpha". In contrast
            to seaborn, we allow for a "width" argument.
        style (str): A seaborn style.
        dodge (bool): Wheter the lollipops for different datasets are plotted
            with an offset or on top of each other.

    Returns:
        seaborn.PairGrid

    """
    data, varnames = _harmonize_data(data)

    sns.set_style(style)
    pairgrid_kws = {} if pairgrid_kws is None else pairgrid_kws
    stripplot_kws = {} if stripplot_kws is None else stripplot_kws
    barplot_kws = {} if barplot_kws is None else barplot_kws

    colors = get_colors("categorical", len(data))

    # Make the PairGrid
    pairgrid_kws = {
        "aspect": 0.5,
        **pairgrid_kws,
    }

    g = sns.PairGrid(
        data,
        x_vars=varnames,
        y_vars=["__name__"],
        hue="__hue__",
        **pairgrid_kws,
    )

    # Draw a dot plot using the stripplot function
    combined_stripplot_kws = {
        "size": 8,
        "orient": "h",
        "jitter": False,
        "palette": colors,
        "edgecolor": "#0000ffff",
        "dodge": dodge,
        **stripplot_kws,
    }

    g.map(sns.stripplot, **combined_stripplot_kws)

    if plot_bar:
        # Draw lines to the plot using the barplot function
        combined_barplot_kws = {
            "palette": colors,
            "alpha": 0.5,
            "width": 0.1,
            "dodge": dodge,
            **barplot_kws,
        }
        bar_height = combined_barplot_kws.pop("width")
        g.map(sns.barplot, **combined_barplot_kws)

    # Adjust the width of the bars which seaborn.barplot does not allow
    for ax in g.axes.flat:
        for patch in ax.patches:
            current_height = patch.get_height()
            diff = current_height - bar_height
            # we change the bar width
            patch.set_height(bar_height)
            # we recenter the bar
            patch.set_y(patch.get_y() + diff * 0.5)

    # Use the same x axis limits on all columns and add better labels
    if sharex:
        lower_candidate = data[varnames].min().min()
        upper_candidate = data[varnames].max().max()
        padding = (upper_candidate - lower_candidate) / 10
        lower = lower_candidate - padding
        upper = upper_candidate + padding
        g.set(xlim=(lower, upper), xlabel=None, ylabel=None)

    # Use semantically meaningful titles for the columns
    for ax, title in zip(g.axes.flat, varnames):

        # Set a different title for each axes
        ax.set(title=title)

        # Make the grid horizontal instead of vertical
        ax.xaxis.grid(False)
        ax.yaxis.grid(False)

    sns.despine(left=False, bottom=False)
    return g
コード例 #7
0
ファイル: plot_functions.py プロジェクト: yradeva93/estimagic
def plot_time_series(
    data,
    y_keys,
    x_name,
    title,
    name=None,
    y_names=None,
    logscale=False,
    plot_width=PLOT_WIDTH,
):
    """Plot time series linking the *y_keys* to a common *x_name* variable.

    Args:
        data (ColumnDataSource): data that contain the y_keys and x_name
        y_keys (list): list of the entries in the data that are to be plotted.
        x_name (str): name of the entry in the data that will be on the x axis.
        title (str): title of the plot.
        name (str, optional): name of the plot for later retrieval with bokeh.
        y_names (list, optional): if given these replace the y keys as line names.
        logscale (bool, optional): Whether to have a logarithmic scale or a linear one.

    Returns:
        plot (bokeh Figure)

    """
    if y_names is None:
        y_names = [str(key) for key in y_keys]

    plot = create_styled_figure(title=title,
                                name=name,
                                logscale=logscale,
                                plot_width=plot_width)
    # this ensures that the y range spans at least 0.1
    plot.y_range.range_padding = Y_RANGE_PADDING
    plot.y_range.range_padding_units = Y_RANGE_PADDING_UNITS

    colors = get_colors("categorical", len(y_keys))

    legend_items = []
    for color, y_key, y_name in zip(colors, y_keys, y_names):
        if len(y_name) <= 35:
            label = y_name
        else:
            label = "..." + y_name[-32:]
        line_glyph = plot.line(
            source=data,
            x=x_name,
            y=y_key,
            line_width=2,
            color=color,
            muted_color=color,
            muted_alpha=0.2,
        )
        legend_items.append((label, [line_glyph]))
    legend_items.append((" " * 60, []))

    tooltips = [(x_name, "@" + x_name)]
    tooltips += [("param_name", y_name), ("param_value", "@" + y_key)]
    hover = HoverTool(renderers=[line_glyph], tooltips=tooltips)
    plot.tools.append(hover)

    legend = Legend(
        items=legend_items,
        border_line_color=None,
        label_width=100,
        label_text_font_size=LEGEND_LABEL_TEXT_FONT_SIZE,
        spacing=LEGEND_SPACING,
    )
    legend.click_policy = "mute"
    plot.add_layout(legend, "right")

    return plot
コード例 #8
0
def convergence_plot(
    problems=None,
    results=None,
    problem_subset=None,
    algorithm_subset=None,
    n_cols=2,
    distance_measure="criterion",
    monotone=True,
    normalize_distance=True,
    runtime_measure="n_evaluations",
    stopping_criterion="y",
    x_precision=1e-4,
    y_precision=1e-4,
):
    """Plot convergence of optimizers for a set of problems.

    This creates a grid of plots, showing the convergence of the different
    algorithms on each problem. The faster a line falls, the faster the algorithm
    improved on the problem. The algorithm converged where its line reaches 0
    (if normalize_distance is True) or the horizontal blue line labeled "true solution".

    Each plot shows on the x axis the runtime_measure, which can be walltime or number
    of evaluations. Each algorithm's convergence is a line in the plot. Convergence can
    be measured by the criterion value of the particular time/evaluation. The
    convergence can be made monotone (i.e. always taking the bast value so far) or
    normalized such that the distance from the start to the true solution is one.

    Args:
        problems (dict): estimagic benchmarking problems dictionary. Keys are the
            problem names. Values contain information on the problem, including the
            solution value.
        results (dict): estimagic benchmarking results dictionary. Keys are
            tuples of the form (problem, algorithm), values are dictionaries of the
            collected information on the benchmark run, including 'criterion_history'
            and 'time_history'.
        problem_subset (list, optional): List of problem names. These must be a subset
            of the keys of the problems dictionary. If provided the convergence plot is
            only created for the problems specified in this list.
        algorithm_subset (list, optional): List of algorithm names. These must be a
            subset of the keys of the optimizer_options passed to run_benchmark. If
            provided only the convergence of the given algorithms are shown.
        n_cols (int): number of columns in the plot of grids. The number
            of rows is determined automatically.
        distance_measure (str): One of "criterion", "parameter_distance".
        monotone (bool): If True the best found criterion value so far is plotted.
            If False the particular criterion evaluation of that time is used.
        normalize_distance (bool): If True the progress is scaled by the total distance
            between the start value and the optimal value, i.e. 1 means the algorithm
            is as far from the solution as the start value and 0 means the algorithm
            has reached the solution value.
        runtime_measure (str): "n_evaluations" or "walltime".
        stopping_criterion (str): "x_and_y", "x_or_y", "x", "y" or None. If None, no
            clipping is done.
        x_precision (float or None): how close an algorithm must have gotten to the
            true parameter values (as percent of the Euclidean distance between start
            and solution parameters) before the criterion for clipping and convergence
            is fulfilled.
        y_precision (float or None): how close an algorithm must have gotten to the
            true criterion values (as percent of the distance between start
            and solution criterion value) before the criterion for clipping and
            convergence is fulfilled.

    Returns:
        fig

    """
    df, _ = create_convergence_histories(
        problems=problems,
        results=results,
        stopping_criterion=stopping_criterion,
        x_precision=x_precision,
        y_precision=y_precision,
    )

    # handle string provision for single problems / algorithms
    if isinstance(problem_subset, str):
        problem_subset = [problem_subset]
    if isinstance(algorithm_subset, str):
        algorithm_subset = [algorithm_subset]

    _check_only_allowed_subset_provided(problem_subset, df["problem"], "problem")
    _check_only_allowed_subset_provided(algorithm_subset, df["algorithm"], "algorithm")

    if problem_subset is not None:
        df = df[df["problem"].isin(problem_subset)]
    if algorithm_subset is not None:
        df = df[df["algorithm"].isin(algorithm_subset)]

    # plot configuration
    outcome = (
        f"{'monotone_' if monotone else ''}"
        + distance_measure
        + f"{'_normalized' if normalize_distance else ''}"
    )

    # create plots
    remaining_problems = df["problem"].unique()
    n_rows = int(np.ceil(len(remaining_problems) / n_cols))
    figsize = (n_cols * 6, n_rows * 4)
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=figsize)

    if algorithm_subset is None:
        algorithms = {tup[1] for tup in results.keys()}
    else:
        algorithms = algorithm_subset
    palette = get_colors("categorical", number=len(algorithms))

    for ax, prob_name in zip(axes.flatten(), remaining_problems):
        to_plot = df[df["problem"] == prob_name]
        sns.lineplot(
            data=to_plot,
            x=runtime_measure,
            y=outcome,
            hue="algorithm",
            lw=2.5,
            alpha=1.0,
            ax=ax,
            palette=palette,
        )
        ax.set_title(prob_name.replace("_", " ").title())
        if distance_measure == "criterion" and not normalize_distance:
            f_opt = problems[prob_name]["solution"]["value"]
            ax.axhline(f_opt, label="true solution", lw=2.5)

    # style plots
    y_labels = {
        "criterion": "Current Function Value",
        "monotone_criterion": "Best Function Value Found So Far",
        "criterion_normalized": "Share of Function Distance to Optimum\n"
        + "Missing From Current Criterion Value",
        "monotone_criterion_normalized": "Share of Function Distance to Optimum\n"
        + "Missing From Best So Far",
        "parameter_distance": "Distance Between Current and Optimal Parameters",
        "parameter_distance_normalized": "Share of the Parameter Distance to Optimum\n"
        + "Missing From Current Parameters",
        "monotone_parameter_distance_normalized": "Share of the Parameter Distance "
        + "to Optimum\n Missing From the Best Parameters So Far",
        "monotone_parameter_distance": "Distance Between the Best Parameters So Far\n"
        "and the Optimal Parameters",
    }
    x_labels = {
        "n_evaluations": "Number of Function Evaluations",
        "walltime": "Elapsed Time",
    }
    for ax in axes.flatten():
        ax.set_ylabel(y_labels[outcome])
        ax.set_xlabel(x_labels[runtime_measure])
        ax.legend(title=None)

    # make empty plots invisible
    n_empty_plots = len(axes.flatten()) - len(remaining_problems)
    if n_empty_plots > 0:
        for ax in axes.flatten()[-n_empty_plots:]:
            ax.set_visible(False)
    fig.tight_layout()
    return fig
コード例 #9
0
def test_correct_number_up_to_24(palette):
    for number in range(12, 25):
        assert len(get_colors(palette, number)) == number
コード例 #10
0
def test_correct_number_up_to_twelve(palette):
    for number in range(13):
        assert len(get_colors(palette, number)) == number