Esempio n. 1
0
def test_fail_if_datatype_is_false(input_data):
    with does_not_raise():
        _fail_if_datatype_is_false(input_data, [], [])
    with pytest.raises(ValueError):
        altered_data = input_data.copy(deep=True)
        altered_data["alter"] = altered_data["alter"].astype(float)
        _fail_if_datatype_is_false(altered_data, [], [])
    with pytest.raises(ValueError):
        _, functions = load_user_and_internal_functions(None)
        columns = ["abgelt_st_tu"]
        new_data = pd.DataFrame(data=[True, False],
                                columns=columns,
                                dtype=bool)
        _fail_if_datatype_is_false(new_data, columns, functions)
def plot_dag(
    functions,
    targets=None,
    columns_overriding_functions=None,
    check_minimal_specification="ignore",
    selectors=None,
    labels=True,
    tooltips=False,
    plot_kwargs=None,
    arrow_kwargs=None,
    edge_kwargs=None,
    label_kwargs=None,
    node_kwargs=None,
):
    """Plot the dag of the tax and transfer system.

    Parameters
    ----------
    functions : str, pathlib.Path, callable, module, imports statements, dict
        Functions can be anything of the specified types and a list of the same objects.
        If the object is a dictionary, the keys of the dictionary are used as a name
        instead of the function name. For all other objects, the name is inferred from
        the function name.
    targets : str, list of str
        String or list of strings with names of functions whose output is actually
        needed by the user.
    columns_overriding_functions : str list of str
        Names of columns in the data which are preferred over function defined in the
        tax and transfer system.
    check_minimal_specification : {"ignore", "warn", "raise"}, default "ignore"
        Indicator for whether checks which ensure the most minimal configuration should
        be silenced, emitted as warnings or errors.
    selectors : str or list of str or dict or list of dict or list of str and dict
        Selectors allow to you to select and de-select nodes in the graph for
        visualization. For the full list of options, see the tutorial about
        `visualization <../docs/tutorials/visualize.ipynb>`_. By default, all nodes are
        shown.
    labels : bool, default True
        Annotate nodes with labels.
    tooltips : bool, default False
        Experimental feature which makes the source code of the functions accessible as
        a tooltip. Sometimes, the tooltip is not properly displayed.
    plot_kwargs : dict
        Additional keyword arguments passed to :class:`bokeh.models.Plot`.
    arrow_kwargs : dict
        Additional keyword arguments passed to :class:`bokeh.models.Arrow`. For example,
        change the size of the head with ``{"size": 10}``.
    edge_kwargs : dict
        Additional keyword arguments passed to :class:`bokeh.models.MultiLine`. For
        example, change the color with ``{"fill_color": "green"}``.
    label_kwargs : dict
        Additional keyword arguments passed to :class:`bokeh.models.LabelSet`. For
        example, change the fontsize with ``{"text_font_size": "12px"}``.
    node_kwargs : dict
        Additional keyword arguments passed to :class:`bokeh.models.Circle`. For
        example, change the color with ``{"fill_color": "orange"}``.

    """
    targets = DEFAULT_TARGETS if targets is None else targets
    targets = parse_to_list_of_strings(targets, "targets")
    columns_overriding_functions = parse_to_list_of_strings(
        columns_overriding_functions, "columns_overriding_functions")

    # Load functions and perform checks.
    functions, internal_functions = load_user_and_internal_functions(functions)

    # Create one dictionary of functions and perform check.
    functions = {**internal_functions, **functions}
    functions = {
        k: v
        for k, v in functions.items() if k not in columns_overriding_functions
    }
    _fail_if_targets_not_in_functions(functions, targets)

    # Partial parameters to functions such that they disappear in the DAG.
    functions = _mock_parameters_arguments(functions)

    dag = create_dag(functions, targets, columns_overriding_functions,
                     check_minimal_specification)

    selectors = [] if selectors is None else _to_list(selectors)
    plot_kwargs = {} if plot_kwargs is None else plot_kwargs
    arrow_kwargs = {} if arrow_kwargs is None else arrow_kwargs
    edge_kwargs = {} if edge_kwargs is None else edge_kwargs
    label_kwargs = {} if label_kwargs is None else label_kwargs
    node_kwargs = {} if node_kwargs is None else node_kwargs

    dag = _select_nodes_in_dag(dag, selectors)

    dag = _add_url_to_dag(dag)
    # Even if we do not use the source codes as tooltips, we need to remove the
    # functions.
    dag = _replace_functions_with_source_code(dag)

    plot_kwargs["title"] = _to_bokeh_title(
        plot_kwargs.get("title", "Tax and Transfer System"))
    plot = Plot(**{**PLOT_KWARGS_DEFAULTS, **plot_kwargs})

    layout = _create_pydot_layout(dag)
    graph_renderer = from_networkx(dag, layout, scale=1, center=(0, 0))

    graph_renderer.node_renderer.glyph = Circle(**{
        **NODE_KWARGS_DEFAULTS,
        **node_kwargs
    })

    graph_renderer.edge_renderer.visible = False
    for (
            _,
        (start_node, end_node),
    ) in graph_renderer.edge_renderer.data_source.to_df().iterrows():
        (x_start, y_start), (x_end, y_end) = _compute_arrow_coordinates(
            layout[start_node], layout[end_node])
        plot.add_layout(
            Arrow(
                end=NormalHead(**{
                    **ARROW_KWARGS_DEFAULTS,
                    **arrow_kwargs
                }),
                x_start=x_start,
                y_start=y_start,
                x_end=x_end,
                y_end=y_end,
                **{
                    **EDGE_KWARGS_DEFAULTS,
                    **edge_kwargs
                },
            ))

    plot.renderers.append(graph_renderer)

    tools = [BoxZoomTool(), ResetTool()]
    tools.append(TapTool(callback=OpenURL(url="@url")))
    if tooltips:
        tools.append(HoverTool(tooltips=TOOLTIPS))

    plot.add_tools(*tools)

    if labels:
        source = ColumnDataSource(
            pd.DataFrame(layout).T.rename(columns={
                0: "x",
                1: "y"
            }))
        labels = LabelSet(
            x="x",
            y="y",
            text="index",
            source=source,
            **{
                **LABEL_KWARGS_DEFAULT,
                **label_kwargs
            },
        )
        plot.add_layout(labels)

    output_notebook()
    show(plot)

    return plot
def compute_taxes_and_transfers(
    data,
    params,
    functions,
    targets=None,
    columns_overriding_functions=None,
    check_minimal_specification="ignore",
    debug=False,
):
    """Compute taxes and transfers.

    Parameters
    ----------
    data : pandas.DataFrame
        The data provided by the user.
    params : dict
        A dictionary with parameters from the policy environment. For more
        information see the documentation of the :ref:`param_files`.
    functions : str, pathlib.Path, callable, module, imports statements, dict
        Function from the policy environment. Functions can be anything of the specified
        types and a list of the same objects. If the object is a dictionary, the keys of
        the dictionary are used as a name instead of the function name. For all other
        objects, the name is inferred from the function name.
    targets : str, list of str, default None
        String or list of strings with names of functions whose output is actually
        needed by the user. By default, ``targets`` is ``None`` and all key outputs as
        defined by `gettsim.config.DEFAULT_TARGETS` are returned.
    columns_overriding_functions : str list of str
        Names of columns in the data which are preferred over function defined in the
        tax and transfer system.
    check_minimal_specification : {"ignore", "warn", "raise"}, default "ignore"
        Indicator for whether checks which ensure the most minimal configuration should
        be silenced, emitted as warnings or errors.
    debug : bool
        The debug mode does the following:

        1. All necessary inputs and all computed variables are returned.
        2. If an exception occurs while computing one variable, the exception is
           printed, but not raised. The computation of all dependent variables is
           skipped.

    Returns
    -------
    results : pandas.DataFrame
        DataFrame containing computed variables.

    """
    targets = DEFAULT_TARGETS if targets is None else targets
    targets = parse_to_list_of_strings(targets, "targets")
    columns_overriding_functions = parse_to_list_of_strings(
        columns_overriding_functions, "columns_overriding_functions")
    params = {} if params is None else params

    _fail_if_columns_overriding_functions_are_not_in_data(
        data, columns_overriding_functions)

    # Load functions and perform checks.
    functions, internal_functions = load_user_and_internal_functions(functions)
    columns = set(data) - set(columns_overriding_functions)
    for funcs, name in zip([internal_functions, functions],
                           ["internal", "user"]):
        _fail_if_functions_and_columns_overlap(columns, funcs, name)

    # Create one dictionary of functions and perform check.
    functions = {**internal_functions, **functions}
    _fail_if_datatype_is_false(data, columns_overriding_functions, functions)
    _fail_if_columns_overriding_functions_are_not_in_functions(
        columns_overriding_functions, functions)

    functions = {
        k: v
        for k, v in functions.items() if k not in columns_overriding_functions
    }
    _fail_if_targets_not_in_functions(functions, targets)

    # Partial parameters to functions such that they disappear in the DAG.
    functions = _partial_parameters_to_functions(functions, params)

    # Create DAG and perform checks which depend on data which is not part of the DAG
    # interface.
    dag = create_dag(functions, targets, columns_overriding_functions,
                     check_minimal_specification)
    _fail_if_root_nodes_are_missing(dag, data)
    _fail_if_more_than_necessary_data_is_passed(dag, data,
                                                check_minimal_specification)
    _fail_if_pid_is_non_unique(data)

    # We delay the data preparation as long as possible such that other checks can fail
    # before this.
    data = data.copy(deep=True)
    data = _process_data(data)
    data = _reduce_data(data)
    ids = _dict_subset(data, set(data) & {"hh_id", "tu_id"})

    results = execute_dag(dag, data, targets, debug)

    results = _expand_data(results, ids)
    results = pd.DataFrame(results)

    if not debug:
        results = results[targets]

    results = _reorder_columns(results)

    return results