def plot_dag( functions, targets=None, columns_overriding_functions=None, check_minimal_specification="ignore", selectors=None, labels=True, tooltips=False, plot_kwargs=None, arrow_kwargs=None, edge_kwargs=None, label_kwargs=None, node_kwargs=None, ): """Plot the dag of the tax and transfer system. Parameters ---------- functions : str, pathlib.Path, callable, module, imports statements, dict Functions can be anything of the specified types and a list of the same objects. If the object is a dictionary, the keys of the dictionary are used as a name instead of the function name. For all other objects, the name is inferred from the function name. targets : str, list of str String or list of strings with names of functions whose output is actually needed by the user. columns_overriding_functions : str list of str Names of columns in the data which are preferred over function defined in the tax and transfer system. check_minimal_specification : {"ignore", "warn", "raise"}, default "ignore" Indicator for whether checks which ensure the most minimal configuration should be silenced, emitted as warnings or errors. selectors : str or list of str or dict or list of dict or list of str and dict Selectors allow to you to select and de-select nodes in the graph for visualization. For the full list of options, see the tutorial about `visualization <../docs/tutorials/visualize.ipynb>`_. By default, all nodes are shown. labels : bool, default True Annotate nodes with labels. tooltips : bool, default False Experimental feature which makes the source code of the functions accessible as a tooltip. Sometimes, the tooltip is not properly displayed. plot_kwargs : dict Additional keyword arguments passed to :class:`bokeh.models.Plot`. arrow_kwargs : dict Additional keyword arguments passed to :class:`bokeh.models.Arrow`. For example, change the size of the head with ``{"size": 10}``. edge_kwargs : dict Additional keyword arguments passed to :class:`bokeh.models.MultiLine`. For example, change the color with ``{"fill_color": "green"}``. label_kwargs : dict Additional keyword arguments passed to :class:`bokeh.models.LabelSet`. For example, change the fontsize with ``{"text_font_size": "12px"}``. node_kwargs : dict Additional keyword arguments passed to :class:`bokeh.models.Circle`. For example, change the color with ``{"fill_color": "orange"}``. """ targets = DEFAULT_TARGETS if targets is None else targets targets = parse_to_list_of_strings(targets, "targets") columns_overriding_functions = parse_to_list_of_strings( columns_overriding_functions, "columns_overriding_functions") # Load functions and perform checks. functions, internal_functions = load_user_and_internal_functions(functions) # Create one dictionary of functions and perform check. functions = {**internal_functions, **functions} functions = { k: v for k, v in functions.items() if k not in columns_overriding_functions } _fail_if_targets_not_in_functions(functions, targets) # Partial parameters to functions such that they disappear in the DAG. functions = _mock_parameters_arguments(functions) dag = create_dag(functions, targets, columns_overriding_functions, check_minimal_specification) selectors = [] if selectors is None else _to_list(selectors) plot_kwargs = {} if plot_kwargs is None else plot_kwargs arrow_kwargs = {} if arrow_kwargs is None else arrow_kwargs edge_kwargs = {} if edge_kwargs is None else edge_kwargs label_kwargs = {} if label_kwargs is None else label_kwargs node_kwargs = {} if node_kwargs is None else node_kwargs dag = _select_nodes_in_dag(dag, selectors) dag = _add_url_to_dag(dag) # Even if we do not use the source codes as tooltips, we need to remove the # functions. dag = _replace_functions_with_source_code(dag) plot_kwargs["title"] = _to_bokeh_title( plot_kwargs.get("title", "Tax and Transfer System")) plot = Plot(**{**PLOT_KWARGS_DEFAULTS, **plot_kwargs}) layout = _create_pydot_layout(dag) graph_renderer = from_networkx(dag, layout, scale=1, center=(0, 0)) graph_renderer.node_renderer.glyph = Circle(**{ **NODE_KWARGS_DEFAULTS, **node_kwargs }) graph_renderer.edge_renderer.visible = False for ( _, (start_node, end_node), ) in graph_renderer.edge_renderer.data_source.to_df().iterrows(): (x_start, y_start), (x_end, y_end) = _compute_arrow_coordinates( layout[start_node], layout[end_node]) plot.add_layout( Arrow( end=NormalHead(**{ **ARROW_KWARGS_DEFAULTS, **arrow_kwargs }), x_start=x_start, y_start=y_start, x_end=x_end, y_end=y_end, **{ **EDGE_KWARGS_DEFAULTS, **edge_kwargs }, )) plot.renderers.append(graph_renderer) tools = [BoxZoomTool(), ResetTool()] tools.append(TapTool(callback=OpenURL(url="@url"))) if tooltips: tools.append(HoverTool(tooltips=TOOLTIPS)) plot.add_tools(*tools) if labels: source = ColumnDataSource( pd.DataFrame(layout).T.rename(columns={ 0: "x", 1: "y" })) labels = LabelSet( x="x", y="y", text="index", source=source, **{ **LABEL_KWARGS_DEFAULT, **label_kwargs }, ) plot.add_layout(labels) output_notebook() show(plot) return plot
def compute_taxes_and_transfers( data, params, functions, targets=None, columns_overriding_functions=None, check_minimal_specification="ignore", debug=False, ): """Compute taxes and transfers. Parameters ---------- data : pandas.DataFrame The data provided by the user. params : dict A dictionary with parameters from the policy environment. For more information see the documentation of the :ref:`param_files`. functions : str, pathlib.Path, callable, module, imports statements, dict Function from the policy environment. Functions can be anything of the specified types and a list of the same objects. If the object is a dictionary, the keys of the dictionary are used as a name instead of the function name. For all other objects, the name is inferred from the function name. targets : str, list of str, default None String or list of strings with names of functions whose output is actually needed by the user. By default, ``targets`` is ``None`` and all key outputs as defined by `gettsim.config.DEFAULT_TARGETS` are returned. columns_overriding_functions : str list of str Names of columns in the data which are preferred over function defined in the tax and transfer system. check_minimal_specification : {"ignore", "warn", "raise"}, default "ignore" Indicator for whether checks which ensure the most minimal configuration should be silenced, emitted as warnings or errors. debug : bool The debug mode does the following: 1. All necessary inputs and all computed variables are returned. 2. If an exception occurs while computing one variable, the exception is printed, but not raised. The computation of all dependent variables is skipped. Returns ------- results : pandas.DataFrame DataFrame containing computed variables. """ targets = DEFAULT_TARGETS if targets is None else targets targets = parse_to_list_of_strings(targets, "targets") columns_overriding_functions = parse_to_list_of_strings( columns_overriding_functions, "columns_overriding_functions") params = {} if params is None else params _fail_if_columns_overriding_functions_are_not_in_data( data, columns_overriding_functions) # Load functions and perform checks. functions, internal_functions = load_user_and_internal_functions(functions) columns = set(data) - set(columns_overriding_functions) for funcs, name in zip([internal_functions, functions], ["internal", "user"]): _fail_if_functions_and_columns_overlap(columns, funcs, name) # Create one dictionary of functions and perform check. functions = {**internal_functions, **functions} _fail_if_datatype_is_false(data, columns_overriding_functions, functions) _fail_if_columns_overriding_functions_are_not_in_functions( columns_overriding_functions, functions) functions = { k: v for k, v in functions.items() if k not in columns_overriding_functions } _fail_if_targets_not_in_functions(functions, targets) # Partial parameters to functions such that they disappear in the DAG. functions = _partial_parameters_to_functions(functions, params) # Create DAG and perform checks which depend on data which is not part of the DAG # interface. dag = create_dag(functions, targets, columns_overriding_functions, check_minimal_specification) _fail_if_root_nodes_are_missing(dag, data) _fail_if_more_than_necessary_data_is_passed(dag, data, check_minimal_specification) _fail_if_pid_is_non_unique(data) # We delay the data preparation as long as possible such that other checks can fail # before this. data = data.copy(deep=True) data = _process_data(data) data = _reduce_data(data) ids = _dict_subset(data, set(data) & {"hh_id", "tu_id"}) results = execute_dag(dag, data, targets, debug) results = _expand_data(results, ids) results = pd.DataFrame(results) if not debug: results = results[targets] results = _reorder_columns(results) return results