def _fail_if_columns_overriding_functions_are_not_in_functions( columns_overriding_functions, functions): """Fail if ``columns_overriding_functions`` are not found in functions. Parameters ---------- columns_overriding_functions : str list of str Names of columns which are preferred over function defined in the tax and transfer system. functions : dict of callable A dictionary of functions. Raises ------ ValueError Fail if some ``columns_overriding_functions`` are not found in internal or user functions. """ unnecessary_columns_overriding_functions = set( columns_overriding_functions) - set(functions) if unnecessary_columns_overriding_functions: n_cols = len(unnecessary_columns_overriding_functions) intro = _format_text_for_cmdline(f""" You passed the following user column{'' if n_cols == 1 else 's'} which {'is' if n_cols == 1 else 'are'} unnecessary because no functions require them as inputs. """) list_ = format_list_linewise(unnecessary_columns_overriding_functions) raise ValueError("\n".join([intro, list_]))
def _fail_if_more_than_necessary_data_is_passed(dag, data, check_minimal_specification): root_nodes = set(_root_nodes(dag)) unnecessary_data = set(data) - root_nodes formatted = format_list_linewise(unnecessary_data) message = f"The following columns in 'data' are unused.\n\n{formatted}" if unnecessary_data and check_minimal_specification == "warn": warnings.warn(message) elif unnecessary_data and check_minimal_specification == "raise": raise ValueError(message)
def _fail_if_root_nodes_are_missing(dag, data): missing_nodes = [] for node in _root_nodes(dag): if node not in data and "function" not in dag.nodes[node]: missing_nodes.append(node) if missing_nodes: formatted = format_list_linewise(missing_nodes) raise ValueError( f"The following data columns are missing.\n{formatted}")
def _fail_if_functions_and_columns_overlap(columns, functions, type_): """Fail if functions which compute columns overlap with existing columns. Parameters ---------- columns : list of str List of strings containing column names. functions : dict Dictionary of functions. type_ : {"internal", "user"} Source of the functions. "user" means functions passed by the user. columns_overriding_functions : list of str Columns provided by the user. Raises ------ ValueError Fail if functions which compute columns overlap with existing columns. """ type_str = "internal " if type_ == "internal" else "" overlap = sorted(name for name in functions if name in columns) if overlap: n_cols = len(overlap) first_part = _format_text_for_cmdline( f"Your data provides the column{'' if n_cols == 1 else 's'}:") formatted = format_list_linewise(overlap) second_part = _format_text_for_cmdline(f""" {'This is' if n_cols == 1 else 'These are'} already present among the {type_str}functions of the taxes and transfers system. If you want {'this' if n_cols == 1 else 'a'} data column to be used instead of calculating it within GETTSIM, please specify it among the *columns_overriding_functions*{'.' if type_ == 'internal' else ''' or remove the function from *functions*.'''} If you want {'this' if n_cols == 1 else 'a'} data column to be calculated by {type_str}functions, remove it from the *data* you pass to GETTSIM. {'' if n_cols == 1 else '''You need to pick one option for each column that appears in the list above.'''} """) raise ValueError("\n".join([first_part, formatted, second_part]))
def _fail_if_columns_overriding_functions_are_not_in_data(data, columns): """Fail if functions which compute columns overlap with existing columns. Parameters ---------- data : dict of pandas.Series Dictionary containing data columns as Series. columns : list of str List of column names. Raises ------ ValueError Fail if functions which compute columns overlap with existing columns. """ unused_columns_overriding_functions = sorted(set(columns) - set(data)) n_cols = len(unused_columns_overriding_functions) column_sg_pl = "column" if n_cols == 1 else "columns" if unused_columns_overriding_functions: first_part = _format_text_for_cmdline( f"You passed the following user {column_sg_pl}:") list_ = format_list_linewise(unused_columns_overriding_functions) second_part = _format_text_for_cmdline(f""" {'This' if n_cols == 1 else 'These'} {column_sg_pl} cannot be found in the data. If you want {'this' if n_cols == 1 else 'a'} data column to be used instead of calculating it within GETTSIM, please add it to *data*. If you want {'this' if n_cols == 1 else 'a'} data column to be calculated internally by GETTSIM, remove it from the *columns_overriding_functions* you pass to GETTSIM. {'' if n_cols == 1 else '''You need to pick one option for each column that appears in the list above.'''} """) raise ValueError("\n".join([first_part, list_, second_part]))
def _fail_if_columns_overriding_functions_are_not_in_dag( dag, columns_overriding_functions, check_minimal_specification ): """Fail if ``columns_overriding_functions`` are not in the DAG. Parameters ---------- dag : networkx.DiGraph The DAG which is limited to targets and their ancestors. columns_overriding_functions : list of str The nodes which are provided by columns in the data and do not need to be computed. These columns limit the depth of the DAG. check_minimal_specification : {"ignore", "warn", "raise"}, default "ignore" Indicator for whether checks which ensure the most minimalistic configuration should be silenced, emitted as warnings or errors. Warnings -------- UserWarning Warns if there are columns in 'columns_overriding_functions' which are not necessary and ``check_minimal_specification`` is set to "warn". Raises ------ ValueError Raised if there are columns in 'columns_overriding_functions' which are not necessary and ``check_minimal_specification`` is set to "raise". """ unused_columns = set(columns_overriding_functions) - set(dag.nodes) formatted = format_list_linewise(unused_columns) if unused_columns and check_minimal_specification == "warn": warnings.warn( f"The following 'columns_overriding_functions' are unused:\n{formatted}" ) elif unused_columns and check_minimal_specification == "raise": raise ValueError( f"The following 'columns_overriding_functions' are unused:\n{formatted}" )
def _fail_if_targets_not_in_functions(functions, targets): """Fail if targets are not in functions. Parameters ---------- functions : dict of callable Dictionary containing functions to build the DAG. targets : list of str The targets which should be computed. They limit the DAG in the way that only ancestors of these nodes need to be considered. Raises ------ ValueError Raised if ``targets`` are not in functions. """ targets_not_in_functions = set(targets) - set(functions) if targets_not_in_functions: formatted = format_list_linewise(targets_not_in_functions) raise ValueError( f"The following targets have no corresponding function:\n{formatted}" )