Example #1
0
def transform_dict_like(
    obj: FrameOrSeries,
    func: AggFuncTypeDict,
    *args,
    **kwargs,
):
    """
    Compute transform in the case of a dict-like func
    """
    from pandas.core.reshape.concat import concat

    if len(func) == 0:
        raise ValueError("No transform functions were provided")

    if obj.ndim != 1:
        # Check for missing columns on a frame
        cols = sorted(set(func.keys()) - set(obj.columns))
        if len(cols) > 0:
            raise SpecificationError(f"Column(s) {cols} do not exist")

    # Can't use func.values(); wouldn't work for a Series
    if any(is_dict_like(v) for _, v in func.items()):
        # GH 15931 - deprecation of renaming keys
        raise SpecificationError("nested renamer is not supported")

    is_aggregator = lambda x: isinstance(x, (list, tuple, dict))

    # if we have a dict of any non-scalars
    # eg. {'A' : ['mean']}, normalize all to
    # be list-likes
    # Cannot use func.values() because arg may be a Series
    if any(is_aggregator(x) for _, x in func.items()):
        new_func: AggFuncTypeDict = {}
        for k, v in func.items():
            if not is_aggregator(v):
                # mypy can't realize v is not a list here
                new_func[k] = [v]  # type:ignore[list-item]
            else:
                new_func[k] = v
        func = new_func

    results: Dict[Label, FrameOrSeriesUnion] = {}
    for name, how in func.items():
        colg = obj._gotitem(name, ndim=1)
        try:
            results[name] = transform(colg, how, 0, *args, **kwargs)
        except Exception as err:
            if (
                str(err) == "Function did not transform"
                or str(err) == "No transform functions were provided"
            ):
                raise err

    # combine results
    if len(results) == 0:
        raise ValueError("Transform function failed")
    return concat(results, axis=1)
    def normalize_dictlike_arg(
        self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict
    ) -> AggFuncTypeDict:
        """
        Handler for dict-like argument.

        Ensures that necessary columns exist if obj is a DataFrame, and
        that a nested renamer is not passed. Also normalizes to all lists
        when values consists of a mix of list and non-lists.
        """
        assert how in ("apply", "agg", "transform")

        # Can't use func.values(); wouldn't work for a Series
        if (
            how == "agg"
            and isinstance(obj, ABCSeries)
            and any(is_list_like(v) for _, v in func.items())
        ) or (any(is_dict_like(v) for _, v in func.items())):
            # GH 15931 - deprecation of renaming keys
            raise SpecificationError("nested renamer is not supported")

        if obj.ndim != 1:
            # Check for missing columns on a frame
            cols = set(func.keys()) - set(obj.columns)
            if len(cols) > 0:
                cols_sorted = list(safe_sort(list(cols)))
                raise KeyError(f"Column(s) {cols_sorted} do not exist")

        is_aggregator = lambda x: isinstance(x, (list, tuple, dict))

        # if we have a dict of any non-scalars
        # eg. {'A' : ['mean']}, normalize all to
        # be list-likes
        # Cannot use func.values() because arg may be a Series
        if any(is_aggregator(x) for _, x in func.items()):
            new_func: AggFuncTypeDict = {}
            for k, v in func.items():
                if not is_aggregator(v):
                    # mypy can't realize v is not a list here
                    new_func[k] = [v]  # type:ignore[list-item]
                else:
                    new_func[k] = v
            func = new_func
        return func
Example #3
0
def transform_dict_like(
    obj: FrameOrSeries,
    func: AggFuncTypeDict,
    *args,
    **kwargs,
):
    """
    Compute transform in the case of a dict-like func
    """
    from pandas.core.reshape.concat import concat

    if len(func) == 0:
        raise ValueError("No transform functions were provided")

    if obj.ndim != 1:
        # Check for missing columns on a frame
        cols = set(func.keys()) - set(obj.columns)
        if len(cols) > 0:
            cols_sorted = list(safe_sort(list(cols)))
            raise SpecificationError(f"Column(s) {cols_sorted} do not exist")

    # Can't use func.values(); wouldn't work for a Series
    if any(is_dict_like(v) for _, v in func.items()):
        # GH 15931 - deprecation of renaming keys
        raise SpecificationError("nested renamer is not supported")

    results: Dict[Hashable, FrameOrSeriesUnion] = {}
    for name, how in func.items():
        colg = obj._gotitem(name, ndim=1)
        try:
            results[name] = transform(colg, how, 0, *args, **kwargs)
        except Exception as err:
            if (
                str(err) == "Function did not transform"
                or str(err) == "No transform functions were provided"
            ):
                raise err

    # combine results
    if len(results) == 0:
        raise ValueError("Transform function failed")
    return concat(results, axis=1)
Example #4
0
    def validate_dictlike_arg(self, how: str, obj: FrameOrSeriesUnion,
                              func: AggFuncTypeDict) -> None:
        """
        Raise if dict-like argument is invalid.

        Ensures that necessary columns exist if obj is a DataFrame, and
        that a nested renamer is not passed.
        """
        assert how in ("apply", "agg", "transform")

        # Can't use func.values(); wouldn't work for a Series
        if (how == "agg" and isinstance(obj, ABCSeries)
                and any(is_list_like(v) for _, v in func.items())) or (any(
                    is_dict_like(v) for _, v in func.items())):
            # GH 15931 - deprecation of renaming keys
            raise SpecificationError("nested renamer is not supported")

        if obj.ndim != 1:
            # Check for missing columns on a frame
            cols = set(func.keys()) - set(obj.columns)
            if len(cols) > 0:
                cols_sorted = list(safe_sort(list(cols)))
                raise KeyError(f"Column(s) {cols_sorted} do not exist")
Example #5
0
def agg_dict_like(
    obj: AggObjType,
    arg: AggFuncTypeDict,
    _axis: int,
) -> FrameOrSeriesUnion:
    """
    Compute aggregation in the case of a dict-like argument.

    Parameters
    ----------
    obj : Pandas object to compute aggregation on.
    arg : dict
        label-aggregation pairs to compute.
    _axis : int, 0 or 1
        Axis to compute aggregation on.

    Returns
    -------
    Result of aggregation.
    """
    is_aggregator = lambda x: isinstance(x, (list, tuple, dict))

    if _axis != 0:  # pragma: no cover
        raise ValueError("Can only pass dict with axis=0")

    selected_obj = obj._selected_obj

    # if we have a dict of any non-scalars
    # eg. {'A' : ['mean']}, normalize all to
    # be list-likes
    # Cannot use arg.values() because arg may be a Series
    if any(is_aggregator(x) for _, x in arg.items()):
        new_arg: AggFuncTypeDict = {}
        for k, v in arg.items():
            if not isinstance(v, (tuple, list, dict)):
                new_arg[k] = [v]
            else:
                new_arg[k] = v

            # the keys must be in the columns
            # for ndim=2, or renamers for ndim=1

            # ok for now, but deprecated
            # {'A': { 'ra': 'mean' }}
            # {'A': { 'ra': ['mean'] }}
            # {'ra': ['mean']}

            # not ok
            # {'ra' : { 'A' : 'mean' }}
            if isinstance(v, dict):
                raise SpecificationError("nested renamer is not supported")
            elif isinstance(selected_obj, ABCSeries):
                raise SpecificationError("nested renamer is not supported")
            elif (
                isinstance(selected_obj, ABCDataFrame) and k not in selected_obj.columns
            ):
                raise KeyError(f"Column '{k}' does not exist!")

        arg = new_arg

    else:
        # deprecation of renaming keys
        # GH 15931
        keys = list(arg.keys())
        if isinstance(selected_obj, ABCDataFrame) and len(
            selected_obj.columns.intersection(keys)
        ) != len(keys):
            cols = list(
                safe_sort(
                    list(set(keys) - set(selected_obj.columns.intersection(keys))),
                )
            )
            raise SpecificationError(f"Column(s) {cols} do not exist")

    from pandas.core.reshape.concat import concat

    if selected_obj.ndim == 1:
        # key only used for output
        colg = obj._gotitem(obj._selection, ndim=1)
        results = {key: colg.agg(how) for key, how in arg.items()}
    else:
        # key used for column selection and output
        results = {key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()}

    # set the final keys
    keys = list(arg.keys())

    # Avoid making two isinstance calls in all and any below
    is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]

    # combine results
    if all(is_ndframe):
        keys_to_use = [k for k in keys if not results[k].empty]
        # Have to check, if at least one DataFrame is not empty.
        keys_to_use = keys_to_use if keys_to_use != [] else keys
        axis = 0 if isinstance(obj, ABCSeries) else 1
        result = concat({k: results[k] for k in keys_to_use}, axis=axis)
    elif any(is_ndframe):
        # There is a mix of NDFrames and scalars
        raise ValueError(
            "cannot perform both aggregation "
            "and transformation operations "
            "simultaneously"
        )
    else:
        from pandas import Series

        # we have a dict of scalars
        # GH 36212 use name only if obj is a series
        if obj.ndim == 1:
            obj = cast("Series", obj)
            name = obj.name
        else:
            name = None

        result = Series(results, name=name)

    return result