Exemple #1
0
def _combine_series_frame(left, right, func, axis: int, str_rep: str):
    """
    Apply binary operator `func` to self, other using alignment and fill
    conventions determined by the axis argument.

    Parameters
    ----------
    left : DataFrame
    right : Series
    func : binary operator
    axis : {0, 1}
    str_rep : str

    Returns
    -------
    result : DataFrame
    """
    # We assume that self.align(other, ...) has already been called
    if axis == 0:
        values = right._values
        if isinstance(values, np.ndarray):
            # TODO(EA2D): no need to special-case with 2D EAs
            # We can operate block-wise
            values = values.reshape(-1, 1)
            values = np.broadcast_to(values, left.shape)

            array_op = get_array_op(func, str_rep=str_rep)
            bm = left._mgr.apply(array_op,
                                 right=values.T,
                                 align_keys=["right"])
            return type(left)(bm)

        new_data = dispatch_to_series(left, right, func)

    else:
        rvalues = right._values
        if isinstance(rvalues, np.ndarray):
            # We can operate block-wise
            rvalues = rvalues.reshape(1, -1)
            rvalues = np.broadcast_to(rvalues, left.shape)

            array_op = get_array_op(func, str_rep=str_rep)
            bm = left._mgr.apply(array_op,
                                 right=rvalues.T,
                                 align_keys=["right"])
            return type(left)(bm)

        new_data = dispatch_to_series(left, right, func, axis="columns")

    return left._construct_result(new_data)
Exemple #2
0
def _combine_series_frame(left, right, func, axis: int, str_rep: str):
    """
    Apply binary operator `func` to self, other using alignment and fill
    conventions determined by the axis argument.

    Parameters
    ----------
    left : DataFrame
    right : Series
    func : binary operator
    axis : {0, 1}
    str_rep : str

    Returns
    -------
    result : DataFrame
    """
    # We assume that self.align(other, ...) has already been called
    if axis == 0:
        values = right._values
        if isinstance(values, np.ndarray):
            # We can operate block-wise
            values = values.reshape(-1, 1)

            array_op = get_array_op(func, str_rep=str_rep)
            bm = left._data.apply(array_op, right=values.T)
            return type(left)(bm)

        new_data = dispatch_to_series(left, right, func)

    else:
        new_data = dispatch_to_series(left, right, func, axis="columns")

    return left._construct_result(new_data)
def dispatch_to_series(left, right, func, axis: Optional[int] = None):
    """
    Evaluate the frame operation func(left, right) by evaluating
    column-by-column, dispatching to the Series implementation.

    Parameters
    ----------
    left : DataFrame
    right : scalar, Series, or DataFrame
    func : arithmetic or comparison operator
    axis : {None, 0, 1}

    Returns
    -------
    DataFrame
    """
    # Get the appropriate array-op to apply to each column/block's values.
    array_op = get_array_op(func)

    right = lib.item_from_zerodim(right)
    if not is_list_like(right):
        # i.e. scalar, faster than checking np.ndim(right) == 0
        bm = left._mgr.apply(array_op, right=right)
        return type(left)(bm)

    elif isinstance(right, ABCDataFrame):
        assert left.index.equals(right.index)
        assert left.columns.equals(right.columns)
        # TODO: The previous assertion `assert right._indexed_same(left)`
        #  fails in cases with empty columns reached via
        #  _frame_arith_method_with_reindex

        bm = left._mgr.operate_blockwise(right._mgr, array_op)
        return type(left)(bm)

    elif isinstance(right, ABCSeries) and axis == 1:
        # axis=1 means we want to operate row-by-row
        assert right.index.equals(left.columns)

        right = right._values
        # maybe_align_as_frame ensures we do not have an ndarray here
        assert not isinstance(right, np.ndarray)

        arrays = [array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)]

    elif isinstance(right, ABCSeries):
        assert right.index.equals(left.index)  # Handle other cases later
        right = right._values

        arrays = [array_op(l, right) for l in left._iter_column_arrays()]

    else:
        # Remaining cases have less-obvious dispatch rules
        raise NotImplementedError(right)

    return type(left)._from_arrays(
        arrays, left.columns, left.index, verify_integrity=False
    )
Exemple #4
0
def _arith_method_FRAME(cls: Type["DataFrame"], op, special: bool):
    # This is the only function where `special` can be either True or False
    op_name = _get_op_name(op, special)
    default_axis = _get_frame_op_default_axis(op_name)

    na_op = get_array_op(op)
    is_logical = op.__name__.strip("_").lstrip("_") in ["and", "or", "xor"]

    if op_name in _op_descriptions:
        # i.e. include "add" but not "__add__"
        doc = _make_flex_doc(op_name, "dataframe")
    else:
        doc = _arith_doc_FRAME % op_name

    @Appender(doc)
    def f(self, other, axis=default_axis, level=None, fill_value=None):

        if _should_reindex_frame_op(
            self, other, op, axis, default_axis, fill_value, level
        ):
            return _frame_arith_method_with_reindex(self, other, op)

        # TODO: why are we passing flex=True instead of flex=not special?
        #  15 tests fail if we pass flex=not special instead
        self, other = _align_method_FRAME(self, other, axis, flex=True, level=level)

        if isinstance(other, ABCDataFrame):
            # Another DataFrame
            new_data = self._combine_frame(other, na_op, fill_value)

        elif isinstance(other, ABCSeries):
            # For these values of `axis`, we end up dispatching to Series op,
            # so do not want the masked op.
            # TODO: the above comment is no longer accurate since we now
            #  operate blockwise if other._values is an ndarray
            pass_op = op if axis in [0, "columns", None] else na_op
            pass_op = pass_op if not is_logical else op

            if fill_value is not None:
                raise NotImplementedError(f"fill_value {fill_value} not supported.")

            axis = self._get_axis_number(axis) if axis is not None else 1
            new_data = _combine_series_frame(self, other, pass_op, axis=axis)
        else:
            # in this case we always have `np.ndim(other) == 0`
            if fill_value is not None:
                self = self.fillna(fill_value)

            new_data = dispatch_to_series(self, other, op)

        return self._construct_result(new_data)

    f.__name__ = op_name

    return f
Exemple #5
0
def _arith_method_FRAME(cls: Type["DataFrame"], op, special: bool):
    # This is the only function where `special` can be either True or False
    op_name = _get_op_name(op, special)
    default_axis = _get_frame_op_default_axis(op_name)

    na_op = get_array_op(op)

    if op_name in _op_descriptions:
        # i.e. include "add" but not "__add__"
        doc = _make_flex_doc(op_name, "dataframe")
    else:
        doc = _arith_doc_FRAME % op_name

    @Appender(doc)
    def f(self, other, axis=default_axis, level=None, fill_value=None):

        if _should_reindex_frame_op(self, other, op, axis, default_axis,
                                    fill_value, level):
            return _frame_arith_method_with_reindex(self, other, op)

        if isinstance(other, ABCSeries) and fill_value is not None:
            # TODO: We could allow this in cases where we end up going
            #  through the DataFrame path
            raise NotImplementedError(
                f"fill_value {fill_value} not supported.")

        axis = self._get_axis_number(axis) if axis is not None else 1

        # TODO: why are we passing flex=True instead of flex=not special?
        #  15 tests fail if we pass flex=not special instead
        self, other = align_method_FRAME(self,
                                         other,
                                         axis,
                                         flex=True,
                                         level=level)

        if isinstance(other, ABCDataFrame):
            # Another DataFrame
            new_data = self._combine_frame(other, na_op, fill_value)

        elif isinstance(other, ABCSeries):
            new_data = dispatch_to_series(self, other, op, axis=axis)
        else:
            # in this case we always have `np.ndim(other) == 0`
            if fill_value is not None:
                self = self.fillna(fill_value)

            new_data = dispatch_to_series(self, other, op)

        return self._construct_result(new_data)

    f.__name__ = op_name

    return f
Exemple #6
0
def flex_arith_method_FRAME(op):
    op_name = op.__name__.strip("_")
    default_axis = "columns"

    na_op = get_array_op(op)
    doc = make_flex_doc(op_name, "dataframe")

    @Appender(doc)
    def f(self, other, axis=default_axis, level=None, fill_value=None):

        if should_reindex_frame_op(self, other, op, axis, default_axis,
                                   fill_value, level):
            return frame_arith_method_with_reindex(self, other, op)

        if isinstance(other, ABCSeries) and fill_value is not None:
            # TODO: We could allow this in cases where we end up going
            #  through the DataFrame path
            raise NotImplementedError(
                f"fill_value {fill_value} not supported.")

        axis = self._get_axis_number(axis) if axis is not None else 1

        other = maybe_prepare_scalar_for_op(other, self.shape)
        self, other = align_method_FRAME(self,
                                         other,
                                         axis,
                                         flex=True,
                                         level=level)

        if isinstance(other, ABCDataFrame):
            # Another DataFrame
            new_data = self._combine_frame(other, na_op, fill_value)

        elif isinstance(other, ABCSeries):
            new_data = self._dispatch_frame_op(other, op, axis=axis)
        else:
            # in this case we always have `np.ndim(other) == 0`
            if fill_value is not None:
                self = self.fillna(fill_value)

            new_data = self._dispatch_frame_op(other, op)

        return self._construct_result(new_data)

    f.__name__ = op_name

    return f
Exemple #7
0
def flex_arith_method_FRAME(cls: Type["DataFrame"], op, special: bool):
    assert not special
    op_name = _get_op_name(op, special)
    default_axis = None if special else "columns"

    na_op = get_array_op(op)
    doc = _make_flex_doc(op_name, "dataframe")

    @Appender(doc)
    def f(self, other, axis=default_axis, level=None, fill_value=None):

        if _should_reindex_frame_op(
            self, other, op, axis, default_axis, fill_value, level
        ):
            return _frame_arith_method_with_reindex(self, other, op)

        if isinstance(other, ABCSeries) and fill_value is not None:
            # TODO: We could allow this in cases where we end up going
            #  through the DataFrame path
            raise NotImplementedError(f"fill_value {fill_value} not supported.")

        axis = self._get_axis_number(axis) if axis is not None else 1

        self, other = align_method_FRAME(self, other, axis, flex=True, level=level)

        if isinstance(other, ABCDataFrame):
            # Another DataFrame
            new_data = self._combine_frame(other, na_op, fill_value)

        elif isinstance(other, ABCSeries):
            new_data = dispatch_to_series(self, other, op, axis=axis)
        else:
            # in this case we always have `np.ndim(other) == 0`
            if fill_value is not None:
                self = self.fillna(fill_value)

            new_data = dispatch_to_series(self, other, op)

        return self._construct_result(new_data)

    f.__name__ = op_name

    return f
Exemple #8
0
def dispatch_to_series(left, right, func, str_rep=None, axis=None):
    """
    Evaluate the frame operation func(left, right) by evaluating
    column-by-column, dispatching to the Series implementation.

    Parameters
    ----------
    left : DataFrame
    right : scalar or DataFrame
    func : arithmetic or comparison operator
    str_rep : str or None, default None
    axis : {None, 0, 1, "index", "columns"}

    Returns
    -------
    DataFrame
    """
    # Note: we use iloc to access columns for compat with cases
    #       with non-unique columns.
    import pandas.core.computation.expressions as expressions

    right = lib.item_from_zerodim(right)
    if lib.is_scalar(right) or np.ndim(right) == 0:

        # Get the appropriate array-op to apply to each block's values.
        array_op = get_array_op(func, str_rep=str_rep)
        bm = left._data.apply(array_op, right=right)
        return type(left)(bm)

    elif isinstance(right, ABCDataFrame):
        assert right._indexed_same(left)

        def column_op(a, b):
            return {
                i: func(a.iloc[:, i], b.iloc[:, i])
                for i in range(len(a.columns))
            }

    elif isinstance(right, ABCSeries) and axis == "columns":
        # We only get here if called via _combine_series_frame,
        # in which case we specifically want to operate row-by-row
        assert right.index.equals(left.columns)

        if right.dtype == "timedelta64[ns]":
            # ensure we treat NaT values as the correct dtype
            # Note: we do not do this unconditionally as it may be lossy or
            #  expensive for EA dtypes.
            right = np.asarray(right)

            def column_op(a, b):
                return {
                    i: func(a.iloc[:, i], b[i])
                    for i in range(len(a.columns))
                }

        else:

            def column_op(a, b):
                return {
                    i: func(a.iloc[:, i], b.iloc[i])
                    for i in range(len(a.columns))
                }

    elif isinstance(right, ABCSeries):
        assert right.index.equals(left.index)  # Handle other cases later

        def column_op(a, b):
            return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))}

    else:
        # Remaining cases have less-obvious dispatch rules
        raise NotImplementedError(right)

    new_data = expressions.evaluate(column_op, str_rep, left, right)
    return new_data
def dispatch_to_series(left, right, func, axis=None):
    """
    Evaluate the frame operation func(left, right) by evaluating
    column-by-column, dispatching to the Series implementation.

    Parameters
    ----------
    left : DataFrame
    right : scalar or DataFrame
    func : arithmetic or comparison operator
    axis : {None, 0, 1, "index", "columns"}

    Returns
    -------
    DataFrame
    """
    # Get the appropriate array-op to apply to each column/block's values.
    array_op = get_array_op(func)

    right = lib.item_from_zerodim(right)
    if lib.is_scalar(right) or np.ndim(right) == 0:
        bm = left._mgr.apply(array_op, right=right)
        return type(left)(bm)

    elif isinstance(right, ABCDataFrame):
        assert left.index.equals(right.index)
        assert left.columns.equals(right.columns)
        # TODO: The previous assertion `assert right._indexed_same(left)`
        #  fails in cases with empty columns reached via
        #  _frame_arith_method_with_reindex

        bm = left._mgr.operate_blockwise(right._mgr, array_op)
        return type(left)(bm)

    elif isinstance(right, ABCSeries) and axis == 1:
        # axis=1 means we want to operate row-by-row
        assert right.index.equals(left.columns)

        if right.dtype == "timedelta64[ns]":
            # ensure we treat NaT values as the correct dtype
            # Note: we do not do this unconditionally as it may be lossy or
            #  expensive for EA dtypes.
            right = np.asarray(right)
        else:
            right = right._values
            # maybe_align_as_frame ensures we do not have an ndarray here
            assert not isinstance(right, np.ndarray)

        arrays = [
            array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)
        ]

    elif isinstance(right, ABCSeries):
        assert right.index.equals(left.index)  # Handle other cases later
        right = right._values

        arrays = [array_op(l, right) for l in left._iter_column_arrays()]

    else:
        # Remaining cases have less-obvious dispatch rules
        raise NotImplementedError(right)

    return type(left)._from_arrays(arrays,
                                   left.columns,
                                   left.index,
                                   verify_integrity=False)