def _combine_series_frame(left, right, func, axis: int, str_rep: str): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the axis argument. Parameters ---------- left : DataFrame right : Series func : binary operator axis : {0, 1} str_rep : str Returns ------- result : DataFrame """ # We assume that self.align(other, ...) has already been called if axis == 0: values = right._values if isinstance(values, np.ndarray): # TODO(EA2D): no need to special-case with 2D EAs # We can operate block-wise values = values.reshape(-1, 1) values = np.broadcast_to(values, left.shape) array_op = get_array_op(func, str_rep=str_rep) bm = left._mgr.apply(array_op, right=values.T, align_keys=["right"]) return type(left)(bm) new_data = dispatch_to_series(left, right, func) else: rvalues = right._values if isinstance(rvalues, np.ndarray): # We can operate block-wise rvalues = rvalues.reshape(1, -1) rvalues = np.broadcast_to(rvalues, left.shape) array_op = get_array_op(func, str_rep=str_rep) bm = left._mgr.apply(array_op, right=rvalues.T, align_keys=["right"]) return type(left)(bm) new_data = dispatch_to_series(left, right, func, axis="columns") return left._construct_result(new_data)
def _combine_series_frame(left, right, func, axis: int, str_rep: str): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the axis argument. Parameters ---------- left : DataFrame right : Series func : binary operator axis : {0, 1} str_rep : str Returns ------- result : DataFrame """ # We assume that self.align(other, ...) has already been called if axis == 0: values = right._values if isinstance(values, np.ndarray): # We can operate block-wise values = values.reshape(-1, 1) array_op = get_array_op(func, str_rep=str_rep) bm = left._data.apply(array_op, right=values.T) return type(left)(bm) new_data = dispatch_to_series(left, right, func) else: new_data = dispatch_to_series(left, right, func, axis="columns") return left._construct_result(new_data)
def dispatch_to_series(left, right, func, axis: Optional[int] = None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar, Series, or DataFrame func : arithmetic or comparison operator axis : {None, 0, 1} Returns ------- DataFrame """ # Get the appropriate array-op to apply to each column/block's values. array_op = get_array_op(func) right = lib.item_from_zerodim(right) if not is_list_like(right): # i.e. scalar, faster than checking np.ndim(right) == 0 bm = left._mgr.apply(array_op, right=right) return type(left)(bm) elif isinstance(right, ABCDataFrame): assert left.index.equals(right.index) assert left.columns.equals(right.columns) # TODO: The previous assertion `assert right._indexed_same(left)` # fails in cases with empty columns reached via # _frame_arith_method_with_reindex bm = left._mgr.operate_blockwise(right._mgr, array_op) return type(left)(bm) elif isinstance(right, ABCSeries) and axis == 1: # axis=1 means we want to operate row-by-row assert right.index.equals(left.columns) right = right._values # maybe_align_as_frame ensures we do not have an ndarray here assert not isinstance(right, np.ndarray) arrays = [array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)] elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later right = right._values arrays = [array_op(l, right) for l in left._iter_column_arrays()] else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) return type(left)._from_arrays( arrays, left.columns, left.index, verify_integrity=False )
def _arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): # This is the only function where `special` can be either True or False op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) na_op = get_array_op(op) is_logical = op.__name__.strip("_").lstrip("_") in ["and", "or", "xor"] if op_name in _op_descriptions: # i.e. include "add" but not "__add__" doc = _make_flex_doc(op_name, "dataframe") else: doc = _arith_doc_FRAME % op_name @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): if _should_reindex_frame_op( self, other, op, axis, default_axis, fill_value, level ): return _frame_arith_method_with_reindex(self, other, op) # TODO: why are we passing flex=True instead of flex=not special? # 15 tests fail if we pass flex=not special instead self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame new_data = self._combine_frame(other, na_op, fill_value) elif isinstance(other, ABCSeries): # For these values of `axis`, we end up dispatching to Series op, # so do not want the masked op. # TODO: the above comment is no longer accurate since we now # operate blockwise if other._values is an ndarray pass_op = op if axis in [0, "columns", None] else na_op pass_op = pass_op if not is_logical else op if fill_value is not None: raise NotImplementedError(f"fill_value {fill_value} not supported.") axis = self._get_axis_number(axis) if axis is not None else 1 new_data = _combine_series_frame(self, other, pass_op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: self = self.fillna(fill_value) new_data = dispatch_to_series(self, other, op) return self._construct_result(new_data) f.__name__ = op_name return f
def _arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): # This is the only function where `special` can be either True or False op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) na_op = get_array_op(op) if op_name in _op_descriptions: # i.e. include "add" but not "__add__" doc = _make_flex_doc(op_name, "dataframe") else: doc = _arith_doc_FRAME % op_name @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): if _should_reindex_frame_op(self, other, op, axis, default_axis, fill_value, level): return _frame_arith_method_with_reindex(self, other, op) if isinstance(other, ABCSeries) and fill_value is not None: # TODO: We could allow this in cases where we end up going # through the DataFrame path raise NotImplementedError( f"fill_value {fill_value} not supported.") axis = self._get_axis_number(axis) if axis is not None else 1 # TODO: why are we passing flex=True instead of flex=not special? # 15 tests fail if we pass flex=not special instead self, other = align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame new_data = self._combine_frame(other, na_op, fill_value) elif isinstance(other, ABCSeries): new_data = dispatch_to_series(self, other, op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: self = self.fillna(fill_value) new_data = dispatch_to_series(self, other, op) return self._construct_result(new_data) f.__name__ = op_name return f
def flex_arith_method_FRAME(op): op_name = op.__name__.strip("_") default_axis = "columns" na_op = get_array_op(op) doc = make_flex_doc(op_name, "dataframe") @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): if should_reindex_frame_op(self, other, op, axis, default_axis, fill_value, level): return frame_arith_method_with_reindex(self, other, op) if isinstance(other, ABCSeries) and fill_value is not None: # TODO: We could allow this in cases where we end up going # through the DataFrame path raise NotImplementedError( f"fill_value {fill_value} not supported.") axis = self._get_axis_number(axis) if axis is not None else 1 other = maybe_prepare_scalar_for_op(other, self.shape) self, other = align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame new_data = self._combine_frame(other, na_op, fill_value) elif isinstance(other, ABCSeries): new_data = self._dispatch_frame_op(other, op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: self = self.fillna(fill_value) new_data = self._dispatch_frame_op(other, op) return self._construct_result(new_data) f.__name__ = op_name return f
def flex_arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): assert not special op_name = _get_op_name(op, special) default_axis = None if special else "columns" na_op = get_array_op(op) doc = _make_flex_doc(op_name, "dataframe") @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): if _should_reindex_frame_op( self, other, op, axis, default_axis, fill_value, level ): return _frame_arith_method_with_reindex(self, other, op) if isinstance(other, ABCSeries) and fill_value is not None: # TODO: We could allow this in cases where we end up going # through the DataFrame path raise NotImplementedError(f"fill_value {fill_value} not supported.") axis = self._get_axis_number(axis) if axis is not None else 1 self, other = align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame new_data = self._combine_frame(other, na_op, fill_value) elif isinstance(other, ABCSeries): new_data = dispatch_to_series(self, other, op, axis=axis) else: # in this case we always have `np.ndim(other) == 0` if fill_value is not None: self = self.fillna(fill_value) new_data = dispatch_to_series(self, other, op) return self._construct_result(new_data) f.__name__ = op_name return f
def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator str_rep : str or None, default None axis : {None, 0, 1, "index", "columns"} Returns ------- DataFrame """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: # Get the appropriate array-op to apply to each block's values. array_op = get_array_op(func, str_rep=str_rep) bm = left._data.apply(array_op, right=right) return type(left)(bm) elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) def column_op(a, b): return { i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns)) } elif isinstance(right, ABCSeries) and axis == "columns": # We only get here if called via _combine_series_frame, # in which case we specifically want to operate row-by-row assert right.index.equals(left.columns) if right.dtype == "timedelta64[ns]": # ensure we treat NaT values as the correct dtype # Note: we do not do this unconditionally as it may be lossy or # expensive for EA dtypes. right = np.asarray(right) def column_op(a, b): return { i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns)) } else: def column_op(a, b): return { i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns)) } elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) return new_data
def dispatch_to_series(left, right, func, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator axis : {None, 0, 1, "index", "columns"} Returns ------- DataFrame """ # Get the appropriate array-op to apply to each column/block's values. array_op = get_array_op(func) right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: bm = left._mgr.apply(array_op, right=right) return type(left)(bm) elif isinstance(right, ABCDataFrame): assert left.index.equals(right.index) assert left.columns.equals(right.columns) # TODO: The previous assertion `assert right._indexed_same(left)` # fails in cases with empty columns reached via # _frame_arith_method_with_reindex bm = left._mgr.operate_blockwise(right._mgr, array_op) return type(left)(bm) elif isinstance(right, ABCSeries) and axis == 1: # axis=1 means we want to operate row-by-row assert right.index.equals(left.columns) if right.dtype == "timedelta64[ns]": # ensure we treat NaT values as the correct dtype # Note: we do not do this unconditionally as it may be lossy or # expensive for EA dtypes. right = np.asarray(right) else: right = right._values # maybe_align_as_frame ensures we do not have an ndarray here assert not isinstance(right, np.ndarray) arrays = [ array_op(l, r) for l, r in zip(left._iter_column_arrays(), right) ] elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later right = right._values arrays = [array_op(l, right) for l in left._iter_column_arrays()] else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) return type(left)._from_arrays(arrays, left.columns, left.index, verify_integrity=False)