def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: f11 = f f12 = f + 1 f21 = f2 f22 = f2 + 1 for op, op_str in [ ("gt", ">"), ("lt", "<"), ("ge", ">="), ("le", "<="), ("eq", "=="), ("ne", "!="), ]: op = getattr(operator, op) result = expr._can_use_numexpr(op, op_str, f11, f12, "evaluate") assert result != f11._is_mixed_type result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f21, f22, "evaluate") assert not result
def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: f11 = f f12 = f + 1 f21 = f2 f22 = f2 + 1 for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='), ('le', '<='), ('eq', '=='), ('ne', '!=')]: op = getattr(operator, op) result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') assert result != f11._is_mixed_type result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') assert not result
def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: if op == 'pow': continue if op == 'div': op = getattr(operator, 'truediv', None) else: op = getattr(operator, op, None) if op is not None: result = expr._can_use_numexpr(op, op_str, f, f, 'evaluate') assert result != f._is_mixed_type result = expr.evaluate(op, op_str, f, f, use_numexpr=True) expected = expr.evaluate(op, op_str, f, f, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate') assert not result
def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: for op, op_str in [ ("add", "+"), ("sub", "-"), ("mul", "*"), ("div", "/"), ("pow", "**"), ]: if op == "pow": continue if op == "div": op = getattr(operator, "truediv", None) else: op = getattr(operator, op, None) if op is not None: result = expr._can_use_numexpr(op, op_str, f, f, "evaluate") assert result != f._is_mixed_type result = expr.evaluate(op, op_str, f, f, use_numexpr=True) expected = expr.evaluate(op, op_str, f, f, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f2, f2, "evaluate") assert not result
def testit(): f12 = left + 1 f22 = right + 1 op = getattr(operator, opname) result = expr.evaluate(op, left, f12, use_numexpr=True) expected = expr.evaluate(op, left, f12, use_numexpr=False) tm.assert_numpy_array_equal(result, expected) result = expr._can_use_numexpr(op, op_str, right, f22, "evaluate") assert not result
def testit(): if opname == "pow": # TODO: get this working return op = getattr(operator, opname) result = expr.evaluate(op, left, left, use_numexpr=True) expected = expr.evaluate(op, left, left, use_numexpr=False) tm.assert_numpy_array_equal(result, expected) result = expr._can_use_numexpr(op, op_str, right, right, "evaluate") assert not result
def na_op(x, y): """ Return the result of evaluating op on the passed in values. If native types are not compatible, try coersion to object dtype. Parameters ---------- x : array-like y : array-like or scalar Returns ------- array-like Raises ------ TypeError : invalid operation """ import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: result = masked_arith_op(x, y, op) return missing.dispatch_fill_zeros(op, x, y, result)
def na_arithmetic_op(left, right, op, str_rep: str): """ Return the result of evaluating op on the passed in values. If native types are not compatible, try coersion to object dtype. Parameters ---------- left : np.ndarray right : np.ndarray or scalar str_rep : str or None Returns ------- array-like Raises ------ TypeError : invalid operation """ import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, left, right) except TypeError: result = masked_arith_op(left, right, op) return missing.dispatch_fill_zeros(op, left, right, result)
def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y) except TypeError: result = mask_cmp_op(x, y, op, np.ndarray) return result
def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator str_rep : str or None, default None axis : {None, 0, 1, "index", "columns"} Returns ------- DataFrame """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} elif isinstance(right, ABCSeries) and axis == "columns": # We only get here if called via left._combine_match_columns, # in which case we specifically want to operate row-by-row assert right.index.equals(left.columns) def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))} elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) result = left._constructor(new_data, index=left.index, copy=False) # Pin columns instead of passing to constructor for compat with # non-unique columns case result.columns = left.columns return result
def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: result = masked_arith_op(x, y, op) return missing.dispatch_fill_zeros(op, x, y, result)
def testit(): f12 = left + 1 f22 = right + 1 op = getattr(operator, opname) result = expr._can_use_numexpr(op, op_str, left, f12, "evaluate") assert result != left._is_mixed_type result = expr.evaluate(op, op_str, left, f12, use_numexpr=True) expected = expr.evaluate(op, op_str, left, f12, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, right, f22, "evaluate") assert not result
def testit(): if opname == "pow": # TODO: get this working return op = getattr(operator, opname) with warnings.catch_warnings(): # array has 0s msg = "invalid value encountered in true_divide" warnings.filterwarnings("ignore", msg, RuntimeWarning) result = expr.evaluate(op, left, left, use_numexpr=True) expected = expr.evaluate(op, left, left, use_numexpr=False) tm.assert_numpy_array_equal(result, expected) result = expr._can_use_numexpr(op, op_str, right, right, "evaluate") assert not result
def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: # numpy >= 1.11 doesn't handle integers # raised to integer powers # https://github.com/pandas-dev/pandas/issues/15363 if op == 'pow' and not _np_version_under1p11: continue if op == 'div': op = getattr(operator, 'truediv', None) else: op = getattr(operator, op, None) if op is not None: result = expr._can_use_numexpr(op, op_str, f, f, 'evaluate') self.assertNotEqual(result, f._is_mixed_type) result = expr.evaluate(op, op_str, f, f, use_numexpr=True) expected = expr.evaluate(op, op_str, f, f, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal( result, expected.values) result = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate') self.assertFalse(result)
def testit(): if opname == "pow": # TODO: get this working return op = getattr(operator, opname) result = expr._can_use_numexpr(op, op_str, left, left, "evaluate") assert result != left._is_mixed_type result = expr.evaluate(op, op_str, left, left, use_numexpr=True) expected = expr.evaluate(op, op_str, left, left, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, right, right, "evaluate") assert not result
def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: if opname == "pow": continue op = getattr(operator, opname) result = expr._can_use_numexpr(op, op_str, f, f, "evaluate") assert result != f._is_mixed_type result = expr.evaluate(op, op_str, f, f, use_numexpr=True) expected = expr.evaluate(op, op_str, f, f, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f2, f2, "evaluate") assert not result
def testit(): for f, f2 in [(self.frame, self.frame2), (self.mixed, self.mixed2)]: for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: # numpy >= 1.11 doesn't handle integers # raised to integer powers # https://github.com/pandas-dev/pandas/issues/15363 if op == 'pow' and not _np_version_under1p11: continue if op == 'div': op = getattr(operator, 'truediv', None) else: op = getattr(operator, op, None) if op is not None: result = expr._can_use_numexpr(op, op_str, f, f, 'evaluate') assert result != f._is_mixed_type result = expr.evaluate(op, op_str, f, f, use_numexpr=True) expected = expr.evaluate(op, op_str, f, f, use_numexpr=False) if isinstance(result, DataFrame): tm.assert_frame_equal(result, expected) else: tm.assert_numpy_array_equal(result, expected.values) result = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate') assert not result
def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, errors='raise', **eval_kwargs) except TypeError: result = op(x, y) # handles discrepancy between numpy and numexpr on division/mod # by 0 though, given that these are generally (always?) # non-scalars, I'm not sure whether it's worth it at the moment result = missing.fill_zeros(result, x, y, name, fill_zeros) return result
def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, ABCSeries)): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() mask = notna(xrav) & notna(yrav) xrav = xrav[mask] if yrav.shape != mask.shape: # FIXME: GH#5284, GH#5035, GH#19448 # Without specifically raising here we get mismatched # errors in Py3 (TypeError) vs Py2 (ValueError) raise ValueError('Cannot broadcast operands together.') yrav = yrav[mask] if xrav.size: with np.errstate(all='ignore'): result[mask] = op(xrav, yrav) elif isinstance(x, np.ndarray): # mask is only meaningful for x result = np.empty(x.size, dtype=x.dtype) mask = notna(xrav) xrav = xrav[mask] if xrav.size: with np.errstate(all='ignore'): result[mask] = op(xrav, y) else: raise TypeError("cannot perform operation {op} between " "objects of type {x} and {y}".format( op=name, x=type(x), y=type(y))) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result
def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = False): """ Return the result of evaluating op on the passed in values. If native types are not compatible, try coersion to object dtype. Parameters ---------- left : np.ndarray right : np.ndarray or scalar str_rep : str or None is_cmp : bool, default False If this a comparison operation. Returns ------- array-like Raises ------ TypeError : invalid operation """ import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, left, right) except TypeError: if is_cmp: # numexpr failed on comparison op, e.g. ndarray[float] > datetime # In this case we do not fall back to the masked op, as that # will handle complex numbers incorrectly, see GH#32047 raise result = masked_arith_op(left, right, op) if is_cmp and (is_scalar(result) or result is NotImplemented): # numpy returned a scalar instead of operating element-wise # e.g. numeric array vs str return invalid_comparison(left, right, op) return missing.dispatch_fill_zeros(op, left, right, result)
def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, ABCSeries)): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() mask = notna(xrav) & notna(yrav) xrav = xrav[mask] # we may need to manually # broadcast a 1 element array if yrav.shape != mask.shape: yrav = np.empty(mask.shape, dtype=yrav.dtype) yrav.fill(yrav.item()) yrav = yrav[mask] if np.prod(xrav.shape) and np.prod(yrav.shape): with np.errstate(all='ignore'): result[mask] = op(xrav, yrav) elif hasattr(x, 'size'): result = np.empty(x.size, dtype=x.dtype) mask = notna(xrav) xrav = xrav[mask] if np.prod(xrav.shape): with np.errstate(all='ignore'): result[mask] = op(xrav, y) else: raise TypeError("cannot perform operation {op} between " "objects of type {x} and {y}".format( op=name, x=type(x), y=type(y))) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result
def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) mask = notna(x) & notna(y) result[mask] = op(x[mask], com._values_from_object(y[mask])) else: assert isinstance(x, np.ndarray) result = np.empty(len(x), dtype=x.dtype) mask = notna(x) result[mask] = op(x[mask], y) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result
def _na_arithmetic_op(left, right, op, is_cmp: bool = False): """ Return the result of evaluating op on the passed in values. If native types are not compatible, try coercion to object dtype. Parameters ---------- left : np.ndarray right : np.ndarray or scalar is_cmp : bool, default False If this a comparison operation. Returns ------- array-like Raises ------ TypeError : invalid operation """ try: result = expressions.evaluate(op, left, right) except TypeError: if is_object_dtype(left) or is_object_dtype(right) and not is_cmp: # For object dtype, fallback to a masked operation (only operating # on the non-missing values) # Don't do this for comparisons, as that will handle complex numbers # incorrectly, see GH#32047 result = _masked_arith_op(left, right, op) else: raise if is_cmp and (is_scalar(result) or result is NotImplemented): # numpy returned a scalar instead of operating element-wise # e.g. numeric array vs str return invalid_comparison(left, right, op) return missing.dispatch_fill_zeros(op, left, right, result)
def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y) except TypeError: xrav = x.ravel() result = np.empty(x.size, dtype=bool) if isinstance(y, np.ndarray): yrav = y.ravel() mask = notna(xrav) & notna(yrav) result[mask] = op(np.array(list(xrav[mask])), np.array(list(yrav[mask]))) else: mask = notna(xrav) result[mask] = op(np.array(list(xrav[mask])), y) if op == operator.ne: # pragma: no cover np.putmask(result, ~mask, True) else: np.putmask(result, ~mask, False) result = result.reshape(x.shape) return result
def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. Parameters ---------- left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator str_rep : str or None, default None axis : {None, 0, 1, "index", "columns"} Returns ------- DataFrame """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. import pandas.core.computation.expressions as expressions right = lib.item_from_zerodim(right) if lib.is_scalar(right) or np.ndim(right) == 0: # Get the appropriate array-op to apply to each block's values. array_op = get_array_op(func, str_rep=str_rep) bm = left._data.apply(array_op, right=right) return type(left)(bm) elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) def column_op(a, b): return { i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns)) } elif isinstance(right, ABCSeries) and axis == "columns": # We only get here if called via _combine_series_frame, # in which case we specifically want to operate row-by-row assert right.index.equals(left.columns) if right.dtype == "timedelta64[ns]": # ensure we treat NaT values as the correct dtype # Note: we do not do this unconditionally as it may be lossy or # expensive for EA dtypes. right = np.asarray(right) def column_op(a, b): return { i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns)) } else: def column_op(a, b): return { i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns)) } elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later def column_op(a, b): return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} else: # Remaining cases have less-obvious dispatch rules raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) return new_data