def ensure_python_int(value: Union[int, np.integer]) -> int: """ Ensure that a value is a python int. Parameters ---------- value: int or numpy.integer Returns ------- int Raises ------ TypeError: if the value isn't an int or can't be converted to one. """ if not is_scalar(value): raise TypeError( f"Value needs to be a scalar value, was type {type(value).__name__}" ) try: new_value = int(value) assert new_value == value except (TypeError, ValueError, AssertionError) as err: raise TypeError(f"Wrong type {type(value)} for value {value}") from err return new_value
def ensure_python_int(value: Union[int, np.integer]) -> int: """ Ensure that a value is a python int. Parameters ---------- value: int or numpy.integer Returns ------- int Raises ------ TypeError: if the value isn't an int or can't be converted to one. """ if not is_scalar(value): raise TypeError("Value needs to be a scalar value, was type {}".format( type(value))) msg = "Wrong type {} for value {}" try: new_value = int(value) assert new_value == value except (TypeError, ValueError, AssertionError): raise TypeError(msg.format(type(value), value)) return new_value
def _apply_binary_operator( self, other: Any, op: Any ) -> Union["RLEArray", np.ndarray]: if isinstance(other, (ABCSeries, ABCIndexClass)): # rely on pandas to unbox and dispatch to us return NotImplemented if is_scalar(other): with np.errstate(invalid="ignore"): new_data = op(self._data, other) return RLEArray(*recompress(new_data, self._positions)) elif isinstance(other, RLEArray): if len(self) != len(other): raise ValueError("arrays have different lengths") extended_positions = extend_positions(self._positions, other._positions) data_self = extend_data( data=self._data, positions=self._positions, extended_positions=extended_positions, ) data_other = extend_data( data=other._data, positions=other._positions, extended_positions=extended_positions, ) with np.errstate(invalid="ignore"): new_data = op(data_self, data_other) return RLEArray(*recompress(new_data, extended_positions)) else: array = self.__array__() with np.errstate(invalid="ignore"): return op(array, other)
def grouped_eval(__data, expr, require_agg = False): if is_scalar(expr): return expr if isinstance(expr, Call): call = call_listener.enter(expr) # grouped_res = call(__data) if isinstance(grouped_res, GroupByAgg): # TODO: may want to validate its grouper if require_agg: # need an agg, got an agg. we are done. if not grouped_res._orig_grouper is __data.grouper: raise ValueError("Incompatible groupers") return grouped_res else: # broadcast from aggregate to original length (like transform) return grouped_res._broadcast_agg_result() elif isinstance(grouped_res, SeriesGroupBy) and not require_agg: # TODO: may want to validate its grouper return grouped_res.obj else: # can happen right now if user selects, e.g., a property of the # groupby object, like .dtype, which returns a single value # in the future, could restrict set of operations user could perform raise ValueError("Result must be subclass of SeriesGroupBy") raise ValueError("Grouped expressions must be a siu expression or scalar")
def grouped_eval(__data, expr, require_agg=False): if is_scalar(expr): return expr if isinstance(expr, Call): try: call = call_listener.enter(expr) except FunctionLookupError as e: fallback_warning(expr, str(e)) call = expr # grouped_res = call(__data) if isinstance(grouped_res, SeriesGroupBy): if not is_compatible(grouped_res, __data): raise ValueError("Incompatible groupers") # TODO: may want to validate result is correct length / index? # e.g. a SeriesGroupBy could be compatible and not an agg if require_agg: return grouped_res.obj else: # broadcast from aggregate to original length (like transform) return broadcast_agg(grouped_res) else: # can happen right now if user selects, e.g., a property of the # groupby object, like .dtype, which returns a single value # in the future, could restrict set of operations user could perform raise ValueError("Result must be subclass of SeriesGroupBy") raise ValueError("Grouped expressions must be a siu expression or scalar")
def _transform_args(args): out = [] for expr in args: if is_scalar(expr): out.append(expr) elif isinstance(expr, Call): try: call = call_listener.enter(expr) out.append(call) except FunctionLookupError as e: fallback_warning(expr, str(e)) return None elif callable(expr): return None return out
def _is_dtype_type(arr_or_dtype, condition) -> bool: """ Return a boolean if the condition is satisfied for the arr_or_dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype object whose dtype we want to extract. condition : callable[Union[np.dtype, ExtensionDtypeType]] Returns ------- bool : if the condition is satisfied for the arr_or_dtype """ if arr_or_dtype is None: return condition(type(None)) # fastpath if isinstance(arr_or_dtype, np.dtype): return condition(arr_or_dtype.type) elif isinstance(arr_or_dtype, type): if issubclass(arr_or_dtype, ExtensionDtype): arr_or_dtype = arr_or_dtype.type return condition(np.dtype(arr_or_dtype).type) # if we have an array-like if hasattr(arr_or_dtype, "dtype"): arr_or_dtype = arr_or_dtype.dtype # we are not possibly a dtype elif is_list_like(arr_or_dtype): return condition(type(None)) try: tipo = pandas_dtype(arr_or_dtype).type except (TypeError, ValueError, UnicodeEncodeError): if is_scalar(arr_or_dtype): return condition(type(None)) return False return condition(tipo)
def summarize(__data, **kwargs): """Assign variables that are single number summaries of a DataFrame. Args: ___data: a DataFrame **kwargs: new_col_name=value pairs, where value can be a function taking a single argument for the data being operated on. Note ---- Grouped DataFrames will produce one row for each group. Ungrouped DataFrames will produce a single row. Examples -------- :: from siuba.data import mtcars mtcars >> summarize(mean = _.disp.mean(), n = n(_)) """ results = {} for k, v in kwargs.items(): res = v(__data) if callable(v) else v # validate operations returned single result if not is_scalar(res) and len(res) > 1: raise ValueError( "Summarize argument, %s, must return result of length 1 or a scalar." % k) # keep result, but use underlying array to avoid crazy index issues # on DataFrame construction (#138) results[k] = res.array if isinstance(res, pd.Series) else res # must pass index, or raises error when using all scalar values return DataFrame(results, index=[0])