Example #1
0
def ensure_python_int(value: Union[int, np.integer]) -> int:
    """
    Ensure that a value is a python int.

    Parameters
    ----------
    value: int or numpy.integer

    Returns
    -------
    int

    Raises
    ------
    TypeError: if the value isn't an int or can't be converted to one.
    """
    if not is_scalar(value):
        raise TypeError(
            f"Value needs to be a scalar value, was type {type(value).__name__}"
        )
    try:
        new_value = int(value)
        assert new_value == value
    except (TypeError, ValueError, AssertionError) as err:
        raise TypeError(f"Wrong type {type(value)} for value {value}") from err
    return new_value
Example #2
0
def ensure_python_int(value: Union[int, np.integer]) -> int:
    """
    Ensure that a value is a python int.

    Parameters
    ----------
    value: int or numpy.integer

    Returns
    -------
    int

    Raises
    ------
    TypeError: if the value isn't an int or can't be converted to one.
    """
    if not is_scalar(value):
        raise TypeError("Value needs to be a scalar value, was type {}".format(
            type(value)))
    msg = "Wrong type {} for value {}"
    try:
        new_value = int(value)
        assert new_value == value
    except (TypeError, ValueError, AssertionError):
        raise TypeError(msg.format(type(value), value))
    return new_value
Example #3
0
    def _apply_binary_operator(
        self, other: Any, op: Any
    ) -> Union["RLEArray", np.ndarray]:
        if isinstance(other, (ABCSeries, ABCIndexClass)):
            # rely on pandas to unbox and dispatch to us
            return NotImplemented

        if is_scalar(other):
            with np.errstate(invalid="ignore"):
                new_data = op(self._data, other)
            return RLEArray(*recompress(new_data, self._positions))
        elif isinstance(other, RLEArray):
            if len(self) != len(other):
                raise ValueError("arrays have different lengths")
            extended_positions = extend_positions(self._positions, other._positions)
            data_self = extend_data(
                data=self._data,
                positions=self._positions,
                extended_positions=extended_positions,
            )
            data_other = extend_data(
                data=other._data,
                positions=other._positions,
                extended_positions=extended_positions,
            )
            with np.errstate(invalid="ignore"):
                new_data = op(data_self, data_other)
            return RLEArray(*recompress(new_data, extended_positions))
        else:
            array = self.__array__()
            with np.errstate(invalid="ignore"):
                return op(array, other)
Example #4
0
def grouped_eval(__data, expr, require_agg = False):
    if is_scalar(expr):
        return expr
    
    if isinstance(expr, Call):
        call = call_listener.enter(expr)

        #
        grouped_res = call(__data)

        if isinstance(grouped_res, GroupByAgg):
            # TODO: may want to validate its grouper
            if require_agg:
                # need an agg, got an agg. we are done.
                if not grouped_res._orig_grouper is __data.grouper:
                    raise ValueError("Incompatible groupers")
                return grouped_res
            else:
                # broadcast from aggregate to original length (like transform)
                return grouped_res._broadcast_agg_result()
        elif isinstance(grouped_res, SeriesGroupBy) and not require_agg:
            # TODO: may want to validate its grouper
            return grouped_res.obj
        else:
            # can happen right now if user selects, e.g., a property of the
            # groupby object, like .dtype, which returns a single value
            # in the future, could restrict set of operations user could perform
            raise ValueError("Result must be subclass of SeriesGroupBy")

    raise ValueError("Grouped expressions must be a siu expression or scalar")
Example #5
0
def grouped_eval(__data, expr, require_agg=False):
    if is_scalar(expr):
        return expr

    if isinstance(expr, Call):
        try:
            call = call_listener.enter(expr)
        except FunctionLookupError as e:
            fallback_warning(expr, str(e))
            call = expr

        #
        grouped_res = call(__data)

        if isinstance(grouped_res, SeriesGroupBy):
            if not is_compatible(grouped_res, __data):
                raise ValueError("Incompatible groupers")

            # TODO: may want to validate result is correct length / index?
            #       e.g. a SeriesGroupBy could be compatible and not an agg
            if require_agg:
                return grouped_res.obj
            else:
                # broadcast from aggregate to original length (like transform)
                return broadcast_agg(grouped_res)

        else:
            # can happen right now if user selects, e.g., a property of the
            # groupby object, like .dtype, which returns a single value
            # in the future, could restrict set of operations user could perform
            raise ValueError("Result must be subclass of SeriesGroupBy")

    raise ValueError("Grouped expressions must be a siu expression or scalar")
Example #6
0
def _transform_args(args):
    out = []
    for expr in args:
        if is_scalar(expr):
            out.append(expr)
        elif isinstance(expr, Call):
            try:
                call = call_listener.enter(expr)
                out.append(call)
            except FunctionLookupError as e:
                fallback_warning(expr, str(e))
                return None
        elif callable(expr):
            return None

    return out
Example #7
0
def _is_dtype_type(arr_or_dtype, condition) -> bool:
    """
    Return a boolean if the condition is satisfied for the arr_or_dtype.

    Parameters
    ----------
    arr_or_dtype : array-like
        The array-like or dtype object whose dtype we want to extract.
    condition : callable[Union[np.dtype, ExtensionDtypeType]]

    Returns
    -------
    bool : if the condition is satisfied for the arr_or_dtype
    """

    if arr_or_dtype is None:
        return condition(type(None))

    # fastpath
    if isinstance(arr_or_dtype, np.dtype):
        return condition(arr_or_dtype.type)
    elif isinstance(arr_or_dtype, type):
        if issubclass(arr_or_dtype, ExtensionDtype):
            arr_or_dtype = arr_or_dtype.type
        return condition(np.dtype(arr_or_dtype).type)

    # if we have an array-like
    if hasattr(arr_or_dtype, "dtype"):
        arr_or_dtype = arr_or_dtype.dtype

    # we are not possibly a dtype
    elif is_list_like(arr_or_dtype):
        return condition(type(None))

    try:
        tipo = pandas_dtype(arr_or_dtype).type
    except (TypeError, ValueError, UnicodeEncodeError):
        if is_scalar(arr_or_dtype):
            return condition(type(None))

        return False

    return condition(tipo)
Example #8
0
def summarize(__data, **kwargs):
    """Assign variables that are single number summaries of a DataFrame.


    Args:
        ___data: a DataFrame
        **kwargs: new_col_name=value pairs, where value can be a function taking
                  a single argument for the data being operated on.

    Note
    ----

    Grouped DataFrames will produce one row for each group. Ungrouped DataFrames
    will produce a single row.


    Examples
    --------

    ::
        from siuba.data import mtcars
        mtcars >> summarize(mean = _.disp.mean(), n = n(_))
        
    """
    results = {}
    for k, v in kwargs.items():
        res = v(__data) if callable(v) else v

        # validate operations returned single result
        if not is_scalar(res) and len(res) > 1:
            raise ValueError(
                "Summarize argument, %s, must return result of length 1 or a scalar."
                % k)

        # keep result, but use underlying array to avoid crazy index issues
        # on DataFrame construction (#138)
        results[k] = res.array if isinstance(res, pd.Series) else res

    # must pass index, or raises error when using all scalar values
    return DataFrame(results, index=[0])