Beispiel #1
0
    def test_searchsorted(self, string_dtype):
        arr = pd.array(['a', 'b', 'c'], dtype=string_dtype)

        result = arr.searchsorted('a', side='left')
        assert is_scalar(result)
        assert result == 0

        result = arr.searchsorted('a', side='right')
        assert is_scalar(result)
        assert result == 1
Beispiel #2
0
    def test_searchsorted(self):
        # https://github.com/pandas-dev/pandas/issues/8420
        # https://github.com/pandas-dev/pandas/issues/14522

        c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
                         categories=['cheese', 'milk', 'apple', 'bread'],
                         ordered=True)
        s1 = Series(c1)
        c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
                         categories=['cheese', 'milk', 'apple', 'bread'],
                         ordered=False)
        s2 = Series(c2)

        # Searching for single item argument, side='left' (default)
        res_cat = c1.searchsorted('apple')
        assert res_cat == 2
        assert is_scalar(res_cat)

        res_ser = s1.searchsorted('apple')
        assert res_ser == 2
        assert is_scalar(res_ser)

        # Searching for single item array, side='left' (default)
        res_cat = c1.searchsorted(['bread'])
        res_ser = s1.searchsorted(['bread'])
        exp = np.array([3], dtype=np.intp)
        tm.assert_numpy_array_equal(res_cat, exp)
        tm.assert_numpy_array_equal(res_ser, exp)

        # Searching for several items array, side='right'
        res_cat = c1.searchsorted(['apple', 'bread'], side='right')
        res_ser = s1.searchsorted(['apple', 'bread'], side='right')
        exp = np.array([3, 5], dtype=np.intp)
        tm.assert_numpy_array_equal(res_cat, exp)
        tm.assert_numpy_array_equal(res_ser, exp)

        # Searching for a single value that is not from the Categorical
        msg = r"Value\(s\) to be inserted must be in categories"
        with pytest.raises(KeyError, match=msg):
            c1.searchsorted('cucumber')
        with pytest.raises(KeyError, match=msg):
            s1.searchsorted('cucumber')

        # Searching for multiple values one of each is not from the Categorical
        with pytest.raises(KeyError, match=msg):
            c1.searchsorted(['bread', 'cucumber'])
        with pytest.raises(KeyError, match=msg):
            s1.searchsorted(['bread', 'cucumber'])

        # searchsorted call for unordered Categorical
        msg = "Categorical not ordered"
        with pytest.raises(ValueError, match=msg):
            c2.searchsorted('apple')
        with pytest.raises(ValueError, match=msg):
            s2.searchsorted('apple')
Beispiel #3
0
def make_meta(x, index=None):
    """Create an empty pandas object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.
    index :  pd.Index, optional
        Any pandas index to use in the metadata. If none provided, a
        `RangeIndex` will be used.

    Examples
    --------
    >>> make_meta([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta('i8')
    1
    """
    if hasattr(x, '_meta'):
        return x._meta
    if isinstance(x, (pd.Series, pd.DataFrame)):
        return x.iloc[0:0]
    elif isinstance(x, pd.Index):
        return x[0:0]
    index = index if index is None else index[0:0]

    if isinstance(x, dict):
        return pd.DataFrame({c: _empty_series(c, d, index=index)
                             for (c, d) in x.items()}, index=index)
    if isinstance(x, tuple) and len(x) == 2:
        return _empty_series(x[0], x[1], index=index)
    elif isinstance(x, (list, tuple)):
        if not all(isinstance(i, tuple) and len(i) == 2 for i in x):
            raise ValueError("Expected iterable of tuples of (name, dtype), "
                             "got {0}".format(x))
        return pd.DataFrame({c: _empty_series(c, d, index=index) for (c, d) in x},
                            columns=[c for c, d in x], index=index)
    elif not hasattr(x, 'dtype') and x is not None:
        # could be a string, a dtype object, or a python type. Skip `None`,
        # because it is implictly converted to `dtype('f8')`, which we don't
        # want here.
        try:
            dtype = np.dtype(x)
            return _scalar_from_dtype(dtype)
        except Exception:
            # Continue on to next check
            pass

    if is_scalar(x):
        return _nonempty_scalar(x)

    raise TypeError("Don't know how to create metadata from {0}".format(x))
Beispiel #4
0
    def test_searchsorted_numeric_dtypes_scalar(self, any_real_dtype):
        arr = pd.array([1, 3, 90], dtype=any_real_dtype)
        result = arr.searchsorted(30)
        assert is_scalar(result)
        assert result == 2

        result = arr.searchsorted([30])
        expected = np.array([2], dtype=np.intp)
        tm.assert_numpy_array_equal(result, expected)
Beispiel #5
0
    def __setitem__(self, key, value):
        if is_list_like(value):
            if is_scalar(key):
                raise ValueError("setting an array element with a sequence.")
            value = [decimal.Decimal(v) for v in value]
        else:
            value = decimal.Decimal(value)

        key = check_array_indexer(self, key)
        self._data[key] = value
Beispiel #6
0
    def test_iloc_setitem_with_scalar_index(self, indexer, value):
        # GH #19474
        # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
        # elementwisely, not using "setter('A', ['Z'])".

        df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
        df.iloc[0, indexer] = value
        result = df.iloc[0, 0]

        assert is_scalar(result) and result == "Z"
Beispiel #7
0
    def test_loc_setitem_with_scalar_index(self, indexer, value):
        # GH #19474
        # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
        # elementwisely, not using "setter('A', ['Z'])".

        df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
        df.loc[0, indexer] = value
        result = df.loc[0, 'A']

        assert is_scalar(result) and result == 'Z'
Beispiel #8
0
    def _copy_if_else(
        self,
        cond,
        other=None,
        inplace=False,
        axis=None,
        level=None,
        errors="raise",
        try_cast=False,
        negate=False,
    ):
        inplace = validate_bool_kwarg(inplace, "inplace")
        axis = self._get_axis_number(axis, 0)

        if level is not None:
            raise err._unsupported_error("level", level)

        if axis not in (0, ):
            raise err._unsupported_error("axis", axis)

        if try_cast not in (False, ):
            raise err._unsupported_error("try_cast", try_cast)

        # Checks on cond
        cond = self._ensure_valid_frame(cond)

        if self.ndim < cond.ndim:
            raise ValueError(
                "cannot use the higher dimensional dataframe for 'cond'")
        _, cond = self._align_frame(cond, join="left", broadcast_axis=1)

        if any(not is_bool_dtype(dtype) for dtype in cond._get_dtypes()):
            raise ValueError("'cond' must have only boolean values")

        # Checks on other
        if not is_scalar(other):
            other = self._ensure_valid_frame(other)

            if self.ndim < other.ndim:
                raise ValueError(
                    "cannot use the higher dimensional dataframe for 'other'")
            _, other = self._align_frame(other, join="left", broadcast_axis=1)

            for l_dtype, r_dtype in zip(self._get_dtypes(),
                                        other._get_dtypes()):
                if l_dtype != r_dtype:
                    raise ValueError("'other' must have the same type as self")

            other = other._frame

        else:
            other = util.sanitize_scalar(other)

        frame = self._frame.copy_if_else(cond._frame, other, negate=negate)
        return self._create_or_update_frame(frame, inplace)
Beispiel #9
0
    def _check_op_integer(self, result, expected, mask, s, op_name, other):
        # check comparisions that are resulting in integer dtypes

        # to compare properly, we convert the expected
        # to float, mask to nans and convert infs
        # if we have uints then we process as uints
        # then conert to float
        # and we ultimately want to create a IntArray
        # for comparisons

        fill_value = 0

        # mod/rmod turn floating 0 into NaN while
        # integer works as expected (no nan)
        if op_name in ['__mod__', '__rmod__']:
            if is_scalar(other):
                if other == 0:
                    expected[s.values == 0] = 0
                else:
                    expected = expected.fillna(0)
            else:
                expected[(s.values == 0) &
                         ((expected == 0) | expected.isna())] = 0

        try:
            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
            original = expected
            expected = expected.astype(s.dtype)

        except ValueError:

            expected = expected.astype(float)
            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
            original = expected
            expected = expected.astype(s.dtype)

        expected[mask] = np.nan

        # assert that the expected astype is ok
        # (skip for unsigned as they have wrap around)
        if not s.dtype.is_unsigned_integer:
            original = pd.Series(original)

            # we need to fill with 0's to emulate what an astype('int') does
            # (truncation) for certain ops
            if op_name in ['__rtruediv__', '__rdiv__']:
                mask |= original.isna()
                original = original.fillna(0).astype('int')

            original = original.astype('float')
            original[mask] = np.nan
            tm.assert_series_equal(original, expected.astype('float'))

        # assert our expected result
        tm.assert_series_equal(result, expected)
Beispiel #10
0
    def construct_result(self, result, columns, out_ndim, row_scalar):
        if out_ndim > 0:
            result = self.df.__ctor__(frame=result, columns=columns)
            if out_ndim == 1:
                result = result.squeeze(axis=1)
        else:
            result = result.to_pandas().squeeze()

        if row_scalar and not is_scalar(result) and len(result) == 0:
            raise _NotFoundError()
        return result
Beispiel #11
0
def meta_nonempty_object(x):
    """Create a nonempty pandas object from the given metadata.

    Returns a pandas DataFrame, Series, or Index that contains two rows
    of fake data.
    """
    if is_scalar(x):
        return _nonempty_scalar(x)
    else:
        raise TypeError("Expected Index, Series, DataFrame, or scalar, "
                        "got {0}".format(type(x).__name__))
Beispiel #12
0
    def construct_result(self, result, out_ndim, row_scalar):
        if out_ndim == 1:
            result = self.sr.__ctor__(frame=result, name=self.sr.name)
        else:
            assert out_ndim == 0
            result = result.to_pandas().squeeze()

        if row_scalar and not is_scalar(result) and len(result) == 0:
            raise _NotFoundError()

        return result
Beispiel #13
0
def meta_nonempty_object(x):
    """Create a nonempty pandas object from the given metadata.

    Returns a pandas DataFrame, Series, or Index that contains two rows
    of fake data.
    """
    if is_scalar(x):
        return _nonempty_scalar(x)
    else:
        raise TypeError("Expected Index, Series, DataFrame, or scalar, "
                        "got {0}".format(type(x).__name__))
Beispiel #14
0
    def create_column_from_scalar(self, value):
        assert is_scalar(value)
        assert self._index is not None

        value_dtype = ty.infer_dtype(value)
        column = _create_column(
            self._index.storage, value_dtype, nullable=value is None
        )
        column.fill(value, self._index.volume)

        return Table(self._runtime, self._index, [column])
Beispiel #15
0
    def insert(self, loc, value):
        assert loc >= 0 and loc <= len(self._columns)

        if is_scalar(value):
            value = self.create_column_from_scalar(value)

        assert len(value._columns) == 1
        column = value._columns[0]

        return self.replace_columns(
            self._columns[:loc] + [column] + self._columns[loc:]
        )
Beispiel #16
0
def meta_nonempty_object(x):
    """Create a nonempty pandas object from the given metadata.

    Returns a pandas DataFrame, Series, or Index that contains two rows
    of fake data.
    """
    if is_scalar(x):
        return _nonempty_scalar(x)
    else:
        raise TypeError(
            "Expected Pandas-like Index, Series, DataFrame, or scalar, "
            f"got {typename(type(x))}")
Beispiel #17
0
def flatten_preds_if_necessary(df):
    """
    Flatten predictions if they are a list in a list.
    This is necessary because of an issue with the predict.py script prior to the update performed on 15-09-2021.
    """
    cols = [col for col in df.columns if 'pred' in col]
    for col in cols:
        test = df[col].iloc[0]
        if is_scalar(test[0]):
            continue
        df[col] = df[col].str[0]
    return df
Beispiel #18
0
    def __drop_na(self, df, index, columns):
        add_cat = series.add_category
        fillna = lambda s: add_cat(s, self.na_cat).fillna(self.na_cat)
        to_list = lambda x: [] if x is None else [x] if is_scalar(x) else x

        df = df.copy()
        if self.na == 'drop':
            return df.dropna(subset=to_list(columns))
        else:
            all_items = to_list(columns) + to_list(index)
            selection = [item for item in all_items if item is not None]
            df[selection] = df[selection].apply(fillna)
            return df
Beispiel #19
0
def grouper_match(grp1: GroupByAgg, grp2):
    # No need to broadcast against a scalar (pandas will handle) ----
    if is_scalar(grp2):
        return grp1.obj, grp2, grp1
    
    # Broadcasting requires: non-agg groupby with same original grouper ----
    if not isinstance(grp2, SeriesGroupBy):
        raise TypeError("grp2 must be a scalar or SeriesGroupBy")

    if not is_compatible(grp1, grp2):
        raise ValueError("groups must have matching groupers")

    return broadcast_agg(grp1), grp2.obj, grp2
Beispiel #20
0
    def fillna(self, value=None, method=None, limit=None):
        """ Fill NA/NaN values using the specified method.

        Parameters
        ----------
        value : scalar, array-like
            If a scalar value is passed it is used to fill all missing values.
            Alternatively, an array-like 'value' can be given. It's expected
            that the array-like have the same length as 'self'.
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
            Method to use for filling holes in reindexed Series
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        limit : int, default None
            If method is specified, this is the maximum number of consecutive
            NaN values to forward/backward fill. In other words, if there is
            a gap with more than this number of consecutive NaNs, it will only
            be partially filled. If method is not specified, this is the
            maximum number of entries along the entire axis where NaNs will be
            filled.

        Returns
        -------
        filled : ExtensionArray with NA/NaN filled
        """
        from pandas.api.types import is_scalar
        from pandas.util._validators import validate_fillna_kwargs
        from pandas.core.missing import pad_1d, backfill_1d

        value, method = validate_fillna_kwargs(value, method)

        mask = self.isna()

        if not is_scalar(value):
            if len(value) != len(self):
                raise ValueError("Length of 'value' does not match. Got ({}) "
                                 " expected {}".format(len(value), len(self)))
            value = value[mask]

        if mask.any():
            if method is not None:
                func = pad_1d if method == 'pad' else backfill_1d
                new_values = func(self.astype(object), limit=limit, mask=mask)
                new_values = self._constructor_from_sequence(new_values)
            else:
                # fill with value
                new_values = self.copy()
                new_values[mask] = value
        else:
            new_values = self.copy()
        return new_values
Beispiel #21
0
    def __setitem__(self, key, item):
        (row_loc, row_scalar, _) = self._validate_locator(key)

        sr = self.sr

        self._validate_lhs(sr)

        if row_scalar:
            if not is_scalar(item):
                raise ValueError("Value must be a scalar")

            result = sr._frame.write_at(row_loc, item)

        elif isinstance(row_loc, slice):
            if row_loc == slice(None):
                index = sr._frame._index

                item = self._align_rhs(sr, index, item)

                result = item

            else:
                (index,
                 bounds) = sr._frame.slice_index_by_slice(row_loc, False)

                item = self._align_rhs(sr, index, item)

                result = sr._frame.scatter_by_slice(index, bounds, item)

        else:
            row_loc = sr._ensure_valid_frame(row_loc)

            if not row_loc._is_series:
                raise ValueError("indexer must be 1-dimensional")

            if not is_bool_dtype(row_loc.dtype):
                raise err._unsupported_error(
                    "only boolean indexers are supported now")

            # This may raise an exception if the indexer size doesn't match
            # with the index of the LHS.
            row_loc = row_loc._frame.update_legate_index(sr._raw_index)

            index = sr._frame.slice_index_by_boolean_mask(row_loc)

            item = self._align_rhs(sr, index, item)

            result = sr._frame.scatter_by_boolean_mask(row_loc, index, item)

        self.update_column(result)
Beispiel #22
0
def _parse_tuple(tup):
    """Unpack the user input for getitem and setitem and compute ndim

    loc[a] -> ([a], :), 1D
    loc[[a,b],] -> ([a,b], :),
    loc[a,b] -> ([a], [b]), 0D
    """
    row_loc, col_loc = slice(None), slice(None)

    if is_tuple(tup):
        row_loc = tup[0]
        if len(tup) == 2:
            col_loc = tup[1]
        if len(tup) > 2:
            raise IndexingError('Too many indexers')
    else:
        row_loc = tup

    ndim = _compute_ndim(row_loc, col_loc)
    row_loc = [row_loc] if is_scalar(row_loc) else row_loc
    col_loc = [col_loc] if is_scalar(col_loc) else col_loc

    return row_loc, col_loc, ndim
Beispiel #23
0
    def _align_rhs(self, lhs, align_index, rhs):
        if not is_scalar(rhs):
            to_align = self.df.__ctor__(index=align_index, columns=lhs.columns)
            rhs = to_align._ensure_valid_frame(rhs)
            _, aligned = to_align._align_frame(rhs,
                                               join="left",
                                               broadcast_axis=1)
            # FIXME: For now we allow only aligned frames.
            if not (rhs._is_series or rhs.columns.equals(aligned.columns)):
                raise err._unsupported_error(
                    "Unaligned frames cannot be used for in-place updates")
            rhs = aligned._frame

        return rhs
Beispiel #24
0
def infer_dtype_bydata(data):
    d_type = DataType.UNKNOWN
    if is_scalar(data):
        d_type = infer_dtype_by_scaladata(data)
        return d_type

    if is_list_like(data) or is_array_like(data):
        failed = False
        try:
            type_str = infer_dtype(data)
        except TypeError:
            failed = True
        if not failed:
            d_type = dtype_str_map.get(type_str, DataType.UNKNOWN)
            if is_numeric_datatype(d_type):
                d_type = DataType.FLOAT_VECTOR
            else:
                d_type = DataType.UNKNOWN

            return d_type

    if d_type == DataType.UNKNOWN:
        try:
            elem = data[0]
        except:
            elem = None

        if elem is not None and is_scalar(elem):
            d_type = infer_dtype_by_scaladata(elem)

    if d_type == DataType.UNKNOWN:
        _dtype = getattr(data, "dtype", None)

        if _dtype is not None:
            d_type = map_numpy_dtype_to_datatype(_dtype)

    return d_type
Beispiel #25
0
    def _validate_locators(self, tup):
        if util.is_tuple(tup) and len(tup) >= 1:
            if len(tup) > 2:
                raise ValueError("Too many indexers")
            row_loc = tup[0]
            col_loc = tup[1] if len(tup) == 2 else slice(None)
        else:
            row_loc = tup
            col_loc = slice(None)

        if isinstance(row_loc, slice) and row_loc.step is not None:
            raise err._unsupported_error(
                "row slicer cannot have a step for now")

        row_scalar = is_scalar(row_loc) or util.is_tuple(row_loc)
        col_scalar = is_scalar(col_loc) or util.is_tuple(col_loc)

        if self.is_at:
            if not util.is_tuple(tup) or len(tup) != 2:
                raise ValueError("Need two indexers")

            if self.is_loc:
                if not row_scalar or not col_scalar:
                    raise ValueError(
                        "At based indexing can only have scalar indexers")
            else:
                if not is_integer(row_loc) or not is_integer(col_loc):
                    raise ValueError(
                        "iAt based indexing can only have integer indexers")

        return (
            row_loc,
            [col_loc] if col_scalar else col_loc,
            row_scalar,
            col_scalar,
            _compute_ndim(row_loc, col_loc),
        )
Beispiel #26
0
    def _binary_op(self, op, other, axis=None, level=None, fill_value=None):
        # Retrieve arguments and convert them to default ones if necessary
        axis = self._get_axis_number(axis)

        # Raise an exception for cases that are not implemented yet
        if level is not None:
            raise err._unsupported_error("level", level)

        other = self._ensure_valid_frame(other)

        if not self._is_series and not is_scalar(other):
            if other._is_series and axis not in (0, ):
                raise err._unsupported_error("axis", axis)

        # Convert the RHS to a frame unless it's a scalar
        if is_scalar(other):
            new_self = self
            other = util.sanitize_scalar(other)

        else:
            new_self, other = self._align_frame(other,
                                                join="outer",
                                                fill_value=fill_value,
                                                broadcast_axis=1)
            other = other._frame

        new_frame = new_self._frame.binary_op(op, other)

        if new_self._is_series:
            from .series import Series

            return Series(frame=new_frame, name=new_self.name)
        else:
            from .dataframe import DataFrame

            return DataFrame(frame=new_frame, columns=new_self.columns)
Beispiel #27
0
 def __getitem__(self, key):
     if is_scalar(key) or isinstance(key, tuple):
         return self._get_columns_by_labels(key)
     elif isinstance(key, slice):
         return self.iloc[key]
     elif isinstance(key, (DataFrame, pandas.DataFrame)):
         return self.where(key)
     elif isinstance(key, Series):
         return self.loc[key]
     elif is_list_like(key):
         if is_bool_indexer(key):
             return self.loc[key]
         else:
             return self._get_columns_by_labels(key)
     else:
         raise ValueError(f"Unsupported key type '{type(key).__name}'")
Beispiel #28
0
        def method(self, other):
            is_arithmetic = \
                True if op.__name__ in ops.ARITHMETIC_BINOPS else False

            is_other_array = False
            if not is_scalar(other):
                is_other_array = True
                other = np.asarray(other)

            self_is_na = self.isna()
            other_is_na = pd.isna(other)
            mask = self_is_na | other_is_na

            chunks = []
            mask_chunks = []
            start = 0
            for chunk_array in self._arrow_array.chunks:
                chunk_array = np.asarray(chunk_array.to_pandas())
                end = start + len(chunk_array)
                chunk_mask = mask[start:end]
                chunk_valid = ~chunk_mask

                if is_arithmetic:
                    result = np.empty(chunk_array.shape, dtype=object)
                else:
                    result = np.zeros(chunk_array.shape, dtype=bool)

                chunk_other = other
                if is_other_array:
                    chunk_other = other[start:end]
                    chunk_other = chunk_other[chunk_valid]

                # calculate only for both not None
                result[chunk_valid] = op(chunk_array[chunk_valid], chunk_other)

                if is_arithmetic:
                    chunks.append(
                        pa.array(result, type=pa.string(), from_pandas=True))
                else:
                    chunks.append(result)
                    mask_chunks.append(chunk_mask)

            if is_arithmetic:
                return ArrowStringArray(pa.chunked_array(chunks))
            else:
                return pd.arrays.BooleanArray(np.concatenate(chunks),
                                              np.concatenate(mask_chunks))
Beispiel #29
0
def is_scalar(val):
    """Return True if given object is scalar.

    Parameters
    ----------
    val : object
        Possibly scalar object.

    Returns
    -------
    bool
        Return True if given object is scalar.
    """
    return (isinstance(val, cudf._lib.scalar.DeviceScalar)
            or isinstance(val, cudf.Scalar)
            or isinstance(val, cudf.core.tools.datetimes.DateOffset)
            or pd_types.is_scalar(val))
Beispiel #30
0
    def _get_columns_by_labels(self, key):
        key_scalar = is_scalar(key) or isinstance(key, tuple)
        keys = util.to_list_if_scalar(key)
        columns = self.columns

        # Validate keys
        for key in keys:
            if key not in columns:
                raise KeyError(key)

        indexer = columns.get_indexer_for(keys)
        new_self = self._slice_columns(indexer)
        if key_scalar:
            assert len(new_self.columns) == 1
            return new_self.squeeze(axis=1)
        else:
            return new_self
Beispiel #31
0
    def __getitem__(self, key):
        (row_loc, row_scalar, out_ndim) = self._validate_locator(key)

        sr = self.sr
        if row_scalar:
            index = sr._raw_index

            if index.nlevels == 1 and not is_scalar(row_loc):
                raise KeyError("row indexer must be a scalar")

            mask = index == row_loc

            result = sr._frame.select(mask)

            # If the frame has a multi-index, we need to check if it was
            # a partial match and handle the output accordingly (only to
            # make the output the same as Pandas' and for no other reason...)
            row_loc_tpl = util.to_tuple_if_scalar(row_loc)
            if index.nlevels > len(row_loc_tpl):
                # If this is a partial match, the output should not be
                # squeezed down to a scalar,
                out_ndim += 1
                # and the matched levels should be droped for some reason.
                result = result.droplevel(range(len(row_loc_tpl)))

        elif isinstance(row_loc, slice):
            if row_loc == slice(None):
                result = sr._frame
            else:
                result = sr._frame.slice_rows_by_slice(row_loc, True)

        else:
            row_loc = sr._ensure_valid_frame(row_loc)

            _, row_loc = sr._align_frame(row_loc, join="left", axis=0)

            if not is_bool_dtype(row_loc.dtype):
                raise err._unsupported_error(
                    "only boolean indexers are supported now")

            result = sr._frame.select(row_loc._frame)

        try:
            return super().construct_result(result, out_ndim, row_scalar)
        except _NotFoundError:
            raise KeyError(row_loc)
 def _write_one_pair(key, value):
     if is_scalar(value):
         if type(value).__module__ == 'numpy':
             value = value.item()
         scalar_dict[key] = value
     elif isinstance(value, np.ndarray):
         self.write_array(sub_group, key, value)
     elif isinstance(value, pd.DataFrame):
         self.write_dataframe(sub_group, key, value)
     elif is_dict_like(value):
         self.write_mapping(sub_group, key, value)
     elif issparse(value):
         assert isinstance(value, csr_matrix)
         self.write_csr(sub_group, key, value)
     else:
         # assume value is either list or tuple, converting it to np.ndarray
         self.write_array(sub_group, key, value.astype(str) if is_categorical_dtype(value) else np.array(value))
Beispiel #33
0
    def __setitem__(self, key, value):
        if isinstance(value, (pd.Index, pd.Series)):
            value = value.to_numpy()

        key = check_array_indexer(self, key)
        scalar_key = is_scalar(key)

        # validate new items
        if scalar_key:
            if pd.isna(value):
                value = None
            elif not is_list_like(value):
                raise ValueError('Must provide list.')

        array = np.asarray(self._arrow_array.to_pandas())
        array[key] = value
        self._arrow_array = pa.chunked_array(
            [pa.array(array, type=self.dtype.arrow_type)])
Beispiel #34
0
    def __call__(self, arg):
        if is_scalar(arg):
            ret = pd.to_datetime(arg, errors=self._errors, dayfirst=self._dayfirst,
                                 yearfirst=self._yearfirst, utc=self._utc,
                                 format=self._format, exact=self._exact,
                                 unit=self._unit, infer_datetime_format=self._infer_datetime_format,
                                 origin=self._origin, cache=self._cache)
            return astensor(ret)

        dtype = np.datetime64(1, 'ns').dtype
        if isinstance(arg, (pd.Series, SERIES_TYPE)):
            arg = asseries(arg)
            self._object_type = ObjectType.series
            return self.new_series([arg], shape=arg.shape,
                                   dtype=dtype, index_value=arg.index_value,
                                   name=arg.name)
        if is_dict_like(arg) or isinstance(arg, DATAFRAME_TYPE):
            arg = asdataframe(arg)
            columns = arg.columns_value.to_pandas().tolist()
            if sorted(columns) != sorted(['year', 'month', 'day']):
                missing = ','.join(c for c in ['day', 'month', 'year'] if c not in columns)
                raise ValueError('to assemble mappings requires at least '
                                 'that [year, month, day] be specified: [{}] is missing'.format(missing))
            self._object_type = ObjectType.series
            return self.new_series([arg], shape=(arg.shape[0],),
                                   dtype=dtype, index_value=arg.index_value)
        elif isinstance(arg, (pd.Index, INDEX_TYPE)):
            arg = asindex(arg)
            self._object_type = ObjectType.index
            return self.new_series([arg], shape=arg.shape,
                                   dtype=dtype,
                                   index_value=parse_index(pd.Index([], dtype=dtype),
                                                           self._params, arg),
                                   name=arg.name)
        else:
            arg = astensor(arg)
            if arg.ndim != 1:
                raise TypeError('arg must be a string, datetime, '
                                'list, tuple, 1-d tensor, or Series')
            self._object_type = ObjectType.index
            return self.new_index([arg], shape=arg.shape,
                                  dtype=dtype,
                                  index_value=parse_index(pd.Index([], dtype=dtype),
                                                          self._params, arg))
Beispiel #35
0
    def _ensure_valid_frame(self, data, copy=False):
        if is_scalar(data) or util.is_tuple(data):
            return data
        elif isinstance(data, Frame):
            return data.copy(deep=copy)
        elif isinstance(data, pandas.DataFrame):
            from .dataframe import DataFrame

            return DataFrame(data)
        elif isinstance(data, pandas.Series):
            from .series import Series

            return Series(data)
        elif isinstance(data, np.ndarray):
            # TODO: Here we assume that the axis to which we align the ndarray
            #       is the index, but we really should be choosing between
            #       the index and the columns, depending on the axis argument.
            if data.ndim == 1:
                from .series import Series

                if len(self) != len(data):
                    raise ValueError(
                        f"Length of passed values is {len(self)}, "
                        f"index implies {len(data)}.")

                name = self.name if self._is_series else None
                return Series(data, name=name, index=self._raw_index)
            elif data.ndim == 2:
                if self._is_series:
                    raise Exception("Data must be 1-dimensional")

                from .dataframe import DataFrame

                return DataFrame(data,
                                 columns=self.columns,
                                 index=self._raw_index)
            else:
                raise ValueError("array must be either 1-d or 2-d")

        elif is_list_like(data):
            return self._ensure_valid_frame(np.array(data))

        else:
            raise ValueError(f"unsupported value type '{type(data)}'")
Beispiel #36
0
    def __setitem__(self, key, value):

        # need to not use `not value` on numpy arrays
        if isinstance(value, (list, tuple)) and (not value):
            # doing nothing here seems to be ok
            return

        if isinstance(value, _Quantity):
            value = value.to(self.units).magnitude
        elif is_list_like(value) and isinstance(value[0], _Quantity):
            value = [item.to(self.units).magnitude for item in value]
        _is_scalar = is_scalar(value)
        if _is_scalar:
            value = [value]

        if _is_scalar:
            value = value[0]

        self._data[key] = value
Beispiel #37
0
def test_searchsorted_monotonic(indices):
    # GH17271
    # not implemented for tuple searches in MultiIndex
    # or Intervals searches in IntervalIndex
    if isinstance(indices, (MultiIndex, IntervalIndex)):
        return

    # nothing to test if the index is empty
    if indices.empty:
        return
    value = indices[0]

    # determine the expected results (handle dupes for 'right')
    expected_left, expected_right = 0, (indices == value).argmin()
    if expected_right == 0:
        # all values are the same, expected_right should be length
        expected_right = len(indices)

    # test _searchsorted_monotonic in all cases
    # test searchsorted only for increasing
    if indices.is_monotonic_increasing:
        ssm_left = indices._searchsorted_monotonic(value, side='left')
        assert is_scalar(ssm_left)
        assert expected_left == ssm_left

        ssm_right = indices._searchsorted_monotonic(value, side='right')
        assert is_scalar(ssm_right)
        assert expected_right == ssm_right

        ss_left = indices.searchsorted(value, side='left')
        assert is_scalar(ss_left)
        assert expected_left == ss_left

        ss_right = indices.searchsorted(value, side='right')
        assert is_scalar(ss_right)
        assert expected_right == ss_right

    elif indices.is_monotonic_decreasing:
        ssm_left = indices._searchsorted_monotonic(value, side='left')
        assert is_scalar(ssm_left)
        assert expected_left == ssm_left

        ssm_right = indices._searchsorted_monotonic(value, side='right')
        assert is_scalar(ssm_right)
        assert expected_right == ssm_right

    else:
        # non-monotonic should raise.
        with pytest.raises(ValueError):
            indices._searchsorted_monotonic(value, side='left')
Beispiel #38
0
def meta_nonempty(x):
    """Create a nonempty pandas object from the given metadata.

    Returns a pandas DataFrame, Series, or Index that contains two rows
    of fake data.
    """
    if isinstance(x, pd.Index):
        return _nonempty_index(x)
    elif isinstance(x, pd.Series):
        idx = _nonempty_index(x.index)
        return _nonempty_series(x, idx)
    elif isinstance(x, pd.DataFrame):
        idx = _nonempty_index(x.index)
        data = {i: _nonempty_series(x.iloc[:, i], idx)
                for i, c in enumerate(x.columns)}
        res = pd.DataFrame(data, index=idx,
                           columns=np.arange(len(x.columns)))
        res.columns = x.columns
        return res
    elif is_scalar(x):
        return _nonempty_scalar(x)
    else:
        raise TypeError("Expected Index, Series, DataFrame, or scalar, "
                        "got {0}".format(type(x).__name__))
Beispiel #39
0
 def test_search_sorted_datetime64_scalar(self, arr, val):
     arr = pd.array(arr)
     result = arr.searchsorted(val)
     assert is_scalar(result)
     assert result == 1