Beispiel #1
0
        def _test_dtype(dtype, can_hold_na, writeable=True):
            data = np.random.randint(0, 2, (5, 3)).astype(dtype)
            data.flags.writeable = writeable

            indexer = [2, 1, 0, 1]
            out0 = np.empty((4, 3), dtype=dtype)
            out1 = np.empty((5, 4), dtype=dtype)
            algos.take_nd(data, indexer, out=out0, axis=0)
            algos.take_nd(data, indexer, out=out1, axis=1)
            expected0 = data.take(indexer, axis=0)
            expected1 = data.take(indexer, axis=1)
            tm.assert_almost_equal(out0, expected0)
            tm.assert_almost_equal(out1, expected1)

            indexer = [2, 1, 0, -1]
            out0 = np.empty((4, 3), dtype=dtype)
            out1 = np.empty((5, 4), dtype=dtype)
            if can_hold_na:
                algos.take_nd(data, indexer, out=out0, axis=0)
                algos.take_nd(data, indexer, out=out1, axis=1)
                expected0 = data.take(indexer, axis=0)
                expected1 = data.take(indexer, axis=1)
                expected0[3, :] = np.nan
                expected1[:, 3] = np.nan
                tm.assert_almost_equal(out0, expected0)
                tm.assert_almost_equal(out1, expected1)
            else:
                for i, out in enumerate([out0, out1]):
                    with tm.assertRaisesRegexp(TypeError, self.fill_error):
                        algos.take_nd(data, indexer, out=out, axis=i)
                    # no exception o/w
                    data.take(indexer, out=out, axis=i)
Beispiel #2
0
    def test_2d_fill_nonna(self, dtype_fill_out_dtype):
        dtype, fill_value, out_dtype = dtype_fill_out_dtype
        data = np.random.randint(0, 2, (5, 3)).astype(dtype)
        indexer = [2, 1, 0, -1]

        result = algos.take_nd(data, indexer, axis=0,
                               fill_value=fill_value)
        assert ((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all())
        assert ((result[3, :] == fill_value).all())
        assert (result.dtype == out_dtype)

        result = algos.take_nd(data, indexer, axis=1,
                               fill_value=fill_value)
        assert ((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all())
        assert ((result[:, 3] == fill_value).all())
        assert (result.dtype == out_dtype)

        indexer = [2, 1, 0, 1]
        result = algos.take_nd(data, indexer, axis=0,
                               fill_value=fill_value)
        assert ((result[[0, 1, 2, 3], :] == data[indexer, :]).all())
        assert (result.dtype == dtype)

        result = algos.take_nd(data, indexer, axis=1,
                               fill_value=fill_value)
        assert ((result[:, [0, 1, 2, 3]] == data[:, indexer]).all())
        assert (result.dtype == dtype)
Beispiel #3
0
def _bins_to_cuts(x, bins, right=True, labels=None,
                  precision=3, include_lowest=False,
                  dtype=None, duplicates='raise'):

    if duplicates not in ['raise', 'drop']:
        raise ValueError("invalid value for 'duplicates' parameter, "
                         "valid options are: raise, drop")

    if isinstance(bins, IntervalIndex):
        # we have a fast-path here
        ids = bins.get_indexer(x)
        result = algos.take_nd(bins, ids)
        result = Categorical(result, categories=bins, ordered=True)
        return result, bins

    unique_bins = algos.unique(bins)
    if len(unique_bins) < len(bins) and len(bins) != 2:
        if duplicates == 'raise':
            raise ValueError("Bin edges must be unique: {bins!r}.\nYou "
                             "can drop duplicate edges by setting "
                             "the 'duplicates' kwarg".format(bins=bins))
        else:
            bins = unique_bins

    side = 'left' if right else 'right'
    ids = _ensure_int64(bins.searchsorted(x, side=side))

    if include_lowest:
        # Numpy 1.9 support: ensure this mask is a Numpy array
        ids[np.asarray(x == bins[0])] = 1

    na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if labels is None:
            labels = _format_labels(bins, precision, right=right,
                                    include_lowest=include_lowest,
                                    dtype=dtype)
        else:
            if len(labels) != len(bins) - 1:
                raise ValueError('Bin labels must be one fewer than '
                                 'the number of bin edges')
        if not is_categorical_dtype(labels):
            labels = Categorical(labels, categories=labels, ordered=True)

        np.putmask(ids, na_mask, 0)
        result = algos.take_nd(labels, ids - 1)

    else:
        result = ids - 1
        if has_nas:
            result = result.astype(np.float64)
            np.putmask(result, na_mask, np.nan)

    return result, bins
Beispiel #4
0
    def test_2d_bool(self):
        arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool)

        result = algos.take_nd(arr, [0, 2, 2, 1])
        expected = arr.take([0, 2, 2, 1], axis=0)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.take_nd(arr, [0, 2, 2, 1], axis=1)
        expected = arr.take([0, 2, 2, 1], axis=1)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.take_nd(arr, [0, 2, -1])
        assert result.dtype == np.object_
Beispiel #5
0
    def test_2d_float32(self):
        arr = np.random.randn(4, 3).astype(np.float32)
        indexer = [0, 2, -1, 1, -1]

        # axis=0
        result = algos.take_nd(arr, indexer, axis=0)
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, axis=0, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected[[2, 4], :] = np.nan
        tm.assert_almost_equal(result, expected)

        # this now accepts a float32! # test with float64 out buffer
        out = np.empty((len(indexer), arr.shape[1]), dtype='float32')
        algos.take_nd(arr, indexer, out=out)  # it works!

        # axis=1
        result = algos.take_nd(arr, indexer, axis=1)
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, axis=1, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected[:, [2, 4]] = np.nan
        tm.assert_almost_equal(result, expected)
Beispiel #6
0
    def get_result(self):
        # TODO: find a better way than this masking business

        values, value_mask = self.get_new_values()
        columns = self.get_new_columns()
        index = self.get_new_index()

        # filter out missing levels
        if values.shape[1] > 0:
            col_inds, obs_ids = _compress_group_index(self.sorted_labels[-1])
            # rare case, level values not observed
            if len(obs_ids) < self.full_shape[1]:
                inds = (value_mask.sum(0) > 0).nonzero()[0]
                values = algos.take_nd(values, inds, axis=1)
                columns = columns[inds]

        # may need to coerce categoricals here
        if self.is_categorical is not None:
            categories = self.is_categorical.categories
            ordered = self.is_categorical.ordered
            values = [
                Categorical(values[:, i], categories=categories, ordered=ordered) for i in range(values.shape[-1])
            ]

        return DataFrame(values, index=index, columns=columns)
Beispiel #7
0
    def test_2d_other_dtypes(self):
        arr = np.random.randn(10, 5).astype(np.float32)

        indexer = [1, 2, 3, -1]

        # axis=0
        result = algos.take_nd(arr, indexer, axis=0)
        expected = arr.take(indexer, axis=0)
        expected[-1] = np.nan
        tm.assert_almost_equal(result, expected)

        # axis=1
        result = algos.take_nd(arr, indexer, axis=1)
        expected = arr.take(indexer, axis=1)
        expected[:, -1] = np.nan
        tm.assert_almost_equal(result, expected)
Beispiel #8
0
def _reorder_by_uniques(uniques, labels):
    # sorter is index where elements ought to go
    sorter = uniques.argsort()

    # reverse_indexer is where elements came from
    reverse_indexer = np.empty(len(sorter), dtype=np.int64)
    reverse_indexer.put(sorter, np.arange(len(sorter)))

    mask = labels < 0

    # move labels to right locations (ie, unsort ascending labels)
    labels = algos.take_nd(reverse_indexer, labels, allow_fill=False)
    np.putmask(labels, mask, -1)

    # sort observed ids
    uniques = algos.take_nd(uniques, sorter, allow_fill=False)

    return uniques, labels
Beispiel #9
0
 def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer,
                                  fill_value):
     mat = mgr.as_matrix()
     reindexed_mat = algos.take_nd(mat, indexer, axis,
                                   fill_value=fill_value)
     reindexed = mgr.reindex_indexer(new_labels, indexer, axis,
                                     fill_value=fill_value)
     tm.assert_numpy_array_equal(reindexed_mat, reindexed.as_matrix())
     tm.assert_index_equal(reindexed.axes[axis], new_labels)
Beispiel #10
0
    def get_reindexed_values(self, empty_dtype, upcasted_na):
        if upcasted_na is None:
            # No upcasting is necessary
            fill_value = self.block.fill_value
            values = self.block.get_values()
        else:
            fill_value = upcasted_na

            if self.is_na:
                if getattr(self.block, 'is_object', False):
                    # we want to avoid filling with np.nan if we are
                    # using None; we already know that we are all
                    # nulls
                    values = self.block.values.ravel(order='K')
                    if len(values) and values[0] is None:
                        fill_value = None

                if getattr(self.block, 'is_datetimetz', False) or \
                        is_datetimetz(empty_dtype):
                    pass
                elif getattr(self.block, 'is_categorical', False):
                    pass
                elif getattr(self.block, 'is_sparse', False):
                    pass
                else:
                    missing_arr = np.empty(self.shape, dtype=empty_dtype)
                    missing_arr.fill(fill_value)
                    return missing_arr

            if not self.indexers:
                if not self.block._can_consolidate:
                    # preserve these for validation in _concat_compat
                    return self.block.values

            if self.block.is_bool and not self.block.is_categorical:
                # External code requested filling/upcasting, bool values must
                # be upcasted to object to avoid being upcasted to numeric.
                values = self.block.astype(np.object_).values
            elif self.block.is_extension:
                values = self.block.values
            else:
                # No dtype upcasting is done here, it will be performed during
                # concatenation itself.
                values = self.block.get_values()

        if not self.indexers:
            # If there's no indexing to be done, we want to signal outside
            # code that this array must be copied explicitly.  This is done
            # by returning a view and checking `retval.base`.
            values = values.view()

        else:
            for ax, indexer in self.indexers.items():
                values = algos.take_nd(values, indexer, axis=ax,
                                       fill_value=fill_value)

        return values
Beispiel #11
0
        def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value):
            mat = mgr.as_matrix()
            indexer = mgr.axes[axis].get_indexer_for(new_labels)

            reindexed = mgr.reindex_axis(new_labels, axis,
                                         fill_value=fill_value)
            tm.assert_numpy_array_equal(algos.take_nd(mat, indexer, axis,
                                                      fill_value=fill_value),
                                        reindexed.as_matrix(),
                                        check_dtype=False)
            tm.assert_index_equal(reindexed.axes[axis], new_labels)
Beispiel #12
0
    def test_2d_with_out(self, dtype_can_hold_na, writeable):
        dtype, can_hold_na = dtype_can_hold_na

        data = np.random.randint(0, 2, (5, 3)).astype(dtype)
        data.flags.writeable = writeable

        indexer = [2, 1, 0, 1]
        out0 = np.empty((4, 3), dtype=dtype)
        out1 = np.empty((5, 4), dtype=dtype)
        algos.take_nd(data, indexer, out=out0, axis=0)
        algos.take_nd(data, indexer, out=out1, axis=1)

        expected0 = data.take(indexer, axis=0)
        expected1 = data.take(indexer, axis=1)
        tm.assert_almost_equal(out0, expected0)
        tm.assert_almost_equal(out1, expected1)

        indexer = [2, 1, 0, -1]
        out0 = np.empty((4, 3), dtype=dtype)
        out1 = np.empty((5, 4), dtype=dtype)

        if can_hold_na:
            algos.take_nd(data, indexer, out=out0, axis=0)
            algos.take_nd(data, indexer, out=out1, axis=1)

            expected0 = data.take(indexer, axis=0)
            expected1 = data.take(indexer, axis=1)
            expected0[3, :] = np.nan
            expected1[:, 3] = np.nan

            tm.assert_almost_equal(out0, expected0)
            tm.assert_almost_equal(out1, expected1)
        else:
            for i, out in enumerate([out0, out1]):
                with pytest.raises(TypeError, match=self.fill_error):
                    algos.take_nd(data, indexer, out=out, axis=i)

                # No Exception otherwise.
                data.take(indexer, out=out, axis=i)
Beispiel #13
0
    def _make_sorted_values_labels(self):
        v = self.level

        labs = list(self.index.labels)
        levs = list(self.index.levels)
        to_sort = labs[:v] + labs[v + 1 :] + [labs[v]]
        sizes = [len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]]]

        comp_index, obs_ids = get_compressed_ids(to_sort, sizes)
        ngroups = len(obs_ids)

        indexer = _algos.groupsort_indexer(comp_index, ngroups)[0]
        indexer = _ensure_platform_int(indexer)

        self.sorted_values = algos.take_nd(self.values, indexer, axis=0)
        self.sorted_labels = [l.take(indexer) for l in to_sort]
Beispiel #14
0
    def _aggregate_series_fast(self, obj, func):
        func = self._is_builtin_func(func)

        if obj.index._has_complex_internals:
            raise TypeError('Incompatible index for Cython grouper')

        group_index, _, ngroups = self.group_info

        # avoids object / Series creation overhead
        dummy = obj._get_values(slice(None, 0)).to_dense()
        indexer = get_group_index_sorter(group_index, ngroups)
        obj = obj._take(indexer).to_dense()
        group_index = algorithms.take_nd(
            group_index, indexer, allow_fill=False)
        grouper = reduction.SeriesGrouper(obj, func, group_index, ngroups,
                                          dummy)
        result, counts = grouper.get_result()
        return result, counts
Beispiel #15
0
    def _map_values(self, mapper, na_action=None):
        """
        An internal function that maps values using the input
        correspondence (which can be a dict, Series, or function).

        Parameters
        ----------
        mapper : function, dict, or Series
            The input correspondence object
        na_action : {None, 'ignore'}
            If 'ignore', propagate NA values, without passing them to the
            mapping function

        Returns
        -------
        Union[Index, MultiIndex], inferred
            The output of the mapping function applied to the index.
            If the function returns a tuple with more than one element
            a MultiIndex will be returned.
        """
        # we can fastpath dict/Series to an efficient map
        # as we know that we are not going to have to yield
        # python types
        if is_dict_like(mapper):
            if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
                # If a dictionary subclass defines a default value method,
                # convert mapper to a lookup function (GH #15999).
                dict_with_default = mapper
                mapper = lambda x: dict_with_default[x]
            else:
                # Dictionary does not have a default. Thus it's safe to
                # convert to an Series for efficiency.
                # we specify the keys here to handle the
                # possibility that they are tuples

                # The return value of mapping with an empty mapper is
                # expected to be pd.Series(np.nan, ...). As np.nan is
                # of dtype float64 the return value of this method should
                # be float64 as well
                mapper = create_series_with_explicit_dtype(
                    mapper, dtype_if_empty=np.float64)

        if isinstance(mapper, ABCSeries):
            # Since values were input this means we came from either
            # a dict or a series and mapper should be an index
            if is_categorical_dtype(self.dtype):
                # use the built in categorical series mapper which saves
                # time by mapping the categories instead of all values

                cat = cast("Categorical", self._values)
                return cat.map(mapper)

            values = self._values

            indexer = mapper.index.get_indexer(values)
            new_values = algorithms.take_nd(mapper._values, indexer)

            return new_values

        # we must convert to python types
        if is_extension_array_dtype(self.dtype) and hasattr(
                self._values, "map"):
            # GH#23179 some EAs do not have `map`
            values = self._values
            if na_action is not None:
                raise NotImplementedError
            map_f = lambda values, f: values.map(f)
        else:
            values = self._values.astype(object)
            if na_action == "ignore":
                map_f = lambda values, f: lib.map_infer_mask(
                    values, f,
                    isna(values).view(np.uint8))
            elif na_action is None:
                map_f = lib.map_infer
            else:
                msg = ("na_action must either be 'ignore' or None, "
                       f"{na_action} was passed")
                raise ValueError(msg)

        # mapper is a function
        new_values = map_f(values, mapper)

        return new_values
Beispiel #16
0
        def _test_dtype(dtype, can_hold_na):
            data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)

            indexer = [2, 1, 0, 1]
            out0 = np.empty((4, 4, 3), dtype=dtype)
            out1 = np.empty((5, 4, 3), dtype=dtype)
            out2 = np.empty((5, 4, 4), dtype=dtype)
            algos.take_nd(data, indexer, out=out0, axis=0)
            algos.take_nd(data, indexer, out=out1, axis=1)
            algos.take_nd(data, indexer, out=out2, axis=2)
            expected0 = data.take(indexer, axis=0)
            expected1 = data.take(indexer, axis=1)
            expected2 = data.take(indexer, axis=2)
            tm.assert_almost_equal(out0, expected0)
            tm.assert_almost_equal(out1, expected1)
            tm.assert_almost_equal(out2, expected2)

            indexer = [2, 1, 0, -1]
            out0 = np.empty((4, 4, 3), dtype=dtype)
            out1 = np.empty((5, 4, 3), dtype=dtype)
            out2 = np.empty((5, 4, 4), dtype=dtype)
            if can_hold_na:
                algos.take_nd(data, indexer, out=out0, axis=0)
                algos.take_nd(data, indexer, out=out1, axis=1)
                algos.take_nd(data, indexer, out=out2, axis=2)
                expected0 = data.take(indexer, axis=0)
                expected1 = data.take(indexer, axis=1)
                expected2 = data.take(indexer, axis=2)
                expected0[3, :, :] = np.nan
                expected1[:, 3, :] = np.nan
                expected2[:, :, 3] = np.nan
                tm.assert_almost_equal(out0, expected0)
                tm.assert_almost_equal(out1, expected1)
                tm.assert_almost_equal(out2, expected2)
            else:
                for i, out in enumerate([out0, out1, out2]):
                    with tm.assertRaisesRegexp(TypeError, self.fill_error):
                        algos.take_nd(data, indexer, out=out, axis=i)
                    # no exception o/w
                    data.take(indexer, out=out, axis=i)
Beispiel #17
0
    def test_2d_datetime64(self):
        # 2005/01/01 - 2006/01/01
        arr = np.random.randint(long(11045376), long(11360736),
                                (5, 3)) * 100000000000
        arr = arr.view(dtype='datetime64[ns]')
        indexer = [0, 2, -1, 1, -1]

        # axis=0
        result = algos.take_nd(arr, indexer, axis=0)
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, axis=0, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected.view(np.int64)[[2, 4], :] = iNaT
        tm.assert_almost_equal(result, expected)

        result = algos.take_nd(arr,
                               indexer,
                               axis=0,
                               fill_value=datetime(2007, 1, 1))
        result2 = np.empty_like(result)
        algos.take_nd(arr,
                      indexer,
                      out=result2,
                      axis=0,
                      fill_value=datetime(2007, 1, 1))
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected[[2, 4], :] = datetime(2007, 1, 1)
        tm.assert_almost_equal(result, expected)

        # axis=1
        result = algos.take_nd(arr, indexer, axis=1)
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, axis=1, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected.view(np.int64)[:, [2, 4]] = iNaT
        tm.assert_almost_equal(result, expected)

        result = algos.take_nd(arr,
                               indexer,
                               axis=1,
                               fill_value=datetime(2007, 1, 1))
        result2 = np.empty_like(result)
        algos.take_nd(arr,
                      indexer,
                      out=result2,
                      axis=1,
                      fill_value=datetime(2007, 1, 1))
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected[:, [2, 4]] = datetime(2007, 1, 1)
        tm.assert_almost_equal(result, expected)
Beispiel #18
0
    def test_3d_with_out(self, dtype_can_hold_na):
        dtype, can_hold_na = dtype_can_hold_na

        data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)
        indexer = [2, 1, 0, 1]

        out0 = np.empty((4, 4, 3), dtype=dtype)
        out1 = np.empty((5, 4, 3), dtype=dtype)
        out2 = np.empty((5, 4, 4), dtype=dtype)

        algos.take_nd(data, indexer, out=out0, axis=0)
        algos.take_nd(data, indexer, out=out1, axis=1)
        algos.take_nd(data, indexer, out=out2, axis=2)

        expected0 = data.take(indexer, axis=0)
        expected1 = data.take(indexer, axis=1)
        expected2 = data.take(indexer, axis=2)

        tm.assert_almost_equal(out0, expected0)
        tm.assert_almost_equal(out1, expected1)
        tm.assert_almost_equal(out2, expected2)

        indexer = [2, 1, 0, -1]
        out0 = np.empty((4, 4, 3), dtype=dtype)
        out1 = np.empty((5, 4, 3), dtype=dtype)
        out2 = np.empty((5, 4, 4), dtype=dtype)

        if can_hold_na:
            algos.take_nd(data, indexer, out=out0, axis=0)
            algos.take_nd(data, indexer, out=out1, axis=1)
            algos.take_nd(data, indexer, out=out2, axis=2)

            expected0 = data.take(indexer, axis=0)
            expected1 = data.take(indexer, axis=1)
            expected2 = data.take(indexer, axis=2)

            expected0[3, :, :] = np.nan
            expected1[:, 3, :] = np.nan
            expected2[:, :, 3] = np.nan

            tm.assert_almost_equal(out0, expected0)
            tm.assert_almost_equal(out1, expected1)
            tm.assert_almost_equal(out2, expected2)
        else:
            for i, out in enumerate([out0, out1, out2]):
                with tm.assert_raises_regex(TypeError,
                                            self.fill_error):
                    algos.take_nd(data, indexer, out=out, axis=i)

                # No Exception otherwise.
                data.take(indexer, out=out, axis=i)
Beispiel #19
0
 def parallel_take1d():
     take_nd(df["col"].values, indexer)
Beispiel #20
0
def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: Dict[int,
                                                                  np.ndarray]):
    """
    Construct concatenation plan for given block manager and indexers.

    Parameters
    ----------
    mgr : BlockManager
    indexers : dict of {axis: indexer}

    Returns
    -------
    plan : list of (BlockPlacement, JoinUnit) tuples

    """
    # Calculate post-reindex shape , save for item axis which will be separate
    # for each block anyway.
    mgr_shape_list = list(mgr.shape)
    for ax, indexer in indexers.items():
        mgr_shape_list[ax] = len(indexer)
    mgr_shape = tuple(mgr_shape_list)

    if 0 in indexers:
        ax0_indexer = indexers.pop(0)
        blknos = algos.take_nd(mgr.blknos, ax0_indexer, fill_value=-1)
        blklocs = algos.take_nd(mgr.blklocs, ax0_indexer, fill_value=-1)
    else:

        if mgr.is_single_block:
            blk = mgr.blocks[0]
            return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))]

        ax0_indexer = None
        blknos = mgr.blknos
        blklocs = mgr.blklocs

    plan = []
    for blkno, placements in libinternals.get_blkno_placements(blknos,
                                                               group=False):

        assert placements.is_slice_like

        join_unit_indexers = indexers.copy()

        shape_list = list(mgr_shape)
        shape_list[0] = len(placements)
        shape = tuple(shape_list)

        if blkno == -1:
            unit = JoinUnit(None, shape)
        else:
            blk = mgr.blocks[blkno]
            ax0_blk_indexer = blklocs[placements.indexer]

            unit_no_ax0_reindexing = (
                len(placements) == len(blk.mgr_locs) and
                # Fastpath detection of join unit not
                # needing to reindex its block: no ax0
                # reindexing took place and block
                # placement was sequential before.
                ((ax0_indexer is None and blk.mgr_locs.is_slice_like
                  and blk.mgr_locs.as_slice.step == 1) or
                 # Slow-ish detection: all indexer locs
                 # are sequential (and length match is
                 # checked above).
                 (np.diff(ax0_blk_indexer) == 1).all()))

            # Omit indexer if no item reindexing is required.
            if unit_no_ax0_reindexing:
                join_unit_indexers.pop(0, None)
            else:
                join_unit_indexers[0] = ax0_blk_indexer

            unit = JoinUnit(blk, shape, join_unit_indexers)

        plan.append((placements, unit))

    return plan
Beispiel #21
0
    def get_reindexed_values(self, empty_dtype: DtypeObj,
                             upcasted_na) -> ArrayLike:
        if upcasted_na is None:
            # No upcasting is necessary
            fill_value = self.block.fill_value
            values = self.block.get_values()
        else:
            fill_value = upcasted_na

            if self.is_na:
                if getattr(self.block, "is_object", False):
                    # we want to avoid filling with np.nan if we are
                    # using None; we already know that we are all
                    # nulls
                    values = self.block.values.ravel(order="K")
                    if len(values) and values[0] is None:
                        fill_value = None

                if getattr(self.block, "is_datetimetz",
                           False) or is_datetime64tz_dtype(empty_dtype):
                    if self.block is None:
                        # TODO(EA2D): special case unneeded with 2D EAs
                        return DatetimeArray(np.full(self.shape[1],
                                                     fill_value.value),
                                             dtype=empty_dtype)
                elif getattr(self.block, "is_categorical", False):
                    pass
                elif getattr(self.block, "is_extension", False):
                    pass
                elif is_extension_array_dtype(empty_dtype):
                    missing_arr = empty_dtype.construct_array_type(
                    )._from_sequence([], dtype=empty_dtype)
                    ncols, nrows = self.shape
                    assert ncols == 1, ncols
                    empty_arr = -1 * np.ones((nrows, ), dtype=np.intp)
                    return missing_arr.take(empty_arr,
                                            allow_fill=True,
                                            fill_value=fill_value)
                else:
                    missing_arr = np.empty(self.shape, dtype=empty_dtype)
                    missing_arr.fill(fill_value)
                    return missing_arr

            if (not self.indexers) and (not self.block._can_consolidate):
                # preserve these for validation in concat_compat
                return self.block.values

            if self.block.is_bool and not self.block.is_categorical:
                # External code requested filling/upcasting, bool values must
                # be upcasted to object to avoid being upcasted to numeric.
                values = self.block.astype(np.object_).values
            elif self.block.is_extension:
                values = self.block.values
            else:
                # No dtype upcasting is done here, it will be performed during
                # concatenation itself.
                values = self.block.values

        if not self.indexers:
            # If there's no indexing to be done, we want to signal outside
            # code that this array must be copied explicitly.  This is done
            # by returning a view and checking `retval.base`.
            values = values.view()

        else:
            for ax, indexer in self.indexers.items():
                values = algos.take_nd(values,
                                       indexer,
                                       axis=ax,
                                       fill_value=fill_value)

        return values
Beispiel #22
0
def _bins_to_cuts(
    x,
    bins,
    right: bool = True,
    labels=None,
    precision: int = 3,
    include_lowest: bool = False,
    dtype=None,
    duplicates: str = "raise",
    ordered: bool = True,
):
    if not ordered and labels is None:
        raise ValueError("'labels' must be provided if 'ordered = False'")

    if duplicates not in ["raise", "drop"]:
        raise ValueError(
            "invalid value for 'duplicates' parameter, valid options are: raise, drop"
        )

    if isinstance(bins, IntervalIndex):
        # we have a fast-path here
        ids = bins.get_indexer(x)
        result = Categorical.from_codes(ids, categories=bins, ordered=True)
        return result, bins

    unique_bins = algos.unique(bins)
    if len(unique_bins) < len(bins) and len(bins) != 2:
        if duplicates == "raise":
            raise ValueError(
                f"Bin edges must be unique: {repr(bins)}.\n"
                f"You can drop duplicate edges by setting the 'duplicates' kwarg"
            )
        else:
            bins = unique_bins

    side = "left" if right else "right"
    ids = ensure_platform_int(bins.searchsorted(x, side=side))

    if include_lowest:
        ids[x == bins[0]] = 1

    na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if not (labels is None or is_list_like(labels)):
            raise ValueError(
                "Bin labels must either be False, None or passed in as a "
                "list-like argument"
            )

        elif labels is None:
            labels = _format_labels(
                bins, precision, right=right, include_lowest=include_lowest, dtype=dtype
            )
        elif ordered and len(set(labels)) != len(labels):
            raise ValueError(
                "labels must be unique if ordered=True; pass ordered=False for duplicate labels"  # noqa
            )
        else:
            if len(labels) != len(bins) - 1:
                raise ValueError(
                    "Bin labels must be one fewer than the number of bin edges"
                )
        if not is_categorical_dtype(labels):
            labels = Categorical(
                labels,
                categories=labels if len(set(labels)) == len(labels) else None,
                ordered=ordered,
            )
        # TODO: handle mismatch between categorical label order and pandas.cut order.
        np.putmask(ids, na_mask, 0)
        result = algos.take_nd(labels, ids - 1)

    else:
        result = ids - 1
        if has_nas:
            result = result.astype(np.float64)
            np.putmask(result, na_mask, np.nan)

    return result, bins
Beispiel #23
0
    def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
        if upcasted_na is None:
            # No upcasting is necessary
            fill_value = self.block.fill_value
            values = self.block.get_values()
        else:
            fill_value = upcasted_na

            if self.is_valid_na_for(empty_dtype):
                blk_dtype = getattr(self.block, "dtype", None)

                if blk_dtype == np.dtype("object"):
                    # we want to avoid filling with np.nan if we are
                    # using None; we already know that we are all
                    # nulls
                    values = self.block.values.ravel(order="K")
                    if len(values) and values[0] is None:
                        fill_value = None

                if is_datetime64tz_dtype(empty_dtype):
                    i8values = np.full(self.shape, fill_value.value)
                    return DatetimeArray(i8values, dtype=empty_dtype)

                elif is_extension_array_dtype(blk_dtype):
                    pass

                elif is_1d_only_ea_dtype(empty_dtype):
                    empty_dtype = cast(ExtensionDtype, empty_dtype)
                    cls = empty_dtype.construct_array_type()

                    missing_arr = cls._from_sequence([], dtype=empty_dtype)
                    ncols, nrows = self.shape
                    assert ncols == 1, ncols
                    empty_arr = -1 * np.ones((nrows,), dtype=np.intp)
                    return missing_arr.take(
                        empty_arr, allow_fill=True, fill_value=fill_value
                    )
                else:
                    # NB: we should never get here with empty_dtype integer or bool;
                    #  if we did, the missing_arr.fill would cast to gibberish
                    empty_dtype = cast(np.dtype, empty_dtype)

                    missing_arr = np.empty(self.shape, dtype=empty_dtype)
                    missing_arr.fill(fill_value)
                    return missing_arr

            if (not self.indexers) and (not self.block._can_consolidate):
                # preserve these for validation in concat_compat
                return self.block.values

            if self.block.is_bool:
                # External code requested filling/upcasting, bool values must
                # be upcasted to object to avoid being upcasted to numeric.
                values = self.block.astype(np.object_).values
            else:
                # No dtype upcasting is done here, it will be performed during
                # concatenation itself.
                values = self.block.values

        if not self.indexers:
            # If there's no indexing to be done, we want to signal outside
            # code that this array must be copied explicitly.  This is done
            # by returning a view and checking `retval.base`.
            values = values.view()

        else:
            for ax, indexer in self.indexers.items():
                values = algos.take_nd(values, indexer, axis=ax)

        return values
Beispiel #24
0
 def _get_ilevel_values(index, level):
     # accept level number only
     unique = index.levels[level]
     level_codes = index.codes[level]
     filled = take_nd(unique._values, level_codes, fill_value=unique._na_value)
     return unique._shallow_copy(filled, name=index.names[level])
Beispiel #25
0
def union_categoricals(
    to_union, sort_categories: bool = False, ignore_order: bool = False
):
    """
    Combine list-like of Categorical-like, unioning categories.

    All categories must have the same dtype.

    Parameters
    ----------
    to_union : list-like
        Categorical, CategoricalIndex, or Series with dtype='category'.
    sort_categories : bool, default False
        If true, resulting categories will be lexsorted, otherwise
        they will be ordered as they appear in the data.
    ignore_order : bool, default False
        If true, the ordered attribute of the Categoricals will be ignored.
        Results in an unordered categorical.

    Returns
    -------
    Categorical

    Raises
    ------
    TypeError
        - all inputs do not have the same dtype
        - all inputs do not have the same ordered property
        - all inputs are ordered and their categories are not identical
        - sort_categories=True and Categoricals are ordered
    ValueError
        Empty list of categoricals passed

    Notes
    -----
    To learn more about categories, see `link
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html#unioning>`__

    Examples
    --------
    >>> from pandas.api.types import union_categoricals

    If you want to combine categoricals that do not necessarily have
    the same categories, `union_categoricals` will combine a list-like
    of categoricals. The new categories will be the union of the
    categories being combined.

    >>> a = pd.Categorical(["b", "c"])
    >>> b = pd.Categorical(["a", "b"])
    >>> union_categoricals([a, b])
    ['b', 'c', 'a', 'b']
    Categories (3, object): ['b', 'c', 'a']

    By default, the resulting categories will be ordered as they appear
    in the `categories` of the data. If you want the categories to be
    lexsorted, use `sort_categories=True` argument.

    >>> union_categoricals([a, b], sort_categories=True)
    ['b', 'c', 'a', 'b']
    Categories (3, object): ['a', 'b', 'c']

    `union_categoricals` also works with the case of combining two
    categoricals of the same categories and order information (e.g. what
    you could also `append` for).

    >>> a = pd.Categorical(["a", "b"], ordered=True)
    >>> b = pd.Categorical(["a", "b", "a"], ordered=True)
    >>> union_categoricals([a, b])
    ['a', 'b', 'a', 'b', 'a']
    Categories (2, object): ['a' < 'b']

    Raises `TypeError` because the categories are ordered and not identical.

    >>> a = pd.Categorical(["a", "b"], ordered=True)
    >>> b = pd.Categorical(["a", "b", "c"], ordered=True)
    >>> union_categoricals([a, b])
    Traceback (most recent call last):
        ...
    TypeError: to union ordered Categoricals, all categories must be the same

    New in version 0.20.0

    Ordered categoricals with different categories or orderings can be
    combined by using the `ignore_ordered=True` argument.

    >>> a = pd.Categorical(["a", "b", "c"], ordered=True)
    >>> b = pd.Categorical(["c", "b", "a"], ordered=True)
    >>> union_categoricals([a, b], ignore_order=True)
    ['a', 'b', 'c', 'c', 'b', 'a']
    Categories (3, object): ['a', 'b', 'c']

    `union_categoricals` also works with a `CategoricalIndex`, or `Series`
    containing categorical data, but note that the resulting array will
    always be a plain `Categorical`

    >>> a = pd.Series(["b", "c"], dtype='category')
    >>> b = pd.Series(["a", "b"], dtype='category')
    >>> union_categoricals([a, b])
    ['b', 'c', 'a', 'b']
    Categories (3, object): ['b', 'c', 'a']
    """
    from pandas import Categorical
    from pandas.core.arrays.categorical import recode_for_categories

    if len(to_union) == 0:
        raise ValueError("No Categoricals to union")

    def _maybe_unwrap(x):
        if isinstance(x, (ABCCategoricalIndex, ABCSeries)):
            return x._values
        elif isinstance(x, Categorical):
            return x
        else:
            raise TypeError("all components to combine must be Categorical")

    to_union = [_maybe_unwrap(x) for x in to_union]
    first = to_union[0]

    if not all(
        is_dtype_equal(other.categories.dtype, first.categories.dtype)
        for other in to_union[1:]
    ):
        raise TypeError("dtype of categories must be the same")

    ordered = False
    if all(first._categories_match_up_to_permutation(other) for other in to_union[1:]):
        # identical categories - fastpath
        categories = first.categories
        ordered = first.ordered

        all_codes = [first._encode_with_my_categories(x)._codes for x in to_union]
        new_codes = np.concatenate(all_codes)

        if sort_categories and not ignore_order and ordered:
            raise TypeError("Cannot use sort_categories=True with ordered Categoricals")

        if sort_categories and not categories.is_monotonic_increasing:
            categories = categories.sort_values()
            indexer = categories.get_indexer(first.categories)

            from pandas.core.algorithms import take_nd

            new_codes = take_nd(indexer, new_codes, fill_value=-1)
    elif ignore_order or all(not c.ordered for c in to_union):
        # different categories - union and recode
        cats = first.categories.append([c.categories for c in to_union[1:]])
        categories = cats.unique()
        if sort_categories:
            categories = categories.sort_values()

        new_codes = [
            recode_for_categories(c.codes, c.categories, categories) for c in to_union
        ]
        new_codes = np.concatenate(new_codes)
    else:
        # ordered - to show a proper error message
        if all(c.ordered for c in to_union):
            msg = "to union ordered Categoricals, all categories must be the same"
            raise TypeError(msg)
        else:
            raise TypeError("Categorical.ordered must be the same")

    if ignore_order:
        ordered = False

    return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
Beispiel #26
0
 def slabels(self):
     # Sorted labels
     return algorithms.take_nd(self.labels, self.sort_idx, allow_fill=False)
Beispiel #27
0
    def get_reindexed_values(self, empty_dtype, upcasted_na):
        if upcasted_na is None:
            # No upcasting is necessary
            fill_value = self.block.fill_value
            values = self.block.get_values()
        else:
            fill_value = upcasted_na

            if self.is_na:
                if getattr(self.block, 'is_object', False):
                    # we want to avoid filling with np.nan if we are
                    # using None; we already know that we are all
                    # nulls
                    values = self.block.values.ravel(order='K')
                    if len(values) and values[0] is None:
                        fill_value = None

                if (getattr(self.block, 'is_datetimetz', False)
                        or is_datetime64tz_dtype(empty_dtype)):
                    if self.block is None:
                        array = empty_dtype.construct_array_type()
                        missing_arr = array([fill_value], dtype=empty_dtype)
                        return missing_arr.repeat(self.shape[1])
                    pass
                elif getattr(self.block, 'is_categorical', False):
                    pass
                elif getattr(self.block, 'is_sparse', False):
                    pass
                else:
                    missing_arr = np.empty(self.shape, dtype=empty_dtype)
                    missing_arr.fill(fill_value)
                    return missing_arr

            if not self.indexers:
                if not self.block._can_consolidate:
                    # preserve these for validation in _concat_compat
                    return self.block.values

            if self.block.is_bool and not self.block.is_categorical:
                # External code requested filling/upcasting, bool values must
                # be upcasted to object to avoid being upcasted to numeric.
                values = self.block.astype(np.object_).values
            elif self.block.is_extension:
                values = self.block.values
            else:
                # No dtype upcasting is done here, it will be performed during
                # concatenation itself.
                values = self.block.get_values()

        if not self.indexers:
            # If there's no indexing to be done, we want to signal outside
            # code that this array must be copied explicitly.  This is done
            # by returning a view and checking `retval.base`.
            values = values.view()

        else:
            for ax, indexer in self.indexers.items():
                values = algos.take_nd(values,
                                       indexer,
                                       axis=ax,
                                       fill_value=fill_value)

        return values
Beispiel #28
0
    def get_reindexed_values(self, empty_dtype: DtypeObj,
                             upcasted_na) -> ArrayLike:
        if upcasted_na is None:
            # No upcasting is necessary
            fill_value = self.block.fill_value
            values = self.block.get_values()
        else:
            fill_value = upcasted_na

            if self.is_valid_na_for(empty_dtype):
                blk_dtype = getattr(self.block, "dtype", None)

                # error: Value of type variable "_DTypeScalar" of "dtype" cannot be
                # "object"
                if blk_dtype == np.dtype(object):  # type: ignore[type-var]
                    # we want to avoid filling with np.nan if we are
                    # using None; we already know that we are all
                    # nulls
                    values = self.block.values.ravel(order="K")
                    if len(values) and values[0] is None:
                        fill_value = None

                if is_datetime64tz_dtype(empty_dtype):
                    # TODO(EA2D): special case unneeded with 2D EAs
                    i8values = np.full(self.shape[1], fill_value.value)
                    # error: Incompatible return value type (got "DatetimeArray",
                    # expected "ndarray")
                    return DatetimeArray(  # type: ignore[return-value]
                        i8values,
                        dtype=empty_dtype)
                elif is_extension_array_dtype(blk_dtype):
                    pass
                elif is_extension_array_dtype(empty_dtype):
                    # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]"
                    # has no attribute "construct_array_type"
                    cls = empty_dtype.construct_array_type(
                    )  # type: ignore[union-attr]
                    missing_arr = cls._from_sequence([], dtype=empty_dtype)
                    ncols, nrows = self.shape
                    assert ncols == 1, ncols
                    empty_arr = -1 * np.ones((nrows, ), dtype=np.intp)
                    return missing_arr.take(empty_arr,
                                            allow_fill=True,
                                            fill_value=fill_value)
                else:
                    # NB: we should never get here with empty_dtype integer or bool;
                    #  if we did, the missing_arr.fill would cast to gibberish

                    # error: Argument "dtype" to "empty" has incompatible type
                    # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any],
                    # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any,
                    # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any,
                    # Any]]]"
                    missing_arr = np.empty(
                        self.shape,
                        dtype=empty_dtype  # type: ignore[arg-type]
                    )
                    missing_arr.fill(fill_value)
                    return missing_arr

            if (not self.indexers) and (not self.block._can_consolidate):
                # preserve these for validation in concat_compat
                return self.block.values

            if self.block.is_bool and not self.block.is_categorical:
                # External code requested filling/upcasting, bool values must
                # be upcasted to object to avoid being upcasted to numeric.
                values = self.block.astype(np.object_).values
            elif self.block.is_extension:
                values = self.block.values
            else:
                # No dtype upcasting is done here, it will be performed during
                # concatenation itself.
                values = self.block.values

        if not self.indexers:
            # If there's no indexing to be done, we want to signal outside
            # code that this array must be copied explicitly.  This is done
            # by returning a view and checking `retval.base`.
            values = values.view()

        else:
            for ax, indexer in self.indexers.items():
                values = algos.take_nd(values, indexer, axis=ax)

        return values
Beispiel #29
0
    def test_3d_with_out(self, dtype_can_hold_na):
        dtype, can_hold_na = dtype_can_hold_na

        data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)
        indexer = [2, 1, 0, 1]

        out0 = np.empty((4, 4, 3), dtype=dtype)
        out1 = np.empty((5, 4, 3), dtype=dtype)
        out2 = np.empty((5, 4, 4), dtype=dtype)

        algos.take_nd(data, indexer, out=out0, axis=0)
        algos.take_nd(data, indexer, out=out1, axis=1)
        algos.take_nd(data, indexer, out=out2, axis=2)

        expected0 = data.take(indexer, axis=0)
        expected1 = data.take(indexer, axis=1)
        expected2 = data.take(indexer, axis=2)

        tm.assert_almost_equal(out0, expected0)
        tm.assert_almost_equal(out1, expected1)
        tm.assert_almost_equal(out2, expected2)

        indexer = [2, 1, 0, -1]
        out0 = np.empty((4, 4, 3), dtype=dtype)
        out1 = np.empty((5, 4, 3), dtype=dtype)
        out2 = np.empty((5, 4, 4), dtype=dtype)

        if can_hold_na:
            algos.take_nd(data, indexer, out=out0, axis=0)
            algos.take_nd(data, indexer, out=out1, axis=1)
            algos.take_nd(data, indexer, out=out2, axis=2)

            expected0 = data.take(indexer, axis=0)
            expected1 = data.take(indexer, axis=1)
            expected2 = data.take(indexer, axis=2)

            expected0[3, :, :] = np.nan
            expected1[:, 3, :] = np.nan
            expected2[:, :, 3] = np.nan

            tm.assert_almost_equal(out0, expected0)
            tm.assert_almost_equal(out1, expected1)
            tm.assert_almost_equal(out2, expected2)
        else:
            for i, out in enumerate([out0, out1, out2]):
                with pytest.raises(TypeError, match=self.fill_error):
                    algos.take_nd(data, indexer, out=out, axis=i)

                # No Exception otherwise.
                data.take(indexer, out=out, axis=i)
Beispiel #30
0
    def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
        indexer, _ = self._indexer_and_to_sort

        sorted_values = algos.take_nd(values, indexer, axis=0)
        return sorted_values
Beispiel #31
0
    def get_reindexed_values(self, empty_dtype: DtypeObj,
                             upcasted_na) -> ArrayLike:
        values: ArrayLike

        if upcasted_na is None and not self.is_na:
            # No upcasting is necessary
            fill_value = self.block.fill_value
            values = self.block.get_values()
        else:
            fill_value = upcasted_na

            if self.is_na:

                if is_datetime64tz_dtype(empty_dtype):
                    i8values = np.full(self.shape, fill_value.value)
                    return DatetimeArray(i8values, dtype=empty_dtype)

                elif is_1d_only_ea_dtype(empty_dtype):
                    empty_dtype = cast(ExtensionDtype, empty_dtype)
                    cls = empty_dtype.construct_array_type()

                    missing_arr = cls._from_sequence([], dtype=empty_dtype)
                    ncols, nrows = self.shape
                    assert ncols == 1, ncols
                    empty_arr = -1 * np.ones((nrows, ), dtype=np.intp)
                    return missing_arr.take(empty_arr,
                                            allow_fill=True,
                                            fill_value=fill_value)
                elif isinstance(empty_dtype, ExtensionDtype):
                    # TODO: no tests get here, a handful would if we disabled
                    #  the dt64tz special-case above (which is faster)
                    cls = empty_dtype.construct_array_type()
                    missing_arr = cls._empty(shape=self.shape,
                                             dtype=empty_dtype)
                    missing_arr[:] = fill_value
                    return missing_arr
                else:
                    # NB: we should never get here with empty_dtype integer or bool;
                    #  if we did, the missing_arr.fill would cast to gibberish
                    missing_arr = np.empty(self.shape, dtype=empty_dtype)
                    missing_arr.fill(fill_value)
                    return missing_arr

            if (not self.indexers) and (not self.block._can_consolidate):
                # preserve these for validation in concat_compat
                return self.block.values

            if self.block.is_bool:
                # External code requested filling/upcasting, bool values must
                # be upcasted to object to avoid being upcasted to numeric.
                values = self.block.astype(np.object_).values
            else:
                # No dtype upcasting is done here, it will be performed during
                # concatenation itself.
                values = self.block.values

        if not self.indexers:
            # If there's no indexing to be done, we want to signal outside
            # code that this array must be copied explicitly.  This is done
            # by returning a view and checking `retval.base`.
            values = values.view()

        else:
            for ax, indexer in self.indexers.items():
                values = algos.take_nd(values, indexer, axis=ax)

        return values
Beispiel #32
0
def _bins_to_cuts(
    x,
    bins,
    right=True,
    labels=None,
    precision=3,
    include_lowest=False,
    dtype=None,
    duplicates="raise",
):

    if duplicates not in ["raise", "drop"]:
        raise ValueError(
            "invalid value for 'duplicates' parameter, "
            "valid options are: raise, drop"
        )

    if isinstance(bins, IntervalIndex):
        # we have a fast-path here
        ids = bins.get_indexer(x)
        result = Categorical.from_codes(ids, categories=bins, ordered=True)
        return result, bins

    unique_bins = algos.unique(bins)
    if len(unique_bins) < len(bins) and len(bins) != 2:
        if duplicates == "raise":
            raise ValueError(
                "Bin edges must be unique: {bins!r}.\nYou "
                "can drop duplicate edges by setting "
                "the 'duplicates' kwarg".format(bins=bins)
            )
        else:
            bins = unique_bins

    side = "left" if right else "right"
    ids = ensure_int64(bins.searchsorted(x, side=side))

    if include_lowest:
        ids[x == bins[0]] = 1

    na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if labels is None:
            labels = _format_labels(
                bins, precision, right=right, include_lowest=include_lowest, dtype=dtype
            )
        else:
            if len(labels) != len(bins) - 1:
                raise ValueError(
                    "Bin labels must be one fewer than the number of bin edges"
                )
        if not is_categorical_dtype(labels):
            labels = Categorical(labels, categories=labels, ordered=True)

        np.putmask(ids, na_mask, 0)
        result = algos.take_nd(labels, ids - 1)

    else:
        result = ids - 1
        if has_nas:
            result = result.astype(np.float64)
            np.putmask(result, na_mask, np.nan)

    return result, bins
Beispiel #33
0
 def slabels(self):
     # Sorted labels
     return algorithms.take_nd(self.labels, self.sort_idx, allow_fill=False)
Beispiel #34
0
    def test_2d_datetime64(self):
        # 2005/01/01 - 2006/01/01
        arr = np.random.randint(
            long(11045376), long(11360736), (5, 3)) * 100000000000
        arr = arr.view(dtype='datetime64[ns]')
        indexer = [0, 2, -1, 1, -1]

        # axis=0
        result = algos.take_nd(arr, indexer, axis=0)
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, axis=0, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected.view(np.int64)[[2, 4], :] = iNaT
        tm.assert_almost_equal(result, expected)

        result = algos.take_nd(arr, indexer, axis=0,
                               fill_value=datetime(2007, 1, 1))
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, out=result2, axis=0,
                      fill_value=datetime(2007, 1, 1))
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected[[2, 4], :] = datetime(2007, 1, 1)
        tm.assert_almost_equal(result, expected)

        # axis=1
        result = algos.take_nd(arr, indexer, axis=1)
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, axis=1, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected.view(np.int64)[:, [2, 4]] = iNaT
        tm.assert_almost_equal(result, expected)

        result = algos.take_nd(arr, indexer, axis=1,
                               fill_value=datetime(2007, 1, 1))
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, out=result2, axis=1,
                      fill_value=datetime(2007, 1, 1))
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected[:, [2, 4]] = datetime(2007, 1, 1)
        tm.assert_almost_equal(result, expected)
Beispiel #35
0
 def slabels(self) -> np.ndarray:  # np.ndarray[np.intp]
     # Sorted labels
     return algorithms.take_nd(self.labels,
                               self._sort_idx,
                               allow_fill=False)