Esempio n. 1
0
def test_advanced_integer_index_is_valid_with_default_result_shape(
        shape, dtype, data):
    index = data.draw(nps.integer_array_indices(shape, dtype=dtype))
    x = np.zeros(shape)
    out = x[index]  # raises if the index is invalid
    assert not np.shares_memory(
        x, out)  # advanced indexing should not return a view
    assert all(dtype == x.dtype for x in index)
Esempio n. 2
0
def array_inds(draw: st.DataObject,
               int_type: type = np.intp
               ) -> (Tuple[int, ...], np.ndarray, np.ndarray, Tuple[int, ...]):
    """Strategy for array shape and valid index arrays"""
    shape = draw(some_shape)
    multi_ind = draw(hn.integer_array_indices(shape, dtype=int_type))
    ravel_ind = np.ravel_multi_index(multi_ind, shape, mode='wrap')
    if shape:
        rows, cols = shape[0], shape[-1]
        diag = draw(st.integers(min_value=-rows, max_value=cols))
    else:
        rows, diag, cols = (0, 0, 0)
    return shape, multi_ind, ravel_ind, (rows, diag, cols)
Esempio n. 3
0
def test_advanced_integer_index_minimizes_as_documented(
    shape, min_dims, min_side, dtype, data
):
    max_side = data.draw(st.integers(min_side, min_side + 2), label="max_side")
    max_dims = data.draw(st.integers(min_dims, min_dims + 2), label="max_dims")
    result_shape = nps.array_shapes(
        min_dims=min_dims, max_dims=max_dims, min_side=min_side, max_side=max_side
    )
    smallest = minimal(
        nps.integer_array_indices(shape, result_shape=result_shape, dtype=dtype)
    )
    desired = len(shape) * (np.zeros(min_dims * [min_side]),)
    assert len(smallest) == len(desired)
    for s, d in zip(smallest, desired):
        np.testing.assert_array_equal(s, d)
Esempio n. 4
0
def test_advanced_integer_index_can_generate_any_pattern(shape, data):
    # ensures that generated index-arrays can be used to yield any pattern of elements from an array
    x = np.arange(np.product(shape)).reshape(shape)

    target = data.draw(
        nps.arrays(
            shape=nps.array_shapes(min_dims=1, max_dims=2, min_side=1, max_side=2),
            elements=st.sampled_from(x.flatten()),
            dtype=x.dtype,
        ),
        label="target",
    )
    find_any(
        nps.integer_array_indices(
            shape, result_shape=st.just(target.shape), dtype=np.dtype("int8")
        ),
        lambda index: np.all(target == x[index]),
        settings(max_examples=10**6),
    )
Esempio n. 5
0
def adv_integer_index(
    shape: Shape,
    min_dims: int = 1,
    max_dims: int = 3,
    min_side: int = 1,
    max_side: int = 3,
) -> st.SearchStrategy[Tuple[ndarray, ...]]:
    """ Hypothesis search strategy: given an array shape, generate a
    a valid index for specifying an element/subarray of that array,
    using advanced indexing with integer-valued arrays.

    Examples from this strategy shrink towards the index
    `len(shape) * (np.array([0]), )`.

    Parameters
    ----------
    shape : Tuple[int, ...]
        The shape of the array whose indices are being generated

    min_dims : int, optional (default=1)
        The minimum dimensionality permitted for the index-arrays.

    max_dims : int, optional (default=3)
        The maximum dimensionality permitted for the index-arrays.

    min_side : int, optional (default=1)
        The minimum side permitted for the index-arrays.

    max_side : int, optional (default=3)
        The maximum side permitted for the index-arrays.

    Returns
    -------
    hypothesis.searchstrategy.SearchStrategy[Tuple[numpy.ndarray, ...]]
    """

    return hnp.integer_array_indices(
        shape=shape,
        result_shape=hnp.array_shapes(min_dims=min_dims,
                                      max_dims=max_dims,
                                      min_side=min_side,
                                      max_side=max_side),
    )
Esempio n. 6
0
def test_advanced_integer_index_is_valid_and_satisfies_bounds(
        shape, min_dims, min_side, dtype, data):
    max_side = data.draw(st.integers(min_side, min_side + 2), label="max_side")
    max_dims = data.draw(st.integers(min_dims, min_dims + 2), label="max_dims")
    index = data.draw(
        nps.integer_array_indices(
            shape,
            result_shape=nps.array_shapes(
                min_dims=min_dims,
                max_dims=max_dims,
                min_side=min_side,
                max_side=max_side,
            ),
            dtype=dtype,
        ))
    x = np.zeros(shape)
    out = x[index]  # raises if the index is invalid
    assert all(min_side <= s <= max_side for s in out.shape)
    assert min_dims <= out.ndim <= max_dims
    assert not np.shares_memory(
        x, out)  # advanced indexing should not return a view
    assert all(dtype == x.dtype for x in index)
Esempio n. 7
0
def categorical_stringarray(
    draw,
    max_length: int,
    max_categories: int,
    *,
    endianness: str = '=',
    min_str_len: int = 1,
    max_str_len: int = 16,
    unicode: Optional[bool] = None,
    ordered: Optional[bool] = None,
) -> Categorical:
    """
    Strategy for creating StringArray-mode Categoricals.

    Parameters
    ----------
    draw
    max_length : int
    max_categories : int
    endianness : str
    min_str_len : int
    max_str_len : int
    unicode : bool, optional
    ordered : bool, optional

    Examples
    --------
    >>> array_strategy = arrays(integer_dtypes(endianness="=", sizes=(64,)), (5,))
    arrays(dtype=integer_dtypes(endianness='=', sizes=(64,)), shape=(5,))
    >>> categorical_stringarray(array_strategy, with_categories=True).example()
    0, 0, 0, 0, 0

    Notes
    -----
    TODO: Make sure to include the case where we have category values (in the underlying integer array)
          past the end of the categories array. (Or is that only for a Dictionary mode categorical?)
          To clarify -- this is the behavior where, when we print the Categorical, we get entries like <!456>.

    TODO: Also exercise (in one way or another) the following arguments to the Categorical constructor:
        * base_index
            Add an optional boolean parameter. When None, draw a boolean to fill it in.
            When the bool is false, call rt.Cat() with base_index=0.
            When True, call rt.Cat() with base_index=1.
        * dtype
            Call the ctor with dtype=None or a signed integer dtype that's either the min size given the
            number of categories or any larger signed integer dtype.
            E.g. if len(categories) == 1000, draw from { None, np.int16, np.int32, np.int64 }
        * filter
            Add an optional boolean param to the strategy which defaults to None, in which case we'll fill it by drawing a boolean.
            When the bool is false we we call rt.Cat() with filter=None.
            When True, we create a boolean array the same length as our values or fancy index and pass that as the filter.

    TODO: Support slicing/strides on the values/categories arrays passed to the Categorical constructor.

    TODO: When creating the fancy index array and we've drawn 'explicit_categories=True', allow the fancy index to be created
          with any applicable integer type (signed or unsigned) whose range is large enough to index into the categories array.
          (Or, should we just allow _any_ integer dtype, even if too small? We wouldn't be able to index categories past the
          range of the dtype, but maybe that's an interesting thing to test? Especially around cases like having auto_add=True.)
    """
    # Draw a boolean indicating how the data will be passed to the Categorical constructor later.
    # This is done first since it's one of the most likely things to affect the behavior of the Categorical,
    # and shrinking (in some cases) works better when such values are drawn earlier in strategy.
    explicit_categories: bool = draw(st.booleans())
    if explicit_categories:
        event('Categorical created from unique category array and fancy index.')
    else:
        event('Categorical created from non-unique array of strings.')

    # Draw the string dtype based on whether we want a byte (ascii) string or Unicode.
    is_unicode: bool = draw(st.booleans()) if unicode is None else unicode
    if is_unicode:
        labels_dtype = draw(unicode_string_dtypes(endianness=endianness, min_len=min_str_len, max_len=max_str_len))
    else:
        labels_dtype = draw(byte_string_dtypes(endianness=endianness, min_len=min_str_len, max_len=max_str_len))

    # Create an array of unique category labels.
    cats_shapes = array_shapes(max_dims=1, max_side=max_categories)
    category_label_strat = category_labels(min_str_len, max_str_len, unicode=is_unicode)
    unique_labels = draw(arrays(dtype=labels_dtype, shape=cats_shapes, elements=category_label_strat, unique=True))

    # Use basic_indices to create a fancy index into the array of unique category labels.
    # Apply it to expand the array of unique labels into an array where those labels may occur zero or more times.
    fancy_index_shapes = array_shapes(max_dims=1, max_side=max_length)
    fancy_index = draw(integer_array_indices(shape=unique_labels.shape, result_shape=fancy_index_shapes))

    # If the 'ordered' flag is not set, draw a boolean for it now so we have a concrete value
    # to use when creating the categorical.
    is_ordered = draw(st.booleans()) if ordered is None else ordered

    # If the 'explicit_categories' flag is set, create the Categorical by passing in the
    # unique values and fancy index separately.
    # Otherwise, apply the fancy index to the array of unique category values to produce an
    # array where each category appears zero or more times; then create the Categorical from that.
    if explicit_categories:
        return Categorical(fancy_index, categories=unique_labels, ordered=is_ordered, unicode=is_unicode)

    else:
        values = unique_labels[fancy_index]
        return Categorical(values, ordered=is_ordered, unicode=is_unicode)
Esempio n. 8
0
def categorical_dictmode(
    draw,
    max_length: int,
    max_categories: int,
    *,
    endianness: str = '=',
    min_str_len: int = 1,
    max_str_len: int = 16,
    unicode: Optional[bool] = None,
    ordered: Optional[bool] = None,
) -> Categorical:
    """
    Strategy for creating Dictionary-mode Categoricals.

    This strategy currently only covers creating `Categorical` instances with
    string-typed category labels.

    Parameters
    ----------
    draw
    max_length : int
    max_categories : int
    endianness : str
    min_str_len : int
    max_str_len : int
    unicode : bool, optional
    ordered : bool, optional

    Examples
    --------
    >>> categorical_dictmode(10_000, 1_000, max_str_len=20).example()
    0, 0, 0, 0, 0

    Notes
    -----
    TODO: Make sure to include the case where we have category values (in the underlying integer array)
          past the end of the categories array. (Or is that only for a Dictionary mode categorical?)
          To clarify -- this is the behavior where, when we print the Categorical, we get entries like <!456>.

    TODO: Also exercise (in one way or another) the following arguments to the Categorical constructor:
        * base_index
            Add an optional boolean parameter. When None, draw a boolean to fill it in.
            When the bool is false, call rt.Cat() with base_index=0.
            When True, call rt.Cat() with base_index=1.
        * dtype
            Call the ctor with dtype=None or a signed integer dtype that's either the min size given the
            number of categories or any larger signed integer dtype.
            E.g. if len(categories) == 1000, draw from { None, np.int16, np.int32, np.int64 }
        * filter
            Add an optional boolean param to the strategy which defaults to None, in which case we'll fill it by drawing a boolean.
            When the bool is false we we call rt.Cat() with filter=None.
            When True, we create a boolean array the same length as our values or fancy index and pass that as the filter.

    TODO: Support slicing/strides on the values/categories arrays passed to the Categorical constructor.

    TODO: Does a Dictionary-mode Categorical allow any other types (e.g. rt.Date) to be used for the category labels?
        If so, these should also be covered by this strategy (though changes will needed to allow a variety of
        types to be used for category labels).

    TODO: Any possible issues (that we might want to exercise in this strategy) between the string used when displaying
        the invalid category (e.g. 'Inv') and category labels? What happens if we have a category label using the same string?
    """
    # Draw a boolean indicating whether we'll use a signed or unsigned integer dtype.
    use_signed_integer_dtype: bool = draw(st.booleans())

    # If using a signed integer dtype, draw another boolean indicating whether we'll
    # generate negative category values.
    allow_negative_category_values: bool = draw(st.booleans()) if use_signed_integer_dtype else False
    if use_signed_integer_dtype:
        if allow_negative_category_values:
            event('Categorical may have a mix of negative, zero, and positive category values.')
        else:
            event('Categorical has only non-negative category values.')

    # If the 'unicode' flag is not set, draw a boolean to fill it in.
    is_unicode: bool = draw(st.booleans()) if unicode is None else unicode
    event(f'Category labels are {"unicode" if is_unicode else "ascii"} strings.')

    # If the 'ordered' flag is not set, draw a boolean for it now so we have a concrete value
    # to use when creating the categorical.
    is_ordered = draw(st.booleans()) if ordered is None else ordered
    event(f'ordered = {is_ordered}')

    # Draw the dtype for the category values.
    # TODO: Draw a signed or unsigned integer dtype here which is at least as large as needed, but perhaps larger
    #       than needed.
    #       For now, we just use the smallest dtype large enough to fit the max number of categories; but allowing for
    #       larger (randomly-selected) dtypes later will help ensure we test cases where there are non-consecutive
    #       category values even when the max_categories value is near the max value of a dtype.
    values_dtype = np.min_scalar_type(max_categories)

    # Create the strategy for the category values (integer values representing the categories).
    values_dtype_info = np.iinfo(values_dtype)
    values_strat =\
        st.integers(
            min_value=(values_dtype_info.min if allow_negative_category_values else 0),
            max_value=values_dtype_info.max)

    # Create an array of unique category values/codes.
    cats_shapes = array_shapes(max_dims=1, max_side=max_categories)
    unique_cat_values = draw(arrays(dtype=values_dtype, shape=cats_shapes, elements=values_strat, unique=True))

    # Draw the string dtype for the labels based on whether we want a byte (ascii) string or Unicode.
    is_unicode: bool = draw(st.booleans()) if unicode is None else unicode
    if is_unicode:
        labels_dtype = draw(unicode_string_dtypes(endianness=endianness, min_len=min_str_len, max_len=max_str_len))
    else:
        labels_dtype = draw(byte_string_dtypes(endianness=endianness, min_len=min_str_len, max_len=max_str_len))

    # Create an array of unique category labels; this must be the same shape as the unique category values array.
    category_label_strat = category_labels(min_str_len, max_str_len, unicode=is_unicode)
    unique_labels =\
        draw(arrays(dtype=labels_dtype, shape=unique_cat_values.shape, elements=category_label_strat, unique=True))

    # TODO: Draw a slice (or None) that we'll apply to both arrays of uniques (the labels and values)
    #   before using them to create the category dictionary.
    #   This allows us to cover cases where a category value isn't in the dictionary.

    # Combine the unique category labels and values to create a dictionary.
    category_dict = dict(zip(unique_labels, unique_cat_values))

    # Use basic_indices to create a fancy index into the array of unique values.
    # Apply it to expand the array of unique values into an array where those values may occur zero or more times.
    fancy_index_shapes = array_shapes(max_dims=1, max_side=max_length)
    fancy_index = draw(integer_array_indices(shape=unique_cat_values.shape, result_shape=fancy_index_shapes))

    # Apply the fancy index to the array of unique category values to produce an
    # array where each category appears zero or more times; then create the Categorical from that.
    cat_values = unique_cat_values[fancy_index]
    return Categorical(cat_values, categories=category_dict, ordered=is_ordered, unicode=is_unicode)
Esempio n. 9
0
def arbitrary_indices(draw, shape: Tuple[int]):
    """
    Hypothesis search strategy: Generate a valid index
    for an array of a given shape. The index can contain
    any type of valid object used for indexing, including
    integers, slices, Ellipsis's, newaxis's, boolean arrays,
    and integer arrays.

    Parameters
    ----------
    shape : Tuple[int]
        The shape of the array to be indexed into

    Notes
    -----
    `draw` is a parameter reserved by hypothesis, and should not be specified
    by the user.

    When given a shape with a 0-dimensional axis, only a basic index will be returned.

    Returns
    -------
    hypothesis.searchstrategy.SearchStrategy[Tuple[Union[int, slice, Ellipsis, NoneType, numpy.ndarray], ...]]
    """
    def group_continuous_integers(ls):
        """
        Given a list of integers, find and group continuous sequences

        Parameters
        ----------
        ls: List[int]

        Returns
        -------
        List[Tuple[int]]

        Examples
        --------
        >>> group_continuous_integers([1, 3, 4, 5, 7, 8])
        [(1,), (3, 4, 5), (7, 8)]
        """
        return [
            tuple(map(itemgetter(1), g))
            for k, g in groupby(enumerate(ls), lambda x: x[0] - x[1])
        ]

    if not shape or 0 in shape:
        return draw(hnp.basic_indices(shape=shape, allow_newaxis=True))

    shape_inds = list(range(len(shape)))
    index = []  # stores tuples of (axis, indexing object)

    # add integers, slices
    basic_inds = sorted(
        draw(st.lists(st.sampled_from(shape_inds), unique=True)))

    if len(basic_inds) > 0:
        basic_dims = tuple(shape[i] for i in basic_inds)

        # only draw ints and slices
        # will handle possible ellipsis and newaxis objects later
        # as these can make array indices difficult to handle
        basics = draw(hnp.basic_indices(shape=basic_dims,
                                        allow_ellipsis=False))
        if not isinstance(basics, tuple):
            basics = (basics, )

        index += [tup for tup in zip(basic_inds, basics)]

        # will not necessarily index all axes from basic_inds as
        # `basic_indices` can return indices with omitted trailing slices
        # so only remove dimensions directly indexed into
        for i in basic_inds[:len(basics)]:
            shape_inds.pop(shape_inds.index(i))

    if len(shape_inds) > 0:
        # add integer arrays to index
        int_arr_inds = sorted(
            draw(st.lists(st.sampled_from(shape_inds), unique=True)))

        if len(int_arr_inds) > 0:
            int_arr_dims = tuple(shape[i] for i in int_arr_inds)
            int_arrs = draw(hnp.integer_array_indices(shape=int_arr_dims))
            index += [tup for tup in zip(int_arr_inds, int_arrs)]

            for i in int_arr_inds:
                shape_inds.pop(shape_inds.index(i))

    if len(shape_inds) > 0:
        # add boolean arrays to index
        bool_inds = sorted(
            draw(st.lists(st.sampled_from(shape_inds), unique=True)))

        if len(bool_inds) > 0:
            # boolean arrays can be multi-dimensional, so by grouping all
            # adjacent axes to make a single boolean array, this can be tested for
            grouped_bool_inds = group_continuous_integers(bool_inds)
            bool_dims = [
                tuple(shape[i] for i in ind) for ind in grouped_bool_inds
            ]

            # if multiple boolean array indices, the number of trues must be such that
            # the output of ind.nonzero() for each index are broadcast compatible
            # this must also be the same as the trailing dim of each integer array, if any used
            if len(int_arr_inds):
                max_trues = max(i.shape[-1] for i in int_arrs)
            else:
                max_trues = st.integers(min_value=0,
                                        max_value=min(
                                            bool_dims,
                                            key=lambda x: np.prod(x)))

            index += [(
                i[0],
                draw(
                    hnp.arrays(shape=sh, dtype=bool).filter(
                        lambda x: x.sum() in (1, max_trues))),
            ) for i, sh in zip(grouped_bool_inds, bool_dims)]

            for i in bool_inds:
                shape_inds.pop(shape_inds.index(i))

    grouped_shape_inds = group_continuous_integers(sorted(shape_inds))
    if len(grouped_shape_inds) == 1:
        # unused indices form a continuous stretch of dimensions
        # so can replace with an ellipsis

        # to test ellipsis vs omitted slices, randomly
        # add ellipsis when the unused axes are trailing
        if max(shape_inds) + 1 == len(shape):
            if draw(st.booleans()):
                index += [(min(shape_inds), Ellipsis)]
        else:
            index += [(min(shape_inds), Ellipsis)]
    elif len(grouped_shape_inds) == 0 and draw(st.booleans()):
        # all indices filled already
        # can randomly add ellipsis that expands to 0-d tuple
        # this can have counter-intuitive behavior
        # (particularly in conjunction with array indices)
        i = draw(st.integers(min_value=0, max_value=len(index)))
        index.insert(i, (i, Ellipsis))
    else:
        # so that current chosen index's work,
        # fill in remaining any gaps with empty slices
        index += [(i, slice(None)) for i in shape_inds]

    index = sorted(index, key=lambda x: x[0])

    # can now randomly add in newaxis objects
    newaxis_pos = sorted(
        draw(
            st.lists(st.integers(min_value=0, max_value=len(index)),
                     unique=True)),
        reverse=True,
    )
    for i in newaxis_pos:
        index.insert(i, (-1, np.newaxis))

    out_ind = tuple(i[1] for i in index)
    return out_ind