Beispiel #1
0
def rule(
    *,
    targets: Union[Sequence[Bundle[Ex]], _OmittedArgument] = (),
    target: Optional[Bundle[Ex]] = None,
    **kwargs: SearchStrategy,
) -> Union[_RuleWrapper[Ex], Callable[[Callable[..., None]], Callable[...,
                                                                      None]]]:
    """Decorator for RuleBasedStateMachine. Any Bundle present in ``target`` or
    ``targets`` will define where the end result of this function should go. If
    both are empty then the end result will be discarded.

    ``target`` must be a Bundle, or if the result should go to multiple
    bundles you can pass a tuple of them as the ``targets`` argument.
    It is invalid to use both arguments for a single rule.  If the result
    should go to exactly one of several bundles, define a separate rule for
    each case.

    kwargs then define the arguments that will be passed to the function
    invocation. If their value is a Bundle, or if it is ``consumes(b)``
    where ``b`` is a Bundle, then values that have previously been produced
    for that bundle will be provided. If ``consumes`` is used, the value
    will also be removed from the bundle.

    Any other kwargs should be strategies and values from them will be
    provided.
    """
    converted_targets = _convert_targets(targets, target)
    for k, v in kwargs.items():
        check_strategy(v, name=k)

    def accept(f):
        if getattr(f, INVARIANT_MARKER, None):
            raise InvalidDefinition(
                "A function cannot be used for both a rule and an invariant.",
                Settings.default,
            )
        existing_rule = getattr(f, RULE_MARKER, None)
        existing_initialize_rule = getattr(f, INITIALIZE_RULE_MARKER, None)
        if existing_rule is not None or existing_initialize_rule is not None:
            raise InvalidDefinition(
                "A function cannot be used for two distinct rules. ",
                Settings.default)
        preconditions = getattr(f, PRECONDITIONS_MARKER, ())
        rule = Rule(
            targets=converted_targets,
            arguments=kwargs,
            function=f,
            preconditions=preconditions,
        )

        @proxies(f)
        def rule_wrapper(*args, **kwargs):
            return f(*args, **kwargs)

        setattr(rule_wrapper, RULE_MARKER, rule)
        return rule_wrapper

    return accept
Beispiel #2
0
def fill_for(elements, unique, fill, name=""):
    if fill is None:
        if unique or not elements.has_reusable_values:
            fill = st.nothing()
        else:
            fill = elements
    else:
        check_strategy(fill, f"{name}.fill" if name else "fill")
    return fill
Beispiel #3
0
def elements_and_dtype(elements, dtype, source=None):

    if source is None:
        prefix = ""
    else:
        prefix = f"{source}."

    if elements is not None:
        check_strategy(elements, f"{prefix}elements")
    else:
        with check("dtype is not None"):
            if dtype is None:
                raise InvalidArgument(
                    f"At least one of {prefix}elements or {prefix}dtype must be provided."
                )

    with check("is_categorical_dtype"):
        if is_categorical_dtype(dtype):
            raise InvalidArgument(
                f"{prefix}dtype is categorical, which is currently unsupported"
            )

    if isinstance(
            dtype,
            type) and np.dtype(dtype).kind == "O" and dtype is not object:
        note_deprecation(
            f"Passed dtype={dtype!r} is not a valid Pandas dtype.  We'll treat it as "
            "dtype=object for now, but this will be an error in a future version.",
            since="2021-12-31",
            has_codemod=False,
        )

    dtype = try_convert(np.dtype, dtype, "dtype")

    if elements is None:
        elements = npst.from_dtype(dtype)
    elif dtype is not None:

        def convert_element(value):
            name = f"draw({prefix}elements)"
            try:
                return np.array([value], dtype=dtype)[0]
            except TypeError:
                raise InvalidArgument(
                    "Cannot convert %s=%r of type %s to dtype %s" %
                    (name, value, type(value).__name__, dtype.str)) from None
            except ValueError:
                raise InvalidArgument(
                    f"Cannot convert {name}={value!r} to type {dtype.str}"
                ) from None

        elements = elements.map(convert_element)
    assert elements is not None

    return elements, dtype
Beispiel #4
0
def tuples(*args: SearchStrategy) -> SearchStrategy[tuple]:
    """Return a strategy which generates a tuple of the same length as args by
    generating the value at index i from args[i].

    e.g. tuples(integers(), integers()) would generate a tuple of length
    two with both values an integer.

    Examples from this strategy shrink by shrinking their component parts.
    """
    for arg in args:
        check_strategy(arg)

    return TupleStrategy(args)
Beispiel #5
0
def initialize(
    *,
    targets: Union[Sequence[Bundle[Ex]], _OmittedArgument] = (),
    target: Optional[Bundle[Ex]] = None,
    **kwargs: SearchStrategy,
) -> Union[_RuleWrapper[Ex], Callable[[Callable[..., None]], Callable[...,
                                                                      None]]]:
    """Decorator for RuleBasedStateMachine.

    An initialize decorator behaves like a rule, but all ``@initialize()`` decorated
    methods will be called before any ``@rule()`` decorated methods, in an arbitrary
    order.  Each ``@initialize()`` method will be called exactly once per run, unless
    one raises an exception - after which only the ``.teardown()`` method will be run.
    ``@initialize()`` methods may not have preconditions.
    """
    converted_targets = _convert_targets(targets, target)
    for k, v in kwargs.items():
        check_strategy(v, name=k)

    def accept(f):
        if getattr(f, INVARIANT_MARKER, None):
            raise InvalidDefinition(
                "A function cannot be used for both a rule and an invariant.",
                Settings.default,
            )
        existing_rule = getattr(f, RULE_MARKER, None)
        existing_initialize_rule = getattr(f, INITIALIZE_RULE_MARKER, None)
        if existing_rule is not None or existing_initialize_rule is not None:
            raise InvalidDefinition(
                "A function cannot be used for two distinct rules. ",
                Settings.default)
        preconditions = getattr(f, PRECONDITIONS_MARKER, ())
        if preconditions:
            raise InvalidDefinition(
                "An initialization rule cannot have a precondition. ",
                Settings.default)
        rule = Rule(
            targets=converted_targets,
            arguments=kwargs,
            function=f,
            preconditions=preconditions,
        )

        @proxies(f)
        def rule_wrapper(*args, **kwargs):
            return f(*args, **kwargs)

        setattr(rule_wrapper, INITIALIZE_RULE_MARKER, rule)
        return rule_wrapper

    return accept
Beispiel #6
0
 def wrapped_strategy(self):
     if self.__wrapped_strategy is None:
         if not inspect.isfunction(self.__definition):
             raise InvalidArgument(
                 f"Expected definition to be a function but got {self.__definition!r} "
                 f"of type {type(self.__definition).__name__} instead.")
         result = self.__definition()
         if result is self:
             raise InvalidArgument(
                 "Cannot define a deferred strategy to be itself")
         check_strategy(result, "definition()")
         self.__wrapped_strategy = result
         self.__definition = None
     return self.__wrapped_strategy
Beispiel #7
0
def elements_and_dtype(elements, dtype, source=None):

    if source is None:
        prefix = ""
    else:
        prefix = f"{source}."

    if elements is not None:
        check_strategy(elements, f"{prefix}elements")
    else:
        with check("dtype is not None"):
            if dtype is None:
                raise InvalidArgument(
                    f"At least one of {prefix}elements or {prefix}dtype must be provided."
                )

    with check("is_categorical_dtype"):
        if is_categorical_dtype(dtype):
            raise InvalidArgument(
                f"{prefix}dtype is categorical, which is currently unsupported"
            )

    dtype = try_convert(np.dtype, dtype, "dtype")

    if elements is None:
        elements = npst.from_dtype(dtype)
    elif dtype is not None:

        def convert_element(value):
            name = f"draw({prefix}elements)"
            try:
                return np.array([value], dtype=dtype)[0]
            except TypeError:
                raise InvalidArgument(
                    "Cannot convert %s=%r of type %s to dtype %s" %
                    (name, value, type(value).__name__, dtype.str)) from None
            except ValueError:
                raise InvalidArgument(
                    f"Cannot convert {name}={value!r} to type {dtype.str}"
                ) from None

        elements = elements.map(convert_element)
    assert elements is not None

    return elements, dtype
Beispiel #8
0
def initialize(*, targets=(), target=None, **kwargs):
    """Decorator for RuleBasedStateMachine.

    An initialize decorator behaves like a rule, but the decorated
    method is called at most once in a run. All initialize decorated
    methods will be called before any rule decorated methods, in an
    arbitrary order.
    """
    converted_targets = _convert_targets(targets, target)
    for k, v in kwargs.items():
        check_strategy(v, name=k)

    def accept(f):
        existing_rule = getattr(f, RULE_MARKER, None)
        existing_initialize_rule = getattr(f, INITIALIZE_RULE_MARKER, None)
        if existing_rule is not None or existing_initialize_rule is not None:
            raise InvalidDefinition(
                "A function cannot be used for two distinct rules. ",
                Settings.default)
        precondition = getattr(f, PRECONDITION_MARKER, None)
        if precondition:
            raise InvalidDefinition(
                "An initialization rule cannot have a precondition. ",
                Settings.default)
        rule = Rule(
            targets=converted_targets,
            arguments=kwargs,
            function=f,
            precondition=precondition,
        )

        @proxies(f)
        def rule_wrapper(*args, **kwargs):
            return f(*args, **kwargs)

        setattr(rule_wrapper, INITIALIZE_RULE_MARKER, rule)
        return rule_wrapper

    return accept
Beispiel #9
0
def _arrays(
    xp: Any,
    dtype: Union[DataType, str, st.SearchStrategy[DataType],
                 st.SearchStrategy[str]],
    shape: Union[int, Shape, st.SearchStrategy[Shape]],
    *,
    elements: Optional[Union[Mapping[str, Any], st.SearchStrategy]] = None,
    fill: Optional[st.SearchStrategy[Any]] = None,
    unique: bool = False,
) -> st.SearchStrategy:
    """Returns a strategy for :xp-ref:`arrays <array_object.html>`.

    * ``dtype`` may be a :xp-ref:`valid dtype <data_types.html>` object or name,
      or a strategy that generates such values.
    * ``shape`` may be an integer >= 0, a tuple of such integers, or a strategy
      that generates such values.
    * ``elements`` is a strategy for values to put in the array. If ``None``
      then a suitable value will be inferred based on the dtype, which may give
      any legal value (including e.g. NaN for floats). If a mapping, it will be
      passed as ``**kwargs`` to :func:`from_dtype()` when inferring based on the dtype.
    * ``fill`` is a strategy that may be used to generate a single background
      value for the array. If ``None``, a suitable default will be inferred
      based on the other arguments. If set to
      :func:`~hypothesis.strategies.nothing` then filling behaviour will be
      disabled entirely and every element will be generated independently.
    * ``unique`` specifies if the elements of the array should all be distinct
      from one another; if fill is also set, the only valid values for fill to
      return are NaN values.

    Arrays of specified ``dtype`` and ``shape`` are generated for example
    like this:

    .. code-block:: pycon

      >>> from numpy import array_api as xp
      >>> xps.arrays(xp, xp.int8, (2, 3)).example()
      Array([[-8,  6,  3],
             [-6,  4,  6]], dtype=int8)

    Specifying element boundaries by a :obj:`python:dict` of the kwargs to pass
    to :func:`from_dtype` will ensure ``dtype`` bounds will be respected.

    .. code-block:: pycon

      >>> xps.arrays(xp, xp.int8, 3, elements={"min_value": 10}).example()
      Array([125, 13, 79], dtype=int8)

    Refer to :doc:`What you can generate and how <data>` for passing
    your own elements strategy.

    .. code-block:: pycon

      >>> xps.arrays(xp, xp.float32, 3, elements=floats(0, 1, width=32)).example()
      Array([ 0.88974794,  0.77387938,  0.1977879 ], dtype=float32)

    Array values are generated in two parts:

    1. A single value is drawn from the fill strategy and is used to create a
       filled array.
    2. Some subset of the coordinates of the array are populated with a value
       drawn from the elements strategy (or its inferred form).

    You can set ``fill`` to :func:`~hypothesis.strategies.nothing` if you want
    to disable this behaviour and draw a value for every element.

    By default ``arrays`` will attempt to infer the correct fill behaviour: if
    ``unique`` is also ``True``, no filling will occur. Otherwise, if it looks
    safe to reuse the values of elements across multiple coordinates (this will
    be the case for any inferred strategy, and for most of the builtins, but is
    not the case for mutable values or strategies built with flatmap, map,
    composite, etc.) then it will use the elements strategy as the fill, else it
    will default to having no fill.

    Having a fill helps Hypothesis craft high quality examples, but its
    main importance is when the array generated is large: Hypothesis is
    primarily designed around testing small examples. If you have arrays with
    hundreds or more elements, having a fill value is essential if you want
    your tests to run in reasonable time.
    """
    check_xp_attributes(xp, [
        "finfo", "asarray", "zeros", "full", "all", "isnan", "isfinite",
        "reshape"
    ])

    if isinstance(dtype, st.SearchStrategy):
        return dtype.flatmap(lambda d: _arrays(
            xp, d, shape, elements=elements, fill=fill, unique=unique))
    elif isinstance(dtype, str):
        dtype = dtype_from_name(xp, dtype)

    if isinstance(shape, st.SearchStrategy):
        return shape.flatmap(lambda s: _arrays(
            xp, dtype, s, elements=elements, fill=fill, unique=unique))
    elif isinstance(shape, int):
        shape = (shape, )
    elif not isinstance(shape, tuple):
        raise InvalidArgument(
            f"shape={shape} is not a valid shape or strategy")
    check_argument(
        all(isinstance(x, int) and x >= 0 for x in shape),
        f"shape={shape!r}, but all dimensions must be non-negative integers.",
    )

    if elements is None:
        elements = _from_dtype(xp, dtype)
    elif isinstance(elements, Mapping):
        elements = _from_dtype(xp, dtype, **elements)
    check_strategy(elements, "elements")

    if fill is None:
        assert isinstance(elements, st.SearchStrategy)  # for mypy
        if unique or not elements.has_reusable_values:
            fill = st.nothing()
        else:
            fill = elements
    check_strategy(fill, "fill")

    return ArrayStrategy(xp, elements, dtype, shape, fill, unique)
Beispiel #10
0
 def do_validate(self):
     check_strategy(self.base, "base")
     extended = self.extend(self.limited_base)
     check_strategy(extended, f"extend({self.limited_base!r})")
     self.limited_base.validate()
     extended.validate()
Beispiel #11
0
def data_frames(
    columns: Optional[Sequence[column]] = None,
    *,
    rows: Optional[st.SearchStrategy[Union[dict, Sequence[Any]]]] = None,
    index: Optional[st.SearchStrategy[Ex]] = None,
) -> st.SearchStrategy[pandas.DataFrame]:
    """Provides a strategy for producing a :class:`pandas.DataFrame`.

    Arguments:

    * columns: An iterable of :class:`column` objects describing the shape
      of the generated DataFrame.

    * rows: A strategy for generating a row object. Should generate
      either dicts mapping column names to values or a sequence mapping
      column position to the value in that position (note that unlike the
      :class:`pandas.DataFrame` constructor, single values are not allowed
      here. Passing e.g. an integer is an error, even if there is only one
      column).

      At least one of rows and columns must be provided. If both are
      provided then the generated rows will be validated against the
      columns and an error will be raised if they don't match.

      Caveats on using rows:

      * In general you should prefer using columns to rows, and only use
        rows if the columns interface is insufficiently flexible to
        describe what you need - you will get better performance and
        example quality that way.
      * If you provide rows and not columns, then the shape and dtype of
        the resulting DataFrame may vary. e.g. if you have a mix of int
        and float in the values for one column in your row entries, the
        column will sometimes have an integral dtype and sometimes a float.

    * index: If not None, a strategy for generating indexes for the
      resulting DataFrame. This can generate either :class:`pandas.Index`
      objects or any sequence of values (which will be passed to the
      Index constructor).

      You will probably find it most convenient to use the
      :func:`~hypothesis.extra.pandas.indexes` or
      :func:`~hypothesis.extra.pandas.range_indexes` function to produce
      values for this argument.

    Usage:

    The expected usage pattern is that you use :class:`column` and
    :func:`columns` to specify a fixed shape of the DataFrame you want as
    follows. For example the following gives a two column data frame:

    .. code-block:: pycon

        >>> from hypothesis.extra.pandas import column, data_frames
        >>> data_frames([
        ... column('A', dtype=int), column('B', dtype=float)]).example()
                    A              B
        0  2021915903  1.793898e+232
        1  1146643993            inf
        2 -2096165693   1.000000e+07

    If you want the values in different columns to interact in some way you
    can use the rows argument. For example the following gives a two column
    DataFrame where the value in the first column is always at most the value
    in the second:

    .. code-block:: pycon

        >>> from hypothesis.extra.pandas import column, data_frames
        >>> import hypothesis.strategies as st
        >>> data_frames(
        ...     rows=st.tuples(st.floats(allow_nan=False),
        ...                    st.floats(allow_nan=False)).map(sorted)
        ... ).example()
                       0             1
        0  -3.402823e+38  9.007199e+15
        1 -1.562796e-298  5.000000e-01

    You can also combine the two:

    .. code-block:: pycon

        >>> from hypothesis.extra.pandas import columns, data_frames
        >>> import hypothesis.strategies as st
        >>> data_frames(
        ...     columns=columns(["lo", "hi"], dtype=float),
        ...     rows=st.tuples(st.floats(allow_nan=False),
        ...                    st.floats(allow_nan=False)).map(sorted)
        ... ).example()
                 lo            hi
        0   9.314723e-49  4.353037e+45
        1  -9.999900e-01  1.000000e+07
        2 -2.152861e+134 -1.069317e-73

    (Note that the column dtype must still be specified and will not be
    inferred from the rows. This restriction may be lifted in future).

    Combining rows and columns has the following behaviour:

    * The column names and dtypes will be used.
    * If the column is required to be unique, this will be enforced.
    * Any values missing from the generated rows will be provided using the
      column's fill.
    * Any values in the row not present in the column specification (if
      dicts are passed, if there are keys with no corresponding column name,
      if sequences are passed if there are too many items) will result in
      InvalidArgument being raised.
    """
    if index is None:
        index = range_indexes()
    else:
        check_strategy(index, "index")

    index_strategy = index

    if columns is None:
        if rows is None:
            raise InvalidArgument(
                "At least one of rows and columns must be provided")
        else:

            @st.composite
            def rows_only(draw):
                index = draw(index_strategy)

                @check_function
                def row():
                    result = draw(rows)
                    check_type(abc.Iterable, result, "draw(row)")
                    return result

                if len(index) > 0:
                    return pandas.DataFrame([row() for _ in index],
                                            index=index)
                else:
                    # If we haven't drawn any rows we need to draw one row and
                    # then discard it so that we get a consistent shape for the
                    # DataFrame.
                    base = pandas.DataFrame([row()])
                    return base.drop(0)

            return rows_only()

    assert columns is not None
    cols = try_convert(tuple, columns, "columns")

    rewritten_columns = []
    column_names: Set[str] = set()

    for i, c in enumerate(cols):
        check_type(column, c, f"columns[{i}]")

        c = copy(c)
        if c.name is None:
            label = f"columns[{i}]"
            c.name = i
        else:
            label = c.name
            try:
                hash(c.name)
            except TypeError:
                raise InvalidArgument(
                    "Column names must be hashable, but columns[%d].name was "
                    "%r of type %s, which cannot be hashed." %
                    (i, c.name, type(c.name).__name__)) from None

        if c.name in column_names:
            raise InvalidArgument(
                f"duplicate definition of column name {c.name!r}")

        column_names.add(c.name)

        c.elements, c.dtype = elements_and_dtype(c.elements, c.dtype, label)

        if c.dtype is None and rows is not None:
            raise InvalidArgument(
                "Must specify a dtype for all columns when combining rows with columns."
            )

        c.fill = npst.fill_for(fill=c.fill,
                               elements=c.elements,
                               unique=c.unique,
                               name=label)

        rewritten_columns.append(c)

    if rows is None:

        @st.composite
        def just_draw_columns(draw):
            index = draw(index_strategy)
            local_index_strategy = st.just(index)

            data = OrderedDict((c.name, None) for c in rewritten_columns)

            # Depending on how the columns are going to be generated we group
            # them differently to get better shrinking. For columns with fill
            # enabled, the elements can be shrunk independently of the size,
            # so we can just shrink by shrinking the index then shrinking the
            # length and are generally much more free to move data around.

            # For columns with no filling the problem is harder, and drawing
            # them like that would result in rows being very far apart from
            # each other in the underlying data stream, which gets in the way
            # of shrinking. So what we do is reorder and draw those columns
            # row wise, so that the values of each row are next to each other.
            # This makes life easier for the shrinker when deleting blocks of
            # data.
            columns_without_fill = [
                c for c in rewritten_columns if c.fill.is_empty
            ]

            if columns_without_fill:
                for c in columns_without_fill:
                    data[c.name] = pandas.Series(np.zeros(shape=len(index),
                                                          dtype=c.dtype),
                                                 index=index)
                seen = {
                    c.name: set()
                    for c in columns_without_fill if c.unique
                }

                for i in range(len(index)):
                    for c in columns_without_fill:
                        if c.unique:
                            for _ in range(5):
                                value = draw(c.elements)
                                if value not in seen[c.name]:
                                    seen[c.name].add(value)
                                    break
                            else:
                                reject()
                        else:
                            value = draw(c.elements)
                        try:
                            data[c.name][i] = value
                        except ValueError as err:  # pragma: no cover
                            # This just works in Pandas 1.4 and later, but gives
                            # a confusing error on previous versions.
                            if c.dtype is None and not isinstance(
                                    value,
                                (float, int, str, bool, datetime, timedelta)):
                                raise ValueError(
                                    f"Failed to add value={value!r} to column "
                                    f"{c.name} with dtype=None.  Maybe passing "
                                    "dtype=object would help?") from err
                            # Unclear how this could happen, but users find a way...
                            raise

            for c in rewritten_columns:
                if not c.fill.is_empty:
                    data[c.name] = draw(
                        series(
                            index=local_index_strategy,
                            dtype=c.dtype,
                            elements=c.elements,
                            fill=c.fill,
                            unique=c.unique,
                        ))

            return pandas.DataFrame(data, index=index)

        return just_draw_columns()
    else:

        @st.composite
        def assign_rows(draw):
            index = draw(index_strategy)

            result = pandas.DataFrame(
                OrderedDict((
                    c.name,
                    pandas.Series(np.zeros(dtype=c.dtype, shape=len(index)),
                                  dtype=c.dtype),
                ) for c in rewritten_columns),
                index=index,
            )

            fills = {}

            any_unique = any(c.unique for c in rewritten_columns)

            if any_unique:
                all_seen = [
                    set() if c.unique else None for c in rewritten_columns
                ]
                while all_seen[-1] is None:
                    all_seen.pop()

            for row_index in range(len(index)):
                for _ in range(5):
                    original_row = draw(rows)
                    row = original_row
                    if isinstance(row, dict):
                        as_list = [None] * len(rewritten_columns)
                        for i, c in enumerate(rewritten_columns):
                            try:
                                as_list[i] = row[c.name]
                            except KeyError:
                                try:
                                    as_list[i] = fills[i]
                                except KeyError:
                                    if c.fill.is_empty:
                                        raise InvalidArgument(
                                            f"Empty fill strategy in {c!r} cannot "
                                            f"complete row {original_row!r}"
                                        ) from None
                                    fills[i] = draw(c.fill)
                                    as_list[i] = fills[i]
                        for k in row:
                            if k not in column_names:
                                raise InvalidArgument(
                                    "Row %r contains column %r not in columns %r)"
                                    % (row, k,
                                       [c.name for c in rewritten_columns]))
                        row = as_list
                    if any_unique:
                        has_duplicate = False
                        for seen, value in zip(all_seen, row):
                            if seen is None:
                                continue
                            if value in seen:
                                has_duplicate = True
                                break
                            seen.add(value)
                        if has_duplicate:
                            continue
                    row = list(try_convert(tuple, row, "draw(rows)"))

                    if len(row) > len(rewritten_columns):
                        raise InvalidArgument(
                            f"Row {original_row!r} contains too many entries. Has "
                            f"{len(row)} but expected at most {len(rewritten_columns)}"
                        )
                    while len(row) < len(rewritten_columns):
                        c = rewritten_columns[len(row)]
                        if c.fill.is_empty:
                            raise InvalidArgument(
                                f"Empty fill strategy in {c!r} cannot "
                                f"complete row {original_row!r}")
                        row.append(draw(c.fill))
                    result.iloc[row_index] = row
                    break
                else:
                    reject()
            return result

        return assign_rows()
Beispiel #12
0
def series(
    *,
    elements: Optional[st.SearchStrategy[Ex]] = None,
    dtype: Any = None,
    index: Optional[st.SearchStrategy[Union[Sequence, pandas.Index]]] = None,
    fill: Optional[st.SearchStrategy[Ex]] = None,
    unique: bool = False,
) -> st.SearchStrategy[pandas.Series]:
    """Provides a strategy for producing a :class:`pandas.Series`.

    Arguments:

    * elements: a strategy that will be used to generate the individual
      values in the series. If None, we will attempt to infer a suitable
      default from the dtype.

    * dtype: the dtype of the resulting series and may be any value
      that can be passed to :class:`numpy.dtype`. If None, will use
      pandas's standard behaviour to infer it from the type of the elements
      values. Note that if the type of values that comes out of your
      elements strategy varies, then so will the resulting dtype of the
      series.

    * index: If not None, a strategy for generating indexes for the
      resulting Series. This can generate either :class:`pandas.Index`
      objects or any sequence of values (which will be passed to the
      Index constructor).

      You will probably find it most convenient to use the
      :func:`~hypothesis.extra.pandas.indexes` or
      :func:`~hypothesis.extra.pandas.range_indexes` function to produce
      values for this argument.

    Usage:

    .. code-block:: pycon

        >>> series(dtype=int).example()
        0   -2001747478
        1    1153062837
    """
    if index is None:
        index = range_indexes()
    else:
        check_strategy(index, "index")

    elements, dtype = elements_and_dtype(elements, dtype)
    index_strategy = index

    @st.composite
    def result(draw):
        index = draw(index_strategy)

        if len(index) > 0:
            if dtype is not None:
                result_data = draw(
                    npst.arrays(
                        dtype=dtype,
                        elements=elements,
                        shape=len(index),
                        fill=fill,
                        unique=unique,
                    ))
            else:
                result_data = list(
                    draw(
                        npst.arrays(
                            dtype=object,
                            elements=elements,
                            shape=len(index),
                            fill=fill,
                            unique=unique,
                        )))

            return pandas.Series(result_data, index=index, dtype=dtype)
        else:
            return pandas.Series(
                (),
                index=index,
                dtype=dtype if dtype is not None else draw(
                    dtype_for_elements_strategy(elements)),
            )

    return result()
Beispiel #13
0
 def do_draw(self, data):
     source = data.draw(self.flatmapped_strategy)
     expanded_source = self.expand(source)
     check_strategy(expanded_source)
     return data.draw(expanded_source)
Beispiel #14
0
def check_strategy_(*args):
    return check_strategy(*args)
Beispiel #15
0
def integer_array_indices(
    shape: Shape,
    *,
    result_shape: st.SearchStrategy[Shape] = array_shapes(),
    dtype: np.dtype = "int",
) -> st.SearchStrategy[Tuple[np.ndarray, ...]]:
    """Return a search strategy for tuples of integer-arrays that, when used
    to index into an array of shape ``shape``, given an array whose shape
    was drawn from ``result_shape``.

    Examples from this strategy shrink towards the tuple of index-arrays::

        len(shape) * (np.zeros(drawn_result_shape, dtype), )

    * ``shape`` a tuple of integers that indicates the shape of the array,
      whose indices are being generated.
    * ``result_shape`` a strategy for generating tuples of integers, which
      describe the shape of the resulting index arrays. The default is
      :func:`~hypothesis.extra.numpy.array_shapes`.  The shape drawn from
      this strategy determines the shape of the array that will be produced
      when the corresponding example from ``integer_array_indices`` is used
      as an index.
    * ``dtype`` the integer data type of the generated index-arrays. Negative
      integer indices can be generated if a signed integer type is specified.

    Recall that an array can be indexed using a tuple of integer-arrays to
    access its members in an arbitrary order, producing an array with an
    arbitrary shape. For example:

    .. code-block:: pycon

        >>> from numpy import array
        >>> x = array([-0, -1, -2, -3, -4])
        >>> ind = (array([[4, 0], [0, 1]]),)  # a tuple containing a 2D integer-array
        >>> x[ind]  # the resulting array is commensurate with the indexing array(s)
        array([[-4,  0],
               [0, -1]])

    Note that this strategy does not accommodate all variations of so-called
    'advanced indexing', as prescribed by NumPy's nomenclature.  Combinations
    of basic and advanced indexes are too complex to usefully define in a
    standard strategy; we leave application-specific strategies to the user.
    Advanced-boolean indexing can be defined as ``arrays(shape=..., dtype=bool)``,
    and is similarly left to the user.
    """
    check_type(tuple, shape, "shape")
    check_argument(
        shape and all(isinstance(x, int) and x > 0 for x in shape),
        f"shape={shape!r} must be a non-empty tuple of integers > 0",
    )
    check_strategy(result_shape, "result_shape")
    check_argument(np.issubdtype(dtype, np.integer),
                   f"dtype={dtype!r} must be an integer dtype")
    signed = np.issubdtype(dtype, np.signedinteger)

    def array_for(index_shape, size):
        return arrays(
            dtype=dtype,
            shape=index_shape,
            elements=st.integers(-size if signed else 0, size - 1),
        )

    return result_shape.flatmap(lambda index_shape: st.tuples(*(
        array_for(index_shape, size) for size in shape)))
Beispiel #16
0
def arrays(
    dtype: Any,
    shape: Union[int, st.SearchStrategy[int], Shape, st.SearchStrategy[Shape]],
    *,
    elements: Optional[Union[st.SearchStrategy, Mapping[str, Any]]] = None,
    fill: Optional[st.SearchStrategy[Any]] = None,
    unique: bool = False,
) -> st.SearchStrategy[np.ndarray]:
    r"""Returns a strategy for generating :class:`numpy:numpy.ndarray`\ s.

    * ``dtype`` may be any valid input to :class:`~numpy:numpy.dtype`
      (this includes :class:`~numpy:numpy.dtype` objects), or a strategy that
      generates such values.
    * ``shape`` may be an integer >= 0, a tuple of such integers, or a
      strategy that generates such values.
    * ``elements`` is a strategy for generating values to put in the array.
      If it is None a suitable value will be inferred based on the dtype,
      which may give any legal value (including eg ``NaN`` for floats).
      If a mapping, it will be passed as ``**kwargs`` to ``from_dtype()``
    * ``fill`` is a strategy that may be used to generate a single background
      value for the array. If None, a suitable default will be inferred
      based on the other arguments. If set to
      :func:`~hypothesis.strategies.nothing` then filling
      behaviour will be disabled entirely and every element will be generated
      independently.
    * ``unique`` specifies if the elements of the array should all be
      distinct from one another. Note that in this case multiple NaN values
      may still be allowed. If fill is also set, the only valid values for
      it to return are NaN values (anything for which :obj:`numpy:numpy.isnan`
      returns True. So e.g. for complex numbers (nan+1j) is also a valid fill).
      Note that if unique is set to True the generated values must be hashable.

    Arrays of specified ``dtype`` and ``shape`` are generated for example
    like this:

    .. code-block:: pycon

      >>> import numpy as np
      >>> arrays(np.int8, (2, 3)).example()
      array([[-8,  6,  3],
             [-6,  4,  6]], dtype=int8)

    - See :doc:`What you can generate and how <data>`.

    .. code-block:: pycon

      >>> import numpy as np
      >>> from hypothesis.strategies import floats
      >>> arrays(np.float, 3, elements=floats(0, 1)).example()
      array([ 0.88974794,  0.77387938,  0.1977879 ])

    Array values are generated in two parts:

    1. Some subset of the coordinates of the array are populated with a value
       drawn from the elements strategy (or its inferred form).
    2. If any coordinates were not assigned in the previous step, a single
       value is drawn from the fill strategy and is assigned to all remaining
       places.

    You can set fill to :func:`~hypothesis.strategies.nothing` if you want to
    disable this behaviour and draw a value for every element.

    If fill is set to None then it will attempt to infer the correct behaviour
    automatically: If unique is True, no filling will occur by default.
    Otherwise, if it looks safe to reuse the values of elements across
    multiple coordinates (this will be the case for any inferred strategy, and
    for most of the builtins, but is not the case for mutable values or
    strategies built with flatmap, map, composite, etc) then it will use the
    elements strategy as the fill, else it will default to having no fill.

    Having a fill helps Hypothesis craft high quality examples, but its
    main importance is when the array generated is large: Hypothesis is
    primarily designed around testing small examples. If you have arrays with
    hundreds or more elements, having a fill value is essential if you want
    your tests to run in reasonable time.
    """
    # We support passing strategies as arguments for convenience, or at least
    # for legacy reasons, but don't want to pay the perf cost of a composite
    # strategy (i.e. repeated argument handling and validation) when it's not
    # needed.  So we get the best of both worlds by recursing with flatmap,
    # but only when it's actually needed.
    if isinstance(dtype, st.SearchStrategy):
        return dtype.flatmap(lambda d: arrays(
            d, shape, elements=elements, fill=fill, unique=unique))
    if isinstance(shape, st.SearchStrategy):
        return shape.flatmap(lambda s: arrays(
            dtype, s, elements=elements, fill=fill, unique=unique))
    # From here on, we're only dealing with values and it's relatively simple.
    dtype = np.dtype(dtype)
    if elements is None or isinstance(elements, Mapping):
        if dtype.kind in ("m", "M") and "[" not in dtype.str:
            # For datetime and timedelta dtypes, we have a tricky situation -
            # because they *may or may not* specify a unit as part of the dtype.
            # If not, we flatmap over the various resolutions so that array
            # elements have consistent units but units may vary between arrays.
            return (st.sampled_from(TIME_RESOLUTIONS).map(
                (dtype.str + "[{}]").format).flatmap(lambda d: arrays(
                    d, shape=shape, fill=fill, unique=unique)))
        elements = from_dtype(dtype, **(elements or {}))
    check_strategy(elements, "elements")
    if isinstance(shape, int):
        shape = (shape, )
    shape = tuple(shape)
    check_argument(
        all(isinstance(s, int) for s in shape),
        "Array shape must be integer in each dimension, provided shape was {}",
        shape,
    )
    fill = fill_for(elements=elements, unique=unique, fill=fill)
    return ArrayStrategy(elements, shape, dtype, fill, unique)