Esempio n. 1
0
def get_scalar_dtype_strategy(exclude=None):
    """
    A `hypothesis` strategy yielding
    """
    possible_strategies = {
        "datetime": hyp_np.datetime64_dtypes(max_period="ms", min_period="ns"),
        "uint": hyp_np.unsigned_integer_dtypes(),
        "int": hyp_np.integer_dtypes(),
        "float": hyp_np.floating_dtypes(),
        "byte": hyp_np.byte_string_dtypes(),
        "unicode": hyp_np.unicode_string_dtypes(),
    }
    if exclude is None:
        exclude = {}
    elif not isinstance(exclude, list):
        exclude = [exclude]
    for ex in exclude:
        if ex in possible_strategies:
            del possible_strategies[ex]
        else:
            raise ValueError(
                "Strategy {} unknown. Possible values are {}".format(
                    ex, possible_strategies.keys()
                )
            )
    return hyp_st.one_of(*list(possible_strategies.values()))
Esempio n. 2
0
def one_of_supported_dtypes(draw):
    # A strategy that selects a dtype that riptable is known to handle.
    # dtype size 16-bit is not supported
    # little endian is not supported
    return one_of(
        boolean_dtypes(),
        integer_dtypes(endianness="=", sizes=(8, 32, 64)),
        unsigned_integer_dtypes(endianness="=", sizes=(8, 32, 64)),
        floating_dtypes(endianness="=", sizes=(32, 64)),
        byte_string_dtypes(endianness="="),
        unicode_string_dtypes(endianness="="),
        # the following dtypes are not supported
        # complex_number_dtypes(),
        # datetime64_dtypes(),
        # timedelta64_dtypes(),
    )
Esempio n. 3
0
    "kwargs", [{"min_side": 100}, {"min_dims": 15}, {"min_dims": 32}]
)
def test_interesting_array_shapes_argument(kwargs):
    nps.array_shapes(**kwargs).example()


@given(nps.scalar_dtypes())
def test_can_generate_scalar_dtypes(dtype):
    assert isinstance(dtype, np.dtype)


@settings(max_examples=100)
@given(
    nps.nested_dtypes(
        subtype_strategy=st.one_of(
            nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes()
        )
    )
)
def test_can_generate_compound_dtypes(dtype):
    assert isinstance(dtype, np.dtype)


@settings(max_examples=100)
@given(
    nps.nested_dtypes(
        subtype_strategy=st.one_of(
            nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes()
        )
    ).flatmap(lambda dt: nps.arrays(dtype=dt, shape=1))
)
Esempio n. 4
0
def test_byte_string_dtypes_generate_unicode_strings(data):
    dt = data.draw(nps.byte_string_dtypes())
    result = data.draw(nps.from_dtype(dt))
    assert isinstance(result, binary_type)
Esempio n. 5
0

@given(
    st.data(),
    st.builds(
        "{}[{}]".format,
        st.sampled_from(("datetime64", "timedelta64")),
        st.sampled_from(nps.TIME_RESOLUTIONS),
    ).map(np.dtype),
)
def test_inferring_from_time_dtypes_gives_same_dtype(data, dtype):
    ex = data.draw(nps.from_dtype(dtype))
    assert dtype == ex.dtype


@given(st.data(), nps.byte_string_dtypes() | nps.unicode_string_dtypes())
def test_inferred_string_strategies_roundtrip(data, dtype):
    # Check that we never generate too-long or nul-terminated strings, which
    # cannot be read back out of an array.
    arr = np.zeros(shape=1, dtype=dtype)
    ex = data.draw(nps.from_dtype(arr.dtype))
    arr[0] = ex
    assert arr[0] == ex


@given(st.data(), nps.scalar_dtypes())
def test_all_inferred_scalar_strategies_roundtrip(data, dtype):
    # We only check scalars here, because record/compound/nested dtypes always
    # give an array of np.void objects.  We're interested in whether scalar
    # values are safe, not known type coercion.
    arr = np.zeros(shape=1, dtype=dtype)
Esempio n. 6
0
}, {
    "min_dims": 32
}])
def test_interesting_array_shapes_argument(kwargs):
    nps.array_shapes(**kwargs).example()


@given(nps.scalar_dtypes())
def test_can_generate_scalar_dtypes(dtype):
    assert isinstance(dtype, np.dtype)


@settings(max_examples=100)
@given(
    nps.nested_dtypes(subtype_strategy=st.one_of(nps.scalar_dtypes(
    ), nps.byte_string_dtypes(), nps.unicode_string_dtypes())))
def test_can_generate_compound_dtypes(dtype):
    assert isinstance(dtype, np.dtype)


@settings(max_examples=100)
@given(
    nps.nested_dtypes(subtype_strategy=st.one_of(nps.scalar_dtypes(
    ), nps.byte_string_dtypes(), nps.unicode_string_dtypes())).flatmap(
        lambda dt: nps.arrays(dtype=dt, shape=1)))
def test_can_generate_data_compound_dtypes(arr):
    # This is meant to catch the class of errors which prompted PR #2085
    assert isinstance(arr, np.ndarray)


@given(nps.nested_dtypes())
Esempio n. 7
0
def test_byte_string_dtype_len_0(data):
    s = nps.byte_string_dtypes(min_len=0, max_len=0)
    assert data.draw(s).itemsize == 1
Esempio n. 8
0
        datasaver.add_result((p, list_of_strings))

    test_set = load_by_id(datasaver.run_id)
    expec_data = np.array([item for item in list_of_strings])
    actual_data = test_set.get_parameter_data()["p"]["p"]

    try:
        np.testing.assert_array_equal(actual_data, expec_data)
    finally:
        test_set.conn.close()


@settings(suppress_health_check=(HealthCheck.function_scoped_fixture, ),
          deadline=None)
@given(p_values=hypnumpy.arrays(dtype=hst.sampled_from(
    (hypnumpy.unicode_string_dtypes(), hypnumpy.byte_string_dtypes(),
     hypnumpy.timedelta64_dtypes(), hypnumpy.datetime64_dtypes())),
                                shape=hypnumpy.array_shapes()))
def test_string_and_date_data_in_array(experiment, p_values):
    p = qc.Parameter('p',
                     label='String parameter',
                     unit='',
                     get_cmd=None,
                     set_cmd=None,
                     initial_value=p_values)

    meas = Measurement(experiment)
    meas.register_parameter(p, paramtype='array')

    with meas.run() as datasaver:
        datasaver.add_result((p, p.get()))
Esempio n. 9
0
}, {
    "min_dims": 32
}])
def test_interesting_array_shapes_argument(kwargs):
    nps.array_shapes(**kwargs).example()


@given(nps.scalar_dtypes())
def test_can_generate_scalar_dtypes(dtype):
    assert isinstance(dtype, np.dtype)


@settings(max_examples=100)
@given(
    nps.nested_dtypes(subtype_strategy=st.one_of(nps.scalar_dtypes(
    ), nps.byte_string_dtypes(), nps.unicode_string_dtypes())))
def test_can_generate_compound_dtypes(dtype):
    assert isinstance(dtype, np.dtype)


@settings(max_examples=100)
@given(
    nps.nested_dtypes(subtype_strategy=st.one_of(nps.scalar_dtypes(
    ), nps.byte_string_dtypes(), nps.unicode_string_dtypes())).flatmap(
        lambda dt: nps.arrays(dtype=dt, shape=1)))
def test_can_generate_data_compound_dtypes(arr):
    # This is meant to catch the class of errors which prompted PR #2085
    assert isinstance(arr, np.ndarray)


@settings(max_examples=100)
Esempio n. 10
0
def categorical_stringarray(
    draw,
    max_length: int,
    max_categories: int,
    *,
    endianness: str = '=',
    min_str_len: int = 1,
    max_str_len: int = 16,
    unicode: Optional[bool] = None,
    ordered: Optional[bool] = None,
) -> Categorical:
    """
    Strategy for creating StringArray-mode Categoricals.

    Parameters
    ----------
    draw
    max_length : int
    max_categories : int
    endianness : str
    min_str_len : int
    max_str_len : int
    unicode : bool, optional
    ordered : bool, optional

    Examples
    --------
    >>> array_strategy = arrays(integer_dtypes(endianness="=", sizes=(64,)), (5,))
    arrays(dtype=integer_dtypes(endianness='=', sizes=(64,)), shape=(5,))
    >>> categorical_stringarray(array_strategy, with_categories=True).example()
    0, 0, 0, 0, 0

    Notes
    -----
    TODO: Make sure to include the case where we have category values (in the underlying integer array)
          past the end of the categories array. (Or is that only for a Dictionary mode categorical?)
          To clarify -- this is the behavior where, when we print the Categorical, we get entries like <!456>.

    TODO: Also exercise (in one way or another) the following arguments to the Categorical constructor:
        * base_index
            Add an optional boolean parameter. When None, draw a boolean to fill it in.
            When the bool is false, call rt.Cat() with base_index=0.
            When True, call rt.Cat() with base_index=1.
        * dtype
            Call the ctor with dtype=None or a signed integer dtype that's either the min size given the
            number of categories or any larger signed integer dtype.
            E.g. if len(categories) == 1000, draw from { None, np.int16, np.int32, np.int64 }
        * filter
            Add an optional boolean param to the strategy which defaults to None, in which case we'll fill it by drawing a boolean.
            When the bool is false we we call rt.Cat() with filter=None.
            When True, we create a boolean array the same length as our values or fancy index and pass that as the filter.

    TODO: Support slicing/strides on the values/categories arrays passed to the Categorical constructor.

    TODO: When creating the fancy index array and we've drawn 'explicit_categories=True', allow the fancy index to be created
          with any applicable integer type (signed or unsigned) whose range is large enough to index into the categories array.
          (Or, should we just allow _any_ integer dtype, even if too small? We wouldn't be able to index categories past the
          range of the dtype, but maybe that's an interesting thing to test? Especially around cases like having auto_add=True.)
    """
    # Draw a boolean indicating how the data will be passed to the Categorical constructor later.
    # This is done first since it's one of the most likely things to affect the behavior of the Categorical,
    # and shrinking (in some cases) works better when such values are drawn earlier in strategy.
    explicit_categories: bool = draw(st.booleans())
    if explicit_categories:
        event('Categorical created from unique category array and fancy index.')
    else:
        event('Categorical created from non-unique array of strings.')

    # Draw the string dtype based on whether we want a byte (ascii) string or Unicode.
    is_unicode: bool = draw(st.booleans()) if unicode is None else unicode
    if is_unicode:
        labels_dtype = draw(unicode_string_dtypes(endianness=endianness, min_len=min_str_len, max_len=max_str_len))
    else:
        labels_dtype = draw(byte_string_dtypes(endianness=endianness, min_len=min_str_len, max_len=max_str_len))

    # Create an array of unique category labels.
    cats_shapes = array_shapes(max_dims=1, max_side=max_categories)
    category_label_strat = category_labels(min_str_len, max_str_len, unicode=is_unicode)
    unique_labels = draw(arrays(dtype=labels_dtype, shape=cats_shapes, elements=category_label_strat, unique=True))

    # Use basic_indices to create a fancy index into the array of unique category labels.
    # Apply it to expand the array of unique labels into an array where those labels may occur zero or more times.
    fancy_index_shapes = array_shapes(max_dims=1, max_side=max_length)
    fancy_index = draw(integer_array_indices(shape=unique_labels.shape, result_shape=fancy_index_shapes))

    # If the 'ordered' flag is not set, draw a boolean for it now so we have a concrete value
    # to use when creating the categorical.
    is_ordered = draw(st.booleans()) if ordered is None else ordered

    # If the 'explicit_categories' flag is set, create the Categorical by passing in the
    # unique values and fancy index separately.
    # Otherwise, apply the fancy index to the array of unique category values to produce an
    # array where each category appears zero or more times; then create the Categorical from that.
    if explicit_categories:
        return Categorical(fancy_index, categories=unique_labels, ordered=is_ordered, unicode=is_unicode)

    else:
        values = unique_labels[fancy_index]
        return Categorical(values, ordered=is_ordered, unicode=is_unicode)
Esempio n. 11
0
def categorical_dictmode(
    draw,
    max_length: int,
    max_categories: int,
    *,
    endianness: str = '=',
    min_str_len: int = 1,
    max_str_len: int = 16,
    unicode: Optional[bool] = None,
    ordered: Optional[bool] = None,
) -> Categorical:
    """
    Strategy for creating Dictionary-mode Categoricals.

    This strategy currently only covers creating `Categorical` instances with
    string-typed category labels.

    Parameters
    ----------
    draw
    max_length : int
    max_categories : int
    endianness : str
    min_str_len : int
    max_str_len : int
    unicode : bool, optional
    ordered : bool, optional

    Examples
    --------
    >>> categorical_dictmode(10_000, 1_000, max_str_len=20).example()
    0, 0, 0, 0, 0

    Notes
    -----
    TODO: Make sure to include the case where we have category values (in the underlying integer array)
          past the end of the categories array. (Or is that only for a Dictionary mode categorical?)
          To clarify -- this is the behavior where, when we print the Categorical, we get entries like <!456>.

    TODO: Also exercise (in one way or another) the following arguments to the Categorical constructor:
        * base_index
            Add an optional boolean parameter. When None, draw a boolean to fill it in.
            When the bool is false, call rt.Cat() with base_index=0.
            When True, call rt.Cat() with base_index=1.
        * dtype
            Call the ctor with dtype=None or a signed integer dtype that's either the min size given the
            number of categories or any larger signed integer dtype.
            E.g. if len(categories) == 1000, draw from { None, np.int16, np.int32, np.int64 }
        * filter
            Add an optional boolean param to the strategy which defaults to None, in which case we'll fill it by drawing a boolean.
            When the bool is false we we call rt.Cat() with filter=None.
            When True, we create a boolean array the same length as our values or fancy index and pass that as the filter.

    TODO: Support slicing/strides on the values/categories arrays passed to the Categorical constructor.

    TODO: Does a Dictionary-mode Categorical allow any other types (e.g. rt.Date) to be used for the category labels?
        If so, these should also be covered by this strategy (though changes will needed to allow a variety of
        types to be used for category labels).

    TODO: Any possible issues (that we might want to exercise in this strategy) between the string used when displaying
        the invalid category (e.g. 'Inv') and category labels? What happens if we have a category label using the same string?
    """
    # Draw a boolean indicating whether we'll use a signed or unsigned integer dtype.
    use_signed_integer_dtype: bool = draw(st.booleans())

    # If using a signed integer dtype, draw another boolean indicating whether we'll
    # generate negative category values.
    allow_negative_category_values: bool = draw(st.booleans()) if use_signed_integer_dtype else False
    if use_signed_integer_dtype:
        if allow_negative_category_values:
            event('Categorical may have a mix of negative, zero, and positive category values.')
        else:
            event('Categorical has only non-negative category values.')

    # If the 'unicode' flag is not set, draw a boolean to fill it in.
    is_unicode: bool = draw(st.booleans()) if unicode is None else unicode
    event(f'Category labels are {"unicode" if is_unicode else "ascii"} strings.')

    # If the 'ordered' flag is not set, draw a boolean for it now so we have a concrete value
    # to use when creating the categorical.
    is_ordered = draw(st.booleans()) if ordered is None else ordered
    event(f'ordered = {is_ordered}')

    # Draw the dtype for the category values.
    # TODO: Draw a signed or unsigned integer dtype here which is at least as large as needed, but perhaps larger
    #       than needed.
    #       For now, we just use the smallest dtype large enough to fit the max number of categories; but allowing for
    #       larger (randomly-selected) dtypes later will help ensure we test cases where there are non-consecutive
    #       category values even when the max_categories value is near the max value of a dtype.
    values_dtype = np.min_scalar_type(max_categories)

    # Create the strategy for the category values (integer values representing the categories).
    values_dtype_info = np.iinfo(values_dtype)
    values_strat =\
        st.integers(
            min_value=(values_dtype_info.min if allow_negative_category_values else 0),
            max_value=values_dtype_info.max)

    # Create an array of unique category values/codes.
    cats_shapes = array_shapes(max_dims=1, max_side=max_categories)
    unique_cat_values = draw(arrays(dtype=values_dtype, shape=cats_shapes, elements=values_strat, unique=True))

    # Draw the string dtype for the labels based on whether we want a byte (ascii) string or Unicode.
    is_unicode: bool = draw(st.booleans()) if unicode is None else unicode
    if is_unicode:
        labels_dtype = draw(unicode_string_dtypes(endianness=endianness, min_len=min_str_len, max_len=max_str_len))
    else:
        labels_dtype = draw(byte_string_dtypes(endianness=endianness, min_len=min_str_len, max_len=max_str_len))

    # Create an array of unique category labels; this must be the same shape as the unique category values array.
    category_label_strat = category_labels(min_str_len, max_str_len, unicode=is_unicode)
    unique_labels =\
        draw(arrays(dtype=labels_dtype, shape=unique_cat_values.shape, elements=category_label_strat, unique=True))

    # TODO: Draw a slice (or None) that we'll apply to both arrays of uniques (the labels and values)
    #   before using them to create the category dictionary.
    #   This allows us to cover cases where a category value isn't in the dictionary.

    # Combine the unique category labels and values to create a dictionary.
    category_dict = dict(zip(unique_labels, unique_cat_values))

    # Use basic_indices to create a fancy index into the array of unique values.
    # Apply it to expand the array of unique values into an array where those values may occur zero or more times.
    fancy_index_shapes = array_shapes(max_dims=1, max_side=max_length)
    fancy_index = draw(integer_array_indices(shape=unique_cat_values.shape, result_shape=fancy_index_shapes))

    # Apply the fancy index to the array of unique category values to produce an
    # array where each category appears zero or more times; then create the Categorical from that.
    cat_values = unique_cat_values[fancy_index]
    return Categorical(cat_values, categories=category_dict, ordered=is_ordered, unicode=is_unicode)
@given(data())
@pytest.mark.parametrize(
    "datatype, elements",
    [
        pytest.param(
            integer_dtypes(endianness="=", sizes=(64, )),
            integers(min_value=1, max_value=np.iinfo(np.int64).max),
            id="integer_dtype",
        ),
        pytest.param(
            unsigned_integer_dtypes(endianness="=", sizes=(64, )),
            integers(min_value=1, max_value=np.iinfo(np.int64).max),
            id="unsigned_integer_dtype",
        ),
        pytest.param(
            byte_string_dtypes(endianness="="), None, id="byte_string_dtype"),
        pytest.param(
            datetime64_dtypes(endianness="="),
            None,
            id="datetime64_dtype",
            marks=[
                pytest.mark.xfail(
                    reason="RIP-375 - Categorical unsupported dtypes"),
                pytest.mark.skip,
            ],
        ),
        pytest.param(
            timedelta64_dtypes(endianness="="),
            None,
            id="timedelta64_dtype",
            marks=[
Esempio n. 13
0
def test_byte_string_dtypes_generate_unicode_strings(data):
    dt = data.draw(nps.byte_string_dtypes())
    result = data.draw(nps.from_dtype(dt))
    assert isinstance(result, binary_type)
Esempio n. 14
0
    with pytest.raises(InvalidArgument):
        test()


@given(st.data(),
       st.builds('{}[{}]'.format,
                 st.sampled_from(('datetime64', 'timedelta64')),
                 st.sampled_from(nps.TIME_RESOLUTIONS)
                 ).map(np.dtype)
       )
def test_inferring_from_time_dtypes_gives_same_dtype(data, dtype):
    ex = data.draw(nps.from_dtype(dtype))
    assert dtype == ex.dtype


@given(st.data(), nps.byte_string_dtypes() | nps.unicode_string_dtypes())
def test_inferred_string_strategies_roundtrip(data, dtype):
    # Check that we never generate too-long or nul-terminated strings, which
    # cannot be read back out of an array.
    arr = np.zeros(shape=1, dtype=dtype)
    ex = data.draw(nps.from_dtype(arr.dtype))
    arr[0] = ex
    assert arr[0] == ex


@given(st.data(), nps.scalar_dtypes())
def test_all_inferred_scalar_strategies_roundtrip(data, dtype):
    # We only check scalars here, because record/compound/nested dtypes always
    # give an array of np.void objects.  We're interested in whether scalar
    # values are safe, not known type coercion.
    arr = np.zeros(shape=1, dtype=dtype)
Esempio n. 15
0
    test_set = load_by_id(datasaver.run_id)
    expec_data = np.array([item for item in list_of_strings])
    actual_data = test_set.get_parameter_data()["p"]["p"]

    try:
        np.testing.assert_array_equal(actual_data, expec_data)
    finally:
        test_set.conn.close()


@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,))
@given(
    p_values=hypnumpy.arrays(
        dtype=hst.sampled_from(
            (hypnumpy.unicode_string_dtypes(),
             hypnumpy.byte_string_dtypes(),
             hypnumpy.timedelta64_dtypes(),
             hypnumpy.datetime64_dtypes())
        ),
        shape=hypnumpy.array_shapes()
    )
)
def test_string_and_date_data_in_array(experiment, p_values):
    p = qc.Parameter('p', label='String parameter', unit='', get_cmd=None,
                     set_cmd=None, initial_value=p_values)

    meas = Measurement(experiment)
    meas.register_parameter(p, paramtype='array')

    with meas.run() as datasaver:
        datasaver.add_result((p, p.get()))