Python series Examples, hypothesis.extra.pandas.series Python Examples

Example #1

0

Show file

def test_can_create_a_series_of_any_dtype(data):
    dtype = np.dtype(data.draw(npst.scalar_dtypes()))
    assume(supported_by_pandas(dtype))
    # Use raw data to work around pandas bug in repr. See
    # https://github.com/pandas-dev/pandas/issues/27484
    series = data.conjecture_data.draw(pdst.series(dtype=dtype))
    assert series.dtype == pandas.Series([], dtype=dtype).dtype

Example #2

0

Show file

def series_strategy(
    pandas_dtype: PandasDtype,
    strategy: Optional[SearchStrategy] = None,
    *,
    checks: Optional[Sequence] = None,
    nullable: Optional[bool] = False,
    allow_duplicates: Optional[bool] = True,
    name: Optional[str] = None,
    size: Optional[int] = None,
):
    """Strategy to generate a pandas Series.

    :param pandas_dtype: :class:`pandera.dtypes.PandasDtype` instance.
    :param strategy: an optional hypothesis strategy. If specified, the
        pandas dtype strategy will be chained onto this strategy.
    :param checks: sequence of :class:`~pandera.checks.Check` s to constrain
        the values of the data in the column/index.
    :param nullable: whether or not generated Series contains null values.
    :param allow_duplicates: whether or not generated Series contains
        duplicates.
    :param name: name of the Series.
    :param size: number of elements in the Series.
    :returns: ``hypothesis`` strategy.
    """
    elements = field_element_strategy(pandas_dtype, strategy, checks=checks)
    strategy = (
        pdst.series(
            elements=elements,
            dtype=pandas_dtype.numpy_dtype,
            index=pdst.range_indexes(
                min_size=0 if size is None else size, max_size=size
            ),
            unique=not allow_duplicates,
        )
        .filter(lambda x: x.shape[0] > 0)
        .map(lambda x: x.rename(name))
        .map(lambda x: x.astype(pandas_dtype.str_alias))
    )
    if nullable:
        strategy = null_field_masks(strategy)

    def undefined_check_strategy(strategy, check):
        """Strategy for checks with undefined strategies."""
        warnings.warn(
            "Vectorized check doesn't have a defined strategy."
            "Falling back to filtering drawn values based on the check "
            "definition. This can considerably slow down data-generation."
        )

        def _check_fn(series):
            return check(series).check_passed

        return strategy.filter(_check_fn)

    for check in checks if checks is not None else []:
        if not hasattr(check, "strategy") and not check.element_wise:
            strategy = undefined_check_strategy(strategy, check)

    return strategy

Example #3

0

Show file

def two_equal_size_series(draw):
    series_strategy = series(
        dtype=np.float64, elements=float_strategy, index=range_indexes(min_size=1)
    )
    s1 = draw(series_strategy)
    s2 = draw(series_strategy)
    assume(len(s1) == len(s2))
    return s1, s2

Example #4

0

Show file

File: test_support_resistance.py Project: thyarles/LiuAlgoTrader

import pandas as pd
import pytz
from hypothesis import given, settings
from hypothesis.extra.pandas import columns, indexes, series

from liualgotrader.fincalcs.support_resistance import get_local_maxima

est = pytz.timezone("US/Eastern")


@settings(deadline=None, max_examples=100)
@given(
    series(
        index=indexes(
            elements=st.datetimes(min_value=datetime(2000, 1, 1),
                                  max_value=datetime(2040, 1, 1)),
            dtype=pd.DatetimeIndex,
        ),
        elements=st.floats(),
        dtype=float,
    ), )
def test_get_local_maxima(series: pd.Series):
    print(series)
    r = get_local_maxima(series)
    if r.empty:
        assert r.empty  # nosec
    else:
        assert not r.empty  # nosec

    print("result", r)

Example #5

0

Show file

def test_can_create_a_series_of_any_dtype(data):
    dtype = np.dtype(data.draw(npst.scalar_dtypes()))
    assume(supported_by_pandas(dtype))
    series = data.draw(pdst.series(dtype=dtype))
    assert series.dtype == pandas.Series([], dtype=dtype).dtype

Example #6

0

Show file

File: test_business_rules.py Project: vwt-digital-solutions/fttx-dashboard

            dates(min_value=datetime.date(2019, 1, 1),
                  max_value=datetime.date.today()),
            nats,
        ),
    )
])

opleverstatus_st = data_frames([
    column("opleverstatus",
           dtype="object",
           elements=sampled_from(br.opleverstatussen))
])

date_series_st = series(
    dates(min_value=datetime.date(2019, 1, 1),
          max_value=datetime.date.today()),
    dtype="datetime64[ns]",
)


@given(date_series_st, integers(min_value=-10, max_value=10))
def test_is_date_set(test_series: pd.Series, time_delta_days: int):
    opgeleverd = br.is_date_set(test_series, time_delta_days)
    time_point = datetime.date.today() - datetime.timedelta(
        days=time_delta_days)
    assert len(test_series) == len(opgeleverd)
    if not test_series.empty:
        assert ((~test_series.isna() &
                 (test_series <= time_point)) == opgeleverd).all()

Example #7

0

Show file

File: test_series.py Project: HypothesisWorks/hypothesis-python

def test_can_fill_series():
    nan_backed = pdst.series(
        elements=st.floats(allow_nan=False), fill=st.just(float("nan"))
    )
    find_any(nan_backed, lambda x: np.isnan(x).any())

Example #8

0

Show file

File: test_series.py Project: HypothesisWorks/hypothesis-python

import hypothesis.extra.pandas as pdst
import hypothesis.strategies as st
from hypothesis import assume, given
from tests.common.debug import find_any
from tests.pandas.helpers import supported_by_pandas


@given(st.data())
def test_can_create_a_series_of_any_dtype(data):
    dtype = np.dtype(data.draw(npst.scalar_dtypes()))
    assume(supported_by_pandas(dtype))
    series = data.draw(pdst.series(dtype=dtype))
    assert series.dtype == pandas.Series([], dtype=dtype).dtype


@given(pdst.series(dtype=float, index=pdst.range_indexes(min_size=2, max_size=5)))
def test_series_respects_size_bounds(s):
    assert 2 <= len(s) <= 5


def test_can_fill_series():
    nan_backed = pdst.series(
        elements=st.floats(allow_nan=False), fill=st.just(float("nan"))
    )
    find_any(nan_backed, lambda x: np.isnan(x).any())


@given(pdst.series(dtype=int))
def test_can_generate_integral_series(s):
    assert s.dtype == np.dtype(int)

Example #9

0

Show file

File: test_series.py Project: pexip/os-python-hypothesis

def test_can_fill_series():
    nan_backed = pdst.series(elements=st.floats(allow_nan=False),
                             fill=st.just(np.nan))
    find_any(nan_backed, lambda x: np.isnan(x).any())

Example #10

0

Show file

File: AMS_functions.py Project: JustWhit3/Software_and_Computing_program

    print('The best AMS Score is {:.3f} at a Cut Parameter of {:.2f}'.format(
        max(y), x[np.argmax(y)]))


#TESTING "NN_OUTPUT_TO_AMS":
from hypothesis import given, settings
import hypothesis.strategies as st
import hypothesis.extra
from hypothesis.extra.numpy import arrays
from hypothesis.extra.pandas import series, range_indexes


@given(x=st.floats(0.5, 1),
       y=arrays(np.float64, (1, 100000),
                elements=st.floats(0, 1),
                fill=None,
                unique=False),
       z=arrays(np.int8, (1, 100000), elements=None, fill=None, unique=False),
       t=series(elements=None,
                dtype=np.float64,
                index=range_indexes(min_size=1, max_size=1),
                fill=None,
                unique=False))
@settings(deadline=None)
def test_NN_output_to_AMS(x, y, z, t):
    b_reg = 10
    s = sum(t[(y[:, 1] > x) & (z[:, 1] == 1)])
    b = sum(t[(y[:, 1] > x) & (z[:, 1] == 0)])
    AMS = np.sqrt(2 * ((s + b + b_reg) * np.log(1 + s / (b + b_reg)) - s))
    assert NN_output_to_AMS(x, y, z, t) == AMS

Example #11

0

Show file

    df, seasonalities = td.sample_n_gaussian_ts_as_df(10, '2019-01-01',
                                                      '2020-01-01', Interval.D)
    assert df.shape == (366, 10)


# globals for tests
index_len = 100
train_size_min = 0.1
train_size_max = 0.9


@given(features=data_frames(columns(['feat1', 'feat2', 'feat3'], dtype=float),
                            index=range_indexes(min_size=index_len,
                                                max_size=index_len)),
       labels=series(elements=st.integers(min_value=0, max_value=3),
                     index=range_indexes(min_size=index_len,
                                         max_size=index_len)),
       time_stamps=st.integers(min_value=1, max_value=10),
       train_size=st.floats(min_value=train_size_min,
                            max_value=train_size_max,
                            allow_infinity=False,
                            allow_nan=False))
def test_split_categorical_time_series_labels(features: pd.DataFrame,
                                              labels: pd.Series,
                                              time_stamps: int,
                                              train_size: float):
    assume(train_size == round(train_size,
                               2))  # limit to only 2 digits after comma
    assume(features.isnull().any(1).sum() == 0)  # forbid nans
    features_train, features_test, labels_train, labels_test = \
        tu.split_categorical_time_series_labels(features, labels, time_stamps, train_size)

Example #12

0

Show file

File: test_ashpool.py Project: chrischeung-alphalex/ashpool

lst_text = lists(elements=text(alphabet=list(string.printable),
                               min_size=4,
                               max_size=10),
                 min_size=1).example()


@given(text(min_size=3))
@example(' BAD label ')
@example(' BAD label !@#$!%!%')
def test_make_good_label(s):
    print(type(make_good_label(s)))
    assert isinstance(make_good_label(s), str)
    assert make_good_label(' BAD label ') == 'bad_label'


@given(series(dtype=np.unicode_))
@example(series(dtype=np.int_).example())
@example(series(dtype=float).example())
@example(series(dtype=bool).example())
def test_completeness(srs_t):
    print(srs_t, srs_t.dtype.kind)
    assert np.isclose(completeness(df_l['flt_1']), 0.833333)
    assert isinstance(completeness(srs_t), float)


def test_mash():
    df_result = mash(df_l, 'flt_1')
    assert df_result.shape[0] == 5
    df_result = mash(df_l, 'int_1')
    assert df_result.shape[0] == 5
    df_result = mash(df_l, 'int_1', keep_zeros=True)

Example #13

0

Show file

File: test_serialisation.py Project: zlapp/superintendent

@given(
    input_=np_strategies.arrays(
        guaranteed_dtypes, np_strategies.array_shapes()
    )
)
def test_numpy_array_serialisation(input_):
    serialised = data_dumps(input_)
    assert isinstance(serialised, str)
    deserialised = data_loads(serialised)
    assert isinstance(deserialised, type(input_))
    assert pytest.helpers.exact_element_match(input_, deserialised)


@given(
    input_=one_of(series(dtype=int), series(dtype=float), series(dtype=str))
)
def test_pandas_series_serialisation(input_):
    serialised = data_dumps(input_)
    assert isinstance(serialised, str)
    deserialised = data_loads(serialised)
    assert isinstance(deserialised, type(input_))
    assert pytest.helpers.exact_element_match(input_, deserialised)


@given(
    input_=one_of(
        data_frames(columns(3, dtype=int)),
        data_frames(columns(3, dtype=float)),
        data_frames(columns(3, dtype=str)),
        data_frames(

Example #14

0

Show file

File: test_parsers.py Project: lluo5779/Fake-News-Classification

#
#    split_at = split_at.sort()
#    if 0 not in split_at:
#        split_at = [0].extend(split_at)
#
#    new_text = []
#    for i in split_at[1:]:
#        new_text.append(text[i-1:i])
#
#    new_text.append(text[i[-1]:len(text)])


@settings(verbosity=Verbosity.verbose)
@given(data=expd.series(elements=st.text(
    u'abcdefghijklmnopqrstuvwxyz0123456789-_/ ', min_size=25, max_size=500),
                        dtype=None,
                        index=None,
                        fill=None,
                        unique=True),
       upper=expd.series(elements=st.text(u'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
                                          min_size=25,
                                          max_size=500),
                         dtype=None,
                         index=None,
                         fill=None,
                         unique=True),
       non_ascii=expd.series(elements=st.text(st.characters(
           min_codepoint=256, max_codepoint=1000),
                                              min_size=25,
                                              max_size=500),
                             dtype=None,
                             index=None,

Example #15

0

Show file

File: test_utils.py Project: gregparkes/turbopanda

class TestUtilsModule(unittest.TestCase):
    @given(s=st.text())
    def test_belongs1(self, s):
        l1 = ['apples', 'oranges', 'pears']
        with pytest.raises(ValueError):
            assert utils.belongs(s, l1)

    def test_belongs2(self):
        l1 = ['apples', 'oranges', 'pears']
        assert utils.belongs("apples", l1)

    def test_instance_check(self):
        # single example
        x = ['abba', 'father', 'cross']
        assert utils.instance_check(x, list)
        y = np.array([1, 2, 3], dtype=float)
        assert utils.instance_check(y, np.ndarray)
        # multiples given a tuple
        ij = True
        ji = False
        assert utils.instance_check((ij, ji), bool)

    @given(st.integers(min_value=0))
    def test_nonnegative1(self, x):
        assert utils.nonnegative(x, int)

    @given(st.integers(max_value=-1))
    def test_nonnegative2(self, x):
        with pytest.raises(AttributeError):
            assert utils.nonnegative(x, int)

    def test_bounds_check(self):
        assert utils.bounds_check(math.pi, math.pi - 0.00001,
                                  math.pi + 0.00001)
        assert utils.bounds_check(5, 5 - 1, 5 + 1)

    @given(numpy.arrays(float, (100, 2)), numpy.arrays(float, (100, 2)))
    def test_arrays_equal_size(self, x, y):
        assert utils.arrays_equal_size(x, y)

    @given(numpy.arrays(float, (100, 2)), st.text())
    def test_arrays_dimension(self, array, s):
        with pytest.raises(ValueError):
            assert utils.arrays_dimension(array, s)

    def test_check_list_type(self):
        x = [1, 2, 3]
        assert utils.check_list_type(x, int)
        y = ['a', 'b', 'c']
        assert utils.check_list_type(y, str)
        with pytest.raises(TypeError):
            assert utils.check_list_type(x, str)

    @given(st.integers(min_value=1, max_value=150),
           st.sampled_from(["square", "diag"]),
           st.integers(min_value=1, max_value=15),
           st.integers(min_value=1, max_value=15),
           st.floats(min_value=0.01, max_value=10.))
    def test_nearest_factors1(self, n, shape, cut, sr, wvar):
        # returns a tuple
        res = utils.nearest_factors(n, shape, cut, sr, wvar)

        assert type(res) == tuple
        assert isinstance(res[0], (np.int, np.int32, np.int64))
        assert isinstance(res[1], (np.int, np.int32, np.int64))

        diff = np.abs(res[0] * res[1] - sr)
        assert diff - 10 < diff < diff + 10

    @given(st.lists(st.text(), min_size=1))
    def test_zipe1(self, x):
        z = utils.zipe(x)

        assert type(z) == list
        assert len(z) == len(x)
        assert z[0] == x[0]

    @given(st.lists(st.text(), min_size=1), st.lists(st.integers(),
                                                     min_size=1))
    def test_zipe2(self, x, y):
        z = utils.zipe(x, y)

        assert type(z) == list
        assert len(z) == max(len(x), len(y))
        assert z[0][0] == x[0]
        assert z[0][1] == y[0]

    def test_zipe3(self):
        x = [1, 2, 3]
        assert utils.zipe(x) == x

    def test_umap(self):
        """A mapping function"""
        def _f(x):
            """Blank."""
            return x**2

        assert utils.umap(_f, [2, 4, 6]) == [4, 16, 36]

        def _g(x, y):
            """Blank."""
            return x / y

        assert utils.umap(_g, [3, 6, 9], [1, 2, 3]) == [3, 3, 3]

    def test_umapc(self):
        """mapping w cache"""
        def _f(x):
            """Blank."""
            return x**2

        assert utils.umapc("test.pkl", _f, [2, 4, 6]) == [4, 16, 36]
        # re-run to load from file
        assert utils.umapc("test.pkl", _f, [2, 4, 6]) == [4, 16, 36]
        assert os.path.isfile("test.pkl")
        os.remove("test.pkl")

    def test_umapp(self):
        def _f(x):
            """Blank."""
            return x**2

        assert utils.umapp(_f, [2, 4, 6]) == [4, 16, 36]

        def _g(x, y):
            """Blank."""
            return x / y

        assert utils.umapp(_g, [3, 6, 9], [1, 2, 3]) == [3, 3, 3]

    def test_umappc(self):
        """mapping w cache"""
        def _f(x):
            """Blank."""
            return x**2

        assert utils.umappc("test.pkl", _f, [2, 4, 6]) == [4, 16, 36]
        # re-run to load from file
        assert utils.umappc("test.pkl", _f, [2, 4, 6]) == [4, 16, 36]
        assert os.path.isfile("test.pkl")
        os.remove("test.pkl")

        def _g(x, y):
            """Blank."""
            return x / y

        assert utils.umappc("test2.pkl", _g, [3, 6, 9], [1, 2, 3]) == [3, 3, 3]
        assert utils.umappc("test2.pkl", _g, [3, 6, 9], [1, 2, 3]) == [3, 3, 3]
        assert os.path.isfile("test2.pkl")
        os.remove("test2.pkl")

    @given(pandas.series(dtype=int))
    def test_panderfy(self, ser):
        # make a copy
        cpy = ser.copy()
        pd.testing.assert_series_equal(ser, utils.transform_copy(ser, cpy))

    @given(pandas.indexes(dtype=int))
    def test_panderfy2(self, ser):
        # make a copy
        cpy = ser.copy()
        pd.testing.assert_index_equal(ser, utils.transform_copy(ser, cpy))

    @given(
        pandas.data_frames(
            [pandas.column("A", dtype=int),
             pandas.column("B", dtype=float)]))
    def test_panderfy3(self, ser):
        # make a copy
        cpy = ser.copy()
        pd.testing.assert_frame_equal(ser, utils.transform_copy(ser, cpy))

    @given(numpy.arrays(int, (100, )))
    def test_remove_na(self, x):
        np.testing.assert_array_almost_equal(x, utils.remove_na(x))

    def test_dict_to_tuple(self):
        d = {"a": 1, "b": 2, "c": 3}
        assert utils.dict_to_tuple(d) == (("a", 1), ("b", 2), ("c", 3))

    def test_dictsplit(self):
        d = {"a": 1, "b": 2, "c": 3}
        assert utils.dictsplit(d) == (tuple(d.keys()), tuple(d.values()))

    def test_dictzip(self):
        x = [1, 2, 3]
        y = ['a', 'b', 'c']
        assert utils.dictzip(x, y) == dict(zip(x, y))

    def test_set_like1(self):
        x = ['a', 'b', 'a', 'c']
        assert np.all(utils.set_like(x) == pd.Index(['a', 'b', 'c']))

    def test_union(self):
        x = ["fi", "fo", "fum"]
        y = ["fi", "yo", "sum"]
        z = ["fi", "fe", "sun"]

        assert np.all(
            utils.union(x, y) == pd.Index(["fi", "fo", "fum", "sum", "yo"]))
        assert np.all(
            utils.union(x, y, z) == pd.Index(
                ["fe", "fi", "fo", "fum", "sum", "sun", "yo"]))

    def test_intersect(self):
        x = ["fi", "fo", "sun"]
        y = ["fi", "yo", "sum"]
        z = ["fi", "fe", "sun"]

        assert np.all(utils.intersect(x, z) == pd.Index(["fi", "sun"]))
        assert np.all(utils.intersect(x, y, z) == pd.Index(["fi"]))

    def test_difference(self):
        x = ["fi", "fo", "sun"]
        z = ["fi", "yo", "sum"]

        assert np.all(
            utils.difference(x, z) == pd.Index(["fo", "sum", "sun", "yo"]))

    def test_absdifference(self):
        x = ["fi", "fo", "sun"]
        z = ["fi", "yo", "sum"]

        assert np.all(utils.absdifference(x, z) == pd.Index(["fo", "sun"]))

    def test_pairwise(self):
        x = [1, 2, 3]
        f = lambda i, j: i * j
        np.testing.assert_array_equal(np.array(utils.pairwise(f, x)),
                                      np.array([2, 3, 6]))

Example #16

0

Show file

)
def test_minmax_scaler_np(array):
    scaler = MinMaxScaler()
    scaler.fit(array)

    assert (scaler.transform(array).min(axis=0) >= 0).all()
    assert (scaler.transform(array).max(axis=0) <= 1).all()
    np.testing.assert_allclose(scaler.fit(array).transform(array), scaler.fit_transform(array))
    np.testing.assert_allclose(array, scaler.inv_transform(scaler.transform(array)))


@given(
    series(
        unique=True,
        elements=st.floats(
            max_value=1e8, min_value=-1e8, allow_nan=False, allow_infinity=False
        ),
        index=range_indexes(min_size=2)
    )
)
def test_minmax_scaler_series(series):
    scaler = MinMaxScaler()
    scaler.fit(series)

    assert scaler.transform(series).min() >= 0
    assert scaler.transform(series).max() <= 1

    np.testing.assert_allclose(scaler.fit(series).transform(series), scaler.fit_transform(series))
    np.testing.assert_allclose(series, scaler.inv_transform(scaler.transform(series)), rtol=1e-06)

Example #17

0

Show file

File: test_series.py Project: pexip/os-python-hypothesis

from tests.common.debug import find_any
from tests.pandas.helpers import supported_by_pandas


@given(st.data())
def test_can_create_a_series_of_any_dtype(data):
    dtype = np.dtype(data.draw(npst.scalar_dtypes()))
    assume(supported_by_pandas(dtype))
    # Use raw data to work around pandas bug in repr. See
    # https://github.com/pandas-dev/pandas/issues/27484
    series = data.conjecture_data.draw(pdst.series(dtype=dtype))
    assert series.dtype == pandas.Series([], dtype=dtype).dtype


@given(
    pdst.series(dtype=float, index=pdst.range_indexes(min_size=2, max_size=5)))
def test_series_respects_size_bounds(s):
    assert 2 <= len(s) <= 5


def test_can_fill_series():
    nan_backed = pdst.series(elements=st.floats(allow_nan=False),
                             fill=st.just(np.nan))
    find_any(nan_backed, lambda x: np.isnan(x).any())


@given(pdst.series(dtype=int))
def test_can_generate_integral_series(s):
    assert s.dtype == np.dtype(int)

Example #18

0

Show file

File: test_serialisation.py Project: sourcery-ai-bot/superintendent

    else:
        return all(a_ == b_ or (np.isnan(a_) and np.isnan(b_))
                   for a_, b_ in zip(a, b))


@given(input_=np_strategies.arrays(guaranteed_dtypes,
                                   np_strategies.array_shapes()))
def test_numpy_array_serialisation(input_):
    serialised = data_dumps(input_)
    assert isinstance(serialised, str)
    deserialised = data_loads(serialised)
    assert isinstance(deserialised, type(input_))
    assert pytest.helpers.exact_element_match(input_, deserialised)


@given(input_=one_of(series(dtype=int), series(dtype=float),
                     series(dtype=str)))
def test_pandas_series_serialisation(input_):
    serialised = data_dumps(input_)
    assert isinstance(serialised, str)
    deserialised = data_loads(serialised)
    assert isinstance(deserialised, type(input_))
    assert pytest.helpers.exact_element_match(input_, deserialised)


@given(input_=one_of(
    data_frames(columns(3, dtype=int)),
    data_frames(columns(3, dtype=float)),
    data_frames(columns(3, dtype=str)),
    data_frames([column(dtype=str),
                 column(dtype=float),

Example #19

0

Show file

import numpy as np
import pandas as pd
import pytest

import xarray as xr

pytest.importorskip("hypothesis")
import hypothesis.extra.numpy as npst  # isort:skip
import hypothesis.extra.pandas as pdst  # isort:skip
import hypothesis.strategies as st  # isort:skip
from hypothesis import given  # isort:skip

numeric_dtypes = st.one_of(npst.unsigned_integer_dtypes(),
                           npst.integer_dtypes(), npst.floating_dtypes())

numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt))

an_array = npst.arrays(
    dtype=numeric_dtypes,
    shape=npst.array_shapes(max_dims=2),  # can only convert 1D/2D to pandas
)


@st.composite
def datasets_1d_vars(draw) -> xr.Dataset:
    """Generate datasets with only 1D variables

    Suitable for converting to pandas dataframes.
    """
    # Generate an index for the dataset
    idx = draw(pdst.indexes(dtype="u8", min_size=0, max_size=100))

Example #20

0

Show file

File: test_str.py Project: gregparkes/turbopanda

class TestStringModule(unittest.TestCase):

    def test_common_substrings(self):
        t = ['hello there!', 'hey there', 'well hello there', 'there is a disturbance']

        x1 = turb.str.common_substrings("high tower", "low tower")
        assert type(x1) == np.str
        assert x1 == " tower"

        x2 = turb.str.common_substrings(t)
        # should return a series
        assert type(x2) == pd.Series
        assert len(x2) == 2
        assert x2.index[0] == 'there'
        assert x2.iloc[0] == 3

    @given(s1=st.one_of(
        st.text(), st.lists(st.text())),
        s2=st.one_of(
            st.text(), st.lists(st.text())))
    def test_common_substrings2(self, s1, s2):
        x1 = turb.str.common_substrings(s1, s2)
        note(x1)
        if type(s1) == str and type(s2) == str:
            assert type(x1) == str
        else:
            assert type(x1) == pd.Series

    def test_pattern(self):
        _in = ["hello", "bellow", "mellow", "swellow"]
        x1 = turb.str.pattern("ellow", _in)
        assert type(x1) == pd.Index
        assert len(x1) == 3
        assert np.all(x1, pd.Index(['bellow', 'mellow', 'swellow']))

        x2 = turb.str.pattern("^he | ^b", _in)
        assert type(x2) == pd.Index
        assert len(x2) == 2
        assert np.all(x2, pd.Index(['hello', 'bellow']))

        _in2 = int_dataframe_example()
        x3 = turb.str.pattern("^he | ^b", _in2)
        assert type(x3) == pd.Index
        assert len(x3) == 2
        assert np.all(x3, pd.Index(['hello', 'bellow']))

    @given(words=st.one_of(
        st.lists(random_or_regex, unique=True),
        pandas.series(elements=random_or_regex, dtype=str, unique=True),
        pandas.indexes(elements=random_or_regex, dtype=str, unique=True),
    ))
    def test_pattern2(self, words):
        res = list(turb.str.pattern("ellow", words))
        # select all that conform to the pattern..?
        actual = [x for x in words if re.search("ellow", x)]
        self.assertEqual(res, actual)

    def test_patproduct(self):
        x1 = turb.str.patproduct("%s%d", ("x", "y"), range(100))
        assert type(x1) == list
        assert len(x1) == 200
        assert x1[0] == 'x0'
        assert x1[-1] == 'y99'

        # second example
        x2 = turb.str.patproduct("%s_%s", ("repl", "quality"), ("sum", "prod"))
        assert type(x2) == list
        assert len(x2) == 4
        assert x2[0] == 'repl_sum'
        assert x2[-1] == "quality_prod"

    @given(a=st.tuples(st.text(min_size=1), st.text(min_size=1)),
           b=st.lists(st.text(min_size=1)))
    def test_patproduct2(self, a, b):
        x1 = turb.str.patproduct("%s%s", a, b)
        self.assertListEqual(x1, ["%s%s" % item for item in it.product(a, b)])

    @given(s=st.text(max_size=50),
           strat=st.sampled_from(['middle', 'end']))
    def test_shorten(self, s, strat):
        ns = turb.str.shorten(s, strategy=strat)
        note(ns)
        assert type(ns) == np.str
        if len(s) < 15:
            assert len(s) == len(ns)
        else:
            assert len(ns) <= 15

    @given(st.lists(st.text(max_size=70)))
    def test_shorten2(self, ls):
        nls = turb.str.shorten(ls)
        assert type(nls) == list

    def test_string_replace(self):
        t = ['hello', 'i am', 'pleased']
        r = ['hello', 'u am', 'pleased']
        x1 = turb.str.string_replace(t, ('i', 'u'))
        assert type(x1) == list
        assert len(x1) == 3
        self.assertEqual(x1, r)

    @given(st.text(min_size=1))
    def test_string_replace2(self, s):
        op1 = ('u', 'i')
        res = s.replace(*op1)
        assert turb.str.string_replace(s, op1) == res

    @given(st.lists(st.text(min_size=1), min_size=1))
    def test_string_replace3(self, s):
        ops = [('u', 'i'), ('a', 'e')]
        res = s[0].replace(*ops[0]).replace(*ops[1])
        assert turb.str.string_replace(s, *ops)[0] == res

    def test_reformat(self):
        df = str_dataframe_example()
        x1 = turb.str.reformat("{hello}{bellow}", df)

        assert type(x1) == pd.Series
        pd.testing.assert_series_equal(x1, pd.Series(['carrotsbird', "applesshark", "pearsdragon"], dtype=object))

    @given(df=pandas.data_frames([pandas.column("A", st.text(min_size=1),
                                                dtype=str, unique=True),
                                  pandas.column("B", st.text(min_size=1),
                                                dtype=str, unique=True)]))
    def test_reformat2(self, df):
        x1 = turb.str.reformat("{A}{B}", df)
        note(x1)
        pd.testing.assert_series_equal(x1, df['A'] + df['B'])

Example #21

0

Show file

File: test_series.py Project: HypothesisWorks/hypothesis-python

def test_can_create_a_series_of_any_dtype(data):
    dtype = np.dtype(data.draw(npst.scalar_dtypes()))
    assume(supported_by_pandas(dtype))
    series = data.draw(pdst.series(dtype=dtype))
    assert series.dtype == pandas.Series([], dtype=dtype).dtype

Example #22

0

Show file

File: test_argument_validation.py Project: jjpal/hypothesis

def test_confusing_object_dtype_aliases():
    pdst.series(elements=st.tuples(st.integers()), dtype=tuple).example()

Example #23

0

Show file

File: test_semisupervisor.py Project: zxlzr/superintendent

nested_strategy = recursive(
    container_strategy,
    lambda children: lists(children) | dictionaries(text(), children),
)

container_strategy = dictionaries(
    text(), primitive_strategy) | lists(primitive_strategy)

nested_strategy = recursive(
    container_strategy,
    lambda children: lists(children) | dictionaries(text(), children),
)

numpy_strategy = arrays(guaranteed_dtypes, array_shapes())

pandas_series = series(dtype=int) | series(dtype=float) | series(dtype=str)

pandas_dfs = (data_frames(columns(3, dtype=int))
              | data_frames(columns(3, dtype=float))
              | data_frames(columns(3, dtype=str))
              | data_frames(
                  [column(dtype=str),
                   column(dtype=float),
                   column(dtype=int)]))

possible_input_data = one_of(
    lists(primitive_strategy),
    numpy_strategy,
    pandas_series,
    # pandas_dfs
)

Example #24

0

Show file

File: test_trends.py Project: thyarles/LiuAlgoTrader

from datetime import datetime

import hypothesis.strategies as st
import pandas as pd
import pytz
from hypothesis import given, settings
from hypothesis.extra.pandas import indexes, series

from liualgotrader.fincalcs.trends import SeriesTrendType, get_series_trend


@settings(max_examples=200)
@given(
    series(
        index=indexes(
            elements=st.datetimes(min_value=datetime(2000, 1, 1),
                                  max_value=datetime(2040, 1, 1)),
            dtype=pd.DatetimeIndex,
        ),
        elements=st.floats(
            allow_nan=False,
            allow_infinity=False,
        ),
        dtype=float,
    ), )
def test_get_series_trend(serie: pd.Series):
    print(serie)
    r, t = get_series_trend(serie)
    print("result", r, t)