def data_generator(draw):
    df = draw(
        data_frames(columns=columns(
            names_or_number=[str(i) for i in range(10)],
            dtype=float,
            elements=st.floats(allow_infinity=False, max_value=1e+30))))
    reg_l1 = draw(st.floats())
    reg_l2 = draw(st.floats())
    optimizator = draw(st.sampled_from(['L-BFGS-B', 'BFGS']))
    intercept = draw(st.booleans())
    return df, reg_l1, reg_l2, optimizator, intercept
Ejemplo n.º 2
0
def gen_columns_and_subset(draw, elements=names):
    column_names = draw(lists(elements, min_size=1, unique=True))
    num_columns_to_keep = draw(
        integers(min_value=1, max_value=len(column_names)))
    i = num_columns_to_keep
    columns_to_keep = set()
    while i > 0:
        keeper_column = draw(
            integers(min_value=0, max_value=len(column_names) - 1))
        columns_to_keep.add(column_names[keeper_column])
        i = i - 1

    # With column data and 'keeper' columns selected, utilize draw to return
    # a hypothesis DataFrame column strategies defined.
    return draw(
        hpd.data_frames(hpd.columns(column_names, elements=elements),
                        index=hpd.range_indexes(min_size=5))), columns_to_keep
Ejemplo n.º 3
0
    lambda children: lists(children) | dictionaries(text(), children),
)

container_strategy = dictionaries(
    text(), primitive_strategy) | lists(primitive_strategy)

nested_strategy = recursive(
    container_strategy,
    lambda children: lists(children) | dictionaries(text(), children),
)

numpy_strategy = arrays(guaranteed_dtypes, array_shapes())

pandas_series = series(dtype=int) | series(dtype=float) | series(dtype=str)

pandas_dfs = (data_frames(columns(3, dtype=int))
              | data_frames(columns(3, dtype=float))
              | data_frames(columns(3, dtype=str))
              | data_frames(
                  [column(dtype=str),
                   column(dtype=float),
                   column(dtype=int)]))

possible_input_data = one_of(
    lists(primitive_strategy),
    numpy_strategy,
    pandas_series,
    # pandas_dfs
)

TEST_DF = pd.DataFrame(np.meshgrid(np.arange(20), np.arange(20))[0])
Ejemplo n.º 4
0
from hypothesis import given
from hypothesis.extra.pandas import columns, data_frames, range_indexes
import hypothesis.strategies as st
import pandas as pd

from analyse_weather import get_data, hottest_summer


@given(
    data_frames(
        columns=columns(
            ['JUN', 'JUL', 'AUG'],
            elements=st.floats(allow_nan=True)
        ),
        index=range_indexes(min_size=1)
    )
)
def test_hottest_summer_auto(df):
    assert not pd.isnull(hottest_summer(df))


# Below is annother example of using fixtures but for this function:
import pytest
from pandas import DataFrame


@pytest.fixture
def full_dataset():
    return get_data()

#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at http://mozilla.org/MPL/2.0/.
#
# END HEADER

from __future__ import division, print_function, absolute_import

import hypothesis.strategies as st
import hypothesis.extra.pandas as pdst
from tests.common.arguments import e, argument_validation_test

BAD_ARGS = [
    e(pdst.data_frames),
    e(pdst.data_frames, pdst.columns(1, dtype='not a dtype')),
    e(pdst.data_frames, pdst.columns(1, elements='not a strategy')),
    e(pdst.data_frames, pdst.columns([[]])),
    e(pdst.data_frames, [], index=[]),
    e(pdst.data_frames, [], rows=st.fixed_dictionaries({'A': st.just(1)})),
    e(pdst.data_frames, pdst.columns(1)),
    e(pdst.data_frames, pdst.columns(1, dtype=float, fill=1)),
    e(pdst.data_frames, pdst.columns(1, dtype=float, elements=1)),
    e(pdst.data_frames, pdst.columns(1, fill=1, dtype=float)),
    e(pdst.data_frames, pdst.columns(['A', 'A'], dtype=float)),
    e(pdst.data_frames, pdst.columns(1, elements=st.none(), dtype=int)),
    e(pdst.data_frames, 1),
    e(pdst.data_frames, [1]),
    e(pdst.data_frames, pdst.columns(1, dtype='category')),
    e(pdst.data_frames,
      pdst.columns(['A'], dtype=bool),
Ejemplo n.º 6
0
    pdst.column('a', dtype=int),
    pdst.column('b', dtype=float),
]))
def test_can_have_columns_of_distinct_types(df):
    assert df['a'].dtype == np.dtype(int)
    assert df['b'].dtype == np.dtype(float)


@given(pdst.data_frames(
    [pdst.column(dtype=int)],
    index=pdst.range_indexes(min_size=1, max_size=5)))
def test_respects_size_bounds(df):
    assert 1 <= len(df) <= 5


@given(pdst.data_frames(pdst.columns(['A', 'B'], dtype=float)))
def test_can_specify_just_column_names(df):
    df['A']
    df['B']


@given(pdst.data_frames(pdst.columns(2, dtype=float)))
def test_can_specify_just_column_count(df):
    df[0]
    df[1]


@given(pdst.data_frames(
    rows=st.fixed_dictionaries({'A': st.integers(1, 10), 'B': st.floats()}))
)
def test_gets_the_correct_data_shape_for_just_rows(table):
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at http://mozilla.org/MPL/2.0/.
#
# END HEADER

from __future__ import division, print_function, absolute_import

import hypothesis.strategies as st
import hypothesis.extra.pandas as pdst
from tests.common.arguments import e, argument_validation_test

BAD_ARGS = [
    e(pdst.data_frames),
    e(pdst.data_frames, pdst.columns(1, dtype='not a dtype')),
    e(pdst.data_frames, pdst.columns(1, elements='not a strategy')),
    e(pdst.data_frames, pdst.columns([[]])),
    e(pdst.data_frames, [], index=[]),
    e(pdst.data_frames, [], rows=st.fixed_dictionaries({'A': st.just(1)})),
    e(pdst.data_frames, pdst.columns(1)),
    e(pdst.data_frames, pdst.columns(1, dtype=float, fill=1)),
    e(pdst.data_frames, pdst.columns(1, dtype=float, elements=1)),
    e(pdst.data_frames, pdst.columns(1, fill=1, dtype=float)),
    e(pdst.data_frames, pdst.columns(['A', 'A'], dtype=float)),
    e(pdst.data_frames, pdst.columns(1, elements=st.none(), dtype=int)),
    e(pdst.data_frames, 1),
    e(pdst.data_frames, [1]),
    e(pdst.data_frames, pdst.columns(1, dtype='category')),
    e(pdst.data_frames,
        pdst.columns(['A'], dtype=bool),
# Save and load a Pandas dataframe
#
# This test is pretty similar!  Three reasons: it's a good way to show you how
# the Hypothesis-for-Pandas API works, it should emphasise that round-trip tests
# are *shockingly* effective, and save/load is a simple example of functionalty
# that everyone uses no matter what domain they work in.
#
# TODO: Write tests that show one dtype that you can round-trp through CSV
# and/or JSON, and one that you can't.
#
# See https://hypothesis.readthedocs.io/en/latest/numpy.html#pandas for details,
# and remember that you can use Numpy arrays or even lists of tuples if it helps!


@given(
    pdst.data_frames(
        columns=pdst.columns(3, dtype="float64"),
        index=pdst.indexes(
            dtype="float64", elements=st.floats(allow_nan=False), unique=True
        ),
    )
)
def test_dataframe_round_trip(df):
    with BytesIO() as f:
        df.to_pickle(f, compression=None)
        contents = f.getvalue()
    with BytesIO(contents) as f:
        new = pd.read_pickle(f, compression=None)
    # Pandas ships testing helper functions too!
    pd.testing.assert_frame_equal(df, new)
Ejemplo n.º 9
0
from hypothesis import given
from hypothesis.extra.pandas import data_frames, columns, range_indexes
import hypothesis.strategies as st

positions = st.integers(min_value=0, max_value=int(1e7))


def mysort(pos1, pos2):

    if pos1 > pos2:
        return pos2, pos1
    elif pos2 > pos1:
        return pos1, pos2
    else:
        return pos1, pos2 + 1


dfs = data_frames(columns=columns("Start End".split(), dtype=int),
                  rows=st.tuples(positions, positions).map(mysort))
Ejemplo n.º 10
0
def gen_rando_dataframe(draw, elements=names):
    column_names = draw(lists(elements, min_size=1, unique=True))
    return draw(
        hpd.data_frames(hpd.columns(column_names, elements=elements),
                        index=hpd.range_indexes(min_size=5)))
Ejemplo n.º 11
0
    assert_bins_and_labels_ndim('S2q_XY' , ['X'  , 'Y', 'S2q'], out_bins, out_labels, test_bins, test_dict)

    variable_names = list(test_bins)
    variable_names.extend(['S2e_X', 'S2e_Y' , 'S2e_Z', 'S2e_R', 'S2e_Phi',
                           'S2q_X', 'S2q_Y' , 'S2q_Z', 'S2q_R', 'S2q_Phi',
                           'XY'   , 'S2e_XY','S2q_XY'])
    for k in out_bins:
        assert k in variable_names


kdst_variables = ['nS2', 'S1w'  , 'S1h', 'S1e', 'S1t', 'S2w', 'S2h', 'S2e', 'S2q' ,
                  'S2t', 'Nsipm', 'DT' , 'Z'  , 'X'  , 'Y'  , 'R'  , 'Phi', 'Zrms',
                  'Xrms', 'Yrms']


@given(data_frames(columns=columns(kdst_variables, elements=floats(allow_nan=False))))
@settings(deadline=None)
def test_fill_kdst_var_1d(kdst):
    var_dict = defaultdict(list)
    monf.fill_kdst_var_1d (kdst, var_dict)

    for var in var_dict:
        value = kdst[var].values
        if var in ['S1t', 'S2t', 'S1w']:
            value = value / units.mus
        assert np.allclose(value, var_dict[var])


@given(data_frames(columns=columns(kdst_variables, elements=floats(allow_nan=False))))
@settings(deadline=None)
def test_fill_kdst_var_2d(kdst):
                                  st.text(), st.floats())),
        column(name='second',
               elements=st.one_of(st.just(float('nan')), st.integers(),
                                  st.text(), st.floats()))
    ]))
def test_get_nan_features_hypo(frame):
    result = get_nan_features(frame)
    assert isinstance(result, dict)
    for value in result.values():
        assert value > 0
    for key in result.keys():
        assert isinstance(key, str)


@given(
    data_frames(columns=columns(["first", "second", 'third'], dtype=float),
                rows=st.tuples(st.floats(allow_nan=False), st.integers(),
                               st.text())))
def test_split_features_hypo(frame):
    cat_feats, float_feats, int_feats = split_features(frame)
    features = list(frame)
    assert isinstance(cat_feats, list)
    assert isinstance(float_feats, list)
    assert isinstance(int_feats, list)

    assert sublist(cat_feats, features)
    assert sublist(float_feats, features)
    assert sublist(int_feats, features)


def calculate_residuals(model_path, data_path):
Ejemplo n.º 13
0
    st.just("no"),
    st.just("false"),
    st.just("f"),
    st.just("n"),
    st.just("0"),
))
def test_str2bool(v):
    assert isinstance(deepof.utils.str2bool(v), bool)


@settings(deadline=None)
@given(
    mult=st.integers(min_value=1, max_value=10),
    dframe=data_frames(
        index=range_indexes(min_size=1),
        columns=columns(["X", "y", "likelihood"], dtype=float),
        rows=st.tuples(
            st.floats(min_value=0,
                      max_value=1000,
                      allow_nan=False,
                      allow_infinity=False),
            st.floats(min_value=0,
                      max_value=1000,
                      allow_nan=False,
                      allow_infinity=False),
            st.floats(min_value=0.01,
                      max_value=1.0,
                      allow_nan=False,
                      allow_infinity=False),
        ),
    ),
Ejemplo n.º 14
0
import pytest
from hypothesis import HealthCheck
from hypothesis import given
from hypothesis import settings
from hypothesis import strategies as st
from hypothesis.extra.pandas import range_indexes, columns, data_frames

import deepof.data
import deepof.pose_utils


@settings(deadline=None)
@given(
    pos_dframe=data_frames(
        index=range_indexes(min_size=5),
        columns=columns(["X1", "y1", "X2", "y2"], dtype=float),
        rows=st.tuples(
            st.floats(min_value=1,
                      max_value=10,
                      allow_nan=False,
                      allow_infinity=False),
            st.floats(min_value=1,
                      max_value=10,
                      allow_nan=False,
                      allow_infinity=False),
            st.floats(min_value=1,
                      max_value=10,
                      allow_nan=False,
                      allow_infinity=False),
            st.floats(min_value=1,
                      max_value=10,
from hypothesis import given
from hypothesis import strategies as st
from hypothesis.extra.pandas import data_frames, columns
from Regression.preprocessing import read_data


@given(
    data_frames(columns=columns(names_or_number=[str(i) for i in range(10)],
                                dtype=float,
                                elements=st.floats(allow_infinity=False,
                                                   max_value=1e+307))))
def test_get_predict_data(df):
    df.to_csv('../tmp/predict_df.csv')
    res = read_data('../tmp/predict_df.csv', fitting=False)
    assert res.shape == df.shape


@given(
    data_frames(columns=columns(names_or_number=[str(i) for i in range(10)],
                                dtype=float,
                                elements=st.floats(allow_infinity=False,
                                                   max_value=1e+307))))
def test_get_train_data(df):
    df.to_csv('../tmp/train_df.csv')
    X, y = read_data('../tmp/train_df.csv', fitting=True, split=False)
    assert X.shape[1] == df.shape[1] - 1
    assert len(y.shape) == 1
    assert y.shape[0] == df.shape[0]


@given(
Ejemplo n.º 16
0
    ]


def test_that_load_spec_raises_valueerror_for_invalid_spec(basic_spec_0):
    with pytest.raises(ValueError) as spec_error:
        with patch('builtins.open',
                   new_callable=mock_open,
                   read_data=basic_spec_0):
            spec = read_spec('fake/file.yaml')

    assert "invalid spec" in str(spec_error.value).lower()


@settings(deadline=None)
@given(
    data_frames(columns=columns("A B C".split(), dtype=int),
                index=hpd.range_indexes()),
    sampled_from(['.csv', '.xls', '.xlsx', '.parquet']))
def test_that_read_data_returns_data_frame(tmpdir, write_funcs,
                                           basic_spec_dict, df, ext):
    """Given a Hypothesis DataFrame, save it as a file of the sampled type,
       and test the reading that file into a Pandas DataFrame works as expected."""

    expected = df.shape[1]

    # using make_numbered_dir to avoid path collisions when running test for each
    # hypothesis-generated data frame.
    # p = tmpdir.make_numbered_dir().join(str(f'test{ext}'))
    # write_funcs[ext](df, p.strpath)
    tmp_file_path = write_dataframe_to_tmpdir(tmpdir, write_funcs, df, ext)
    spec = {'input': {'file': tmp_file_path}}
Ejemplo n.º 17
0
np.random.seed(1234)
param = {}
categories = [
    'alpha', 'D_fit', 'kurtosis', 'asymmetry1', 'asymmetry2', 'asymmetry3',
    'AR', 'elongation', 'boundedness', 'fractal_dim', 'trappedness',
    'efficiency', 'straightness', 'MSD_ratio', 'frames', 'Deff1', 'Deff2',
    'angle_mean', 'angle_mag_mean', 'angle_var', 'dist_tot', 'dist_net',
    'progression', 'Mean alpha', 'Mean D_fit', 'Mean kurtosis',
    'Mean asymmetry1', 'Mean asymmetry2', 'Mean asymmetry3', 'Mean AR',
    'Mean elongation', 'Mean boundedness', 'Mean fractal_dim',
    'Mean trappedness', 'Mean efficiency', 'Mean straightness',
    'Mean MSD_ratio', 'Mean Deff1', 'Mean Deff2'
]

data_cols = columns(names_or_number=categories,
                    dtype=float,
                    elements=st.floats())
position_cols = columns(names_or_number=['X', 'Y'],
                        dtype=float,
                        elements=st.floats(min_value=0.0, max_value=2048.0))
target_col = column(name='target',
                    dtype=int,
                    elements=st.integers(
                        min_value=0,
                        max_value=20))  #up to twenty unique targets

df = data_frames(columns=data_cols + position_cols + [target_col],
                 index=range_indexes(min_size=10))


def test_generate_fullstats():
Ejemplo n.º 18
0
#    assert ts[ts == 0].size == 2040


def test_agg_sample_n_gaussian_ts_as_df():
    df, seasonalities = td.sample_n_gaussian_ts_as_df(10, '2019-01-01',
                                                      '2020-01-01', Interval.D)
    assert df.shape == (366, 10)


# globals for tests
index_len = 100
train_size_min = 0.1
train_size_max = 0.9


@given(features=data_frames(columns(['feat1', 'feat2', 'feat3'], dtype=float),
                            index=range_indexes(min_size=index_len,
                                                max_size=index_len)),
       labels=series(elements=st.integers(min_value=0, max_value=3),
                     index=range_indexes(min_size=index_len,
                                         max_size=index_len)),
       time_stamps=st.integers(min_value=1, max_value=10),
       train_size=st.floats(min_value=train_size_min,
                            max_value=train_size_max,
                            allow_infinity=False,
                            allow_nan=False))
def test_split_categorical_time_series_labels(features: pd.DataFrame,
                                              labels: pd.Series,
                                              time_stamps: int,
                                              train_size: float):
    assume(train_size == round(train_size,
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# END HEADER

from __future__ import absolute_import, division, print_function

import hypothesis.extra.pandas as pdst
import hypothesis.strategies as st
from tests.common.arguments import argument_validation_test, e

BAD_ARGS = [
    e(pdst.data_frames),
    e(pdst.data_frames, pdst.columns(1, dtype="not a dtype")),
    e(pdst.data_frames, pdst.columns(1, elements="not a strategy")),
    e(pdst.data_frames, pdst.columns([[]])),
    e(pdst.data_frames, [], index=[]),
    e(pdst.data_frames, [], rows=st.fixed_dictionaries({"A": st.just(1)})),
    e(pdst.data_frames, pdst.columns(1)),
    e(pdst.data_frames, pdst.columns(1, dtype=float, fill=1)),
    e(pdst.data_frames, pdst.columns(1, dtype=float, elements=1)),
    e(pdst.data_frames, pdst.columns(1, fill=1, dtype=float)),
    e(pdst.data_frames, pdst.columns(["A", "A"], dtype=float)),
    e(pdst.data_frames, pdst.columns(1, elements=st.none(), dtype=int)),
    e(pdst.data_frames, 1),
    e(pdst.data_frames, [1]),
    e(pdst.data_frames, pdst.columns(1, dtype="category")),
    e(
        pdst.data_frames,
Ejemplo n.º 20
0
    U = normalization.op(U, scale)
    for i in np.linspace(0, 2, 5):
        # This should yield exp(i * x/x) = exp(i)
        z_test = LT.flatten() * i
        x_test = np.repeat(x, y.size)
        y_test = np.tile(y, x.size)
        (f_test, u_test) = corr(z_test, x_test, y_test)

        f_true = np.exp(i)
        u_true = z_test * U.flatten() / LT.flatten()**2 * f_test
        assert np.allclose(f_test, f_true)
        assert np.allclose(u_test, u_true)


@given(
    data_frames(columns=columns(
        ['event'], elements=integers(min_value=-1e5, max_value=1e5))),
    lists(integers(min_value=-1e5, max_value=1e5)))
def test_dst_event_id_selection(dst, events):
    filtered_dst = dst_event_id_selection(dst, events)
    assert set(
        filtered_dst.event.values) == set(dst.event.values) & set(events)


def test_dst_event_id_selection_2():
    data = {'event': [1, 1, 3, 6, 7], 'values': [3, 4, 2, 5, 6]}
    filt_data = {'event': [1, 1, 6], 'values': [3, 4, 5]}

    df_data = pd.DataFrame(data=data)
    df_filt_data = pd.DataFrame(data=filt_data)
    df_real_filt = dst_event_id_selection(df_data, [1, 2, 6, 10])
Ejemplo n.º 21
0
@given(pdst.data_frames([pdst.column("a", dtype=int), pdst.column("b", dtype=float)]))
def test_can_have_columns_of_distinct_types(df):
    assert df["a"].dtype == np.dtype(int)
    assert df["b"].dtype == np.dtype(float)


@given(
    pdst.data_frames(
        [pdst.column(dtype=int)], index=pdst.range_indexes(min_size=1, max_size=5)
    )
)
def test_respects_size_bounds(df):
    assert 1 <= len(df) <= 5


@given(pdst.data_frames(pdst.columns(["A", "B"], dtype=float)))
def test_can_specify_just_column_names(df):
    df["A"]
    df["B"]


@given(pdst.data_frames(pdst.columns(2, dtype=float)))
def test_can_specify_just_column_count(df):
    df[0]
    df[1]


@given(
    pdst.data_frames(
        rows=st.fixed_dictionaries({"A": st.integers(1, 10), "B": st.floats()})
    )
Ejemplo n.º 22
0
from liualgotrader.fincalcs.resample import ResampleRangeType, resample

est = pytz.timezone("US/Eastern")


@settings(deadline=None, max_examples=100)
@given(
    data_frames(
        index=indexes(
            elements=st.datetimes(
                min_value=datetime(2000, 1, 1), max_value=datetime(2040, 1, 1)
            ),
            dtype=pd.DatetimeIndex,
        ),
        columns=columns(
            ["open", "close", "high", "low", "volume"], dtype=float
        ),
        rows=st.tuples(
            st.floats(allow_nan=True),
            st.floats(allow_nan=True),
            st.floats(allow_nan=True),
            st.floats(allow_nan=True),
            st.floats(allow_nan=True),
        ),
    ),
    st.sampled_from(ResampleRangeType),
)
def test_resample(ohlc: pd.DataFrame, resample_range: ResampleRangeType):
    print(ohlc.index)
    r = resample(ohlc, resample_range)
    if ohlc.empty:
    pdst.column('a', dtype=int),
    pdst.column('b', dtype=float),
]))
def test_can_have_columns_of_distinct_types(df):
    assert df['a'].dtype == np.dtype(int)
    assert df['b'].dtype == np.dtype(float)


@given(pdst.data_frames(
    [pdst.column(dtype=int)],
    index=pdst.range_indexes(min_size=1, max_size=5)))
def test_respects_size_bounds(df):
    assert 1 <= len(df) <= 5


@given(pdst.data_frames(pdst.columns(['A', 'B'], dtype=float)))
def test_can_specify_just_column_names(df):
    df['A']
    df['B']


@given(pdst.data_frames(pdst.columns(2, dtype=float)))
def test_can_specify_just_column_count(df):
    df[0]
    df[1]


@given(pdst.data_frames(
    rows=st.fixed_dictionaries({'A': st.integers(1, 10), 'B': st.floats()}))
)
def test_gets_the_correct_data_shape_for_just_rows(table):
Ejemplo n.º 24
0
strat_dates = st.dates()

strat_df_index = hpd.range_indexes(min_size=1)

df_hypo_mixed = hpd.data_frames(
    columns=[
        hpd.column(name="col1_text", elements=strat_text),
        hpd.column(name="col2_ints", elements=strat_ints),
        hpd.column(name="col3_floats", elements=strat_floats),
        hpd.column(name="col4_dates", elements=strat_dates),
        hpd.column(name="col4_bools", elements=st.booleans()),
    ],
    index=strat_df_index,
)

df_hypo_text = hpd.data_frames(columns=hpd.columns(5, elements=strat_text),
                               index=strat_df_index)
df_hypo_ints = hpd.data_frames(columns=hpd.columns(5, elements=strat_ints),
                               index=strat_df_index)
df_hypo_floats = hpd.data_frames(columns=hpd.columns(5, elements=strat_floats),
                                 index=strat_df_index)
df_hypo_dates = hpd.data_frames(columns=hpd.columns(5, elements=strat_dates),
                                index=strat_df_index)


def not_has_all_delims(df: pd.DataFrame) -> bool:
    return not all(
        df.applymap(lambda x: delim in x
                    if isinstance(x, str) else False).any().any()
        for delim in _DELIMITER_OPTIONS)
# obtain one at https://mozilla.org/MPL/2.0/.
#
# END HEADER

from datetime import datetime

import pandas as pd

import hypothesis.extra.pandas as pdst
import hypothesis.strategies as st
from hypothesis import given
from tests.common.arguments import argument_validation_test, e

BAD_ARGS = [
    e(pdst.data_frames),
    e(pdst.data_frames, pdst.columns(1, dtype="not a dtype")),
    e(pdst.data_frames, pdst.columns(1, elements="not a strategy")),
    e(pdst.data_frames, pdst.columns([[]])),
    e(pdst.data_frames, [], index=[]),
    e(pdst.data_frames, [], rows=st.fixed_dictionaries({"A": st.just(1)})),
    e(pdst.data_frames, pdst.columns(1)),
    e(pdst.data_frames, pdst.columns(1, dtype=float, fill=1)),
    e(pdst.data_frames, pdst.columns(1, dtype=float, elements=1)),
    e(pdst.data_frames, pdst.columns(1, fill=1, dtype=float)),
    e(pdst.data_frames, pdst.columns(["A", "A"], dtype=float)),
    e(pdst.data_frames, pdst.columns(1, elements=st.none(), dtype=int)),
    e(pdst.data_frames, 1),
    e(pdst.data_frames, [1]),
    e(pdst.data_frames, pdst.columns(1, dtype="category")),
    e(
        pdst.data_frames,
container_strategy = dictionaries(text(), primitive_strategy) | lists(
    primitive_strategy
)

nested_strategy = recursive(
    container_strategy,
    lambda children: lists(children) | dictionaries(text(), children),
)

numpy_strategy = arrays(guaranteed_dtypes, array_shapes())

pandas_series = series(dtype=int) | series(dtype=float) | series(dtype=str)

pandas_dfs = (
    data_frames(columns(3, dtype=int))
    | data_frames(columns(3, dtype=float))
    | data_frames(columns(3, dtype=str))
    | data_frames([column(dtype=str), column(dtype=float), column(dtype=int)])
)

possible_input_data = one_of(
    lists(primitive_strategy),
    numpy_strategy,
    pandas_series,
    # pandas_dfs
)


TEST_DF = pd.DataFrame(np.meshgrid(np.arange(20), np.arange(20))[0])
@given(pdst.data_frames([pdst.column("a", dtype=int), pdst.column("b", dtype=float)]))
def test_can_have_columns_of_distinct_types(df):
    assert df["a"].dtype == np.dtype(int)
    assert df["b"].dtype == np.dtype(float)


@given(
    pdst.data_frames(
        [pdst.column(dtype=int)], index=pdst.range_indexes(min_size=1, max_size=5)
    )
)
def test_respects_size_bounds(df):
    assert 1 <= len(df) <= 5


@given(pdst.data_frames(pdst.columns(["A", "B"], dtype=float)))
def test_can_specify_just_column_names(df):
    df["A"]
    df["B"]


@given(pdst.data_frames(pdst.columns(2, dtype=float)))
def test_can_specify_just_column_count(df):
    df[0]
    df[1]


@given(
    pdst.data_frames(
        rows=st.fixed_dictionaries({"A": st.integers(1, 10), "B": st.floats()})
    )
Ejemplo n.º 28
0
    pdst.data_frames(
        [pdst.column("a", dtype=int),
         pdst.column("b", dtype=float)]))
def test_can_have_columns_of_distinct_types(df):
    assert df["a"].dtype == np.dtype(int)
    assert df["b"].dtype == np.dtype(float)


@given(
    pdst.data_frames([pdst.column(dtype=int)],
                     index=pdst.range_indexes(min_size=1, max_size=5)))
def test_respects_size_bounds(df):
    assert 1 <= len(df) <= 5


@given(pdst.data_frames(pdst.columns(["A", "B"], dtype=float)))
def test_can_specify_just_column_names(df):
    df["A"]
    df["B"]


@given(pdst.data_frames(pdst.columns(2, dtype=float)))
def test_can_specify_just_column_count(df):
    df[0]
    df[1]


@given(
    pdst.data_frames(rows=st.fixed_dictionaries({
        "A": st.integers(1, 10),
        "B": st.floats()
Ejemplo n.º 29
0
    roundtripped = xr.Dataset(df)
    xr.testing.assert_identical(dataset, roundtripped)


@given(numeric_series, st.text())
def test_roundtrip_pandas_series(ser, ix_name) -> None:
    # Need to name the index, otherwise Xarray calls it 'dim_0'.
    ser.index.name = ix_name
    arr = xr.DataArray(ser)
    roundtripped = arr.to_pandas()
    pd.testing.assert_series_equal(ser, roundtripped)
    xr.testing.assert_identical(arr, roundtripped.to_xarray())


# Dataframes with columns of all the same dtype - for roundtrip to DataArray
numeric_homogeneous_dataframe = numeric_dtypes.flatmap(
    lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt)
                                ))


@pytest.mark.xfail
@given(numeric_homogeneous_dataframe)
def test_roundtrip_pandas_dataframe(df) -> None:
    # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'.
    df.index.name = "rows"
    df.columns.name = "cols"
    arr = xr.DataArray(df)
    roundtripped = arr.to_pandas()
    pd.testing.assert_frame_equal(df, roundtripped)
    xr.testing.assert_identical(arr, roundtripped.to_xarray())
Ejemplo n.º 30
0
def test_scatterplot_alternate_data():
    d = data.iris()
    return ar.scatterplot(
        x=d["petalWidth"],
        y=d["petalLength"],
        color=d["sepalWidth"],
        tooltip=d["species"],
    )


show_test(test_scatterplot_alternate_data)

#' A randomized test of equivalence between the two data syntaxes:


@given(data=data_frames(columns=columns(["a", "b", "c"], dtype=float)))
def test_scatterplot_series(data):
    chart1 = ar.scatterplot(data=data[["a", "c"]])
    chart2 = ar.scatterplot(x=data["a"], y=data["c"])
    assert chart1.to_dict() == chart2.to_dict()


#' <h2>Multiscatterplot at defaults</h2>


@viz_reg_test
def test_multiscatterplot_defaults():
    return ar.multiscatterplot(data.iris())


show_test(test_multiscatterplot_defaults)
Ejemplo n.º 31
0
        'S2e_X', 'S2e_Y', 'S2e_Z', 'S2e_R', 'S2e_Phi', 'S2q_X', 'S2q_Y',
        'S2q_Z', 'S2q_R', 'S2q_Phi', 'XY', 'S2e_XY', 'S2q_XY'
    ])
    for k in out_bins:
        assert k in variable_names


kdst_variables = [
    'nS2', 'S1w', 'S1h', 'S1e', 'S1t', 'S2w', 'S2h', 'S2e', 'S2q', 'S2t',
    'Nsipm', 'DT', 'Z', 'X', 'Y', 'R', 'Phi', 'Zrms', 'Xrms', 'Yrms'
]


@given(
    data_frames(
        columns=columns(kdst_variables, elements=floats(allow_nan=False))))
@settings(deadline=None)
def test_fill_kdst_var_1d(kdst):
    var_dict = defaultdict(list)
    monf.fill_kdst_var_1d(kdst, var_dict)

    for var in var_dict:
        value = kdst[var].values
        if var in ['S1t', 'S2t', 'S1w']:
            value = value / units.mus
        assert np.allclose(value, var_dict[var])


@given(
    data_frames(
        columns=columns(kdst_variables, elements=floats(allow_nan=False))))
Ejemplo n.º 32
0
chromosomes_small = st.sampled_from(["chr1"])
cs = st.one_of(chromosomes, chromosomes_small)

positions = st.integers(min_value=0, max_value=int(1e7))
lengths = st.integers(min_value=1, max_value=int(1e7))
small_lengths = st.integers(min_value=1, max_value=int(1e4))
strands = st.sampled_from("+ -".split())


# dfs = data_frames(columns=columns("Chromosome Start End Strand".split(),
#                                   dtype=int), rows=st.tuples(chromosomes, positions, positions,
#                                                              strands).map(mysort))

df_minsize = 1
nonempty_dfs = data_frames(index=range_indexes(min_size=df_minsize),
                           columns=columns("Chromosome Start End Strand".split(), dtype=int),
                           rows=st.tuples(chromosomes, positions, positions, strands).map(mysort))


better_df_minsize = 1
better_dfs = data_frames(index=range_indexes(min_size=better_df_minsize),
                         columns=[column("Chromosome", chromosomes),
                                  column("Start", elements=positions),
                                  column("End", elements=lengths),
                                  column("Strand", strands)])

better_dfs_min = data_frames(index=range_indexes(min_size=better_df_minsize),
                             columns=[column("Chromosome", cs),
                                      column("Start", elements=lengths),
                                      column("End", elements=small_lengths),
                                      column("Strand", strands)])