def data_generator(draw): df = draw( data_frames(columns=columns( names_or_number=[str(i) for i in range(10)], dtype=float, elements=st.floats(allow_infinity=False, max_value=1e+30)))) reg_l1 = draw(st.floats()) reg_l2 = draw(st.floats()) optimizator = draw(st.sampled_from(['L-BFGS-B', 'BFGS'])) intercept = draw(st.booleans()) return df, reg_l1, reg_l2, optimizator, intercept
def gen_columns_and_subset(draw, elements=names): column_names = draw(lists(elements, min_size=1, unique=True)) num_columns_to_keep = draw( integers(min_value=1, max_value=len(column_names))) i = num_columns_to_keep columns_to_keep = set() while i > 0: keeper_column = draw( integers(min_value=0, max_value=len(column_names) - 1)) columns_to_keep.add(column_names[keeper_column]) i = i - 1 # With column data and 'keeper' columns selected, utilize draw to return # a hypothesis DataFrame column strategies defined. return draw( hpd.data_frames(hpd.columns(column_names, elements=elements), index=hpd.range_indexes(min_size=5))), columns_to_keep
lambda children: lists(children) | dictionaries(text(), children), ) container_strategy = dictionaries( text(), primitive_strategy) | lists(primitive_strategy) nested_strategy = recursive( container_strategy, lambda children: lists(children) | dictionaries(text(), children), ) numpy_strategy = arrays(guaranteed_dtypes, array_shapes()) pandas_series = series(dtype=int) | series(dtype=float) | series(dtype=str) pandas_dfs = (data_frames(columns(3, dtype=int)) | data_frames(columns(3, dtype=float)) | data_frames(columns(3, dtype=str)) | data_frames( [column(dtype=str), column(dtype=float), column(dtype=int)])) possible_input_data = one_of( lists(primitive_strategy), numpy_strategy, pandas_series, # pandas_dfs ) TEST_DF = pd.DataFrame(np.meshgrid(np.arange(20), np.arange(20))[0])
from hypothesis import given from hypothesis.extra.pandas import columns, data_frames, range_indexes import hypothesis.strategies as st import pandas as pd from analyse_weather import get_data, hottest_summer @given( data_frames( columns=columns( ['JUN', 'JUL', 'AUG'], elements=st.floats(allow_nan=True) ), index=range_indexes(min_size=1) ) ) def test_hottest_summer_auto(df): assert not pd.isnull(hottest_summer(df)) # Below is annother example of using fixtures but for this function: import pytest from pandas import DataFrame @pytest.fixture def full_dataset(): return get_data()
# # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at http://mozilla.org/MPL/2.0/. # # END HEADER from __future__ import division, print_function, absolute_import import hypothesis.strategies as st import hypothesis.extra.pandas as pdst from tests.common.arguments import e, argument_validation_test BAD_ARGS = [ e(pdst.data_frames), e(pdst.data_frames, pdst.columns(1, dtype='not a dtype')), e(pdst.data_frames, pdst.columns(1, elements='not a strategy')), e(pdst.data_frames, pdst.columns([[]])), e(pdst.data_frames, [], index=[]), e(pdst.data_frames, [], rows=st.fixed_dictionaries({'A': st.just(1)})), e(pdst.data_frames, pdst.columns(1)), e(pdst.data_frames, pdst.columns(1, dtype=float, fill=1)), e(pdst.data_frames, pdst.columns(1, dtype=float, elements=1)), e(pdst.data_frames, pdst.columns(1, fill=1, dtype=float)), e(pdst.data_frames, pdst.columns(['A', 'A'], dtype=float)), e(pdst.data_frames, pdst.columns(1, elements=st.none(), dtype=int)), e(pdst.data_frames, 1), e(pdst.data_frames, [1]), e(pdst.data_frames, pdst.columns(1, dtype='category')), e(pdst.data_frames, pdst.columns(['A'], dtype=bool),
pdst.column('a', dtype=int), pdst.column('b', dtype=float), ])) def test_can_have_columns_of_distinct_types(df): assert df['a'].dtype == np.dtype(int) assert df['b'].dtype == np.dtype(float) @given(pdst.data_frames( [pdst.column(dtype=int)], index=pdst.range_indexes(min_size=1, max_size=5))) def test_respects_size_bounds(df): assert 1 <= len(df) <= 5 @given(pdst.data_frames(pdst.columns(['A', 'B'], dtype=float))) def test_can_specify_just_column_names(df): df['A'] df['B'] @given(pdst.data_frames(pdst.columns(2, dtype=float))) def test_can_specify_just_column_count(df): df[0] df[1] @given(pdst.data_frames( rows=st.fixed_dictionaries({'A': st.integers(1, 10), 'B': st.floats()})) ) def test_gets_the_correct_data_shape_for_just_rows(table):
# Save and load a Pandas dataframe # # This test is pretty similar! Three reasons: it's a good way to show you how # the Hypothesis-for-Pandas API works, it should emphasise that round-trip tests # are *shockingly* effective, and save/load is a simple example of functionalty # that everyone uses no matter what domain they work in. # # TODO: Write tests that show one dtype that you can round-trp through CSV # and/or JSON, and one that you can't. # # See https://hypothesis.readthedocs.io/en/latest/numpy.html#pandas for details, # and remember that you can use Numpy arrays or even lists of tuples if it helps! @given( pdst.data_frames( columns=pdst.columns(3, dtype="float64"), index=pdst.indexes( dtype="float64", elements=st.floats(allow_nan=False), unique=True ), ) ) def test_dataframe_round_trip(df): with BytesIO() as f: df.to_pickle(f, compression=None) contents = f.getvalue() with BytesIO(contents) as f: new = pd.read_pickle(f, compression=None) # Pandas ships testing helper functions too! pd.testing.assert_frame_equal(df, new)
from hypothesis import given from hypothesis.extra.pandas import data_frames, columns, range_indexes import hypothesis.strategies as st positions = st.integers(min_value=0, max_value=int(1e7)) def mysort(pos1, pos2): if pos1 > pos2: return pos2, pos1 elif pos2 > pos1: return pos1, pos2 else: return pos1, pos2 + 1 dfs = data_frames(columns=columns("Start End".split(), dtype=int), rows=st.tuples(positions, positions).map(mysort))
def gen_rando_dataframe(draw, elements=names): column_names = draw(lists(elements, min_size=1, unique=True)) return draw( hpd.data_frames(hpd.columns(column_names, elements=elements), index=hpd.range_indexes(min_size=5)))
assert_bins_and_labels_ndim('S2q_XY' , ['X' , 'Y', 'S2q'], out_bins, out_labels, test_bins, test_dict) variable_names = list(test_bins) variable_names.extend(['S2e_X', 'S2e_Y' , 'S2e_Z', 'S2e_R', 'S2e_Phi', 'S2q_X', 'S2q_Y' , 'S2q_Z', 'S2q_R', 'S2q_Phi', 'XY' , 'S2e_XY','S2q_XY']) for k in out_bins: assert k in variable_names kdst_variables = ['nS2', 'S1w' , 'S1h', 'S1e', 'S1t', 'S2w', 'S2h', 'S2e', 'S2q' , 'S2t', 'Nsipm', 'DT' , 'Z' , 'X' , 'Y' , 'R' , 'Phi', 'Zrms', 'Xrms', 'Yrms'] @given(data_frames(columns=columns(kdst_variables, elements=floats(allow_nan=False)))) @settings(deadline=None) def test_fill_kdst_var_1d(kdst): var_dict = defaultdict(list) monf.fill_kdst_var_1d (kdst, var_dict) for var in var_dict: value = kdst[var].values if var in ['S1t', 'S2t', 'S1w']: value = value / units.mus assert np.allclose(value, var_dict[var]) @given(data_frames(columns=columns(kdst_variables, elements=floats(allow_nan=False)))) @settings(deadline=None) def test_fill_kdst_var_2d(kdst):
st.text(), st.floats())), column(name='second', elements=st.one_of(st.just(float('nan')), st.integers(), st.text(), st.floats())) ])) def test_get_nan_features_hypo(frame): result = get_nan_features(frame) assert isinstance(result, dict) for value in result.values(): assert value > 0 for key in result.keys(): assert isinstance(key, str) @given( data_frames(columns=columns(["first", "second", 'third'], dtype=float), rows=st.tuples(st.floats(allow_nan=False), st.integers(), st.text()))) def test_split_features_hypo(frame): cat_feats, float_feats, int_feats = split_features(frame) features = list(frame) assert isinstance(cat_feats, list) assert isinstance(float_feats, list) assert isinstance(int_feats, list) assert sublist(cat_feats, features) assert sublist(float_feats, features) assert sublist(int_feats, features) def calculate_residuals(model_path, data_path):
st.just("no"), st.just("false"), st.just("f"), st.just("n"), st.just("0"), )) def test_str2bool(v): assert isinstance(deepof.utils.str2bool(v), bool) @settings(deadline=None) @given( mult=st.integers(min_value=1, max_value=10), dframe=data_frames( index=range_indexes(min_size=1), columns=columns(["X", "y", "likelihood"], dtype=float), rows=st.tuples( st.floats(min_value=0, max_value=1000, allow_nan=False, allow_infinity=False), st.floats(min_value=0, max_value=1000, allow_nan=False, allow_infinity=False), st.floats(min_value=0.01, max_value=1.0, allow_nan=False, allow_infinity=False), ), ),
import pytest from hypothesis import HealthCheck from hypothesis import given from hypothesis import settings from hypothesis import strategies as st from hypothesis.extra.pandas import range_indexes, columns, data_frames import deepof.data import deepof.pose_utils @settings(deadline=None) @given( pos_dframe=data_frames( index=range_indexes(min_size=5), columns=columns(["X1", "y1", "X2", "y2"], dtype=float), rows=st.tuples( st.floats(min_value=1, max_value=10, allow_nan=False, allow_infinity=False), st.floats(min_value=1, max_value=10, allow_nan=False, allow_infinity=False), st.floats(min_value=1, max_value=10, allow_nan=False, allow_infinity=False), st.floats(min_value=1, max_value=10,
from hypothesis import given from hypothesis import strategies as st from hypothesis.extra.pandas import data_frames, columns from Regression.preprocessing import read_data @given( data_frames(columns=columns(names_or_number=[str(i) for i in range(10)], dtype=float, elements=st.floats(allow_infinity=False, max_value=1e+307)))) def test_get_predict_data(df): df.to_csv('../tmp/predict_df.csv') res = read_data('../tmp/predict_df.csv', fitting=False) assert res.shape == df.shape @given( data_frames(columns=columns(names_or_number=[str(i) for i in range(10)], dtype=float, elements=st.floats(allow_infinity=False, max_value=1e+307)))) def test_get_train_data(df): df.to_csv('../tmp/train_df.csv') X, y = read_data('../tmp/train_df.csv', fitting=True, split=False) assert X.shape[1] == df.shape[1] - 1 assert len(y.shape) == 1 assert y.shape[0] == df.shape[0] @given(
] def test_that_load_spec_raises_valueerror_for_invalid_spec(basic_spec_0): with pytest.raises(ValueError) as spec_error: with patch('builtins.open', new_callable=mock_open, read_data=basic_spec_0): spec = read_spec('fake/file.yaml') assert "invalid spec" in str(spec_error.value).lower() @settings(deadline=None) @given( data_frames(columns=columns("A B C".split(), dtype=int), index=hpd.range_indexes()), sampled_from(['.csv', '.xls', '.xlsx', '.parquet'])) def test_that_read_data_returns_data_frame(tmpdir, write_funcs, basic_spec_dict, df, ext): """Given a Hypothesis DataFrame, save it as a file of the sampled type, and test the reading that file into a Pandas DataFrame works as expected.""" expected = df.shape[1] # using make_numbered_dir to avoid path collisions when running test for each # hypothesis-generated data frame. # p = tmpdir.make_numbered_dir().join(str(f'test{ext}')) # write_funcs[ext](df, p.strpath) tmp_file_path = write_dataframe_to_tmpdir(tmpdir, write_funcs, df, ext) spec = {'input': {'file': tmp_file_path}}
np.random.seed(1234) param = {} categories = [ 'alpha', 'D_fit', 'kurtosis', 'asymmetry1', 'asymmetry2', 'asymmetry3', 'AR', 'elongation', 'boundedness', 'fractal_dim', 'trappedness', 'efficiency', 'straightness', 'MSD_ratio', 'frames', 'Deff1', 'Deff2', 'angle_mean', 'angle_mag_mean', 'angle_var', 'dist_tot', 'dist_net', 'progression', 'Mean alpha', 'Mean D_fit', 'Mean kurtosis', 'Mean asymmetry1', 'Mean asymmetry2', 'Mean asymmetry3', 'Mean AR', 'Mean elongation', 'Mean boundedness', 'Mean fractal_dim', 'Mean trappedness', 'Mean efficiency', 'Mean straightness', 'Mean MSD_ratio', 'Mean Deff1', 'Mean Deff2' ] data_cols = columns(names_or_number=categories, dtype=float, elements=st.floats()) position_cols = columns(names_or_number=['X', 'Y'], dtype=float, elements=st.floats(min_value=0.0, max_value=2048.0)) target_col = column(name='target', dtype=int, elements=st.integers( min_value=0, max_value=20)) #up to twenty unique targets df = data_frames(columns=data_cols + position_cols + [target_col], index=range_indexes(min_size=10)) def test_generate_fullstats():
# assert ts[ts == 0].size == 2040 def test_agg_sample_n_gaussian_ts_as_df(): df, seasonalities = td.sample_n_gaussian_ts_as_df(10, '2019-01-01', '2020-01-01', Interval.D) assert df.shape == (366, 10) # globals for tests index_len = 100 train_size_min = 0.1 train_size_max = 0.9 @given(features=data_frames(columns(['feat1', 'feat2', 'feat3'], dtype=float), index=range_indexes(min_size=index_len, max_size=index_len)), labels=series(elements=st.integers(min_value=0, max_value=3), index=range_indexes(min_size=index_len, max_size=index_len)), time_stamps=st.integers(min_value=1, max_value=10), train_size=st.floats(min_value=train_size_min, max_value=train_size_max, allow_infinity=False, allow_nan=False)) def test_split_categorical_time_series_labels(features: pd.DataFrame, labels: pd.Series, time_stamps: int, train_size: float): assume(train_size == round(train_size,
# # This Source Code Form is subject to the terms of the Mozilla Public License, # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. # # END HEADER from __future__ import absolute_import, division, print_function import hypothesis.extra.pandas as pdst import hypothesis.strategies as st from tests.common.arguments import argument_validation_test, e BAD_ARGS = [ e(pdst.data_frames), e(pdst.data_frames, pdst.columns(1, dtype="not a dtype")), e(pdst.data_frames, pdst.columns(1, elements="not a strategy")), e(pdst.data_frames, pdst.columns([[]])), e(pdst.data_frames, [], index=[]), e(pdst.data_frames, [], rows=st.fixed_dictionaries({"A": st.just(1)})), e(pdst.data_frames, pdst.columns(1)), e(pdst.data_frames, pdst.columns(1, dtype=float, fill=1)), e(pdst.data_frames, pdst.columns(1, dtype=float, elements=1)), e(pdst.data_frames, pdst.columns(1, fill=1, dtype=float)), e(pdst.data_frames, pdst.columns(["A", "A"], dtype=float)), e(pdst.data_frames, pdst.columns(1, elements=st.none(), dtype=int)), e(pdst.data_frames, 1), e(pdst.data_frames, [1]), e(pdst.data_frames, pdst.columns(1, dtype="category")), e( pdst.data_frames,
U = normalization.op(U, scale) for i in np.linspace(0, 2, 5): # This should yield exp(i * x/x) = exp(i) z_test = LT.flatten() * i x_test = np.repeat(x, y.size) y_test = np.tile(y, x.size) (f_test, u_test) = corr(z_test, x_test, y_test) f_true = np.exp(i) u_true = z_test * U.flatten() / LT.flatten()**2 * f_test assert np.allclose(f_test, f_true) assert np.allclose(u_test, u_true) @given( data_frames(columns=columns( ['event'], elements=integers(min_value=-1e5, max_value=1e5))), lists(integers(min_value=-1e5, max_value=1e5))) def test_dst_event_id_selection(dst, events): filtered_dst = dst_event_id_selection(dst, events) assert set( filtered_dst.event.values) == set(dst.event.values) & set(events) def test_dst_event_id_selection_2(): data = {'event': [1, 1, 3, 6, 7], 'values': [3, 4, 2, 5, 6]} filt_data = {'event': [1, 1, 6], 'values': [3, 4, 5]} df_data = pd.DataFrame(data=data) df_filt_data = pd.DataFrame(data=filt_data) df_real_filt = dst_event_id_selection(df_data, [1, 2, 6, 10])
@given(pdst.data_frames([pdst.column("a", dtype=int), pdst.column("b", dtype=float)])) def test_can_have_columns_of_distinct_types(df): assert df["a"].dtype == np.dtype(int) assert df["b"].dtype == np.dtype(float) @given( pdst.data_frames( [pdst.column(dtype=int)], index=pdst.range_indexes(min_size=1, max_size=5) ) ) def test_respects_size_bounds(df): assert 1 <= len(df) <= 5 @given(pdst.data_frames(pdst.columns(["A", "B"], dtype=float))) def test_can_specify_just_column_names(df): df["A"] df["B"] @given(pdst.data_frames(pdst.columns(2, dtype=float))) def test_can_specify_just_column_count(df): df[0] df[1] @given( pdst.data_frames( rows=st.fixed_dictionaries({"A": st.integers(1, 10), "B": st.floats()}) )
from liualgotrader.fincalcs.resample import ResampleRangeType, resample est = pytz.timezone("US/Eastern") @settings(deadline=None, max_examples=100) @given( data_frames( index=indexes( elements=st.datetimes( min_value=datetime(2000, 1, 1), max_value=datetime(2040, 1, 1) ), dtype=pd.DatetimeIndex, ), columns=columns( ["open", "close", "high", "low", "volume"], dtype=float ), rows=st.tuples( st.floats(allow_nan=True), st.floats(allow_nan=True), st.floats(allow_nan=True), st.floats(allow_nan=True), st.floats(allow_nan=True), ), ), st.sampled_from(ResampleRangeType), ) def test_resample(ohlc: pd.DataFrame, resample_range: ResampleRangeType): print(ohlc.index) r = resample(ohlc, resample_range) if ohlc.empty:
strat_dates = st.dates() strat_df_index = hpd.range_indexes(min_size=1) df_hypo_mixed = hpd.data_frames( columns=[ hpd.column(name="col1_text", elements=strat_text), hpd.column(name="col2_ints", elements=strat_ints), hpd.column(name="col3_floats", elements=strat_floats), hpd.column(name="col4_dates", elements=strat_dates), hpd.column(name="col4_bools", elements=st.booleans()), ], index=strat_df_index, ) df_hypo_text = hpd.data_frames(columns=hpd.columns(5, elements=strat_text), index=strat_df_index) df_hypo_ints = hpd.data_frames(columns=hpd.columns(5, elements=strat_ints), index=strat_df_index) df_hypo_floats = hpd.data_frames(columns=hpd.columns(5, elements=strat_floats), index=strat_df_index) df_hypo_dates = hpd.data_frames(columns=hpd.columns(5, elements=strat_dates), index=strat_df_index) def not_has_all_delims(df: pd.DataFrame) -> bool: return not all( df.applymap(lambda x: delim in x if isinstance(x, str) else False).any().any() for delim in _DELIMITER_OPTIONS)
# obtain one at https://mozilla.org/MPL/2.0/. # # END HEADER from datetime import datetime import pandas as pd import hypothesis.extra.pandas as pdst import hypothesis.strategies as st from hypothesis import given from tests.common.arguments import argument_validation_test, e BAD_ARGS = [ e(pdst.data_frames), e(pdst.data_frames, pdst.columns(1, dtype="not a dtype")), e(pdst.data_frames, pdst.columns(1, elements="not a strategy")), e(pdst.data_frames, pdst.columns([[]])), e(pdst.data_frames, [], index=[]), e(pdst.data_frames, [], rows=st.fixed_dictionaries({"A": st.just(1)})), e(pdst.data_frames, pdst.columns(1)), e(pdst.data_frames, pdst.columns(1, dtype=float, fill=1)), e(pdst.data_frames, pdst.columns(1, dtype=float, elements=1)), e(pdst.data_frames, pdst.columns(1, fill=1, dtype=float)), e(pdst.data_frames, pdst.columns(["A", "A"], dtype=float)), e(pdst.data_frames, pdst.columns(1, elements=st.none(), dtype=int)), e(pdst.data_frames, 1), e(pdst.data_frames, [1]), e(pdst.data_frames, pdst.columns(1, dtype="category")), e( pdst.data_frames,
container_strategy = dictionaries(text(), primitive_strategy) | lists( primitive_strategy ) nested_strategy = recursive( container_strategy, lambda children: lists(children) | dictionaries(text(), children), ) numpy_strategy = arrays(guaranteed_dtypes, array_shapes()) pandas_series = series(dtype=int) | series(dtype=float) | series(dtype=str) pandas_dfs = ( data_frames(columns(3, dtype=int)) | data_frames(columns(3, dtype=float)) | data_frames(columns(3, dtype=str)) | data_frames([column(dtype=str), column(dtype=float), column(dtype=int)]) ) possible_input_data = one_of( lists(primitive_strategy), numpy_strategy, pandas_series, # pandas_dfs ) TEST_DF = pd.DataFrame(np.meshgrid(np.arange(20), np.arange(20))[0])
pdst.data_frames( [pdst.column("a", dtype=int), pdst.column("b", dtype=float)])) def test_can_have_columns_of_distinct_types(df): assert df["a"].dtype == np.dtype(int) assert df["b"].dtype == np.dtype(float) @given( pdst.data_frames([pdst.column(dtype=int)], index=pdst.range_indexes(min_size=1, max_size=5))) def test_respects_size_bounds(df): assert 1 <= len(df) <= 5 @given(pdst.data_frames(pdst.columns(["A", "B"], dtype=float))) def test_can_specify_just_column_names(df): df["A"] df["B"] @given(pdst.data_frames(pdst.columns(2, dtype=float))) def test_can_specify_just_column_count(df): df[0] df[1] @given( pdst.data_frames(rows=st.fixed_dictionaries({ "A": st.integers(1, 10), "B": st.floats()
roundtripped = xr.Dataset(df) xr.testing.assert_identical(dataset, roundtripped) @given(numeric_series, st.text()) def test_roundtrip_pandas_series(ser, ix_name) -> None: # Need to name the index, otherwise Xarray calls it 'dim_0'. ser.index.name = ix_name arr = xr.DataArray(ser) roundtripped = arr.to_pandas() pd.testing.assert_series_equal(ser, roundtripped) xr.testing.assert_identical(arr, roundtripped.to_xarray()) # Dataframes with columns of all the same dtype - for roundtrip to DataArray numeric_homogeneous_dataframe = numeric_dtypes.flatmap( lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt) )) @pytest.mark.xfail @given(numeric_homogeneous_dataframe) def test_roundtrip_pandas_dataframe(df) -> None: # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'. df.index.name = "rows" df.columns.name = "cols" arr = xr.DataArray(df) roundtripped = arr.to_pandas() pd.testing.assert_frame_equal(df, roundtripped) xr.testing.assert_identical(arr, roundtripped.to_xarray())
def test_scatterplot_alternate_data(): d = data.iris() return ar.scatterplot( x=d["petalWidth"], y=d["petalLength"], color=d["sepalWidth"], tooltip=d["species"], ) show_test(test_scatterplot_alternate_data) #' A randomized test of equivalence between the two data syntaxes: @given(data=data_frames(columns=columns(["a", "b", "c"], dtype=float))) def test_scatterplot_series(data): chart1 = ar.scatterplot(data=data[["a", "c"]]) chart2 = ar.scatterplot(x=data["a"], y=data["c"]) assert chart1.to_dict() == chart2.to_dict() #' <h2>Multiscatterplot at defaults</h2> @viz_reg_test def test_multiscatterplot_defaults(): return ar.multiscatterplot(data.iris()) show_test(test_multiscatterplot_defaults)
'S2e_X', 'S2e_Y', 'S2e_Z', 'S2e_R', 'S2e_Phi', 'S2q_X', 'S2q_Y', 'S2q_Z', 'S2q_R', 'S2q_Phi', 'XY', 'S2e_XY', 'S2q_XY' ]) for k in out_bins: assert k in variable_names kdst_variables = [ 'nS2', 'S1w', 'S1h', 'S1e', 'S1t', 'S2w', 'S2h', 'S2e', 'S2q', 'S2t', 'Nsipm', 'DT', 'Z', 'X', 'Y', 'R', 'Phi', 'Zrms', 'Xrms', 'Yrms' ] @given( data_frames( columns=columns(kdst_variables, elements=floats(allow_nan=False)))) @settings(deadline=None) def test_fill_kdst_var_1d(kdst): var_dict = defaultdict(list) monf.fill_kdst_var_1d(kdst, var_dict) for var in var_dict: value = kdst[var].values if var in ['S1t', 'S2t', 'S1w']: value = value / units.mus assert np.allclose(value, var_dict[var]) @given( data_frames( columns=columns(kdst_variables, elements=floats(allow_nan=False))))
chromosomes_small = st.sampled_from(["chr1"]) cs = st.one_of(chromosomes, chromosomes_small) positions = st.integers(min_value=0, max_value=int(1e7)) lengths = st.integers(min_value=1, max_value=int(1e7)) small_lengths = st.integers(min_value=1, max_value=int(1e4)) strands = st.sampled_from("+ -".split()) # dfs = data_frames(columns=columns("Chromosome Start End Strand".split(), # dtype=int), rows=st.tuples(chromosomes, positions, positions, # strands).map(mysort)) df_minsize = 1 nonempty_dfs = data_frames(index=range_indexes(min_size=df_minsize), columns=columns("Chromosome Start End Strand".split(), dtype=int), rows=st.tuples(chromosomes, positions, positions, strands).map(mysort)) better_df_minsize = 1 better_dfs = data_frames(index=range_indexes(min_size=better_df_minsize), columns=[column("Chromosome", chromosomes), column("Start", elements=positions), column("End", elements=lengths), column("Strand", strands)]) better_dfs_min = data_frames(index=range_indexes(min_size=better_df_minsize), columns=[column("Chromosome", cs), column("Start", elements=lengths), column("End", elements=small_lengths), column("Strand", strands)])