def test_toy_dataset_frame_dtype(loader_func, data_dtype, target_dtype): default_result = loader_func() check_as_frame( default_result, loader_func, expected_data_dtype=data_dtype, expected_target_dtype=target_dtype, )
def test_20news_as_frame(fetch_20newsgroups_vectorized_fxt): pd = pytest.importorskip('pandas') bunch = fetch_20newsgroups_vectorized_fxt(as_frame=True) check_as_frame(bunch, fetch_20newsgroups_vectorized_fxt) frame = bunch.frame assert frame.shape == (11314, 130108) assert all([isinstance(col, pd.SparseDtype) for col in bunch.data.dtypes]) # Check a small subset of features for expected_feature in [ "beginner", "beginners", "beginning", "beginnings", "begins", "begley", "begone", ]: assert expected_feature in frame.keys() assert "category_class" in frame.keys() assert bunch.target.name == "category_class"
def test_fetch_kddcup99_as_frame(fetch_kddcup99_fxt): bunch = fetch_kddcup99_fxt() check_as_frame(bunch, fetch_kddcup99_fxt)