def test_check_array_pandas_dtype_object_conversion(): # test that data-frame like objects with dtype object # get converted X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.object) X_df = MockDataFrame(X) assert check_array(X_df).dtype.kind == "f" assert check_array(X_df, ensure_2d=False).dtype.kind == "f" # smoke-test against dataframes with column named "dtype" X_df.dtype = "Hans" assert check_array(X_df, ensure_2d=False).dtype.kind == "f"
def test_check_array_on_mock_dataframe(): arr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]]) mock_df = MockDataFrame(arr) checked_arr = check_array(mock_df) assert checked_arr.dtype == arr.dtype checked_arr = check_array(mock_df, dtype=np.float32) assert checked_arr.dtype == np.dtype(np.float32)
def test_clone_pandas_dataframe(): class DummyEstimator(TransformerMixin, BaseEstimator): """This is a dummy class for generating numerical features This feature extractor extracts numerical features from pandas data frame. Parameters ---------- df: pandas data frame The pandas data frame parameter. Notes ----- """ def __init__(self, df=None, scalar_param=1): self.df = df self.scalar_param = scalar_param def fit(self, X, y=None): pass def transform(self, X): pass # build and clone estimator d = np.arange(10) df = MockDataFrame(d) e = DummyEstimator(df, scalar_param=1) cloned_e = clone(e) # the test assert (e.df == cloned_e.df).values.all() assert e.scalar_param == cloned_e.scalar_param
def test_shuffle_dont_convert_to_array(): # Check that shuffle does not try to convert to numpy arrays with float # dtypes can let any indexable datastructure pass-through. a = ['a', 'b', 'c'] b = np.array(['a', 'b', 'c'], dtype=object) c = [1, 2, 3] d = MockDataFrame(np.array([['a', 0], ['b', 1], ['c', 2]], dtype=object)) e = sp.csc_matrix(np.arange(6).reshape(3, 2)) a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0) assert a_s == ['c', 'b', 'a'] assert type(a_s) == list assert_array_equal(b_s, ['c', 'b', 'a']) assert b_s.dtype == object assert c_s == [3, 2, 1] assert type(c_s) == list assert_array_equal(d_s, np.array([['c', 2], ['b', 1], ['a', 0]], dtype=object)) assert type(d_s) == MockDataFrame assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], [0, 1]]))
def test_check_array_complex_data_error(): X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]) with pytest.raises(ValueError, match="Complex data not supported"): check_array(X) # list of lists X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]] with pytest.raises(ValueError, match="Complex data not supported"): check_array(X) # tuple of tuples X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j)) with pytest.raises(ValueError, match="Complex data not supported"): check_array(X) # list of np arrays X = [np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j])] with pytest.raises(ValueError, match="Complex data not supported"): check_array(X) # tuple of np arrays X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j])) with pytest.raises(ValueError, match="Complex data not supported"): check_array(X) # dataframe X = MockDataFrame(np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])) with pytest.raises(ValueError, match="Complex data not supported"): check_array(X) # sparse matrix X = sp.coo_matrix([[0, 1 + 2j], [0, 0]]) with pytest.raises(ValueError, match="Complex data not supported"): check_array(X) # target variable does not always go through check_array but should # never accept complex data either. y = np.array([1 + 2j, 3 + 4j, 5 + 7j, 2 + 3j, 4 + 5j, 6 + 7j]) with pytest.raises(ValueError, match="Complex data not supported"): _check_y(y)
def test_check_array_complex_data_error(): X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # list of lists X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]] assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # tuple of tuples X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j)) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # list of np arrays X = [ np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j]) ] assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # tuple of np arrays X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j])) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # dataframe X = MockDataFrame( np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # sparse matrix X = sp.coo_matrix([[0, 1 + 2j], [0, 0]]) assert_raises_regex(ValueError, "Complex data not supported", check_array, X)
def test_check_array_complex_data_error(): X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]) with pytest.raises(ValueError, match="Complex area_data not supported"): check_array(X) # list of lists X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]] with pytest.raises(ValueError, match="Complex area_data not supported"): check_array(X) # tuple of tuples X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j)) with pytest.raises(ValueError, match="Complex area_data not supported"): check_array(X) # list of np arrays X = [ np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j]) ] with pytest.raises(ValueError, match="Complex area_data not supported"): check_array(X) # tuple of np arrays X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j])) with pytest.raises(ValueError, match="Complex area_data not supported"): check_array(X) # dataframe X = MockDataFrame( np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])) with pytest.raises(ValueError, match="Complex area_data not supported"): check_array(X) # sparse matrix X = sp.coo_matrix([[0, 1 + 2j], [0, 0]]) with pytest.raises(ValueError, match="Complex area_data not supported"): check_array(X)