def test_safe_indexing_1d_array_error(X_constructor): # check that we are raising an error if the array-like passed is 1D and # we try to index on the 2nd dimension X = list(range(5)) if X_constructor == 'array': X_constructor = np.asarray(X) elif X_constructor == 'series': pd = pytest.importorskip("pandas") X_constructor = pd.Series(X) err_msg = "'X' should be a 2D NumPy array, 2D sparse matrix or pandas" with pytest.raises(ValueError, match=err_msg): _safe_indexing(X_constructor, [0, 1], axis=1)
def test_safe_indexing_1d_container_mask(array_type, indices_type): indices = [False] + [True] * 2 + [False] * 6 array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type) indices = _convert_container(indices, indices_type) subset = _safe_indexing(array, indices, axis=0) assert_allclose_dense_sparse(subset, _convert_container([2, 3], array_type))
def test_safe_indexing_1d_container(array_type, indices_type): indices = [1, 2] if indices_type == 'slice' and isinstance(indices[1], int): indices[1] += 1 array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type) indices = _convert_container(indices, indices_type) subset = _safe_indexing(array, indices, axis=0) assert_allclose_dense_sparse(subset, _convert_container([2, 3], array_type))
def test_safe_indexing_2d_scalar_axis_1(array_type, expected_output_type, indices): columns_name = ['col_0', 'col_1', 'col_2'] array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name) if isinstance(indices, str) and array_type != 'dataframe': err_msg = ("Specifying the columns using strings is only supported " "for pandas DataFrames") with pytest.raises(ValueError, match=err_msg): _safe_indexing(array, indices, axis=1) else: subset = _safe_indexing(array, indices, axis=1) expected_output = [3, 6, 9] if expected_output_type == 'sparse': # sparse matrix are keeping the 2D shape expected_output = [[3], [6], [9]] expected_array = _convert_container(expected_output, expected_output_type) assert_allclose_dense_sparse(subset, expected_array)
def test_safe_indexing_2d_mask(array_type, indices_type, axis, expected_subset): columns_name = ['col_0', 'col_1', 'col_2'] array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name) indices = [False, True, True] indices = _convert_container(indices, indices_type) subset = _safe_indexing(array, indices, axis=axis) assert_allclose_dense_sparse( subset, _convert_container(expected_subset, array_type))
def test_safe_indexing_2d_container_axis_1(array_type, indices_type, indices): # validation of the indices # we make a copy because indices is mutable and shared between tests indices_converted = copy(indices) if indices_type == 'slice' and isinstance(indices[1], int): indices_converted[1] += 1 columns_name = ['col_0', 'col_1', 'col_2'] array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name) indices_converted = _convert_container(indices_converted, indices_type) if isinstance(indices[0], str) and array_type != 'dataframe': err_msg = ("Specifying the columns using strings is only supported " "for pandas DataFrames") with pytest.raises(ValueError, match=err_msg): _safe_indexing(array, indices_converted, axis=1) else: subset = _safe_indexing(array, indices_converted, axis=1) assert_allclose_dense_sparse( subset, _convert_container([[2, 3], [5, 6], [8, 9]], array_type))
def test_check_fit_params(indices): X = np.random.randn(4, 2) fit_params = { 'list': [1, 2, 3, 4], 'array': np.array([1, 2, 3, 4]), 'sparse-col': sp.csc_matrix([1, 2, 3, 4]).T, 'sparse-row': sp.csc_matrix([1, 2, 3, 4]), 'scalar-int': 1, 'scalar-str': 'xxx', 'None': None, } result = _check_fit_params(X, fit_params, indices) indices_ = indices if indices is not None else list(range(X.shape[0])) for key in ['sparse-row', 'scalar-int', 'scalar-str', 'None']: assert result[key] is fit_params[key] assert result['list'] == _safe_indexing(fit_params['list'], indices_) assert_array_equal(result['array'], _safe_indexing(fit_params['array'], indices_)) assert_allclose_dense_sparse( result['sparse-col'], _safe_indexing(fit_params['sparse-col'], indices_))
def test_safe_indexing_2d_read_only_axis_1(array_read_only, indices_read_only, array_type, indices_type, axis, expected_array): array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) if array_read_only: array.setflags(write=False) array = _convert_container(array, array_type) indices = np.array([1, 2]) if indices_read_only: indices.setflags(write=False) indices = _convert_container(indices, indices_type) subset = _safe_indexing(array, indices, axis=axis) assert_allclose_dense_sparse( subset, _convert_container(expected_array, array_type))
def test_safe_indexing_container_axis_0_unsupported_type(): indices = ["col_1", "col_2"] array = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] err_msg = "String indexing is not supported with 'axis=0'" with pytest.raises(ValueError, match=err_msg): _safe_indexing(array, indices, axis=0)
def test_safe_indexing_error_axis(axis): with pytest.raises(ValueError, match="'axis' should be either 0"): _safe_indexing(X_toy, [0, 1], axis=axis)
def test_safe_indexing_pandas_no_matching_cols_error(): pd = pytest.importorskip('pandas') err_msg = "No valid specification of the columns." X = pd.DataFrame(X_toy) with pytest.raises(ValueError, match=err_msg): _safe_indexing(X, [1.0], axis=1)
def test_safe_indexing_None_axis_0(array_type): X = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type) X_subset = _safe_indexing(X, None, axis=0) assert_allclose_dense_sparse(X_subset, X)
def test_safe_indexing_1d_scalar(array_type): array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type) indices = 2 subset = _safe_indexing(array, indices, axis=0) assert subset == 3
def test_safe_indexing_2d_scalar_axis_0(array_type, expected_output_type): array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type) indices = 2 subset = _safe_indexing(array, indices, axis=0) expected_array = _convert_container([7, 8, 9], expected_output_type) assert_allclose_dense_sparse(subset, expected_array)