def test_multilabel_binarizer_multiple_calls(): inp = [(2, 3), (1, ), (1, 2)] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]]) indicator_mat2 = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) # first call mlb = MultiLabelBinarizer(classes=[1, 3, 2]) assert_array_equal(mlb.fit_transform(inp), indicator_mat) # second call change class mlb.classes = [1, 2, 3] assert_array_equal(mlb.fit_transform(inp), indicator_mat2)
def test_multilabel_binarizer_same_length_sequence(): # Ensure sequences of the same length are not interpreted as a 2-d array inp = [[1], [0], [2]] indicator_mat = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]]) # fit_transform() mlb = MultiLabelBinarizer() assert_array_equal(mlb.fit_transform(inp), indicator_mat) assert_array_equal(mlb.inverse_transform(indicator_mat), inp) # fit().transform() mlb = MultiLabelBinarizer() assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat) assert_array_equal(mlb.inverse_transform(indicator_mat), inp)
def test_sparse_output_multilabel_binarizer(): # test input as iterable of iterables inputs = [ lambda: [(2, 3), (1, ), (1, 2)], lambda: ({2, 3}, {1}, {1, 2}), lambda: iter([iter((2, 3)), iter((1, )), {1, 2}]), ] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) inverse = inputs[0]() for sparse_output in [True, False]: for inp in inputs: # With fit_transform mlb = MultiLabelBinarizer(sparse_output=sparse_output) got = mlb.fit_transform(inp()) assert issparse(got) == sparse_output if sparse_output: # verify CSR assumption that indices and indptr have same dtype assert got.indices.dtype == got.indptr.dtype got = got.toarray() assert_array_equal(indicator_mat, got) assert_array_equal([1, 2, 3], mlb.classes_) assert mlb.inverse_transform(got) == inverse # With fit mlb = MultiLabelBinarizer(sparse_output=sparse_output) got = mlb.fit(inp()).transform(inp()) assert issparse(got) == sparse_output if sparse_output: # verify CSR assumption that indices and indptr have same dtype assert got.indices.dtype == got.indptr.dtype got = got.toarray() assert_array_equal(indicator_mat, got) assert_array_equal([1, 2, 3], mlb.classes_) assert mlb.inverse_transform(got) == inverse with pytest.raises(ValueError): mlb.inverse_transform( csr_matrix(np.array([[0, 1, 1], [2, 0, 0], [1, 1, 0]])))
def test_multilabel_binarizer_unknown_class(): mlb = MultiLabelBinarizer() y = [[1, 2]] Y = np.array([[1, 0], [0, 1]]) w = 'unknown class(es) [0, 4] will be ignored' matrix = assert_warns_message(UserWarning, w, mlb.fit(y).transform, [[4, 1], [2, 0]]) assert_array_equal(matrix, Y) Y = np.array([[1, 0, 0], [0, 1, 0]]) mlb = MultiLabelBinarizer(classes=[1, 2, 3]) matrix = assert_warns_message(UserWarning, w, mlb.fit(y).transform, [[4, 1], [2, 0]]) assert_array_equal(matrix, Y)
def test_multilabel_binarizer(): # test input as iterable of iterables inputs = [ lambda: [(2, 3), (1, ), (1, 2)], lambda: ({2, 3}, {1}, {1, 2}), lambda: iter([iter((2, 3)), iter((1, )), {1, 2}]), ] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) inverse = inputs[0]() for inp in inputs: # With fit_transform mlb = MultiLabelBinarizer() got = mlb.fit_transform(inp()) assert_array_equal(indicator_mat, got) assert_array_equal([1, 2, 3], mlb.classes_) assert mlb.inverse_transform(got) == inverse # With fit mlb = MultiLabelBinarizer() got = mlb.fit(inp()).transform(inp()) assert_array_equal(indicator_mat, got) assert_array_equal([1, 2, 3], mlb.classes_) assert mlb.inverse_transform(got) == inverse
def test_multilabel_binarizer_inverse_validation(): inp = [(1, 1, 1, 0)] mlb = MultiLabelBinarizer() mlb.fit_transform(inp) # Not binary with pytest.raises(ValueError): mlb.inverse_transform(np.array([[1, 3]])) # The following binary cases are fine, however mlb.inverse_transform(np.array([[0, 0]])) mlb.inverse_transform(np.array([[1, 1]])) mlb.inverse_transform(np.array([[1, 0]])) # Wrong shape with pytest.raises(ValueError): mlb.inverse_transform(np.array([[1]])) with pytest.raises(ValueError): mlb.inverse_transform(np.array([[1, 1, 1]]))
def test_multilabel_binarizer_non_unique(): inp = [(1, 1, 1, 0)] indicator_mat = np.array([[1, 1]]) mlb = MultiLabelBinarizer() assert_array_equal(mlb.fit_transform(inp), indicator_mat)
def test_multilabel_binarizer_non_integer_labels(): tuple_classes = np.empty(3, dtype=object) tuple_classes[:] = [(1, ), (2, ), (3, )] inputs = [ ([('2', '3'), ('1', ), ('1', '2')], ['1', '2', '3']), ([('b', 'c'), ('a', ), ('a', 'b')], ['a', 'b', 'c']), ([((2, ), (3, )), ((1, ), ), ((1, ), (2, ))], tuple_classes), ] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]]) for inp, classes in inputs: # fit_transform() mlb = MultiLabelBinarizer() assert_array_equal(mlb.fit_transform(inp), indicator_mat) assert_array_equal(mlb.classes_, classes) assert_array_equal(mlb.inverse_transform(indicator_mat), inp) # fit().transform() mlb = MultiLabelBinarizer() assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat) assert_array_equal(mlb.classes_, classes) assert_array_equal(mlb.inverse_transform(indicator_mat), inp) mlb = MultiLabelBinarizer() with pytest.raises(TypeError): mlb.fit_transform([({}), ({}, {'a': 'b'})])
def test_multilabel_binarizer_given_classes(): inp = [(2, 3), (1, ), (1, 2)] indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]]) # fit_transform() mlb = MultiLabelBinarizer(classes=[1, 3, 2]) assert_array_equal(mlb.fit_transform(inp), indicator_mat) assert_array_equal(mlb.classes_, [1, 3, 2]) # fit().transform() mlb = MultiLabelBinarizer(classes=[1, 3, 2]) assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat) assert_array_equal(mlb.classes_, [1, 3, 2]) # ensure works with extra class mlb = MultiLabelBinarizer(classes=[4, 1, 3, 2]) assert_array_equal(mlb.fit_transform(inp), np.hstack(([[0], [0], [0]], indicator_mat))) assert_array_equal(mlb.classes_, [4, 1, 3, 2]) # ensure fit is no-op as iterable is not consumed inp = iter(inp) mlb = MultiLabelBinarizer(classes=[1, 3, 2]) assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat) # ensure a ValueError is thrown if given duplicate classes err_msg = "The classes argument contains duplicate classes. Remove " \ "these duplicates before passing them to MultiLabelBinarizer." mlb = MultiLabelBinarizer(classes=[1, 3, 2, 3]) with pytest.raises(ValueError, match=err_msg): mlb.fit(inp)
def test_multilabel_binarizer_empty_sample(): mlb = MultiLabelBinarizer() y = [[1, 2], [1], []] Y = np.array([[1, 1], [1, 0], [0, 0]]) assert_array_equal(mlb.fit_transform(y), Y)