Example #1
0
def test_normalizer_l1():
    rng = np.random.RandomState(0)
    X_dense = rng.randn(4, 5)
    X_sparse_unpruned = sp.csr_matrix(X_dense)

    # set the row number 3 to zero
    X_dense[3, :] = 0.0

    # set the row number 3 to zero without pruning (can happen in real life)
    indptr_3 = X_sparse_unpruned.indptr[3]
    indptr_4 = X_sparse_unpruned.indptr[4]
    X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0

    # build the pruned variant using the regular constructor
    X_sparse_pruned = sp.csr_matrix(X_dense)

    # check inputs that support the no-copy optim
    for X in (X_dense, X_sparse_pruned, X_sparse_unpruned):

        normalizer = Normalizer(norm='l1', copy=True)
        X_norm = normalizer.transform(X)
        assert X_norm is not X
        X_norm1 = toarray(X_norm)

        normalizer = Normalizer(norm='l1', copy=False)
        X_norm = normalizer.transform(X)
        assert X_norm is X
        X_norm2 = toarray(X_norm)

        for X_norm in (X_norm1, X_norm2):
            row_sums = np.abs(X_norm).sum(axis=1)
            for i in range(3):
                assert_almost_equal(row_sums[i], 1.0)
            assert_almost_equal(row_sums[3], 0.0)

    # check input for which copy=False won't prevent a copy
    for init in (sp.coo_matrix, sp.csc_matrix, sp.lil_matrix):
        X = init(X_dense)
        X_norm = normalizer = Normalizer(norm='l2', copy=False).transform(X)

        assert X_norm is not X
        assert isinstance(X_norm, sp.csr_matrix)

        X_norm = toarray(X_norm)
        for i in xrange(3):
            assert_almost_equal(row_sums[i], 1.0)
        assert_almost_equal(la.norm(X_norm[3]), 0.0)
def test_normalizer_l1():
    np.random.seed(0)
    X_orig = np.random.randn(4, 5)
    X_orig[3, :] = 0.0

    # check inputs that support the no-copy optim
    for init in (np.array, sp.csr_matrix):

        X = init(X_orig.copy())

        normalizer = Normalizer(norm='l1', copy=True)
        X_norm = normalizer.transform(X)
        assert X_norm is not X
        X_norm1 = toarray(X_norm)

        normalizer = Normalizer(norm='l1', copy=False)
        X_norm = normalizer.transform(X)
        assert X_norm is X
        X_norm2 = toarray(X_norm)

        for X_norm in (X_norm1, X_norm2):
            row_sums = np.abs(X_norm).sum(axis=1)
            for i in range(3):
                assert_almost_equal(row_sums[i], 1.0)
            assert_almost_equal(row_sums[3], 0.0)

    # check input for which copy=False won't prevent a copy
    for init in (sp.coo_matrix, sp.csc_matrix, sp.lil_matrix):
        X = init(X_orig.copy())
        X_norm = normalizer = Normalizer(norm='l2', copy=False).transform(X)

        assert X_norm is not X
        assert isinstance(X_norm, sp.csr_matrix)

        X_norm = toarray(X_norm)
        for i in xrange(3):
            assert_almost_equal(row_sums[i], 1.0)
        assert_almost_equal(la.norm(X_norm[3]), 0.0)
Example #3
0
def test_normalizer_l1():
    np.random.seed(0)
    X_orig = np.random.randn(4, 5)
    X_orig[3, :] = 0.0

    # check inputs that support the no-copy optim
    for init in (np.array, sp.csr_matrix):

        X = init(X_orig.copy())

        normalizer = Normalizer(norm='l1', copy=True)
        X_norm = normalizer.transform(X)
        assert X_norm is not X
        X_norm1 = toarray(X_norm)

        normalizer = Normalizer(norm='l1', copy=False)
        X_norm = normalizer.transform(X)
        assert X_norm is X
        X_norm2 = toarray(X_norm)

        for X_norm in (X_norm1, X_norm2):
            row_sums = np.abs(X_norm).sum(axis=1)
            for i in range(3):
                assert_almost_equal(row_sums[i], 1.0)
            assert_almost_equal(row_sums[3], 0.0)

    # check input for which copy=False won't prevent a copy
    for init in (sp.coo_matrix, sp.csc_matrix, sp.lil_matrix):
        X = init(X_orig.copy())
        X_norm = normalizer = Normalizer(norm='l2', copy=False).transform(X)

        assert X_norm is not X
        assert isinstance(X_norm, sp.csr_matrix)

        X_norm = toarray(X_norm)
        for i in xrange(3):
            assert_almost_equal(row_sums[i], 1.0)
        assert_almost_equal(la.norm(X_norm[3]), 0.0)