コード例 #1
0
    def test_cosine_similarity_dense_row_weighted(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        TopK = 0

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [3, 0, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix, dtype=np.float)

        row_weights = [2, 3, 0, 4]

        cosine_similarity = Cosine_Similarity_Cython(data_matrix.T, topK=TopK, normalize=False, row_weights=row_weights)
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Compute_Similarity_Python(data_matrix.T, topK=TopK, normalize=False,
                                                      row_weights=row_weights)
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix.T, topK=TopK, normalize=False,
                                                       row_weights=row_weights)
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_mul = data_matrix.dot(sps.diags(row_weights)).dot(data_matrix.T).toarray()
        W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
コード例 #2
0
    def test_cosine_similarity_dense(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        TopK = 0

        data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=False)
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Compute_Similarity_Python(data_matrix,
                                                      topK=TopK,
                                                      normalize=False)
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix,
                                                       topK=TopK,
                                                       normalize=False)
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul[np.arange(W_dense_mul.shape[0]),
                    np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.all(W_dense_Cython ==
                      W_dense_mul), "W_dense_Cython not matching control"
        assert np.all(W_dense_Python ==
                      W_dense_mul), "W_dense_Python not matching control"
        assert np.all(W_dense_Parallel ==
                      W_dense_mul), "W_dense_Parallel not matching control"
コード例 #3
0
    def test_cosine_similarity_TopK_big(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        n_items = 500
        n_users = 1000
        TopK = n_items

        data_matrix = sps.random(n_users, n_items, density=0.1)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False)
        W_dense_Cython = cosine_similarity.compute_similarity().toarray()

        cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False)
        W_dense_Python = cosine_similarity.compute_similarity().toarray()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False)
        W_dense_Parallel = cosine_similarity.compute_similarity().toarray()

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0

        W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray()

        assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
コード例 #4
0
    def test_cosine_similarity_TopK(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        TopK = 4

        data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False)
        W_dense_Cython = cosine_similarity.compute_similarity().toarray()

        cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False)
        W_dense_Python = cosine_similarity.compute_similarity().toarray()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False)
        W_dense_Parallel = cosine_similarity.compute_similarity().toarray()

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0

        W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray()

        assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
コード例 #5
0
    def test_cosine_similarity_dense_normalize(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        import numpy.matlib

        TopK = 0
        shrink = 5

        data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink)
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink)
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=True, shrink=shrink)
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_denominator = np.matlib.repmat(data_matrix.power(2).sum(axis=0), data_matrix.shape[1], 1)
        W_dense_denominator = np.sqrt(W_dense_denominator)
        W_dense_denominator = np.multiply(W_dense_denominator, W_dense_denominator.T) + shrink

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul /= W_dense_denominator

        W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
コード例 #6
0
    def test_cosine_similarity_dense_jaccard(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        import numpy.matlib

        TopK = 0
        shrink = 0

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='jaccard')
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Python(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='jaccard')
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix,
                                                       topK=TopK,
                                                       normalize=True,
                                                       shrink=shrink,
                                                       mode='jaccard')
        W_dense_Parallel = cosine_similarity.compute_similarity()

        data_matrix.data = np.ones_like(data_matrix.data)
        data_matrix = data_matrix.toarray().astype(np.float64)

        W_dense_mul = data_matrix.T.dot(data_matrix)

        W_dense_denominator = np.matlib.repmat((data_matrix**2).sum(axis=0),
                                               data_matrix.shape[1], 1)
        W_dense_denominator = W_dense_denominator + W_dense_denominator.T - W_dense_mul + shrink

        W_dense_mul[W_dense_denominator > 0] /= W_dense_denominator[
            W_dense_denominator > 0]

        W_dense_mul[np.arange(W_dense_mul.shape[0]),
                    np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.allclose(W_dense_Cython, W_dense_mul,
                           atol=1e-4), "W_dense_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul,
                           atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul,
                           atol=1e-4), "W_dense_Parallel not matching control"
コード例 #7
0
    def test_cosine_similarity_dense_pearson(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python

        import numpy.matlib

        TopK = 0
        shrink = 0

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='pearson')
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Python(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='pearson')
        W_dense_Python = cosine_similarity.compute_similarity()

        data_matrix = data_matrix.toarray().astype(np.float64)
        for col in range(data_matrix.shape[1]):

            nonzeroMask = data_matrix[:, col] > 0
            data_matrix[:, col][nonzeroMask] -= np.mean(
                data_matrix[:, col][nonzeroMask])

        W_dense_denominator = np.matlib.repmat((data_matrix**2).sum(axis=0),
                                               data_matrix.shape[1], 1)
        W_dense_denominator = np.sqrt(W_dense_denominator)
        W_dense_denominator = np.multiply(W_dense_denominator,
                                          W_dense_denominator.T) + shrink

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul[W_dense_denominator > 0] /= W_dense_denominator[
            W_dense_denominator > 0]

        W_dense_mul[np.arange(W_dense_mul.shape[0]),
                    np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.allclose(W_dense_Cython, W_dense_mul,
                           atol=1e-4), "W_dense_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul,
                           atol=1e-4), "W_dense_Python not matching control"
コード例 #8
0
    def test_cosine_similarity_dense_external_cfr(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel
        from sklearn.metrics.pairwise import cosine_similarity as Cosine_Similarity_Sklearn

        from scipy.spatial.distance import jaccard as Jaccard_Distance_Scipy

        TopK = 0
        shrink = 0

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink)
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Compute_Similarity_Python(data_matrix,
                                                      topK=TopK,
                                                      normalize=True,
                                                      shrink=shrink)
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix,
                                                       topK=TopK,
                                                       normalize=True,
                                                       shrink=shrink)
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_sklearn = Cosine_Similarity_Sklearn(data_matrix.copy().T)
        W_dense_sklearn[np.arange(W_dense_sklearn.shape[0]),
                        np.arange(W_dense_sklearn.shape[0])] = 0.0

        assert np.allclose(
            W_dense_Cython, W_dense_sklearn,
            atol=1e-4), "W_dense_Cython Cosine not matching Sklearn control"
        assert np.allclose(
            W_dense_Python, W_dense_sklearn,
            atol=1e-4), "W_dense_Python Cosine not matching Sklearn control"
        assert np.allclose(
            W_dense_Parallel, W_dense_sklearn,
            atol=1e-4), "W_dense_Parallel Cosine not matching Sklearn control"

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='jaccard')
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Compute_Similarity_Python(data_matrix,
                                                      topK=TopK,
                                                      normalize=True,
                                                      shrink=shrink,
                                                      mode='jaccard')
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix,
                                                       topK=TopK,
                                                       normalize=True,
                                                       shrink=shrink,
                                                       mode='jaccard')
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_Scipy = np.zeros_like(W_dense_Python)
        data_matrix.data = np.ones_like(data_matrix.data)
        data_matrix = data_matrix.toarray()

        for row in range(W_dense_Scipy.shape[0]):
            for col in range(W_dense_Scipy.shape[1]):

                if row != col:
                    W_dense_Scipy[row, col] = 1 - Jaccard_Distance_Scipy(
                        data_matrix[:, row], data_matrix[:, col])

        assert np.allclose(
            W_dense_Cython, W_dense_Scipy,
            atol=1e-4), "W_dense_Cython Jaccard not matching Scipy control"
        assert np.allclose(
            W_dense_Python, W_dense_Scipy,
            atol=1e-4), "W_dense_Python Jaccard not matching Scipy control"
        assert np.allclose(
            W_dense_Parallel, W_dense_Scipy,
            atol=1e-4), "W_dense_Parallel Jaccard not matching Scipy control"