def test_levenshtein_normalize():
        links1 = [
            "https://news.ycombinator.com/from?site=eff.org",
            "https://news.ycombinator.com/from?site=martinfowler.com",
            "https://news.ycombinator.com/news?p=2",
            "https://news.ycombinator.com/news?p=3"
        ]

        links2 = ["/fromsite=eff.org",
                  "/fromsite=martinfowler.com",
                  "/newsp=2",
                  "/newsp=3"]

        actual_links1 = distance_matrix(links1, measure=levenshtein_distance,
                                        normalize=True)

        actual_links2 = distance_matrix(links2, measure=levenshtein_distance,
                                        normalize=True)

        expected_links1 = np.array([[0., 0.25454545, 0.32608696, 0.32608696],
                                    [0.25454545, 0., 0.43636364, 0.43636364],
                                    [0.32608696, 0.43636364, 0., 0.02702703],
                                    [0.32608696, 0.43636364, 0.02702703, 0.]])

        expected_links2 = np.array([[0., 0.53846154, 0.82352941, 0.82352941],
                                    [0.53846154, 0., 0.88461538, 0.88461538],
                                    [0.82352941, 0.88461538, 0., 0.125],
                                    [0.82352941, 0.88461538, 0.125, 0.]])

        assert_almost_equal(actual_links1, expected_links1, decimal=7)
        assert_almost_equal(actual_links2, expected_links2, decimal=7)
    def test_label_spreading_algorithms():
        """
        Compare scikit's algorithm and our algorithm
        """
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

        # scikit takes different input that our algorithm
        y_sklearn = np.array([1, 2, -1, -1])
        y_custom = np.array([[1, 0], [0, 1], [0, 0], [0, 0]])

        # scikit's algorithm
        alpha = 0.2
        max_iter = 30
        tol = 1e-3
        label_spreading = LabelSpreadingSKLearn(kernel="rbf",
                                                max_iter=max_iter,
                                                alpha=alpha, tol=tol)
        model = label_spreading.fit(x, y_sklearn)
        expected = model.predict(x)

        # our algorithm
        w = distance_matrix(x, measure=rbf_distance)
        ls = LabelSpreadingCustom(alpha=alpha, max_iter=max_iter, tol=tol)
        ls = ls.fit(w, y_custom)
        actual = ls.predict(y_custom)
        actual = np.array(actual) + 1  # add plus 1 to every prediction
        assert_array_equal(actual, expected)
    def test_rbf_distance():
        """
        Compare radial basis function with scikit and our version
        """
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
        expected = sklearn_rbf_kernel(x, gamma=0.5)

        actual = distance_matrix(x, measure=rbf_distance)
        n = len(actual)

        # scikits rbf returns 1 on diagonal as exp(0) = 1
        # so we change to 1 to assure equality
        actual[range(n), range(n)] = 1

        assert_almost_equal(actual, expected, decimal=20)
    def test_levenshtein_distance():
        """
        compare the score of levenshtein distance
        """
        links = [
            "https://news.ycombinator.com/from?site=eff.org",
            "https://news.ycombinator.com/from?site=martinfowler.com",
            "https://news.ycombinator.com/news?p=2",
            "https://news.ycombinator.com/news?p=3"
        ]

        actual = distance_matrix(links, measure=levenshtein_distance,
                                 normalize=False)

        expected = np.array([[0, 14, 15, 15],
                             [14, 0, 24, 24],
                             [15, 24, 0, 1],
                             [15, 24, 1, 0]])
        assert_array_equal(actual, expected)