Example #1
0
    def test_nonseeded_clustering(self):
        ngrams = NGramSpace(1)
        docs = [ngrams.parse(raw) for raw in test_docs]
        c = Clustering(docs)

        self.assertEqual((1, 0), c.min_link())
        c.merge(1, 0)
        self.assertEqual((2, 1), c.min_link())
        c.merge(2, 1)
        self.assertTrue(c.min_link() in [(4, 3), (5, 3)])
        c.merge(3, 4)
        c.merge(3, 5)
        self.assertEqual((7, 6), c.min_link())
    def test_nonseeded_clustering(self):
        ngrams = NGramSpace(1)
        docs = [ngrams.parse(raw) for raw in test_docs]
        c = Clustering(docs)

        self.assertEqual((1, 0), c.min_link())
        c.merge(1, 0)
        self.assertEqual((2, 1), c.min_link())
        c.merge(2, 1)
        self.assertTrue(c.min_link() in [(4, 3), (5, 3)])
        c.merge(3, 4)
        c.merge(3, 5)
        self.assertEqual((7, 6), c.min_link())
Example #3
0
    def test_clustering(self):
        raw_docs = ["a b c", "b c d", "d e f"]
        ngrams = NGramSpace(1)
        docs = [ngrams.parse(raw) for raw in raw_docs]

        c = Clustering(docs)

        self.assertEqual((1, 0), c.min_link())

        c.merge(1, 0)
        self.assertEqual([1, 1, 2], c.assignments)

        self.assertEqual((2, 1), c.min_link())

        c.merge(2, 0)
        self.assertEqual([2, 2, 2], c.assignments)
    def test_clustering(self):
        raw_docs = ['a b c', 'b c d', 'd e f']
        ngrams = NGramSpace(1)
        docs = [ngrams.parse(raw) for raw in raw_docs]

        c = Clustering(docs)

        self.assertEqual((1, 0), c.min_link())

        c.merge(1, 0)
        self.assertEqual([1, 1, 2], c.assignments)

        self.assertEqual((2, 1), c.min_link())

        c.merge(2, 0)
        self.assertEqual([2, 2, 2], c.assignments)