def test_nonseeded_clustering(self): ngrams = NGramSpace(1) docs = [ngrams.parse(raw) for raw in test_docs] c = Clustering(docs) self.assertEqual((1, 0), c.min_link()) c.merge(1, 0) self.assertEqual((2, 1), c.min_link()) c.merge(2, 1) self.assertTrue(c.min_link() in [(4, 3), (5, 3)]) c.merge(3, 4) c.merge(3, 5) self.assertEqual((7, 6), c.min_link())
def test_clustering(self): raw_docs = ["a b c", "b c d", "d e f"] ngrams = NGramSpace(1) docs = [ngrams.parse(raw) for raw in raw_docs] c = Clustering(docs) self.assertEqual((1, 0), c.min_link()) c.merge(1, 0) self.assertEqual([1, 1, 2], c.assignments) self.assertEqual((2, 1), c.min_link()) c.merge(2, 0) self.assertEqual([2, 2, 2], c.assignments)
def test_clustering(self): raw_docs = ['a b c', 'b c d', 'd e f'] ngrams = NGramSpace(1) docs = [ngrams.parse(raw) for raw in raw_docs] c = Clustering(docs) self.assertEqual((1, 0), c.min_link()) c.merge(1, 0) self.assertEqual([1, 1, 2], c.assignments) self.assertEqual((2, 1), c.min_link()) c.merge(2, 0) self.assertEqual([2, 2, 2], c.assignments)