Beispiel #1
0
 def test_number_of_clusters_expection(self):
     """
     Tests that requesting zero or less clusters and
     InvalidNumberOfClustersError exception is rased.
     """
     test_cases = (
         # N documents with zero clusters
         (self.documents, 0),
         # N documents with -1 clusters
         (self.documents, -1)
     )
     for docs, n in test_cases:
         with self.assertRaises(InvalidNumberOfClustersError):
             generate_document_clusters(docs, n)
Beispiel #2
0
 def test_insuficient_number_of_documents(self):
     """
     Tests that generate_document_clusters raises an exception if the
     number of clusters requested is greater than the number of given
     documents.
     """
     test_cases = (
         # N document and N + 1 clusters
         (self.documents, len(self.documents) + 1),
         # No documents and 1 cluster.
         ([], 1),
     )
     for docs, n in test_cases:
         with self.assertRaises(InsufficientNumberOfDocumentsError):
             generate_document_clusters(docs, n)
Beispiel #3
0
    def test_number_of_clusters(self):
        """
        Tests that the number of clusters returned matches the requested
        number.
        """
        documents = self.documents
        test_cases = (
            # Same number of clusters as documents (one doc per cluster)
            (documents, len(documents)),
            # Number of clusters is half the number of documents
            (documents, len(documents) / 2),
            # Multiple documents, one clusters
            (documents, 1),
        )

        for docs, n_clusters in test_cases:
            doc_clusters = generate_document_clusters(docs, n_clusters)
            error_msg = ('Number of clusters returned ({returned}) does not '
                         'match the number of clusters '
                         'requested ({requested}).').format(
                            returned=len(doc_clusters),
                            requested=n_clusters
                         )
            self.assertEquals(len(doc_clusters), n_clusters, error_msg)
Beispiel #4
0
 def test_clusters_length(self):
     """
     Tests that the total number of documents in the clusters is equals to
     the to the number of documents given.
     """
     documents = self.documents
     test_cases = (
         # N documents and 1 cluster
         (documents, 1),
         # Same number of documents and clusters.
         (documents, len(documents)),
         # Number of clusters is half the number of documents
         (documents, len(documents) / 2),
     )
     for docs, n_clusters in test_cases:
         doc_clusters = generate_document_clusters(docs, n_clusters)
         total_docs = sum([len(c) for c in doc_clusters])
         error_msg = ('Number of documents returned in the clusters '
                      '({returned}) does not add to the number of given '
                      ' documents given ({given}).').format(
                         given=len(docs),
                         returned=total_docs
                      )
         self.assertEquals(len(docs), total_docs, error_msg)