Exemple #1
0
 def _test_k_means(self, seed):
     # Assert k-means clustering accuracy.
     A = []
     n = 100
     m = dict((d.vector.id, d.type) for d in self.model[:n])
     for i in range(30):
         # Create two clusters of vectors.
         k = vector.kmeans([d.vector for d in self.model[:n]], k=2, seed=seed)
         # Measure the number of spam in each clusters.
         # Ideally, we have a cluster without spam and one with only spam.
         i = len([1 for v in k[0] if m[v.id] == False])
         j = len([1 for v in k[1] if m[v.id] == False])
         A.append(max(i,j) * 2.0 / n)
     # Return average accuracy after 10 tests.
     return sum(A) / 30.0
 def _test_k_means(self, seed):
     # Assert k-means clustering accuracy.
     A = []
     n = 100
     m = dict((d.vector.id, d.type) for d in self.model[:n])
     for i in range(30):
         # Create two clusters of vectors.
         k = vector.kmeans([d.vector for d in self.model[:n]], k=2, seed=seed)
         # Measure the number of spam in each clusters.
         # Ideally, we have a cluster without spam and one with only spam.
         i = len([1 for v in k[0] if m[v.id] == False])
         j = len([1 for v in k[1] if m[v.id] == False])
         A.append(max(i,j) * 2.0 / n)
     # Return average accuracy after 10 tests.
     return sum(A) / 30.0