def test_precision_recall_and_f_calculation(self):
     ede = ClusteringEvaluator()
     targets = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 , 1, 2, 2, 2, 2, 2, 2]
     predictions = [0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1 , 2, 2, 2, 2, 2, 0, 2]
     confusion_matrix = ede.create_confusion_matrix(targets, predictions, 3)
     rp_rates = ede.calculate_precision_recall(confusion_matrix)
     fs = ede.calculate_f_measure(rp_rates)
     self.assertEquals(sum(fs), [2.5])
    def test_confusion_matrix_creation(self):
        ede = ClusteringEvaluator()
        targets = [1, 1, 1, 1, 1 , 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2]
        predictions = [1, 1, 1, 1, 1 , 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2]
        confusion_matrix = ede.create_confusion_matrix(targets, predictions, 3)
        expected = numpy.array([[ 6., 0., 0.],[ 0., 6., 0.],[ 0., 0., 6.]])

        self.assertTrue(numpy.in1d((numpy.sum(confusion_matrix-expected, axis=1)), [0., 0., 0.]).all())
 def test_bcubed_calculation(self):
     ede = ClusteringEvaluator()
     documents_labels_clusters = [(0, 0), (0, 1), (0, 0), (0, 0), (0, 1), (0, 0), 
                                  (1, 1), (1, 1), (1, 2), (1, 1), (1, 1), (1, 1),
                                  (2, 1), (2, 0), (2, 2), (2, 0), (2, 2), (2, 2)]
             
     precision ,recall, f = ede.calculate_bcubed_measures(documents_labels_clusters)
     self.assertAlmostEqual(0.532407407407 - precision, 0, places=7)
     self.assertAlmostEqual(0.555555555556 - recall, 0, places=7)
     self.assertAlmostEqual(0.543735224586 - f, 0, places=7)
'''
Created on 21 Mar 2012

@author: george
'''

from database.warehouse import WarehouseServer
from database.model.tweets import EvaluationTweet
from analysis.clustering.kmeans import OrangeKmeansClusterer
from evaluation.evaluators import ClusteringEvaluator

ws = WarehouseServer()
documents = ws.get_all_documents(type=EvaluationTweet)

oc = OrangeKmeansClusterer(k=35, ngram=1)
ebe = ClusteringEvaluator(documents)
bcubed_precision, bcubed_recall, bcubed_f = ebe.evaluate(clusterer=oc)
print bcubed_precision, bcubed_recall, bcubed_f