Beispiel #1
0
 def test_sample_doc_clustering_with_online(self):
     oc = OnlineClusterer(N=2, window=3)
     samples = get_orange_clustering_test_data()
     for document in samples:
         index = oc.add_document(document)
         oc.cluster(document)
     expected = [0, 0, 0, 1, 1, 1]
     for cluster in oc.clusters:
         print cluster.document_dict
 def test_sample_doc_clustering_with_online(self):
     oc = OnlineClusterer(N=2, window=3)        
     samples = get_orange_clustering_test_data()
     for document in samples:
         index = oc.add_document(document)
         oc.cluster(document)
     expected = [0, 0, 0, 1, 1, 1]
     for cluster in oc.clusters:
         print cluster.document_dict
Created on 13 Nov 2011

@author: george

Unit tests for the analysis.clustering package.
'''
import datetime, unittest 
from database.warehouse import WarehouseServer
from analysis.clustering.kmeans import OrangeKmeansClusterer
from tests.test_document import get_orange_clustering_test_data

###########################################
# GLOBALS                                #
###########################################
ws = WarehouseServer()
sample_docs = get_orange_clustering_test_data()

oc = OrangeKmeansClusterer(k=2)
for s in sample_docs:
    oc.add_document(s)

class TestOrangeClustering(unittest.TestCase):
    
    ###########################################
    # ORANGE TESTS                            #
    ###########################################       
    def test_orange_sample_doc_kmeans(self):
        km = oc.run("orange_clustering_test")
        expected = [0, 0, 0, 1, 1, 1]
        self.assertEqual(expected, km.clusters)
Created on 13 Nov 2011

@author: george

Unit tests for the analysis.clustering package.
'''
import datetime, unittest
from database.warehouse import WarehouseServer
from analysis.clustering.kmeans import OrangeKmeansClusterer
from tests.test_document import get_orange_clustering_test_data

###########################################
# GLOBALS                                #
###########################################
ws = WarehouseServer()
sample_docs = get_orange_clustering_test_data()

oc = OrangeKmeansClusterer(k=2)
for s in sample_docs:
    oc.add_document(s)


class TestOrangeClustering(unittest.TestCase):

    ###########################################
    # ORANGE TESTS                            #
    ###########################################
    def test_orange_sample_doc_kmeans(self):
        km = oc.run("orange_clustering_test")
        expected = [0, 0, 0, 1, 1, 1]
        self.assertEqual(expected, km.clusters)
'''
Created on 27 Nov 2011

@author: george
'''
import unittest, numpy
from analysis.summarization.summarization import CentroidSummarizer
from tests.test_document import get_orange_clustering_test_data

test_documents = get_orange_clustering_test_data()
doc_dict = {}
id = 0
for doc in test_documents:
    doc_dict[id] = doc.content
    id +=1
    
class TestSummarizationFunctions(unittest.TestCase):
    
    def test_fv_and_centroid_creation(self): 
        cs = CentroidSummarizer(doc_dict)
        sorted_docs = cs.summarize()
        expected = [[ 0.0866434, 0., 0., 0., 0.0866434, 0., 0., 0.0866434, 0., 0., 0., 0., 0.0866434, 0., 0., 0., 0., 0., 0.13732654, 0.0866434, 0., 0.0866434, 0., 0., 0., 0., 0., 0., 0., 0.22396993, 0., 0., 0.],
                    [ 0.0866434, 0., 0., 0., 0.0866434, 0., 0., 0.0866434, 0., 0., 0., 0., 0.0866434, 0., 0., 0., 0., 0.13732654, 0., 0.0866434, 0., 0.0866434, 0., 0., 0., 0., 0., 0., 0., 0., 0.22396993, 0., 0.],
                    [ 0.06931472, 0.17917595, 0., 0.35835189, 0.06931472, 0., 0., 0.06931472, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.06931472, 0., 0.06931472, 0.17917595, 0., 0.17917595, 0., 0., 0., 0., 0., 0., 0., 0.],
                    [ 0., 0., 0., 0., 0., 0.19908439, 0., 0., 0., 0.12206803, 0.12206803, 0., 0.07701635, 0., 0., 0., 0.07701635, 0., 0.12206803, 0., 0., 0., 0., 0.19908439, 0., 0.19908439, 0., 0., 0., 0., 0., 0.12206803, 0.],
                    [ 0., 0., 0.13782765, 0., 0., 0., 0., 0., 0.13782765, 0., 0., 0.13782765, 0., 0.13782765, 0.13782765, 0.13782765, 0.05331901, 0., 0., 0., 0.13782765, 0., 0., 0., 0., 0., 0.13782765, 0.13782765, 0.13782765, 0., 0., 0.08450864, 0.13782765],
                    [ 0., 0., 0., 0., 0., 0., 0.35835189, 0., 0., 0.21972246, 0.21972246, 0., 0., 0., 0., 0., 0.13862944, 0.21972246, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]
                    ]
        for i, doc in enumerate(cs.documents.values()):
            diff = numpy.sum(doc.fv - expected[i])
            self.assertAlmostEqual(diff, 0)