def test_tokenization(self): expected, sample_docs, objects = get_test_documents() calculated = {} analyser = TextAnalyser() id = 0 for s in sample_docs: d = analyser.add_document(s) calculated[str(id)] = d id += 1 self.assertEqual(expected, calculated)
def test_tokenization(self): expected, sample_docs, objects = get_test_documents() calculated = {} analyser = TextAnalyser() id=0 for s in sample_docs: d = analyser.add_document(s) calculated[str(id)] = d id+=1 self.assertEqual(expected, calculated)
''' Created on 26 Jan 2012 @author: george ''' import unittest, numpy from analysis.clustering.nmf import NMFClusterer from tests.test_document import get_test_documents expected, sample_docs_raw, samples = get_test_documents() nmfc = NMFClusterer(ngram=1) nmfc.add_documents(samples) class Test(unittest.TestCase): def test_nmf_cluster(self): nmfc.run(seed='random_vcol', method='nmf', rank=2, max_iter=65, display_N_tokens=6, display_N_documents=3) nmfc.dump_clusters_to_file("nmf_with_samples") if __name__ == "__main__": unittest.main()
''' Created on 26 Jan 2012 @author: george ''' import unittest, numpy from analysis.clustering.kmeans import OrangeKmeansClusterer from tests.test_document import get_test_documents ########################################### # GLOBALS # ########################################### ignore, ignore, samples = get_test_documents() oc = OrangeKmeansClusterer(k=2) for sample in samples: oc.add_document(sample) class Test(unittest.TestCase): def test_orange_cluster_term_document_matrix(self): oc.construct_term_doc_matrix() calculated = oc.td_matrix expected = numpy.array([[ 0.31388923, 0.11584717, 0, 0, 0, 0, 0.47083384], [ 0, 0.13515504, 0.3662041, 0, 0.3662041, 0, 0 ], [ 0, 0, 0, 0.54930614, 0, 0.549306140, 0 ]]) self.assertEqual(expected.all(), calculated.all()) def test_orange_save_matrix_to_tab_file(self): oc.construct_term_doc_matrix() oc.save_table("sample_table_orange")
''' Created on 26 Jan 2012 @author: george ''' import unittest, numpy from analysis.clustering.kmeans import OrangeKmeansClusterer from tests.test_document import get_test_documents ########################################### # GLOBALS # ########################################### ignore, ignore, samples = get_test_documents() oc = OrangeKmeansClusterer(k=2) for sample in samples: oc.add_document(sample) class Test(unittest.TestCase): def test_orange_cluster_term_document_matrix(self): oc.construct_term_doc_matrix() calculated = oc.td_matrix expected = numpy.array( [[0.31388923, 0.11584717, 0, 0, 0, 0, 0.47083384], [0, 0.13515504, 0.3662041, 0, 0.3662041, 0, 0], [0, 0, 0, 0.54930614, 0, 0.549306140, 0]]) self.assertEqual(expected.all(), calculated.all()) def test_orange_save_matrix_to_tab_file(self): oc.construct_term_doc_matrix()
''' Created on 26 Jan 2012 @author: george ''' import unittest, numpy from analysis.clustering.online import OnlineClusterer from analysis.clustering.structures import OnlineCluster from tests.test_document import get_test_documents, get_orange_clustering_test_data ########################################### # GLOBALS # ########################################### expected, sample_docs_raw, samples = get_test_documents() class Test(unittest.TestCase): def test_sample_doc_clustering_with_online(self): oc = OnlineClusterer(N=2, window=3) samples = get_orange_clustering_test_data() for document in samples: index = oc.add_document(document) oc.cluster(document) expected = [0, 0, 0, 1, 1, 1] for cluster in oc.clusters: print cluster.document_dict def test_cluster_term_document_matrix(self): oc = OnlineClusterer(N=2, window=3) for document in samples: index = oc.add_document(document) oc.cluster(document)