def test_classifier(self): test_data, train_data = self.get_train_and_test_data() classifier = ClusterClassifierFactory( train_repository=InMemoryRepository(data=train_data), test_repository=InMemoryRepository(data=test_data), notifier=InMemoryBroker()) classifier = classifier.compile() outliers = classifier.detect_outliers() self.assertEqual(len(outliers), 3, "it should retrieve 3 outliers") self.assertEqual(len(classifier.notifier.queue.get()), 3, "it should retrieve 3 outliers notified to Broker")
def test_log_classifier_with_persisted_train_clusters(self): outlier_persister, train_persister = self.get_persisters() train_persister.save(object=None) classifier = ClusterClassifierFactory( train_repository=FileRepository( file=open(ROOT_DIR + '/../resources/train.txt')), test_repository=FileRepository( file=open(ROOT_DIR + '/../resources/test.txt')), notifier=InMemoryBroker()) classifier.add_outlier_persister(outlier_persister) classifier = classifier.compile() new_clusters = classifier.detect_outliers() self.assertEqual(len(new_clusters), 2, "it should retrieve 2 new clusters") self.assertEqual( len(classifier.notifier.queue.get()), 2, "it should retrieve 2 new clusters notified to Broker") self.assertEqual(len(classifier.outlier_persister.get()), 2, "it should retrieve 2 new persisted clusters") classifier.outlier_persister.remove()
def test_cosine_similarity_classifier_with_persisted_train_clusters(self): outlier_persister, train_persister = self.get_persisters() test_data, train_data = self.get_train_and_test_data() train_persister.save(object=None) classifier = ClusterClassifierFactory( train_repository=InMemoryRepository(data=train_data), test_repository=InMemoryRepository(data=test_data), notifier=InMemoryBroker()) classifier.add_outlier_persister(outlier_persister) classifier.add_pipeline(CosineSimilarityPipeline(ratio=.70)) classifier = classifier.compile() outliers = classifier.detect_outliers() self.assertEqual(len(outliers), 3, "it should retrieve 3 outliers") self.assertEqual(len(classifier.notifier.queue.get()), 3, "it should retrieve 3 outliers notified to Broker") self.assertEqual(len(classifier.outlier_persister.get()), 3, "it should retrieve 3 outliers clusters") classifier.outlier_persister.remove()
def test_low_ratio_cosine_similarity_classifier(self): train_data = list() train_data.append("Hello world") train_data.append("Uncle Bob") test_data = list() test_data.append("It's an outlier") test_data.append("Hello world") classifier = ClusterClassifierFactory( train_repository=InMemoryRepository(data=train_data), test_repository=InMemoryRepository(data=test_data), notifier=InMemoryBroker()) classifier.add_pipeline(CosineSimilarityPipeline(ratio=.01)) classifier = classifier.compile() outliers = classifier.detect_outliers() self.assertEqual(len(outliers), 1, "it should retrieve 1 outliers") self.assertEqual(len(classifier.notifier.queue.get()), 1, "it should retrieve 1 outliers notified to Broker")