def test_classifier(self):
        test_data, train_data = self.get_train_and_test_data()

        classifier = ClusterClassifierFactory(
            train_repository=InMemoryRepository(data=train_data),
            test_repository=InMemoryRepository(data=test_data),
            notifier=InMemoryBroker())

        classifier = classifier.compile()

        outliers = classifier.detect_outliers()

        self.assertEqual(len(outliers), 3, "it should retrieve 3 outliers")

        self.assertEqual(len(classifier.notifier.queue.get()), 3,
                         "it should retrieve 3 outliers notified to Broker")
예제 #2
0
    def test_log_classifier_with_persisted_train_clusters(self):
        outlier_persister, train_persister = self.get_persisters()
        train_persister.save(object=None)

        classifier = ClusterClassifierFactory(
            train_repository=FileRepository(
                file=open(ROOT_DIR + '/../resources/train.txt')),
            test_repository=FileRepository(
                file=open(ROOT_DIR + '/../resources/test.txt')),
            notifier=InMemoryBroker())
        classifier.add_outlier_persister(outlier_persister)
        classifier = classifier.compile()

        new_clusters = classifier.detect_outliers()

        self.assertEqual(len(new_clusters), 2,
                         "it should retrieve 2 new clusters")

        self.assertEqual(
            len(classifier.notifier.queue.get()), 2,
            "it should retrieve 2 new clusters notified to Broker")

        self.assertEqual(len(classifier.outlier_persister.get()), 2,
                         "it should retrieve 2 new persisted clusters")

        classifier.outlier_persister.remove()
    def test_cosine_similarity_classifier_with_persisted_train_clusters(self):
        outlier_persister, train_persister = self.get_persisters()
        test_data, train_data = self.get_train_and_test_data()
        train_persister.save(object=None)

        classifier = ClusterClassifierFactory(
            train_repository=InMemoryRepository(data=train_data),
            test_repository=InMemoryRepository(data=test_data),
            notifier=InMemoryBroker())
        classifier.add_outlier_persister(outlier_persister)
        classifier.add_pipeline(CosineSimilarityPipeline(ratio=.70))
        classifier = classifier.compile()

        outliers = classifier.detect_outliers()

        self.assertEqual(len(outliers), 3, "it should retrieve 3 outliers")

        self.assertEqual(len(classifier.notifier.queue.get()), 3,
                         "it should retrieve 3 outliers notified to Broker")

        self.assertEqual(len(classifier.outlier_persister.get()), 3,
                         "it should retrieve 3 outliers clusters")

        classifier.outlier_persister.remove()
    def test_low_ratio_cosine_similarity_classifier(self):
        train_data = list()
        train_data.append("Hello world")
        train_data.append("Uncle Bob")

        test_data = list()
        test_data.append("It's an outlier")
        test_data.append("Hello world")

        classifier = ClusterClassifierFactory(
            train_repository=InMemoryRepository(data=train_data),
            test_repository=InMemoryRepository(data=test_data),
            notifier=InMemoryBroker())
        classifier.add_pipeline(CosineSimilarityPipeline(ratio=.01))
        classifier = classifier.compile()

        outliers = classifier.detect_outliers()

        self.assertEqual(len(outliers), 1, "it should retrieve 1 outliers")

        self.assertEqual(len(classifier.notifier.queue.get()), 1,
                         "it should retrieve 1 outliers notified to Broker")