Beispiel #1
0
    def test_prediction_pipeline(self):
        """Test pipeline prediction."""
        train_data, _ = pipelines.extract_labeled_features(
            self.test_data,
            nvd_attributes=['project', 'description'],
            nltk_feed_attributes=['description'])

        clf = classifiers.NBClassifier().fit(train_data)

        pred_data = [
            'Sample project name prediction',
            'Sample project name prediction',
            'Sample project name prediction',
        ]

        pred_pipeline = pipelines.get_prediction_pipeline(clf, )

        n_candidates = 3
        predictions = pred_pipeline.fit_predict(pred_data,
                                                classifier__n=n_candidates,
                                                classifier__sample=True)

        self.assertIsNotNone(predictions)
        self.assertEqual(predictions.shape[1], n_candidates)
        self.assertEqual(predictions.shape[-1], 2)  # candidate - proba
    def test_evaluation(self):
        """Test evaluation of extracted features"""
        test_data = _get_test_data()
        featuresets, _ = pipelines.extract_labeled_features(
            data=test_data,
            attributes=['description'],
        )

        clf = classifiers.NBClassifier().fit(featuresets)
        self.assertIsNotNone(clf)

        # evaluation == 0.0
        zero_labels = [None] * len(featuresets)
        score = clf.evaluate(featuresets, zero_labels, sample=True)

        self.assertIsNotNone(score)
        self.assertEqual(score, 0.0)

        score = classifiers.cross_validate(clf,
                                           featuresets,
                                           zero_labels,
                                           sample=True)

        self.assertIsNotNone(score)
        self.assertEqual(score.mean, 0.0)
Beispiel #3
0
def _export_classifier():
    """Set up for unit tests by exporting classifier."""
    raw_data = _get_test_data()

    data, _ = pipelines.extract_labeled_features(
        data=raw_data,
        nvd_attributes=['project', 'description'],
        nltk_feed_attributes=['description'])

    classifier = classifiers.NBClassifier()
    classifier = classifier.fit(data)

    tmp_dir = tempfile.mkdtemp(prefix='test_export_')

    pickle_path = classifier.export(export_dir=tmp_dir)

    return pickle_path