예제 #1
0
    def identify(self):
        """Identify possible package name candidates."""
        # restored pretrained classifier from the checkpoint
        clf = NBClassifier.restore(checkpoint=Config.nvdtoolkit_export_dir)

        hooks = [
            feature_hooks.has_uppercase_hook, feature_hooks.is_alnum_hook,
            feature_hooks.ver_follows_hook, feature_hooks.word_len_hook
        ]

        pipeline = pipelines.get_prediction_pipeline(classifier=clf,
                                                     feature_hooks=hooks)

        results = pipeline.fit_predict([self.doc.description],
                                       classifier__sample=True).tolist()[0]

        candidates = [x[0][0] for x in results]

        ecosystem = Config.ecosystem
        if ecosystem == 'java':
            vendor = candidates
        else:
            vendor = [ecosystem]
        product = candidates

        return run_cpe2pkg(vendor, product)
예제 #2
0
    def test_prediction_pipeline(self):
        """Test pipeline prediction."""
        train_data, _ = pipelines.extract_labeled_features(
            self.test_data,
            nvd_attributes=['project', 'description'],
            nltk_feed_attributes=['description'])

        clf = classifiers.NBClassifier().fit(train_data)

        pred_data = [
            'Sample project name prediction',
            'Sample project name prediction',
            'Sample project name prediction',
        ]

        pred_pipeline = pipelines.get_prediction_pipeline(clf, )

        n_candidates = 3
        predictions = pred_pipeline.fit_predict(pred_data,
                                                classifier__n=n_candidates,
                                                classifier__sample=True)

        self.assertIsNotNone(predictions)
        self.assertEqual(predictions.shape[1], n_candidates)
        self.assertEqual(predictions.shape[-1], 2)  # candidate - proba
예제 #3
0
def main():
    args = __parser.parse_args()

    clf = classifiers.NBClassifier.restore(args.path_to_classifier)
    prediction_pipeline = pipelines.get_prediction_pipeline(
        classifier=clf, feature_hooks=FEATURE_HOOKS)

    prediction, = prediction_pipeline.fit_predict(
        X=[args.description],
        classifier__n=args.num_candidates,
        classifier__sample=True)

    print("Prediction results:")
    print("-------------------")
    for (name, tag), score in prediction:
        formated_prediction = """\
        Candidate : {name}
        Tag       : {tag}
        Confidence: {score}
        """.format(name=name, tag=tag, score=score)

        print(textwrap.dedent(formated_prediction))