def identify(self): """Identify possible package name candidates.""" # restored pretrained classifier from the checkpoint clf = NBClassifier.restore(checkpoint=Config.nvdtoolkit_export_dir) hooks = [ feature_hooks.has_uppercase_hook, feature_hooks.is_alnum_hook, feature_hooks.ver_follows_hook, feature_hooks.word_len_hook ] pipeline = pipelines.get_prediction_pipeline(classifier=clf, feature_hooks=hooks) results = pipeline.fit_predict([self.doc.description], classifier__sample=True).tolist()[0] candidates = [x[0][0] for x in results] ecosystem = Config.ecosystem if ecosystem == 'java': vendor = candidates else: vendor = [ecosystem] product = candidates return run_cpe2pkg(vendor, product)
def test_prediction_pipeline(self): """Test pipeline prediction.""" train_data, _ = pipelines.extract_labeled_features( self.test_data, nvd_attributes=['project', 'description'], nltk_feed_attributes=['description']) clf = classifiers.NBClassifier().fit(train_data) pred_data = [ 'Sample project name prediction', 'Sample project name prediction', 'Sample project name prediction', ] pred_pipeline = pipelines.get_prediction_pipeline(clf, ) n_candidates = 3 predictions = pred_pipeline.fit_predict(pred_data, classifier__n=n_candidates, classifier__sample=True) self.assertIsNotNone(predictions) self.assertEqual(predictions.shape[1], n_candidates) self.assertEqual(predictions.shape[-1], 2) # candidate - proba
def main(): args = __parser.parse_args() clf = classifiers.NBClassifier.restore(args.path_to_classifier) prediction_pipeline = pipelines.get_prediction_pipeline( classifier=clf, feature_hooks=FEATURE_HOOKS) prediction, = prediction_pipeline.fit_predict( X=[args.description], classifier__n=args.num_candidates, classifier__sample=True) print("Prediction results:") print("-------------------") for (name, tag), score in prediction: formated_prediction = """\ Candidate : {name} Tag : {tag} Confidence: {score} """.format(name=name, tag=tag, score=score) print(textwrap.dedent(formated_prediction))