Python NVDFeedPreprocessor 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: toolkit.preprocessing

메소드/함수: NVDFeedPreprocessor

hotexamples.com에서의 예제들: 4

Python NVDFeedPreprocessor - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 toolkit.preprocessing.NVDFeedPreprocessor에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: pipelines.py 프로젝트: pombredanne/fabric8-analytics-POCs

def get_extraction_pipeline(attributes,
                            feature_hooks: list = None) -> Pipeline:
    """Build the extraction pipeline.

    :param attributes: list, attributes for NLTKPreprocessor

        List of attributes which will be extracted from NVD and passed to NLTK
        preprocessor.

    :param feature_hooks: dict, {feature_key: Hook}
        to be used as an argument to `FeatureExtractor`

        Specify features which should be extracted from the given set.
        The hooks are called for each element of the set and return
        corresponding features.
    """

    return Pipeline(steps=[
        ('nvd_feed_preprocessor',
         preprocessing.NVDFeedPreprocessor(attributes=attributes)),
        ('nltk_preprocessor',
         preprocessing.NLTKPreprocessor(feed_attributes=attributes)),
        (
            'feature_extractor',
            transformers.FeatureExtractor(
                feature_hooks=feature_hooks,
                # make hooks sharable (useful if training pipeline was used before)
                share_hooks=True)),
    ])

예제 #2

파일 보기

파일: train.py 프로젝트: msrb/fabric8-analytics-nvd-toolkit

def main(argv):
    """Run."""
    args = parse_args(argv=argv)

    if args.csv:
        # TODO
        raise NotImplementedError("The feature has not been implemented yet."
                                  " Sorry for the inconvenience.")
    else:
        print("Getting NVD Feed...")
        feed = NVD.from_feeds(feed_names=args.nvd_feeds)
        feed.update()
        data = list(feed.cves())  # generator

    cve_dict = {cve.cve_id: cve for cve in data}

    # set up default argument for vendor-product feature hook
    feature_hooks.vendor_product_match_hook.default_kwargs = {
        'cve_dict': cve_dict
    }

    training_pipeline = Pipeline(
        steps=[('nvd_feed_preprocessor',
                preprocessing.NVDFeedPreprocessor(
                    attributes=['cve_id', 'description'])),
               ('label_preprocessor',
                preprocessing.LabelPreprocessor(
                    feed_attributes=['project', 'description'],
                    output_attributes=['cve_id', 'description'],
                    hook=transformers.Hook(
                        key='label_hook', reuse=True, func=utils.find_))),
               ('nltk_preprocessor',
                preprocessing.NLTKPreprocessor(
                    feed_attributes=['description'],
                    output_attributes=['cve_id', 'label'])),
               ('feature_extractor',
                transformers.FeatureExtractor(feature_hooks=FEATURE_HOOKS,
                                              share_hooks=True)
                ), ('classifier', transformers.NBClassifier())])

    start_time = time()
    print("Training started")

    try:
        classifier = training_pipeline.fit_transform(X=data)
    finally:
        print(f"Training finished in {time() - start_time} seconds")

    if args.export:
        classifier.export(args.export_dir)

예제 #3

파일 보기

def get_preprocessing_pipeline(nvd_attributes: list,
                               nltk_feed_attributes: list = None,
                               labeling_func: typing.Callable = None,
                               share_hooks=False) -> Pipeline:
    """Build the preprocessing pipeline using existing classifier.

    The preprocessing pipeline takes as an input a list of CVE objects
    and outputs labeled data ready for feature extraction.

    *must be fit using `fit_transform` method.*

    :param nvd_attributes: list, attributes to output by NVDPreprocessor

        The attributes are outputed by NVDPreprocessor and passed
        to FeatureExtractor.

    :param nltk_feed_attributes: list, attributes for NLTKPreprocessor

        List of attributes which will be fed to NLTKPreprocessor.

    :param labeling_func: callable object to be used for labeling

        The `labeling_func` is used to create a hook for `LabelPreprocessor`
        (see `LabelPreprocessor` documentation for more info).
        By default `toolkit.utils.find_` function is used for that purpose.

    :param share_hooks: boolean, whether to reuse hooks
    """
    if labeling_func is None:
        labeling_func = utils.find_

    return Pipeline(steps=[
        ('nvd_feed_preprocessor',
         preprocessing.NVDFeedPreprocessor(attributes=nvd_attributes)),
        (
            'label_preprocessor',
            preprocessing.LabelPreprocessor(
                feed_attributes=['project', 'description'],
                # output only description attribute for NLTK processing
                output_attributes=nvd_attributes,
                hook=transformers.Hook(
                    key='label_hook', func=labeling_func, reuse=share_hooks)),
        ),
        ('nltk_preprocessor',
         preprocessing.NLTKPreprocessor(feed_attributes=nltk_feed_attributes, )
         )
    ])

예제 #4

파일 보기

def get_full_training_pipeline(labeling_func: typing.Callable = None,
                               feature_hooks=None,
                               share_hooks=False) -> Pipeline:
    """Build the full training pipeline with no predefined attributes.

    The pipeline accepts raw data, performs preprocessing and feature
    extraction and trains NBClassifier on that data.

    The customization of feed and output attributes is fully left to user.
    It is necessary to provide `fit_params` when fitting, as this pipeline
    does not contain any predefined arguments.

    *must be fit using `fit_transform` method with `fit_params`*

    :param feature_hooks: dict, {feature_key: Hook}
        to be used as an argument to `FeatureExtractor`

        Specify features which should be extracted from the given set.
        The hooks are called for each element of the set and return
        corresponding features.

    :param labeling_func: callable object to be used for labeling

        The `labeling_func` is used to create a hook for `LabelPreprocessor`
        (see `LabelPreprocessor` documentation for more info).
        By default `toolkit.utils.find_` function is used for that purpose.

    :param share_hooks: boolean, whether to reuse hooks

    :returns: Pipeline
    """
    if labeling_func is None:
        labeling_func = utils.find_

    return Pipeline(
        steps=[('nvd_feed_preprocessor', preprocessing.NVDFeedPreprocessor()),
               ('label_preprocessor',
                preprocessing.LabelPreprocessor(hook=transformers.Hook(
                    key='label_hook', reuse=share_hooks, func=labeling_func))
                ), ('nltk_preprocessor', preprocessing.NLTKPreprocessor()),
               ('feature_extractor',
                transformers.FeatureExtractor(feature_hooks=feature_hooks,
                                              share_hooks=True)
                ), ('classifier', transformers.NBClassifier())])