Python dllの例、bugbug.feature_cleanup.dll Pythonの例

コード例 #1

0

ファイルを表示

    def __init__(self, lemmatization=False):
        BugCoupleModel.__init__(self, lemmatization)

        self.calculate_importance = False

        cleanup_functions = [
            feature_cleanup.responses(),
            feature_cleanup.hex(),
            feature_cleanup.dll(),
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
            feature_cleanup.crash(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                ("bug_extractor", bug_features.BugExtractor([], cleanup_functions)),
                (
                    "union",
                    ColumnTransformer([("text", self.text_vectorizer(), "text")]),
                ),
            ]
        )

        self.clf = LinearSVCWithLabelEncoding(LinearSVC())

コード例 #2

0

ファイルを表示

ファイル: test_feature_cleanup.py プロジェクト: yuyan9/bugbug

def test_dll():
    tests = [
        (
            "Crashing thread: 0 scdetour.dll scdetour.dll@0x2dd77",
            "Crashing thread: 0 __DLL_NAME__ __DLL_NAME__@0x2dd77",
        ),
        (
            "Crash in libxul.so@0x287ad36 | libxul.so@0x270c062",
            "Crash in libxul.so@0x287ad36 | libxul.so@0x270c062",
        ),
        ("Crash in libsystem_pthread.dylib@0x14fc",
         "Crash in __DLL_NAME__@0x14fc"),
        (
            "Crash in liblgpllibs.so@0x14fc exmpl.so@0xask ",
            "Crash in liblgpllibs.so@0x14fc __DLL_NAME__@0xask ",
        ),
        (
            "Crash in lgpllibs.dll@0x14fc exmpl.dll@0xask ",
            "Crash in lgpllibs.dll@0x14fc __DLL_NAME__@0xask ",
        ),
        (
            "Crash in libmozglue.dylib@0x14fc exmpl.dylib@0xask ",
            "Crash in libmozglue.dylib@0x14fc __DLL_NAME__@0xask ",
        ),
    ]
    for orig_text, cleaned_text in tests:
        assert feature_cleanup.dll()(orig_text) == cleaned_text

コード例 #3

0

ファイルを表示

    def __init__(self,
                 training_size=14000,
                 lemmatization=False,
                 cleanup_urls=True):
        self.num_duplicates = training_size // 2
        self.num_nondups_nondups = self.num_dup_nondups = training_size // 4

        BugCoupleModel.__init__(self, lemmatization)

        self.calculate_importance = False

        feature_extractors = [
            bug_features.is_same_product(),
            bug_features.is_same_component(),
            bug_features.is_same_platform(),
            bug_features.is_same_version(),
            bug_features.is_same_os(),
            bug_features.is_same_target_milestone(),
            bug_features.is_first_affected_same(),
            bug_features.couple_common_words_comments(),
            bug_features.couple_delta_creation_date(),
            bug_features.couple_common_keywords(),
            bug_features.couple_common_whiteboard_keywords(),
            bug_features.couple_common_words_summary(),
        ]

        cleanup_functions = [
            feature_cleanup.responses(),
            feature_cleanup.hex(),
            feature_cleanup.dll(),
            feature_cleanup.fileref(),
            feature_cleanup.synonyms(),
            feature_cleanup.crash(),
        ]

        if cleanup_urls:
            cleanup_functions.append(feature_cleanup.url())

        self.extraction_pipeline = Pipeline([
            (
                "bug_extractor",
                bug_features.BugExtractor(feature_extractors,
                                          cleanup_functions,
                                          rollback=True),
            ),
            (
                "union",
                ColumnTransformer([
                    ("text", self.text_vectorizer(), "text"),
                    ("couple_data", DictVectorizer(), "couple_data"),
                ]),
            ),
        ])

        self.clf = XGBClassifier(n_jobs=utils.get_physical_cpu_count())

コード例 #4

0

ファイルを表示

ファイル: similarity.py プロジェクト: dishvyas/bugbug

 def __init__(self, cleanup_urls=True):
     self.cleanup_functions = [
         feature_cleanup.responses(),
         feature_cleanup.hex(),
         feature_cleanup.dll(),
         feature_cleanup.fileref(),
         feature_cleanup.synonyms(),
         feature_cleanup.crash(),
     ]
     if cleanup_urls:
         self.cleanup_functions.append(feature_cleanup.url())

コード例 #5

0

ファイルを表示

ファイル: similarity.py プロジェクト: harnaman-hk/bugbug

    def __init__(self,
                 cleanup_urls=True,
                 nltk_tokenizer=False,
                 confidence_threshold=0.8):
        self.cleanup_functions = [
            feature_cleanup.responses(),
            feature_cleanup.hex(),
            feature_cleanup.dll(),
            feature_cleanup.fileref(),
            feature_cleanup.synonyms(),
            feature_cleanup.crash(),
        ]
        if cleanup_urls:
            self.cleanup_functions.append(feature_cleanup.url())

        self.nltk_tokenizer = nltk_tokenizer
        self.confidence_threshold = confidence_threshold

コード例 #6

0

ファイルを表示

    def __init__(self,
                 training_size=14000,
                 lemmatization=False,
                 cleanup_urls=True):
        self.num_duplicates = training_size // 2
        self.num_nondups_nondups = self.num_dup_nondups = training_size // 4

        BugCoupleModel.__init__(self, lemmatization)

        self.calculate_importance = False

        feature_extractors = [bug_features.is_same_product()]

        cleanup_functions = [
            feature_cleanup.responses(),
            feature_cleanup.hex(),
            feature_cleanup.dll(),
            feature_cleanup.fileref(),
            feature_cleanup.synonyms(),
            feature_cleanup.crash(),
        ]

        if cleanup_urls:
            cleanup_functions.append(feature_cleanup.url())

        self.extraction_pipeline = Pipeline([
            (
                "bug_extractor",
                bug_features.BugExtractor(feature_extractors,
                                          cleanup_functions),
            ),
            (
                "union",
                ColumnTransformer([("text", self.text_vectorizer(), "text")]),
            ),
        ])

        self.clf = LinearSVCWithLabelEncoding(LinearSVC())

コード例 #7

0

ファイルを表示

ファイル: similarity.py プロジェクト: yuyan9/bugbug

    def __init__(
        self,
        cleanup_urls=True,
        nltk_tokenizer=False,
        confidence_threshold=0.8,
        end_to_end=False,
    ):
        self.cleanup_functions = [
            feature_cleanup.responses(),
            feature_cleanup.hex(),
            feature_cleanup.dll(),
            feature_cleanup.fileref(),
            feature_cleanup.synonyms(),
            feature_cleanup.crash(),
        ]
        if cleanup_urls:
            self.cleanup_functions.append(feature_cleanup.url())

        self.nltk_tokenizer = nltk_tokenizer
        self.confidence_threshold = confidence_threshold

        self.duplicatemodel = (DuplicateModel.load("duplicatemodel")
                               if end_to_end else None)

コード例 #8

0

ファイルを表示

    def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization)

        self.calculate_importance = False

        self.sampler = InstanceHardnessThreshold(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.has_regression_range(),
            bug_features.severity(),
            bug_features.keywords(),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
            bug_features.title(),
            bug_features.product(),
            bug_features.component(),
            bug_features.is_mozillian(),
            bug_features.bug_reporter(),
            bug_features.blocked_bugs_number(),
            bug_features.priority(),
            bug_features.has_cve_in_alias(),
            bug_features.comment_count(),
            bug_features.comment_length(),
            bug_features.reporter_experience(),
            bug_features.number_of_bug_dependencies(),
        ]

        cleanup_functions = [
            feature_cleanup.url(),
            feature_cleanup.fileref(),
            feature_cleanup.hex(),
            feature_cleanup.dll(),
            feature_cleanup.synonyms(),
            feature_cleanup.crash(),
        ]

        self.extraction_pipeline = Pipeline([
            (
                "bug_extractor",
                bug_features.BugExtractor(
                    feature_extractors,
                    cleanup_functions,
                    rollback=True,
                    rollback_when=self.rollback,
                ),
            ),
            (
                "union",
                ColumnTransformer([
                    ("data", DictVectorizer(), "data"),
                    ("title", self.text_vectorizer(min_df=0.0001), "title"),
                    (
                        "comments",
                        self.text_vectorizer(min_df=0.0001),
                        "comments",
                    ),
                ]),
            ),
        ])

        self.clf = xgboost.XGBClassifier(n_jobs=16)
        self.clf.set_params(predictor="cpu_predictor")

コード例 #9

0

ファイルを表示

    from gensim.corpora import Dictionary
    from nltk.corpus import stopwords
    from nltk.stem.porter import PorterStemmer
except ImportError:
    raise ImportError(OPT_MSG_MISSING)

nltk.download("stopwords")

REPORTERS_TO_IGNORE = {
    "*****@*****.**", "*****@*****.**"
}

cleanup_functions = [
    feature_cleanup.responses(),
    feature_cleanup.hex(),
    feature_cleanup.dll(),
    feature_cleanup.fileref(),
    feature_cleanup.url(),
    feature_cleanup.synonyms(),
    feature_cleanup.crash(),
]

# A map from bug ID to its duplicate IDs
duplicates = defaultdict(set)
all_ids = set(bug["id"] for bug in bugzilla.get_bugs()
              if bug["creator"] not in REPORTERS_TO_IGNORE
              and "dupeme" not in bug["keywords"])

for bug in bugzilla.get_bugs():
    dupes = [entry for entry in bug["duplicates"] if entry in all_ids]
    if bug["dupe_of"] in all_ids:

コード例 #10

0

ファイルを表示

ファイル: tracking.py プロジェクト: mythmon/bugbug

    def __init__(self, lemmatization=False):
        BugModel.__init__(self, lemmatization)

        self.sampler = InstanceHardnessThreshold(random_state=0)

        feature_extractors = [
            bug_features.has_str(),
            bug_features.has_regression_range(),
            bug_features.severity(),
            bug_features.keywords(),
            bug_features.is_coverity_issue(),
            bug_features.has_crash_signature(),
            bug_features.has_url(),
            bug_features.has_w3c_url(),
            bug_features.has_github_url(),
            bug_features.whiteboard(),
            bug_features.patches(),
            bug_features.landings(),
            bug_features.title(),
            bug_features.product(),
            bug_features.component(),
            bug_features.is_mozillian(),
            bug_features.bug_reporter(),
            bug_features.blocked_bugs_number(),
            bug_features.priority(),
            bug_features.has_cve_in_alias(),
            bug_features.comment_count(),
            bug_features.comment_length(),
            bug_features.reporter_experience(),
            bug_features.number_of_bug_dependencies(),
        ]

        cleanup_functions = [
            feature_cleanup.url(),
            feature_cleanup.fileref(),
            feature_cleanup.hex(),
            feature_cleanup.dll(),
            feature_cleanup.synonyms(),
            feature_cleanup.crash(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "bug_extractor",
                    bug_features.BugExtractor(
                        feature_extractors,
                        cleanup_functions,
                        rollback=True,
                        rollback_when=self.rollback,
                    ),
                ),
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("title", self.text_vectorizer(min_df=0.0001), "title"),
                            (
                                "comments",
                                self.text_vectorizer(min_df=0.0001),
                                "comments",
                            ),
                        ]
                    ),
                ),
            ]
        )

        self.clf = xgboost.XGBClassifier(n_jobs=16)
        self.clf.set_params(predictor="cpu_predictor")