def __init__(self, lemmatization=False): Model.__init__(self, lemmatization) self.sampler = InstanceHardnessThreshold(random_state=0) feature_extractors = [ bug_features.has_str(), bug_features.has_regression_range(), bug_features.severity(), bug_features.keywords(), bug_features.is_coverity_issue(), bug_features.has_crash_signature(), bug_features.has_url(), bug_features.has_w3c_url(), bug_features.has_github_url(), bug_features.whiteboard(), bug_features.patches(), bug_features.landings(), bug_features.title(), bug_features.product(), bug_features.component(), bug_features.is_mozillian(), bug_features.bug_reporter(), bug_features.blocked_bugs_number(), bug_features.priority(), bug_features.has_cve_in_alias(), bug_features.comment_count(), bug_features.comment_length(), bug_features.reporter_experience(), bug_features.number_of_bug_dependencies() ] cleanup_functions = [ bug_features.cleanup_url, bug_features.cleanup_fileref, bug_features.cleanup_hex, bug_features.cleanup_dll, bug_features.cleanup_synonyms, bug_features.cleanup_crash, ] self.extraction_pipeline = Pipeline([ ('bug_extractor', bug_features.BugExtractor(feature_extractors, cleanup_functions, rollback=True, rollback_when=self.rollback)), ('union', ColumnTransformer([ ('data', DictVectorizer(), 'data'), ('title', self.text_vectorizer(min_df=0.0001), 'title'), ('comments', self.text_vectorizer(min_df=0.0001), 'comments'), ])), ]) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor='cpu_predictor')
def __init__(self, lemmatization=False): BugModel.__init__(self, lemmatization) self.calculate_importance = False self.sampler = InstanceHardnessThreshold(random_state=0) feature_extractors = [ bug_features.has_str(), bug_features.has_regression_range(), bug_features.severity(), bug_features.keywords(), bug_features.is_coverity_issue(), bug_features.has_crash_signature(), bug_features.has_url(), bug_features.has_w3c_url(), bug_features.has_github_url(), bug_features.whiteboard(), bug_features.patches(), bug_features.landings(), bug_features.title(), bug_features.product(), bug_features.component(), bug_features.is_mozillian(), bug_features.bug_reporter(), bug_features.blocked_bugs_number(), bug_features.priority(), bug_features.has_cve_in_alias(), bug_features.comment_count(), bug_features.comment_length(), bug_features.reporter_experience(), bug_features.number_of_bug_dependencies(), ] cleanup_functions = [ feature_cleanup.url(), feature_cleanup.fileref(), feature_cleanup.hex(), feature_cleanup.dll(), feature_cleanup.synonyms(), feature_cleanup.crash(), ] self.extraction_pipeline = Pipeline([ ( "bug_extractor", bug_features.BugExtractor( feature_extractors, cleanup_functions, rollback=True, rollback_when=self.rollback, ), ), ( "union", ColumnTransformer([ ("data", DictVectorizer(), "data"), ("title", self.text_vectorizer(min_df=0.0001), "title"), ( "comments", self.text_vectorizer(min_df=0.0001), "comments", ), ]), ), ]) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor="cpu_predictor")
def __init__(self, lemmatization=False): BugModel.__init__(self, lemmatization) self.sampler = InstanceHardnessThreshold(random_state=0) feature_extractors = [ bug_features.has_str(), bug_features.has_regression_range(), bug_features.severity(), bug_features.keywords(), bug_features.is_coverity_issue(), bug_features.has_crash_signature(), bug_features.has_url(), bug_features.has_w3c_url(), bug_features.has_github_url(), bug_features.whiteboard(), bug_features.patches(), bug_features.landings(), bug_features.title(), bug_features.product(), bug_features.component(), bug_features.is_mozillian(), bug_features.bug_reporter(), bug_features.blocked_bugs_number(), bug_features.priority(), bug_features.has_cve_in_alias(), bug_features.comment_count(), bug_features.comment_length(), bug_features.reporter_experience(), bug_features.number_of_bug_dependencies(), ] cleanup_functions = [ feature_cleanup.url(), feature_cleanup.fileref(), feature_cleanup.hex(), feature_cleanup.dll(), feature_cleanup.synonyms(), feature_cleanup.crash(), ] self.extraction_pipeline = Pipeline( [ ( "bug_extractor", bug_features.BugExtractor( feature_extractors, cleanup_functions, rollback=True, rollback_when=self.rollback, ), ), ( "union", ColumnTransformer( [ ("data", DictVectorizer(), "data"), ("title", self.text_vectorizer(min_df=0.0001), "title"), ( "comments", self.text_vectorizer(min_df=0.0001), "comments", ), ] ), ), ] ) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor="cpu_predictor")