def __init__(self, lemmatization=False): BugModel.__init__(self, lemmatization, commit_data=True) self.cross_validation_enabled = False self.sampler = RandomUnderSampler(random_state=0) feature_extractors = [ bug_features.has_str(), bug_features.has_regression_range(), bug_features.severity(), bug_features.keywords({"dev-doc-needed", "dev-doc-complete"}), bug_features.is_coverity_issue(), bug_features.has_crash_signature(), bug_features.has_url(), bug_features.has_w3c_url(), bug_features.has_github_url(), bug_features.whiteboard(), bug_features.patches(), bug_features.landings(), bug_features.product(), bug_features.component(), bug_features.commit_added(), bug_features.commit_deleted(), bug_features.commit_types(), ] cleanup_functions = [ feature_cleanup.fileref(), feature_cleanup.url(), feature_cleanup.synonyms(), ] self.extraction_pipeline = Pipeline( [ ( "bug_extractor", bug_features.BugExtractor( feature_extractors, cleanup_functions, rollback=True, rollback_when=self.rollback, commit_data=True, ), ), ( "union", ColumnTransformer( [ ("data", DictVectorizer(), "data"), ("title", self.text_vectorizer(), "title"), ("comments", self.text_vectorizer(), "comments"), ] ), ), ] ) self.clf = xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()) self.clf.set_params(predictor="cpu_predictor")
def __init__(self, lemmatization=False): BugModel.__init__(self, lemmatization) self.sampler = RandomUnderSampler(random_state=0) feature_extractors = [ bug_features.has_str(), bug_features.has_regression_range(), bug_features.severity(), bug_features.keywords({"dev-doc-needed", "dev-doc-complete"}), bug_features.is_coverity_issue(), bug_features.has_crash_signature(), bug_features.has_url(), bug_features.has_w3c_url(), bug_features.has_github_url(), bug_features.whiteboard(), bug_features.patches(), bug_features.landings(), bug_features.title(), bug_features.product(), bug_features.component(), bug_features.commit_added(), bug_features.commit_deleted(), bug_features.commit_types(), ] cleanup_functions = [ feature_cleanup.fileref(), feature_cleanup.url(), feature_cleanup.synonyms(), ] self.extraction_pipeline = Pipeline( [ ( "bug_extractor", bug_features.BugExtractor( feature_extractors, cleanup_functions, rollback=True, rollback_when=self.rollback, commit_data=True, ), ), ( "union", ColumnTransformer( [ ("data", DictVectorizer(), "data"), ("title", self.text_vectorizer(), "title"), ("comments", self.text_vectorizer(), "comments"), ] ), ), ] ) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor="cpu_predictor")
def __init__(self, lemmatization=False): Model.__init__(self, lemmatization) feature_extractors = [ bug_features.has_str(), bug_features.has_regression_range(), bug_features.severity(), bug_features.keywords({'dev-doc-needed', 'dev-doc-complete'}), bug_features.is_coverity_issue(), bug_features.has_crash_signature(), bug_features.has_url(), bug_features.has_w3c_url(), bug_features.has_github_url(), bug_features.whiteboard(), bug_features.patches(), bug_features.landings(), bug_features.title(), bug_features.product(), bug_features.component(), bug_features.commit_added(), bug_features.commit_deleted(), bug_features.commit_types(), ] cleanup_functions = [ bug_features.cleanup_fileref, bug_features.cleanup_url, bug_features.cleanup_synonyms, ] self.extraction_pipeline = Pipeline([ ('bug_extractor', bug_features.BugExtractor(feature_extractors, cleanup_functions, rollback=True, rollback_when=self.rollback, commit_data=True)), ('union', ColumnTransformer([ ('data', DictVectorizer(), 'data'), ('title', self.text_vectorizer(stop_words='english'), 'title'), ('comments', self.text_vectorizer(stop_words='english'), 'comments'), ])), ]) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor='cpu_predictor')