def __init__(self, lemmatization=False): CommitModel.__init__(self, lemmatization) self.calculate_importance = False self.sampler = RandomUnderSampler(random_state=0) feature_extractors = [ commit_features.file_size(), commit_features.test_added(), commit_features.added(), commit_features.deleted(), commit_features.test_deleted(), commit_features.author_experience(), commit_features.reviewer_experience(), commit_features.reviewers_num(), commit_features.component_touched_prev(), commit_features.directory_touched_prev(), commit_features.file_touched_prev(), commit_features.types(), commit_features.components(), commit_features.components_modified_num(), commit_features.directories(), commit_features.directories_modified_num(), commit_features.files(), commit_features.files_modified_num(), ] cleanup_functions = [ feature_cleanup.fileref(), feature_cleanup.url(), feature_cleanup.synonyms(), ] self.extraction_pipeline = Pipeline( [ ( "commit_extractor", commit_features.CommitExtractor( feature_extractors, cleanup_functions ), ), ( "union", ColumnTransformer( [ ("data", DictVectorizer(), "data"), ("desc", self.text_vectorizer(), "desc"), ] ), ), ] ) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor="cpu_predictor")
def __init__(self, lemmatization=False): CommitModel.__init__(self, lemmatization) self.calculate_importance = False self.sampler = RandomUnderSampler(random_state=0) feature_extractors = [ commit_features.files_modified_num(), commit_features.test_added(), commit_features.added(), commit_features.deleted(), commit_features.test_deleted(), commit_features.author_experience(), commit_features.author_experience_90_days(), commit_features.reviewer_experience(), commit_features.reviewer_experience_90_days(), commit_features.components_touched_prev(), commit_features.components_touched_prev_90_days(), commit_features.files_touched_prev(), commit_features.files_touched_prev_90_days(), commit_features.types(), commit_features.components(), commit_features.number_of_reviewers(), ] cleanup_functions = [ feature_cleanup.fileref(), feature_cleanup.url(), feature_cleanup.synonyms(), ] self.extraction_pipeline = Pipeline( [ ( "commit_extractor", commit_features.CommitExtractor( feature_extractors, cleanup_functions ), ), ( "union", ColumnTransformer( [ ("data", DictVectorizer(), "data"), ("desc", self.text_vectorizer(), "desc"), ] ), ), ] ) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor="cpu_predictor")
def __init__(self, lemmatization=False, interpretable=False): CommitModel.__init__(self, lemmatization) self.required_dbs.append(BUG_INTRODUCING_COMMITS_DB) self.store_dataset = True self.sampler = RandomUnderSampler(random_state=0) feature_extractors = [ commit_features.file_size(), commit_features.test_added(), commit_features.added(), commit_features.deleted(), commit_features.test_deleted(), commit_features.author_experience(), commit_features.reviewer_experience(), commit_features.reviewers_num(), commit_features.component_touched_prev(), commit_features.directory_touched_prev(), commit_features.file_touched_prev(), commit_features.types(), commit_features.files(), commit_features.components(), commit_features.components_modified_num(), commit_features.directories(), commit_features.directories_modified_num(), commit_features.files_modified_num(), ] cleanup_functions = [ feature_cleanup.fileref(), feature_cleanup.url(), feature_cleanup.synonyms(), ] column_transformers = [("data", DictVectorizer(), "data")] if not interpretable: column_transformers.append( ("desc", self.text_vectorizer(min_df=0.0001), "desc")) self.extraction_pipeline = Pipeline([ ( "commit_extractor", commit_features.CommitExtractor(feature_extractors, cleanup_functions), ), ("union", ColumnTransformer(column_transformers)), ]) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor="cpu_predictor")
def __init__(self, lemmatization=False): Model.__init__(self, lemmatization) self.required_dbs = [ repository.COMMITS_DB, test_scheduling.TEST_SCHEDULING_DB ] self.calculate_importance = False self.cross_validation_enabled = False self.sampler = RandomUnderSampler(random_state=0) feature_extractors = [ commit_features.files_modified_num(), commit_features.test_files_modified_num(), commit_features.file_size(), commit_features.test_file_size(), commit_features.added(), commit_features.test_added(), commit_features.deleted(), commit_features.test_deleted(), test_scheduling_features.name(), test_scheduling_features.platform(), test_scheduling_features.chunk(), test_scheduling_features.suite(), test_scheduling_features.prev_failures(), ] self.extraction_pipeline = Pipeline([ ( "commit_extractor", commit_features.CommitExtractor(feature_extractors, []), ), ("union", ColumnTransformer([("data", DictVectorizer(), "data")])), ]) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor="cpu_predictor")
def __init__(self, lemmatization=False, bug_data=True): CommitModel.__init__(self, lemmatization, bug_data) self.calculate_importance = False self.sampler = RandomUnderSampler(random_state=0) feature_extractors = [ commit_features.files_modified_num(), commit_features.file_size(), commit_features.test_added(), commit_features.added(), commit_features.deleted(), commit_features.test_deleted(), commit_features.author_experience(), commit_features.reviewer_experience(), commit_features.reviewers_num(), commit_features.component_touched_prev(), commit_features.directory_touched_prev(), commit_features.file_touched_prev(), commit_features.types(), commit_features.components(), commit_features.directories(), commit_features.files(), ] if bug_data: feature_extractors += [ bug_features.product(), bug_features.component(), bug_features.severity(), bug_features.priority(), bug_features.has_crash_signature(), bug_features.has_regression_range(), bug_features.whiteboard(), bug_features.keywords(), bug_features.number_of_bug_dependencies(), bug_features.blocked_bugs_number(), ] cleanup_functions = [ feature_cleanup.fileref(), feature_cleanup.url(), feature_cleanup.synonyms(), ] self.extraction_pipeline = Pipeline([ ( "commit_extractor", commit_features.CommitExtractor(feature_extractors, cleanup_functions), ), ( "union", ColumnTransformer([ ("data", DictVectorizer(), "data"), ("desc", self.text_vectorizer(), "desc"), ]), ), ]) self.clf = xgboost.XGBClassifier(n_jobs=16) self.clf.set_params(predictor="cpu_predictor")