Python path_distance Examples, bugbug.test_scheduling_features.path_distance Python Examples

Example #1

0

Show file

File: testselect.py Project: prajwalMR/bugbug

    def __init__(self,
                 lemmatization=False,
                 granularity="label",
                 failures_skip=None):
        Model.__init__(self, lemmatization)

        self.granularity = granularity
        self.failures_skip = failures_skip

        self.training_dbs = [repository.COMMITS_DB]
        self.eval_dbs[repository.COMMITS_DB] = (
            repository.COMMITS_DB,
            repository.COMMIT_EXPERIENCES_DB,
        )
        if granularity == "label":
            self.training_dbs.append(test_scheduling.TEST_LABEL_SCHEDULING_DB)
            self.eval_dbs[test_scheduling.TEST_LABEL_SCHEDULING_DB] = (
                test_scheduling.PAST_FAILURES_LABEL_DB,
                test_scheduling.FAILING_TOGETHER_LABEL_DB,
            )
        elif granularity == "group":
            self.training_dbs.append(test_scheduling.TEST_GROUP_SCHEDULING_DB)
            self.eval_dbs[test_scheduling.TEST_GROUP_SCHEDULING_DB] = (
                test_scheduling.PAST_FAILURES_GROUP_DB,
                test_scheduling.TOUCHED_TOGETHER_DB,
            )

        self.cross_validation_enabled = False

        self.entire_dataset_training = True

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            test_scheduling_features.prev_failures(),
        ]

        if granularity == "label":
            feature_extractors += [
                test_scheduling_features.platform(),
                # test_scheduling_features.chunk(),
                test_scheduling_features.suite(),
            ]
        elif granularity == "group":
            feature_extractors += [
                test_scheduling_features.path_distance(),
                test_scheduling_features.common_path_components(),
                test_scheduling_features.touched_together(),
            ]

        self.extraction_pipeline = Pipeline([
            (
                "commit_extractor",
                commit_features.CommitExtractor(feature_extractors, []),
            ),
            ("union", ColumnTransformer([("data", DictVectorizer(), "data")])),
        ])

        self.clf = xgboost.XGBClassifier(n_jobs=16)
        self.clf.set_params(predictor="cpu_predictor")

Example #2

0

Show file

File: testselect.py Project: mvkski/bugbug

    def __init__(self, lemmatization=False, granularity="label"):
        Model.__init__(self, lemmatization)

        self.granularity = granularity

        self.required_dbs = [repository.COMMITS_DB]
        if granularity == "label":
            self.required_dbs.append(test_scheduling.TEST_LABEL_SCHEDULING_DB)
        elif granularity == "group":
            self.required_dbs.append(test_scheduling.TEST_GROUP_SCHEDULING_DB)

        self.cross_validation_enabled = False

        self.entire_dataset_training = True

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            commit_features.source_code_files_modified_num(),
            commit_features.other_files_modified_num(),
            commit_features.test_files_modified_num(),
            commit_features.source_code_file_size(),
            commit_features.other_file_size(),
            commit_features.test_file_size(),
            commit_features.source_code_added(),
            commit_features.other_added(),
            commit_features.test_added(),
            commit_features.source_code_deleted(),
            commit_features.other_deleted(),
            commit_features.test_deleted(),
            test_scheduling_features.name(),
            test_scheduling_features.prev_failures(),
        ]

        if granularity == "label":
            feature_extractors += [
                test_scheduling_features.platform(),
                test_scheduling_features.chunk(),
                test_scheduling_features.suite(),
            ]
        elif granularity == "group":
            feature_extractors += [
                test_scheduling_features.path_distance(),
                test_scheduling_features.common_path_components(),
            ]

        self.extraction_pipeline = Pipeline([
            (
                "commit_extractor",
                commit_features.CommitExtractor(feature_extractors, []),
            ),
            ("union", ColumnTransformer([("data", DictVectorizer(), "data")])),
        ])

        self.clf = xgboost.XGBClassifier(n_jobs=16)
        self.clf.set_params(predictor="cpu_predictor")

Example #3

0

Show file

File: test_test_scheduling_features.py Project: yuyan9/bugbug

def test_path_distance():
    pd = test_scheduling_features.path_distance()

    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["dom/media/tests/test.js", "dom/media/anotherFile.cpp"]},
    ) == 0)
    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["dom/media/anotherFile.cpp"]},
    ) == 1)
    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["dom/media/src/aFile.cpp"]},
    ) == 2)
    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["dom/media/src/aFile.cpp", "dom/media/anotherFile.cpp"]},
    ) == 1)
    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["layout/utils/bla.cpp"]},
    ) == 5)

Example #4

0

Show file

def test_path_distance():
    pd = test_scheduling_features.path_distance()

    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["dom/media/tests/test.js", "dom/media/anotherFile.cpp"]},
    ) == 0)
    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["dom/media/anotherFile.cpp"]},
    ) == 1)
    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["dom/media/src/aFile.cpp"]},
    ) == 2)
    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["dom/media/src/aFile.cpp", "dom/media/anotherFile.cpp"]},
    ) == 1)
    assert (pd(
        {"name": "dom/media/tests/mochitest.ini"},
        {"files": ["layout/utils/bla.cpp"]},
    ) == 5)
    assert (pd(
        {
            "name":
            "testing/web-platform/tests/content-security-policy/worker-src"
        },
        {"files": ["test"]},
    ) == 4)
    assert (pd(
        {"name": "test"},
        {
            "files":
            ["testing/web-platform/tests/content-security-policy/worker-src"]
        },
    ) == 4)

Example #5

0

Show file

File: testselect.py Project: e7dal/bugbug

    def __init__(self,
                 lemmatization=False,
                 granularity="label",
                 use_subset=False):
        Model.__init__(self, lemmatization)

        self.granularity = granularity
        # This is useful for development purposes, it avoids using too much memory
        # by using a subset of the dataset (dropping some passing runnables).
        self.use_subset = use_subset

        self.training_dbs = [repository.COMMITS_DB]
        self.eval_dbs[repository.COMMITS_DB] = (
            repository.COMMITS_DB,
            repository.COMMIT_EXPERIENCES_DB,
        )
        if granularity == "label":
            self.training_dbs.append(test_scheduling.TEST_LABEL_SCHEDULING_DB)
            self.eval_dbs[test_scheduling.TEST_LABEL_SCHEDULING_DB] = (
                test_scheduling.PAST_FAILURES_LABEL_DB,
                test_scheduling.FAILING_TOGETHER_LABEL_DB,
            )
        elif granularity == "group":
            self.training_dbs.append(test_scheduling.TEST_GROUP_SCHEDULING_DB)
            self.eval_dbs[test_scheduling.TEST_GROUP_SCHEDULING_DB] = (
                test_scheduling.PAST_FAILURES_GROUP_DB,
                test_scheduling.TOUCHED_TOGETHER_DB,
            )

        self.cross_validation_enabled = False

        self.entire_dataset_training = True

        self.sampler = RandomUnderSampler(random_state=0)

        feature_extractors = [
            test_scheduling_features.prev_failures(),
        ]

        if granularity == "label":
            feature_extractors += [
                test_scheduling_features.platform(),
                # test_scheduling_features.chunk(),
                test_scheduling_features.suite(),
            ]
        elif granularity == "group":
            feature_extractors += [
                commit_features.source_code_files_modified_num(),
                commit_features.other_files_modified_num(),
                commit_features.test_files_modified_num(),
                commit_features.source_code_file_size(),
                commit_features.other_file_size(),
                commit_features.test_file_size(),
                commit_features.source_code_added(),
                commit_features.other_added(),
                commit_features.test_added(),
                commit_features.source_code_deleted(),
                commit_features.other_deleted(),
                commit_features.test_deleted(),
                test_scheduling_features.path_distance(),
                test_scheduling_features.common_path_components(),
                test_scheduling_features.touched_together(),
            ]

        self.extraction_pipeline = Pipeline([
            (
                "commit_extractor",
                commit_features.CommitExtractor(feature_extractors, []),
            ),
            ("union", ColumnTransformer([("data", DictVectorizer(), "data")])),
        ])

        self.clf = xgboost.XGBClassifier(n_jobs=16)
        self.clf.set_params(predictor="cpu_predictor")