Example #1
0
    def check(self):
        success = super().check()

        # Get the number of bugs per full component to fasten up the check
        bugs_number = get_product_component_count()

        # Check number 1, check that the most meaningful product components
        # still have at least a bug in this component. If the check is failing
        # that could mean that:
        # - A component has been renamed / removed
        # - A component is not used anymore by developers

        for product, component in self.meaningful_product_components:
            full_comp = f"{product}::{component}"

            if full_comp not in bugs_number.keys():
                print(
                    f"Component {component!r} of product {product!r} doesn't exists, failure"
                )
                success = False

            elif bugs_number[full_comp] <= 0:
                print(
                    f"Component {component!r} of product {product!r} have 0 bugs or less in it, failure"
                )
                success = False

        # Check number 2, check that conflated components in
        # CONFLATED_COMPONENTS match at least one component which has more
        # than 0 bugs

        for conflated_component in self.CONFLATED_COMPONENTS:

            matching_components = [
                full_comp
                for full_comp in bugs_number.keys()
                if full_comp.startswith(conflated_component)
            ]

            if not matching_components:
                print(f"{conflated_component} doesn't match any component")
                success = False
                continue

            matching_components_values = [
                bugs_number[full_comp]
                for full_comp in matching_components
                if bugs_number[full_comp] > 0
            ]

            if not matching_components_values:
                print(
                    f"{conflated_component} should match at least one component with more than 0 bugs"
                )
                success = False

        # Check number 3, check that values of CONFLATED_COMPONENTS_MAPPING
        # still exist as components and have more than 0 bugs

        for full_comp in self.CONFLATED_COMPONENTS_MAPPING.values():

            if full_comp not in bugs_number:
                print(
                    f"{full_comp} from conflated component mapping doesn't exists, failure"
                )
                success = False
            elif bugs_number[full_comp] <= 0:
                print(
                    f"{full_comp} from conflated component mapping have less than 1 bug, failure"
                )
                success = False

        # Check number 4, conflated components in CONFLATED_COMPONENTS either
        # exist as components or are in CONFLATED_COMPONENTS_MAPPING

        for conflated_component in self.CONFLATED_COMPONENTS:

            in_mapping = conflated_component in self.CONFLATED_COMPONENTS_MAPPING

            matching_components = [
                full_comp
                for full_comp in bugs_number.keys()
                if full_comp.startswith(conflated_component)
            ]

            if not (matching_components or in_mapping):
                print(f"It should be possible to map {conflated_component}")
                success = False
                continue

        # Check number 5, there is no component with many bugs that is not in
        # meaningful_product_components

        # Recompute the meaningful components

        def generate_meaningful_tuples():
            for full_comp, count in bugs_number.items():
                product, component = full_comp.split("::", 1)

                if not self.is_meaningful(product, component):
                    continue

                if count > 0:
                    for i in range(count):
                        yield (product, component)

        meaningful_product_components = self.get_meaningful_product_components(
            generate_meaningful_tuples(), threshold_ratio=10
        )

        if not meaningful_product_components.issubset(
            self.meaningful_product_components
        ):
            print(f"Meaningful product components mismatch")

            new_meaningful_product_components = meaningful_product_components.difference(
                self.meaningful_product_components
            )
            print(
                f"New meaningful product components {new_meaningful_product_components!r}"
            )

            success = False

        return success
Example #2
0
    def check(self):
        success = super().check()

        # Get the number of bugs per full component to fasten up the check
        bugs_number = get_product_component_count()

        # Check number 1, check that the most meaningful product components
        # still have at least a bug in this component. If the check is failing
        # that could mean that:
        # - A component has been renamed / removed
        # - A component is not used anymore by developers

        for product, component in self.meaningful_product_components:
            full_comp = f"{product}::{component}"

            if full_comp not in bugs_number.keys():
                print(
                    f"Component {component!r} of product {product!r} doesn't exists, failure"
                )
                success = False

            elif bugs_number[full_comp] <= 0:
                print(
                    f"Component {component!r} of product {product!r} have 0 bugs or less in it, failure"
                )
                success = False

        # Check number 2, check that conflated components in
        # CONFLATED_COMPONENTS match at least one component which has more
        # than 0 bugs

        for conflated_component in self.CONFLATED_COMPONENTS:

            matching_components = [
                full_comp
                for full_comp in bugs_number.keys()
                if full_comp.startswith(conflated_component)
            ]

            if not matching_components:
                print(f"{conflated_component} doesn't match any component")
                success = False
                continue

            matching_components_values = [
                bugs_number[full_comp]
                for full_comp in matching_components
                if bugs_number[full_comp] > 0
            ]

            if not matching_components_values:
                print(
                    f"{conflated_component} should match at least one component with more than 0 bugs"
                )
                success = False

        # Check number 3, check that values of CONFLATED_COMPONENTS_MAPPING
        # still exist as components and have more than 0 bugs

        for full_comp in self.CONFLATED_COMPONENTS_MAPPING.values():

            if full_comp not in bugs_number:
                print(
                    f"{full_comp} from conflated component mapping doesn't exists, failure"
                )
                success = False
            elif bugs_number[full_comp] <= 0:
                print(
                    f"{full_comp} from conflated component mapping have less than 1 bug, failure"
                )
                success = False

        # Check number 4, conflated components in CONFLATED_COMPONENTS either
        # exist as components or are in CONFLATED_COMPONENTS_MAPPING

        for conflated_component in self.CONFLATED_COMPONENTS:

            in_mapping = conflated_component in self.CONFLATED_COMPONENTS_MAPPING

            matching_components = [
                full_comp
                for full_comp in bugs_number.keys()
                if full_comp.startswith(conflated_component)
            ]

            if not (matching_components or in_mapping):
                print(f"It should be possible to map {conflated_component}")
                success = False
                continue

        # Check number 5, there is no component with many bugs that is not in
        # meaningful_product_components

        # Recompute the meaningful components

        def generate_meaningful_tuples():
            for full_comp, count in bugs_number.items():
                product, component = full_comp.split("::", 1)

                if not self.is_meaningful(product, component):
                    continue

                if count > 0:
                    for i in range(count):
                        yield (product, component)

        meaningful_product_components = self.get_meaningful_product_components(
            generate_meaningful_tuples(), threshold_ratio=10
        )

        if not meaningful_product_components.issubset(
            self.meaningful_product_components
        ):
            print("Meaningful product components mismatch")

            new_meaningful_product_components = meaningful_product_components.difference(
                self.meaningful_product_components
            )
            print(
                f"New meaningful product components {new_meaningful_product_components!r}"
            )

            success = False

        return success
Example #3
0
    def retrieve_bugs(self, limit: int = None) -> None:
        bugzilla.set_token(get_secret("BUGZILLA_TOKEN"))

        db.download(bugzilla.BUGS_DB)

        # Get IDs of bugs changed since last run.
        last_modified = db.last_modified(bugzilla.BUGS_DB)
        logger.info(
            f"Retrieving IDs of bugs modified since the last run on {last_modified}"
        )
        changed_ids = set(
            bugzilla.get_ids({
                "f1": "delta_ts",
                "o1": "greaterthaneq",
                "v1": last_modified.date()
            }))
        logger.info(f"Retrieved {len(changed_ids)} IDs.")

        all_components = bugzilla.get_product_component_count(9999)

        deleted_component_ids = set(
            bug["id"] for bug in bugzilla.get_bugs() if "{}::{}".format(
                bug["product"], bug["component"]) not in all_components)
        logger.info(
            f"{len(deleted_component_ids)} bugs belonging to deleted components"
        )
        changed_ids |= deleted_component_ids

        # Get IDs of bugs between (two years and six months ago) and now.
        two_years_and_six_months_ago = datetime.utcnow() - relativedelta(
            years=2, months=6)
        logger.info(f"Retrieving bug IDs since {two_years_and_six_months_ago}")
        timespan_ids = bugzilla.get_ids_between(two_years_and_six_months_ago)
        if limit:
            timespan_ids = timespan_ids[-limit:]
        logger.info(f"Retrieved {len(timespan_ids)} IDs.")

        # Get IDs of labelled bugs.
        labelled_bug_ids = labels.get_all_bug_ids()
        if limit:
            labelled_bug_ids = labelled_bug_ids[-limit:]
        logger.info(f"{len(labelled_bug_ids)} labelled bugs to download.")

        # Get the commits DB, as we need it to get the bug IDs linked to recent commits.
        # XXX: Temporarily avoid downloading the commits DB when a limit is set, to avoid the integration test fail when the commits DB is bumped.
        if limit is None:
            assert db.download(repository.COMMITS_DB)

        # Get IDs of bugs linked to commits (used for some commit-based models, e.g. backout and regressor).
        start_date = datetime.now() - relativedelta(years=3)
        commit_bug_ids = list(
            set(commit["bug_id"] for commit in repository.get_commits()
                if commit["bug_id"]
                and dateutil.parser.parse(commit["pushdate"]) >= start_date))
        if limit:
            commit_bug_ids = commit_bug_ids[-limit:]
        logger.info(
            f"{len(commit_bug_ids)} bugs linked to commits to download.")

        # Get IDs of bugs which are regressions, bugs which caused regressions (useful for the regressor model),
        # and blocked bugs.
        regression_related_ids: List[int] = list(
            set(
                sum(
                    (bug["regressed_by"] + bug["regressions"] + bug["blocks"]
                     for bug in bugzilla.get_bugs()),
                    [],
                )))
        if limit:
            regression_related_ids = regression_related_ids[-limit:]
        logger.info(
            f"{len(regression_related_ids)} bugs which caused regressions fixed by commits."
        )

        # Get IDs of bugs linked to intermittent failures.
        test_failure_bug_ids = [
            item["bug_id"] for item in test_scheduling.get_failure_bugs(
                two_years_and_six_months_ago, datetime.utcnow())
        ]
        if limit:
            test_failure_bug_ids = test_failure_bug_ids[-limit:]
        logger.info(f"{len(test_failure_bug_ids)} bugs about test failures.")

        all_ids = (timespan_ids + labelled_bug_ids + commit_bug_ids +
                   regression_related_ids + test_failure_bug_ids)
        all_ids_set = set(all_ids)

        # We have to redownload bugs that were changed since the last download.
        # We can remove from the DB the bugs that are outside of the considered timespan and are not labelled.
        bugzilla.delete_bugs(lambda bug: bug["id"] in changed_ids or bug["id"]
                             not in all_ids_set)

        new_bugs = bugzilla.download_bugs(all_ids)

        # Get regression_related_ids again (the set could have changed after downloading new bugs).
        for i in range(7):
            regression_related_ids = list(
                set(
                    sum(
                        (bug["regressed_by"] + bug["regressions"] +
                         bug["blocks"] for bug in new_bugs),
                        [],
                    )))
            logger.info(
                f"{len(regression_related_ids)} bugs which caused regressions fixed by commits."
            )
            if limit:
                regression_related_ids = regression_related_ids[-limit:]

            # If we got all bugs we needed, break.
            if set(regression_related_ids).issubset(all_ids):
                break

            new_bugs = bugzilla.download_bugs(regression_related_ids)

        # Try to re-download inconsistent bugs, up to twice.
        inconsistent_bugs = bugzilla.get_bugs(include_invalid=True)
        for i in range(2):
            # We look for inconsistencies in all bugs first, then, on following passes,
            # we only look for inconsistencies in bugs that were found to be inconsistent in the first pass
            inconsistent_bugs = bug_snapshot.get_inconsistencies(
                inconsistent_bugs)
            inconsistent_bug_ids = set(bug["id"] for bug in inconsistent_bugs)

            if len(inconsistent_bug_ids) == 0:
                break

            logger.info(
                f"Re-downloading {len(inconsistent_bug_ids)} bugs, as they were inconsistent"
            )
            bugzilla.delete_bugs(lambda bug: bug["id"] in inconsistent_bug_ids)
            bugzilla.download_bugs(inconsistent_bug_ids)

        # TODO: Figure out why.
        missing_history_bug_ids = {
            bug["id"]
            for bug in bugzilla.get_bugs() if "history" not in bug
        }
        bugzilla.delete_bugs(lambda bug: bug["id"] in missing_history_bug_ids)
        logger.info(
            f"Deleted {len(missing_history_bug_ids)} bugs as we couldn't retrieve their history"
        )

        zstd_compress(bugzilla.BUGS_DB)