def create_all_issues(details_per_flag, links_per_project_and_flag): errors = set() issue_client = get_github_client() for (project_label, flag), links in links_per_project_and_flag.items(): try: details = details_per_flag.get(flag, (None, None)) if details.bazel_version in (None, "unreleased binary"): raise bazelci.BuildkiteException( "Notifications: Invalid Bazel version '{}' for flag {}". format(details.bazel_version or "", flag)) if not details.issue_url: raise bazelci.BuildkiteException( "Notifications: Missing GitHub issue URL for flag {}". format(flag)) repo_owner, repo_name, do_not_notify = get_project_details( project_label) if do_not_notify: bazelci.eprint( "{} has opted out of notifications.".format(project_label)) continue temporary_title = get_temporary_issue_title(project_label, flag) final_title = get_final_issue_title(project_label, details.bazel_version, flag) has_target_release = details.bazel_version != "TBD" # Three possible scenarios: # 1. There is already an issue with the target release in the title -> do nothing # 2. There is an issue, but without the target release, and we now know the target release -> update title # 3. There is no issue -> create one if issue_client.get_issue(repo_owner, repo_name, final_title): bazelci.eprint( "There is already an issue in {}/{} for project {}, flag {} and Bazel {}" .format(repo_owner, repo_name, project_label, flag, details.bazel_version)) else: number = issue_client.get_issue(repo_owner, repo_name, temporary_title) if number: if has_target_release: issue_client.update_title(repo_owner, repo_name, number, final_title) else: body = create_issue_body(project_label, flag, details, links) title = final_title if has_target_release else temporary_title issue_client.create_issue(repo_owner, repo_name, title, body) except (bazelci.BuildkiteException, GitHubError) as ex: errors.add("Could not notify project '{}': {}".format( project_label, ex)) if errors: print_info("notify_errors", "error", list(errors))
def get_project_details(project_label): entry = bazelci.DOWNSTREAM_PROJECTS.get(project_label, {}) full_repo = entry.get("git_repository", "") if not full_repo: raise bazelci.BuildkiteException( "Could not retrieve Git repository for project '{}'".format(project_label) ) match = REPO_PATTERN.match(full_repo) if not match: raise bazelci.BuildkiteException( "Hosts other than GitHub are currently not supported ({})".format(full_repo) ) return match.group("owner"), match.group("repo"), entry.get("do_not_notify", False)
def process_build_log(failed_jobs_per_flag, already_failing_jobs, log, job): if "Failure: Command failed, even without incompatible flags." in log: already_failing_jobs.append(job) # bazelisk --migrate might run for multiple times for run / build / test, # so there could be several "+++ Result" sections. while "+++ Result" in log: index_success = log.rfind( "Command was successful with the following flags:") index_failure = log.rfind( "Migration is needed for the following flags:") if index_success == -1 or index_failure == -1: raise bazelci.BuildkiteException("Cannot recognize log of " + job["web_url"]) lines = log[index_failure:].split("\n") for line in lines: line = line.strip() if line.startswith( "--incompatible_") and line in INCOMPATIBLE_FLAGS: failed_jobs_per_flag[line][job["id"]] = job log = log[0:log.rfind("+++ Result")] # If the job failed for other reasons, we add it into already failing jobs. if job["state"] == "failed": already_failing_jobs.append(job)
def process_build_log(failed_jobs_per_flag, already_failing_jobs, log, job, details_per_flag): if "Failure: Command failed, even without incompatible flags." in log: already_failing_jobs.append(job) def handle_failing_flags(line, details_per_flag): flag = extract_flag_details(line, details_per_flag) if flag: failed_jobs_per_flag[flag][job["id"]] = job # bazelisk --migrate might run for multiple times for run / build / test, # so there could be several "+++ Result" sections. while "+++ Result" in log: index_success = log.rfind( "Command was successful with the following flags:") index_failure = log.rfind( "Migration is needed for the following flags:") if index_success == -1 or index_failure == -1: raise bazelci.BuildkiteException("Cannot recognize log of " + job["web_url"]) extract_all_flags(log[index_success:index_failure], extract_flag_details, details_per_flag) extract_all_flags(log[index_failure:], handle_failing_flags, details_per_flag) log = log[0:log.rfind("+++ Result")] # If the job failed for other reasons, we add it into already failing jobs. if job["state"] == "failed": already_failing_jobs.append(job)
def _get_main_build_result(self): build_info_list = self.client.get_build_info_list([ ("branch", "master"), ("page", "1"), ("per_page", "1"), ("state[]", "failed"), ("state[]", "passed"), ]) if not build_info_list: error = f"Cannot find finished build for pipeline {self.pipeline}, please try to rerun the pipeline first." self._log("SERIOUS", error) raise bazelci.BuildkiteException(error) main_build_info = build_info_list[0] self.main_result = {} self.main_result["commit"] = main_build_info["commit"] self.main_result["build_number"] = main_build_info["number"] job_infos = filter(lambda x: bool(x), (extract_job_info_by_key(job) for job in main_build_info["jobs"])) self.main_result["tasks"] = group_job_info_by_task(job_infos) self.main_result["state"] = get_project_state( self.main_result["tasks"]) last_green_commit_url = bazelci.bazelci_last_green_commit_url( bazelci.DOWNSTREAM_PROJECTS[self.project]["git_repository"], self.pipeline) self.main_result["last_green_commit"] = bazelci.get_last_green_commit( last_green_commit_url)
def get_github_client(): try: github_token = bazelci.decrypt_token( encrypted_token=ENCRYPTED_GITHUB_API_TOKEN, kms_key=GITHUB_TOKEN_KMS_KEY ) except Exception as ex: raise bazelci.BuildkiteException("Failed to decrypt GitHub API token: {}".format(ex)) return GitHubIssueClient(reporter=GITHUB_ISSUE_REPORTER, oauth_token=github_token)
def group_job_info_by_task(job_infos): job_info_by_task = {} for job_info in job_infos: if "task" not in job_info: raise bazelci.BuildkiteException( f"'task' must be a key of job_info: {job_info}") task_name = job_info["task"] del job_info["task"] job_info_by_task[task_name] = job_info return job_info_by_task
def create_all_issues(details_per_flag, links_per_project_and_flag): errors = [] issue_client = get_github_client() for (project_label, flag), links in links_per_project_and_flag.items(): try: details = details_per_flag.get(flag, (None, None)) if details.bazel_version in (None, "unreleased binary"): raise bazelci.BuildkiteException( "Notifications: Invalid Bazel version '{}' for flag {}". format(details.bazel_version or "", flag)) if not details.issue_url: raise bazelci.BuildkiteException( "Notifications: Missing GitHub issue URL for flag {}". format(flag)) repo_owner, repo_name, do_not_notify = get_project_details( project_label) if do_not_notify: bazelci.eprint( "{} has opted out of notifications.".format(project_label)) continue title = get_issue_title(project_label, details.bazel_version, flag) if issue_client.get_issue(repo_owner, repo_name, title): bazelci.eprint( "There is already an issue in {}/{} for project {}, flag {} and Bazel {}" .format(repo_owner, repo_name, project_label, flag, details.bazel_version)) else: body = create_issue_body(project_label, flag, details, links) issue_client.create_issue(repo_owner, repo_name, title, body) except (bazelci.BuildkiteException, GitHubError) as ex: errors.append("Could not notify project '{}': {}".format( project_label, ex)) if errors: print_info("notify_errors", "error", errors)
def get_latest_downstream_build_info(): downstream_build_list = DOWNSTREAM_PIPELINE_CLIENT.get_build_info_list([ ("branch", "master"), ("page", "1"), ("per_page", "1"), ("state[]", "failed"), ("state[]", "passed"), ]) if len(downstream_build_list) == 0: raise bazelci.BuildkiteException( "Cannot find finished downstream build, please try to rerun downstream pipeline first." ) return downstream_build_list[0]
def process_build_log(failed_jobs_per_flag, already_failing_jobs, log, job, details_per_flag): if "Failure: Command failed, even without incompatible flags." in log: already_failing_jobs.append(job) # bazelisk --migrate might run for multiple times for run / build / test, # so there could be several "+++ Result" sections. while "+++ Result" in log: index_success = log.rfind( "Command was successful with the following flags:") index_failure = log.rfind( "Migration is needed for the following flags:") if index_success == -1 or index_failure == -1: raise bazelci.BuildkiteException("Cannot recognize log of " + job["web_url"]) lines = log[index_failure:].split("\n") for line in lines: match = INCOMPATIBLE_FLAG_LINE_PATTERN.match(line) if match: flag = match.group("flag") if flag not in INCOMPATIBLE_FLAGS: # INCOMPATIBLE_FLAGS only contains flags that are being flipped in future # releases, but Bazelisk may also return already flipped flags if a # project fixes its Bazel version via a .bazelversion file # (e.g. 0.29.0 in Buildfarm). # TODO(fweikert): display notification for such projects # TODO(fweikert): remove INCOMPATIBLE_FLAGS and get all information from # Bazelisk's output continue failed_jobs_per_flag[flag][job["id"]] = job if details_per_flag.get(flag, (None, None)) == (None, None): details_per_flag[flag] = FlagDetails( bazel_version=match.group("version"), issue_url=match.group("url")) log = log[0:log.rfind("+++ Result")] # If the job failed for other reasons, we add it into already failing jobs. if job["state"] == "failed": already_failing_jobs.append(job)
def _analyze_for_downstream_pipeline_result(self): self._log("INFO", "") self._log("PASSED", "***Analyze failures in downstream pipeline***") # Report failed tasks self._log("WARNING", "The following tasks are failing in downstream pipeline") self._print_job_list([ info for _, info in self.downstream_result["tasks"].items() if info["state"] != "passed" ]) # Retry all failed tasks self._log("PASSED", "Retry failed downstream pipeline tasks...") retry_per_failed_task = self._retry_failed_jobs( self.downstream_result, DOWNSTREAM_PIPELINE_CLIENT) # Report tasks that succeeded after retry succeeded_tasks = [] for task, info in retry_per_failed_task.items(): if info["state"] == "passed": succeeded_tasks.append(info) self.downstream_result["tasks"][task]["flaky"] = True if succeeded_tasks: self._log( "WARNING", "The following tasks succeeded after retry, they might be flaky" ) self._print_job_list(succeeded_tasks) # Report tasks that are still failing after retry still_failing_tasks = [] failing_task_names = [] for task, info in retry_per_failed_task.items(): if info["state"] != "passed": still_failing_tasks.append(info) failing_task_names.append(task) self.downstream_result["tasks"][task]["broken"] = True if not still_failing_tasks: return self._log( "FAIL", f"The following tasks are still failing after retry, they are probably broken due to recent Bazel changes." ) self._print_job_list(still_failing_tasks) # Do bisect for still failing jobs self._log("PASSED", f"Bisect for still failing tasks...") bisect_build = self._trigger_bisect(failing_task_names) bisect_build = CULPRIT_FINDER_PIPELINE_CLIENT.wait_build_to_finish( build_number=bisect_build["number"], logger=self) bisect_result_by_task = {} for task in failing_task_names: for job in bisect_build["jobs"]: if ("--task_name=" + task) in job["command"]: bisect_result_by_task[ task], culprit = self._determine_bisect_result(job) if culprit: self.downstream_result["tasks"][task][ "culprit"] = culprit if task not in bisect_result_by_task: error = f"Bisect job for task {task} is missing in " + bisect_build[ "web_url"] self._log("SERIOUS", error) raise bazelci.BuildkiteException(error) # Report bisect result for task, result in bisect_result_by_task.items(): self._log( "WARNING", "Bisect result for " + self.downstream_result["tasks"][task]["name"]) self._log("INFO", result)