def _run_sync(self): self.refresh_betka_yaml() for self.image in self.get_synced_images(): self.pagure_api.set_image(self.image) # Checks if pagure already contains a fork for the image self.image # The image name is defined in the betka.yaml configuration file # variable dist_git_repos if not self.pagure_api.get_pagure_fork(): UMBSender.send_umb_message_skip(self.msg_artifact, "not-applicable", "pagure fork not found") continue self.info("Trying to sync image %r.", self.image) os.chdir(self.betka_tmp_dir.name) self.clone_url = self.pagure_api.get_clone_url() # after downstream is cloned then # new cwd is self.downstream_dir if not self.prepare_downstream_git(): UMBSender.send_umb_message_skip( self.msg_artifact, "not-applicable", "Failed cloning downstream repository", ) continue # This function updates fork based on the upstream Git.get_changes_from_distgit( url=self.pagure_api.full_downstream_url) # Branches are taken from upstream repository like # https://src.fedoraproject.org/container/nginx not from fork all_branches = self.pagure_api.get_branches() # Filter our branches before checking bot-cfg.yml files branch_list_to_sync = Git.branches_to_synchronize( self.betka_config, all_branches=all_branches) self.debug(f"Branches to sync {branch_list_to_sync}") Git.sync_fork_with_upstream(branch_list_to_sync) valid_branches = self.pagure_api.get_valid_branches( self.downstream_dir, branch_list_to_sync) if not valid_branches: msg = "There are no valid branches with bot-cfg.yaml file" self.info(msg) UMBSender.send_umb_message_skip(self.msg_artifact, "not-applicable", msg) if self.downstream_dir.is_dir(): shutil.rmtree(str(self.downstream_dir)) continue try: self._sync_valid_branches(valid_branches) finally: self.delete_cloned_directories() self.send_umb_message_complete() # Deletes temporary directory. # It is created during each upstream2downstream task. if Path(self.betka_tmp_dir.name).is_dir(): self.betka_tmp_dir.cleanup()
def fake_git_clone(self, clone_url, tempdir): repodir = Git.strip_dot_git(clone_url.split("/")[-2]) Git.call_git_cmd( "clone --recurse-submodules {u} {d}".format(u=clone_url, d=os.path.join( tempdir, repodir)), msg=clone_url, ) return os.path.join(tempdir, repodir)
def sync_pull_requests(self, branch) -> bool: """ Sync upstream pr into downstream pr. :return: False if Pull Request is not synced True if Pull Request is filed or updated """ # Get all pull requests from upstream for correct image if not self.config.get("pr_checker"): self.info("Syncing upstream PR to downstream repo is not allowed.") return False # Get all upstream OPENED pull request pr_dict = self.github_api.get_pull_requests("OPEN") self.debug("pr_dict: %s", pr_dict) os.chdir(self.betka_tmp_dir.name) state = self.github_api.check_upstream_pr(self.pr_number) if state == "OPEN": # Commit message has to be the same as in the latest comment message comment = self.config.get("pr_comment_message") if not self.upstream_pr_comment.startswith(comment): return False os.chdir(str(self.upstream_synced_dir)) Git.fetch_pr_origin(self.pr_number, f"Checkout to PR{self.pr_number} branch") # Copy upstream directory into downstream directory title = f"{self.betka_config['downstream_pr_msg']} #{self.pr_number}" description_msg = COMMIT_PR_MSG.format(pr_num=self.pr_number, repo=self.repo) pr_id = self.pagure_api.check_downstream_pull_requests(title) os.chdir(str(self.downstream_dir)) # Switch to downstream dist-git repo if not self.sync_upstream_to_downstream_directory(): return False # git {add,commit,push} all files in local dist-git repo Git.git_add_all( self.upstream_message, related_msg=Git.get_msg_from_jira_ticket(self.config), ) # Prepare betka_schema used for sending mail and Pagure Pull Request # The function also checks if downstream does not already contain pull request betka_schema = self.pagure_api.file_pull_request( title=title, pr_msg=description_msg, upstream_hash=self.upstream_hash, branch=branch, pr_id=pr_id, pr=True, pr_num=self.pr_number, ) self.send_result_email(betka_schema=betka_schema) self.delete_close_merged_pull_requests() return True
def _get_bot_cfg(self, branch: str) -> bool: Git.call_git_cmd(f"checkout {branch}", msg="Change downstream branch") try: self.config = self.pagure_api.get_bot_cfg_yaml(branch=branch) self.debug(f"Downstream 'bot-cfg.yml' file {self.config}.") except jsonschema.exceptions.ValidationError as jeverror: self.error( f"Getting bot.cfg {branch} from " f"{self.config_json['namespace_containers']}/{self.image} " f"failed. {jeverror.message}") raise return True
def github_api(self): """ Init GitHubAPI for working with GitHub PRs :return: """ if not self._github_api: self._github_api = GitHubAPI( self.image, self.headers, Git.get_reponame_from_git_url(self.msg_upstream_url), Git.get_username_from_git_url(self.msg_upstream_url), self.config_json, ) return self._github_api
def prepare(self): """ Load betka.yaml configuration, make ssh_wrapper and load init upstream repository configurations. :return: bool, True - success, False - some problem occurred """ self.set_config() self.refresh_betka_yaml() if not self.betka_config.get("dist_git_repos"): self.error( f"Global configuration file {self.betka_config['betka_yaml_url']}" f" was not parsed properly") return False if "pagure_api_token" in self.betka_config: self.betka_config[ "pagure_user"] = self.pagure_api.get_user_from_token() if not self.betka_config["pagure_user"]: self.error( f"Not able to get username from Internal Pagure " f"instance {self.config_json['pagure_host']}. See logs for details." ) return False Git.create_dot_gitconfig(user_name=self.betka_config["pagure_user"], user_email="non@existing") if not Git.has_ssh_access( self.config_json["pagure_host"], PAGURE_PORT, username=self.betka_config["pagure_user"], ): self.error( f"SSH keys are not valid for {self.config_json['pagure_host']}." ) return False if not self.mandatory_variables_set(): return False try: self.msg_upstream_url = self.message["repository"]["html_url"] except KeyError: self.error( "Fedmsg does not contain html_url key" "in ['repository'] %r", self.message, ) return False return True
def delete_close_merged_pull_requests(self): """ Delete or close or update already merged upstream Pull Requests. :param pr_dict: dictionary with relevant pull request # TODO document pr_dict """ self.debug("delete_close_merged_pull_requests()") # Check for state of the upstream pull request. # If the upstream pull request was close/merged, # then updates the relevant downstream pull request state = self.github_api.check_upstream_pr(self.pr_number) if state != "OPEN": # If Upstream Pull Request is closed or merged, close also downstream # relevant Pull Request. msg_to_check = f"{self.betka_config['downstream_pr_msg']} #{self.pr_number}" # Checks downstream pull request according to upstream PR message. pr_id = self.pagure_api.check_downstream_pull_requests( msg_to_check, check_user=False) if pr_id: url_address = self.pagure_api.get_comment_url(self.repo, pr_id) link = "{url}/pull/{n}".format(n=self.pr_number, url=Git.strip_dot_git( self.msg_upstream_url)) comment = (f"Upstream PR {link} was {state}. " f"This downstream PR {pr_id} can be closed. " f"Feel free to close this upstream Pull Request.") data = {"comment": comment} # Updates downstream PR according to upstream PR. self.pagure_api.pagure_post_action(url_address, data)
def _copy_cloned_upstream_dir(self): """ Copy cloned upstream directory stored in self.upstream_cloned_dir into upstream synced directory stored in self.upstream_synced_dir """ self.upstream_synced_dir = self.timestamp_dir / Git.get_reponame_from_git_url( self.msg_upstream_url) if os.path.isdir(self.upstream_synced_dir): shutil.rmtree(self.upstream_synced_dir) shutil.copytree(str(self.upstream_cloned_dir), str(self.upstream_synced_dir), symlinks=True)
def prepare_upstream_git(self): """ Clone upstream git repository to self.upstream_cloned_dir :return: """ self.upstream_cloned_dir = Git.clone_repo(self.msg_upstream_url, self.betka_tmp_dir.name) if self.upstream_cloned_dir is None: self.error("!!!! Cloning upstream repo %s FAILED.", self.msg_upstream_url) return False self.info("Upstream cloned directory %r", self.upstream_cloned_dir) return True
def prepare_downstream_git(self) -> bool: """ Clone downstream dist-git repository, defined by self.clone_url variable and set `self.downstream_dir` variable. :returns True if downstream git directory was cloned False if downstream git directory was not cloned """ self.downstream_dir = Git.clone_repo(self.clone_url, self.betka_tmp_dir.name) self.info("Downstream directory %r", self.downstream_dir) if self.downstream_dir is None: self.error("!!!! Cloning downstream repo %s FAILED.", self.image) return False os.chdir(str(self.downstream_dir)) return True
def _sync_valid_branches(self, valid_branches): """ Syncs valid branches in namespace :param valid_branches: valid branches to sync :return: """ try: self.prepare_upstream_git() except subprocess.CalledProcessError: self.error( f"!!!! Cloning upstream repo {self.msg_upstream_url} FAILED") raise for branch in valid_branches: self.timestamp_dir: Path = None self.downstream_git_branch = branch # This loads downstream bot-cfg.yml file # and update betka's dictionary (self.config). # We need to have information up to date if not self._get_bot_cfg(branch=self.downstream_git_branch): continue # Gets repo url without .git for cloning self.repo = Git.strip_dot_git(self.msg_upstream_url) if self.master_sync: self.info("SYNCING UPSTREAM TO DOWNSTREAM.") if not self.config.get("master_checker"): self.info( "Syncing upstream repo to downstream repo is not allowed." ) continue self.create_and_copy_timestamp_dir() self.sync_to_downstream_branches(self.downstream_git_branch) elif self.pr_sync: self.info("SYNCING UPSTREAM PR TO DOWNSTREAM PR. DISABLED") # Get all pull requests from upstream for correct image if not self.config.get("pr_checker"): self.info( "Syncing upstream PR to downstream repo is not allowed." ) continue self.create_and_copy_timestamp_dir() if self.sync_pull_requests(self.downstream_git_branch): # Updates pull request from downstream self.delete_close_merged_pull_requests() self.delete_timestamp_dir()
def test_betka_run_master_sync( self, init_betka_real_json, mock_get_pagure_fork, mock_prepare_downstream, mock_prepare_upstream, mock_git_clone, mock_get_branches, mock_check_prs, mock_deploy, mock_send_email, mock_rmtree, ): self.betka.betka_config["dist_git_repos"].pop("s2i-core") flexmock(UMBSender).should_receive( "send_umb_message_in_progress").and_return().once() flexmock(UMBSender).should_receive("send_umb_message_error").never() self.betka.run_sync() # check if readme was updated (compare betka downstream vs test upstream) assert self.betka.downstream_dir os.chdir(str(self.betka.downstream_dir)) Git.call_git_cmd("checkout fc31") upstream_readme = (self.upstream_repo / "README.md").read_text() downstream_readme = (self.betka.downstream_dir / "README.md").read_text() assert upstream_readme == downstream_readme # check git log latest_commit = Git.call_git_cmd("log -n 1 --format=medium") latest_commit = [ x.strip() for x in latest_commit.split("\n") if x != "" ] assert latest_commit assert latest_commit[3] == "Add bot-cfg.yml" # check the other branch - readme should be without the update, because the branch wasn't # configured with bot-cfg.yml Git.call_git_cmd("checkout fc30") # latest commit should be Init branch last_commit = Git.call_git_cmd("log -n 1 --format=medium") assert last_commit commit_fields = [ x.strip() for x in last_commit.split("\n") if x.strip() != "" ] assert commit_fields assert commit_fields[3] == "Init branch" assert commit_fields[4] == "For betka test" assert commit_fields[5] == "in fc30 branch"
def deploy_image(self, image_url): # Sources are generated in another OpenShift POD self.debug("Starting OpenShift POD") from betka.openshift import OpenshiftDeployer self._copy_cloned_downstream_dir() di = OpenshiftDeployer( Git.get_reponame_from_git_url(self.msg_upstream_url), self.image, str(self.timestamp_dir), image_url, self.betka_config["project"], ) result = di.deploy_image() results_dir = "results" list_dir_content(self.timestamp_dir / results_dir) if not result: return False copy_upstream2downstream(self.timestamp_dir / results_dir, self.downstream_dir) return True
def sync_to_downstream_branches(self, branch) -> bool: """ Sync upstream repository into relevant downstream dist-git branch based on the configuration file. :param branch: downstream branch to check and to sync """ if not self.config.get("master_checker"): self.info( "Syncing upstream repo to downstream repo is not allowed.") return False self.info("Syncing upstream %r to downstream %r", self.msg_upstream_url, self.image) description_msg = COMMIT_MASTER_MSG.format(hash=self.upstream_hash, repo=self.repo) pr_id = self.pagure_api.check_downstream_pull_requests(branch=branch) if not pr_id: Git.get_changes_from_distgit( url=self.pagure_api.full_downstream_url) Git.push_changes_to_fork(branch=branch) if not self.sync_upstream_to_downstream_directory(): return False # git {add,commit,push} all files in local dist-git repo Git.git_add_all( upstream_msg=self.upstream_message, related_msg=Git.get_msg_from_jira_ticket(self.config), ) # Prepare betka_schema used for sending mail and Pagure Pull Request # The function also checks if downstream does not already contain pull request betka_schema = self.pagure_api.file_pull_request( pr_msg=description_msg, upstream_hash=self.upstream_hash, branch=branch, pr_id=pr_id, ) self.send_result_email(betka_schema=betka_schema) return True
def __init__(self, betka_config: Dict, config_json: Dict): self.betka_config = betka_config self.config_json = config_json self.pagure_api_url: str = f"{self.config_json['api_url']}" self.git = Git() self.clone_url: str = ""
class PagureAPI(object): def __init__(self, betka_config: Dict, config_json: Dict): self.betka_config = betka_config self.config_json = config_json self.pagure_api_url: str = f"{self.config_json['api_url']}" self.git = Git() self.clone_url: str = "" def set_image(self, image: str): # TODO use setter method self.image = image def get_user_from_token(self): """ Gets the username from token provided by parameter in betka's template. :return: username or None """ ret_json = self.pagure_post_action(self.config_json["get_user_url"]) if "username" not in ret_json: return None return ret_json["username"] def pagure_post_action(self, url: str, data=None): """ Set authorization for operating with Pull Request :param url: URL :param data: ? :return: response from POST request as json """ logger.debug("pagure_post_action(url=%s, data=%s)", url, data) try: r = requests.post( url, data=data, headers={ "Authorization": f"token {self.betka_config['pagure_api_token'].strip()}" }, ) r.raise_for_status() logger.debug("response: %s", r.json()) return r.json() except requests.exceptions.HTTPError as he: logger.exception(he) raise def check_downstream_pull_requests(self, branch: str, check_user: bool = True): """ Checks if downstream already contains pull request. Check is based in the msg_to_check parameter. :return: """ # Function checks if downstream contains pull request or not based on the title message title = self.betka_config["downstream_master_msg"] url_address = self.config_json["get_all_pr"].format( namespace=self.config_json["namespace_containers"], repo=self.image) logger.debug(url_address) (status_code, resp) = self.get_status_and_dict_from_request(url=url_address) req = resp["requests"] user = self.betka_config["pagure_user"] for out in req: if out["status"] != "Open": continue if out["title"].startswith(title): pr_id = out["id"] logger.debug( "Downstream pull request for message %r " "and user %r found %r", title, user, pr_id, ) # Check if the PR is for correct branch if out["branch"] != branch: continue if check_user and out["user"]["name"] == user: return pr_id else: return pr_id return None def create_pagure_pull_request(self, title: str, desc_msg: str, branch: str): """ Creates the pull request for specific image :param title: ? :param desc_msg: ? :param branch: ? :return: """ logger.debug(f"create_pagure_pull_request(): {branch}") url_address = self.config_json["pr_api"] repo_from = self.image repo_from_namespace = self.config_json["namespace_containers"] repo_from_username = self.betka_config["pagure_user"] if self.betka_config["new_api_version"]: url_address = url_address.format(namespace=repo_from_namespace, repo=repo_from) data = { "title": title, "branch_to": branch, "branch_from": branch, "initial_comment": desc_msg, "repo_from": repo_from, "repo_from_namespace": repo_from_namespace, "repo_from_username": repo_from_username, } else: url_address = url_address.format(namespace=repo_from_namespace, repo=repo_from, user=repo_from_username) data = { "title": title, "branch_to": branch, "branch_from": branch, "initial_comment": desc_msg, } ret_json = self.pagure_post_action(url_address, data=data) try: return ret_json.get("id") except AttributeError: return None def get_pagure_fork(self): """ Checks if the fork already exists in the internal Pagure instance otherwise it will create it. :return: True if fork exists False if fork not exists """ data = { "namespace": self.config_json["namespace_containers"], "repo": self.image, } if not self.get_fork(count=1): self.pagure_post_action(self.config_json["pr_fork"], data=data) # If we do not have fork, then it fails # Wait 20 seconds before fork is created if not self.get_fork(): logger.info(f"{self.image} does not have a fork in " f"{self.config_json['namespace_containers']}" f" namespace yet") return False return True def get_comment_url(self, internal_repo: str, pr_id: str): comment_url = self.config_json["get_pr_comment"].format( namespace=self.config_json["namespace_containers"], repo=internal_repo, id=pr_id, ) return comment_url def full_url(self, fork: bool = True): """ Returns the full URL for the relevant repo image. :return: Full URL for image """ pagure_url = self.config_json["git_url_repo"] fork_user = "" if fork: fork_user = f"/fork/{self.betka_config['pagure_user']}" pagure_url = pagure_url.format( fork_user=fork_user, namespace=self.config_json["namespace_containers"], repo=self.image, ) return pagure_url @property def full_downstream_url(self) -> str: """ Returns the full downstream URL for the relevant repo image. Example: ssh://[email protected]/container/s2i-base.git :return: Full URL for image """ url = (f"ssh://git@git.{self.config_json['pagure_host']}:{PAGURE_PORT}" if PAGURE_PORT else self.config_json["pull_request_url"].format( username=self.betka_config["pagure_user"])) return f"{url}/{self.config_json['namespace_containers']}/{self.image}.git" def get_clone_url(self) -> str: return self.clone_url def get_status_and_dict_from_request(self, url: str = None, msg: str = "", fork: bool = True): if not url: url = self.full_url(fork=fork) f = requests.get(url + msg, verify=False) return f.status_code, f.json() def get_fork(self, count: int = 20) -> bool: """ Gets the fork for specific repo :param count: How many times we would like to test if fork exist. Sometimes getting fork takes a bit longer. :return: """ logger.debug(f"get_fork(): {self.full_url()} ") for i in range(0, count): (status_code, req) = self.get_status_and_dict_from_request(msg="urls") if status_code == 400: logger.warning("Unauthorized access to url %s", self.full_url()) return False if status_code == 200 and req: logger.debug("response get_fork: %s", req) self.clone_url = req["urls"]["ssh"] self.clone_url = self.clone_url.format( username=self.betka_config["pagure_user"]) return True logger.info( "Fork %s is not ready yet. Wait 2 more seconds. " "Status code %s ", self.full_url(), status_code, ) time.sleep(2) logger.info("Betka does not have a fork yet.") return False def check_config_in_branch(self, downstream_dir: Path, branch: str) -> bool: """ Checks if the downstream branch contains 'bot-cfg.yml' file :param downstream_dir: Path to downstream directory where betka expects `bot-cfg.yml` file :param branch: Branch which betka checks :return: True if config file exists False is config file does not exist """ try: self.git.call_git_cmd(f"checkout {branch}", msg="Change downstream branch") except CalledProcessError: logger.debug(f"It looks like {branch} does not exist yet. ") return False if (downstream_dir / DOWNSTREAM_CONFIG_FILE).exists(): logger.info("Configuration file %r exists in branch.", DOWNSTREAM_CONFIG_FILE) return True else: logger.info( "Configuration file %r does not exist in branch.", DOWNSTREAM_CONFIG_FILE, ) return False def get_valid_branches(self, downstream_dir: Path, branch_list: List[str]) -> List[str]: """ Gets the valid branches which contains `bot-cfg.yml` file. :param downstream_dir: :return: list of valid branches """ valid_branches = [] for brn in branch_list: logger.debug( "Checking 'bot-cfg.yml' in git directory in branch %r", brn) if self.check_config_in_branch(downstream_dir=downstream_dir, branch=brn): valid_branches.append(brn) if not valid_branches: logger.info("%r does not contain any branch for syncing.", self.image) return [] return valid_branches def get_branches(self) -> List[str]: """ Gets all branches with bot-cfg.yml file """ for i in range(0, 20): (status_code, req) = self.get_status_and_dict_from_request(msg="branches", fork=False) if status_code == 200: logger.debug(req) # Remove master branch and private branches return req["branches"] logger.info( f"Status code for branches %s is %s", self.full_url(fork=False), status_code, ) time.sleep(2) logger.info("Betka does not have a branch yet.") return [] def file_pull_request( self, pr_msg: str, upstream_hash: str, branch: str, pr_id: int, pr=False, pr_num=None, ) -> Dict: """ Files a Pull Request with specific messages and text. :param pr_msg: description message used in pull request :param upstream_hash: commit hash for :param branch: specify downstream branch for file a Pull Request :param pr_id: PR number if we sync Pull Requests :param pr: flag if we file a upstream master Pull Request or upstream Pull Request itself :param pr_num: pull request number :return: schema for sending email """ title = self.betka_config["downstream_master_msg"] betka_schema: Dict = {} text_pr = "PR" if pr else "master" logger.info( f"Downstream {text_pr} sync pull request for image {self.image} is {pr_id}" ) if not pr_id: # In case downstream Pull Request does not exist, file a new one logger.debug(f"Upstream {text_pr} to downstream PR not found.") pr_id = self.create_pagure_pull_request(title=title, desc_msg=pr_msg, branch=branch) if pr_id is None: return betka_schema betka_schema["status"] = "created" else: # Update pull request against the latest upstream master branch logger.debug(f"Sync from upstream to downstream PR={pr_id} found.") betka_schema["status"] = "updated" betka_schema["downstream_repo"] = "".join([ x for x in self.betka_config["dist_git_repos"] if self.image in x ]) betka_schema["pagure"] = self.config_json["pagure_host"] betka_schema["commit"] = upstream_hash betka_schema["pr_number"] = pr_num if pr else pr_id betka_schema["namespace_containers"] = self.config_json[ "namespace_containers"] return betka_schema def get_bot_cfg_yaml(self, branch: str) -> Dict: """ :return: bot-cfg.yml config """ source_url = cfg_url( repo=f"{self.config_json['namespace_containers']}/{self.image}", branch=branch, ) return fetch_config("upstream-to-downstream", source_url)
def clone_git_repo(repo: str, temp_dir: str) -> Path: return Git.clone_repo(repo, temp_dir)
def test_is_branch_synced(all_branches, expected): betka_config = betka_yaml() assert (Git.branches_to_synchronize(betka_config=betka_config, all_branches=all_branches) == expected)
def test_jira_msg(config, commit_msg): assert Git.get_msg_from_jira_ticket(config) == commit_msg