class Gateway(object): """Repo Controller""" BASE_DIR = '/ax/data/repos' BRANCH_CACHE_TTL = 5 * 60 # 5 minutes TTL as we expect we won't finish upgrade within 5 minutes NAMESPACE = 'gateway' CLUSTER_NAME_ID = os.environ.get('AX_CLUSTER') CUSTOMER_ID = os.environ.get('AX_CUSTOMER_ID') S3_BUCKET_NAME = 'applatix-cluster-{account}-{seq}'.format(account=CUSTOMER_ID, seq=0) s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME) def __init__(self): self.axdb_client = AxdbClient() self.axops_client = AxopsClient() self.axsys_client = AxsysClient() self.redis_client = RedisClient('redis', db=DB_REPORTING) self.event_notification_client = EventNotificationClient(FACILITY_GATEWAY) self.scm_clients = { ScmVendors.BITBUCKET: BitBucketClient(), ScmVendors.GITHUB: GitHubClient(), ScmVendors.GITLAB: GitLabClient() } self.repo_manager = RepoManager(DEFAULT_CONCURRENCY, DEFAULT_INTERVAL) self.event_trigger = EventTrigger() def get_repos(self, scm_type, url, username, password): """Get all repos owned by the user.""" if scm_type in {ScmVendors.BITBUCKET, ScmVendors.GITHUB, ScmVendors.GITLAB}: try: repos = self.scm_clients[scm_type].get_repos(username, password) except Exception as e: logger.warning('Unable to connect to %s: %s', scm_type, e) detail = { 'type': scm_type, 'username': username, 'error': str(e.detail) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to %s server' % scm_type) else: return repos elif scm_type == ScmVendors.GIT: _, vendor, repo_owner, repo_name = Gateway.parse_repo(url) path = '/tmp/{}/{}/{}'.format(vendor, repo_owner, repo_name) if os.path.isfile(path): os.remove(path) if os.path.isdir(path): shutil.rmtree(path) os.makedirs(path) client = GitClient(path=path, repo=url, username=username, password=password) try: client.list_remote() except Exception as e: logger.warning('Unable to connect to git server (%s): %s', url, e) detail = { 'type': scm_type, 'url': url, 'username': username, 'error': str(e) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to git server') else: return {url: url} elif scm_type == ScmVendors.CODECOMMIT: repos = {} region = 'us-east-1' default_url_format = 'https://git-codecommit.{}.amazonaws.com/v1/repos/{}' client = boto3.client('codecommit', aws_access_key_id=username, aws_secret_access_key=password, region_name=region) try: response = client.list_repositories().get('repositories', []) for r in response: repo_url = default_url_format.format(region, r['repositoryName']) repos[repo_url] = repo_url except Exception as exc: detail = { 'type': scm_type, 'region': region, 'url': default_url_format.format(region, ''), 'username': username, 'error': 'Cannot connect to CodeCommit' + str(exc) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to CodeCommit: %s' % exc) else: return repos else: return {} @staticmethod def parse_repo(repo): """Parse repo url into 4-tuple (protocol, vendor, repo_owner, repo_name). :param repo: :return: """ parsed_url = urlparse(repo) protocol, vendor = parsed_url.scheme, parsed_url.hostname m = re.match(r'/([a-zA-Z0-9\-]+)/([a-zA-Z0-9_.\-/]+)', parsed_url.path) if not m: raise AXScmException('Illegal repo URL', detail='Illegal repo URL ({})'.format(repo)) repo_owner, repo_name = m.groups() return protocol, vendor, repo_owner, repo_name def has_webhook(self, repo): """Test if there is any repo which uses webhook. :param repo: :return: """ tools = self.axops_client.get_tools(category='scm') for i in range(len(tools)): use_webhook = tools[i].get('use_webhook', False) repos = set(tools[i].get('repos', [])) repos -= {repo} if use_webhook and repos: return True return False def get_webhook(self, vendor, repo): """Get webhook :param vendor: :param repo: :returns: """ logger.info('Retrieving webhook (repo: %s) ...', repo) return self.scm_clients[vendor].get_webhook(repo) def create_webhook(self, vendor, repo): """Create webhook :param vendor: :param repo: :returns: """ @retry(wait_fixed=5000, stop_max_delay=20 * 60 * 1000) def _verify_elb(hostname): try: logger.info('Verifying ELB (%s) ...', hostname) ip = socket.gethostbyname(hostname) logger.info('Successfully resolved ELB (%s) to IP (%s)', hostname, ip) except Exception as e: logger.error('ELB not ready: %s', str(e)) raise AXApiInternalError('ELB not ready', str(e)) ip_range = self.scm_clients[vendor].get_webhook_whitelist() # Create ELB payload = {'ip_range': ip_range, 'external_port': 8443, 'internal_port': 8087} try: logger.info('Creating ELB for webhook ...') result = self.axsys_client.create_webhook(**payload) except Exception as e: logger.error('Failed to create ELB for webhook: %s', str(e)) self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_CREATION_FAILURE, detail=payload) raise AXApiInternalError('Failed to create ELB for webhook', str(e)) else: logger.info('Successfully created ELB for webhook') # Verify ELB hostname = result['hostname'] try: _verify_elb(hostname) except Exception as e: logger.error('Timed out on waiting for ELB to be available: %s', str(e)) self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_VERIFICATION_TIMEOUT, detail={'hostname': hostname}) raise AXApiInternalError('Timed out on waiting for ELB to be available: %s' % str(e)) # Create webhook try: logger.info('Creating webhook (repo: %s) ...', repo) self.scm_clients[vendor].create_webhook(repo) except AXApiAuthFailed as e: logger.error('Invalid credential supplied') detail = { 'repo': repo, 'error': 'Invalid credential supplied:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE, detail=detail) raise AXApiInvalidParam('User authentication failed', detail=str(e)) except AXApiForbiddenReq as e: logger.error('Supplied credential is valid but having insufficient permission') detail = { 'repo': repo, 'error': 'Supplied credential is valid but having insufficient permission:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE, detail=detail) raise AXApiInvalidParam('User has insufficient permission', detail=str(e)) except Exception as e: logger.error('Failed to configure webhook: %s', e) detail = { 'repo': repo, 'error': 'Failed to configure webhook:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE, detail=detail) raise AXApiInternalError('Failed to configure webhook', str(e)) else: logger.info('Successfully created webhook (repo: %s)', repo) return {} def delete_webhook(self, vendor, repo): """Delete webhook :param vendor: :param repo: :returns: """ # Delete webhook try: logger.info('Deleting webhook (repo: %s) ...', repo) self.scm_clients[vendor].delete_webhook(repo) except AXApiAuthFailed as e: logger.error('Invalid credential supplied') detail = { 'repo': repo, 'error': 'Invalid credential supplied:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE, detail=detail) raise AXApiInvalidParam('User authentication failed', detail=str(e)) except AXApiForbiddenReq as e: logger.error('Supplied credential is valid but having insufficient permission') detail = { 'repo': repo, 'error': 'Supplied credential is valid but having insufficient permission:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE, detail=detail) raise AXApiInvalidParam('User has insufficient permission', detail=str(e)) except Exception as e: logger.error('Failed to delete webhook: %s', e) detail = { 'repo': repo, 'error': 'Failed to delete webhook:' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE, detail=detail) raise AXApiInternalError('Failed to delete webhook', str(e)) else: logger.info('Successfully deleted webhook (repo: %s)', repo) # Delete ELB try: if not self.has_webhook(repo): logger.info('Deleting ELB for webhook ...') self.axsys_client.delete_webhook() except Exception as e: logger.error('Failed to delete ELB for webhook: %s', str(e)) detail = {'repo': repo, 'error': 'Failed to delete ELB for webhook' + str(e) } self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_DELETION_FAILURE, detail=detail) raise AXApiInternalError('Failed to delete ELB for webhook', str(e)) else: logger.info('Successfully deleted ELB for webhook') return {} def purge_branches(self, repo, branch=None): """Purge branch heads. :param repo: :param branch: :return: """ if not repo: raise AXApiInvalidParam('Missing required parameter', 'Missing required parameter (repo)') logger.info('Purging branch heads (repo: %s, branch: %s) ...', repo, branch) try: if not branch: self.axdb_client.purge_branch_heads(repo) else: self.axdb_client.purge_branch_head(repo, branch) except Exception as e: message = 'Unable to purge branch heads' detail = 'Unable to purge branch heads (repo: {}, branch: {}): {}'.format(repo, branch, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: logger.info('Successfully purged branch heads') def get_branches(self, repo=None, branch=None, order_by=None, limit=None): """Get branches. :param repo: :param branch: :param order_by: :param limit: :return: """ def _get_branches(workspace): """Retrieve list of remote branches in the workspace. :param workspace: :return: a list of dictionaries. """ try: key = '{}:{}'.format(Gateway.NAMESPACE, workspace) if self.redis_client.exists(key): logger.info('Loading cache (workspace: %s) ...', workspace) results = self.redis_client.get(key, decoder=json.loads) return results else: logger.info('Scanning workspace (%s) ...', workspace) git_client = GitClient(path=workspace, read_only=True) repo = git_client.get_remote() branches = git_client.get_remote_heads() results = [] for i in range(len(branches)): results.append({ 'repo': repo, 'name': branches[i]['reference'], 'revision': branches[i]['commit'], 'commit_date': branches[i]['commit_date'] }) logger.info('Saving cache (workspace: %s) ...', workspace) self.redis_client.set(key, results, expire=Gateway.BRANCH_CACHE_TTL, encoder=json.dumps) return results except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e) return [] logger.info('Retrieving branches (repo: %s, branch: %s) ...', repo, branch) if repo: repo = unquote(repo) _, vendor, repo_owner, repo_name = self.parse_repo(repo) workspaces = ['{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name)] else: dirs = [dir_name[0] for dir_name in os.walk(Gateway.BASE_DIR) if dir_name[0].endswith('/.git')] workspaces = list(map(lambda v: v[:-5], dirs)) branches = [] with ThreadPoolExecutor(max_workers=20) as executor: futures = [] for i in range(len(workspaces)): futures.append(executor.submit(_get_branches, workspaces[i])) for future in as_completed(futures): try: data = future.result() except Exception as e: logger.warning('Unexpected exception occurred during processing: %s', e) else: for i in range(len(data)): branches.append(data[i]) if branch: pattern = '.*{}.*'.format(branch.replace('*', '.*')) branches = [branches[i] for i in range(len(branches)) if re.match(pattern, branches[i]['name'])] if order_by == 'commit_date': branches = sorted(branches, key=lambda v: v['commit_date']) elif order_by == '-commit_date': branches = sorted(branches, key=lambda v: v['commit_date'], reverse=True) elif order_by == '-native': branches = sorted(branches, key=lambda v: (v['repo'], v['name']), reverse=True) else: branches = sorted(branches, key=lambda v: (v['repo'], v['name'])) if limit: branches = branches[:limit] logger.info('Successfully retrieved %s branches', len(branches)) return branches @staticmethod def _get_commits(workspace, branch=None, since=None, until=None, commit=None, author=None, committer=None, description=None, limit=None): """Search for commits in a workspace.""" try: logger.info('Scanning workspace (%s) for commits ...', workspace) git_client = GitClient(path=workspace, read_only=True) if commit and commit.startswith('~'): commit = commit[1:] if author and author.startswith('~'): author = author[1:] if committer and committer.startswith('~'): committer = committer[1:] if description and description.startswith('~'): description = description[1:] return git_client.get_commits(branch=branch, commit=commit, since=since, until=until, author=author, committer=committer, description=description, limit=limit) except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e) @staticmethod def _get_commit(workspace, commit): """Get a commit from a workspace.""" try: logger.info('Scanning workspace (%s) for commit (%s) ...', workspace, commit) git_client = GitClient(path=workspace, read_only=True) return git_client.get_commit(commit) except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e) @staticmethod def _parse_repo_branch(repo, branch, repo_branch): """Parse repo / branch / repo_branch.""" if repo: try: repo = unquote(repo) _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo) except Exception as e: msg = 'Unable to parse repo: %s', e logger.error(msg) raise AXApiInvalidParam('Unable to parse repo', msg) else: dir = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name) workspaces = {dir: [branch] if branch else []} elif repo_branch: try: repo_branch = json.loads(repo_branch) workspaces = {} for repo in repo_branch.keys(): repo = unquote(repo) _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo) dir = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name) if dir not in workspaces: workspaces[dir] = set() for branch in repo_branch[repo]: workspaces[dir].add(branch) except Exception as e: msg = 'Unable to parse repo_branch: %s' % str(e) logger.error(msg) raise AXApiInvalidParam('Unable to parse repo_branch', msg) else: dirs = [dir[0] for dir in os.walk(Gateway.BASE_DIR) if dir[0].endswith('/.git')] workspaces = list(map(lambda v: v[:-5], dirs)) workspaces = dict([(k, [branch] if branch else []) for k in workspaces]) return workspaces @staticmethod def _put_file(repo, branch, path): """Put a file in s3. :param repo: :param branch: :param path: :return: """ _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo) workspace = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name) if not os.path.isdir(workspace): raise AXApiInvalidParam('Invalid repository', 'Invalid repository ({})'.format(repo)) try: logger.info('Extracting file content from repository (repo: %s, branch: %s, path: %s) ...', repo, branch, path) git_client = GitClient(path=workspace, read_only=True) files = git_client.get_files(branch=branch, subdir=path, binary_mode=True) except Exception as e: message = 'Failed to extract file content' detail = '{}: {}'.format(message, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: if len(files) == 0: raise AXApiInvalidParam('Unable to locate file with given information') file_content = files[0]['content'] logger.info('Successfully extracted file content') try: # Cluster name id always has the form <cluster_name>-<36_bytes_long_cluster_id> cluster_name, cluster_id = Gateway.CLUSTER_NAME_ID[:-37], Gateway.CLUSTER_NAME_ID[-36:] key = '{cluster_name}/{cluster_id}/{vendor}/{repo_owner}/{repo_name}/{branch}/{path}'.format( cluster_name=cluster_name, cluster_id=cluster_id, vendor=vendor, repo_owner=repo_owner, repo_name=repo_name, branch=branch, path=path) logger.info('Uploading file content to s3 (bucket: %s, key: %s) ...', Gateway.S3_BUCKET_NAME, key) response = Gateway.s3_bucket.Object(key).put(Body=file_content) etag = response.get('ETag') if etag: etag = json.loads(etag) except Exception as e: message = 'Failed to upload file content' detail = '{}: {}'.format(message, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: logger.info('Successfully uploaded file content') return {'bucket': Gateway.S3_BUCKET_NAME, 'key': key, 'etag': etag} @staticmethod def _delete_file(repo, branch, path): """Delete a file from s3. :param repo: :param branch: :param path: :return: """ _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo) try: cluster_name, cluster_id = Gateway.CLUSTER_NAME_ID[:-37], Gateway.CLUSTER_NAME_ID[-36:] key = '{cluster_name}/{cluster_id}/{vendor}/{repo_owner}/{repo_name}/{branch}/{path}'.format( cluster_name=cluster_name, cluster_id=cluster_id, vendor=vendor, repo_owner=repo_owner, repo_name=repo_name, branch=branch, path=path) logger.info('Deleting file from s3 (bucket: %s, key: %s) ...', Gateway.S3_BUCKET_NAME, key) Gateway.s3_bucket.Object(key).delete() except Exception as e: message = 'Failed to delete file' detail = '{}: {}'.format(message, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: logger.info('Successfully deleted file') return {'bucket': Gateway.S3_BUCKET_NAME, 'key': key} @staticmethod def init_jira_client(axops_client, url=None, username=None, password=None): """Initialize an Jira client""" def get_jira_configuration(): js = axops_client.get_tools(category='issue_management', type='jira') if js: return {'url': js[0]['url'], 'username': js[0]['username'], 'password': js[0]['password'] } else: return dict() if url is None or username is None or password is None: conf = get_jira_configuration() if not conf: raise AXApiInvalidParam('No JIRA configured') else: url, username, password = conf['url'], conf['username'], conf['password'] return JiraClient(url, username, password) # Verify whether this function is still needed def check_github_whitelist(self): if not self.is_github_webhook_enabled(): logger.info('No GitHub webhook configured') return configured = self.get_from_cache() logger.info('The configured GitHub webhook whitelist is %s', configured) advertised = self.scm_clients[ScmVendors.GITHUB].get_webhook_whitelist() logger.info('The GitHub webhook whitelist is %s', advertised) if set(configured) == set(advertised): logger.info('No update needed') else: # Create ELB payload = {'ip_range': advertised, 'external_port': 8443, 'internal_port': 8087} try: logger.info('Creating ELB for webhook ...') self.axsys_client.create_webhook(**payload) except Exception as exc: logger.error('Failed to create ELB for webhook: %s', str(exc)) self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_CREATION_FAILURE, detail=payload) else: # Update cache self.write_to_cache(advertised) logger.info('Successfully updated ELB for webhook') def is_github_webhook_enabled(self): """ Check whether the webhook is configured or not""" github_data = self.axops_client.get_tools(type='github') use_webhook = [each for each in github_data if each['use_webhook']] return bool(use_webhook) @staticmethod def write_to_cache(ip_range): """ Store the webhook whitelist info""" cache_file = '/tmp/github_webhook_whitelist' with open(cache_file, 'w+') as f: f.write(json.dumps(ip_range)) def get_from_cache(self): """ Get cached webhook whitelist info, otherwise get from axmon""" cache_file = '/tmp/github_webhook_whitelist' ip_range = list() if os.path.exists(cache_file): with open(cache_file, 'r+') as f: data = f.readlines() ip_range = json.loads(data[0]) else: logger.debug('No cache file') try: data = self.axsys_client.get_webhook() except Exception as exc: logger.warning(exc) else: logger.info('Write whitelist info to cache file') ip_range = data['ip_ranges'] self.write_to_cache(ip_range) return ip_range
class RepoManager(object): """Manage all repositories in track.""" def __init__(self, concurrency, interval): self.axdb_client = AxdbClient() self.axops_client = AxopsClient() self.concurrency = concurrency self.interval = interval def run(self): """Create workspaces and perform initial/incremental fetch.""" while True: logger.info('Start repository scan ...') try: self.connect() repos, has_change = self.synchronize() with ThreadPoolExecutor( max_workers=self.concurrency) as executor: futures = [] for i in range(len(repos)): futures.append( executor.submit(self.update_repo, **repos[i])) for future in as_completed(futures): try: if not has_change and future.result(): has_change = True except Exception as e: logger.warning( 'Unexpected exception occurred during processing: %s', e) # Notify UI backend about a change in repos if has_change: key = '{}:repos_updated'.format(NAMESPACE) redis_client.set(key, value=str(int(time.time()))) except Exception as e: logger.warning('Repository scan failed: %s', str(e)) else: logger.info('Repository scan completed\n') finally: time.sleep(self.interval) @retry(wait_fixed=5000) def connect(self): """Connect to axops.""" connected = self.axops_client.ping() if not connected: msg = 'Unable to connect to axops' logger.warning(msg) raise ConnectionError(msg) def synchronize(self): """Synchronize all repos.""" logger.info('Synchronizing repositories ...') # Get all repos repos = self.get_all_repos() logger.info('%s repositories currently in track', len(repos)) # Get untracked repos currently on disk untracked_repos = self.get_untracked_repos(repos) logger.info('%s untracked repositories found on disk', len(list(untracked_repos.keys()))) for repo in untracked_repos: # Purge all branch heads logger.info('Purging branch heads (repo: %s) ...', repo) self.axdb_client.purge_branch_heads(repo) # Delete workspace logger.info('Deleting workspace (path: %s) ...', untracked_repos[repo]) shutil.rmtree(untracked_repos[repo]) # Invalidate caches logger.info('Invalidating caches (workspace: %s) ...', untracked_repos[repo]) key_pattern = '^{}\:{}.*$'.format(NAMESPACE, untracked_repos[repo]) keys = redis_client.keys(key_pattern) for k in keys: logger.debug('Invalidating cache (key: %s) ...', k) redis_client.delete(k) # Send event to trigger garbage collection from axops if untracked_repos: kafka_client = ProducerClient() ci_event = { 'Op': "gc", 'Payload': { 'details': "Repo or branch get deleted." } } kafka_client.send(AxSettings.TOPIC_GC_EVENT, key=AxSettings.TOPIC_GC_EVENT, value=ci_event, timeout=120) return repos, len(untracked_repos) > 0 def get_all_repos(self): """Retrieve all repos from axops.""" tools = self.axops_client.get_tools(category='scm') repos = {} for i in range(len(tools)): _repos = tools[i].get('repos', []) for j in range(len(_repos)): parsed_url = urlparse(_repos[j]) protocol, vendor = parsed_url.scheme, parsed_url.hostname m = re.match(r'/([a-zA-Z0-9-]+)/([a-zA-Z0-9_.-]+)', parsed_url.path) if not m: logger.warning('Illegal repo URL: %s, skip', parsed_url) continue _, repo_owner, repo_name = parsed_url.path.split('/', maxsplit=2) key = (vendor, repo_owner, repo_name) if key in repos and repos[key]['protocol'] == 'https': continue repos[key] = { 'repo_type': tools[i].get('type'), 'vendor': vendor, 'protocol': protocol, 'repo_owner': repo_owner, 'repo_name': repo_name, 'username': tools[i].get('username'), 'password': tools[i].get('password'), 'use_webhook': tools[i].get('use_webhook', False) } return list(repos.values()) @staticmethod def get_untracked_repos(repos): """Get all untracked repos.""" # Construct list of expected workspaces expected_workspaces = set() for i in range(len(repos)): expected_workspace = '{}/{}/{}/{}'.format(BASE_DIR, repos[i]['vendor'], repos[i]['repo_owner'], repos[i]['repo_name']) expected_workspaces.add(expected_workspace) # Construct list of all workspaces currently on disk dirs = [ dir[0] for dir in os.walk(BASE_DIR) if dir[0].endswith('/.git') ] workspaces = list(map(lambda v: v[:-5], dirs)) # Construct list of untracked repos untracked_repos = {} for i in range(len(workspaces)): if workspaces[i] not in expected_workspaces: client = GitClient(path=workspaces[i]) repo = client.get_remote() untracked_repos[repo] = workspaces[i] return untracked_repos @staticmethod def get_repo_workspace(repo_vendor, repo_owner, repo_name): return '{}/{}/{}/{}'.format(BASE_DIR, repo_vendor, repo_owner, repo_name) @staticmethod def get_repo_url(protocol, repo_vendor, repo_owner, repo_name): return '{}://{}/{}/{}'.format(protocol, repo_vendor, repo_owner, repo_name) @staticmethod def update_yaml(repo_client, kafka_client, repo, branch, head): """Using Kafka to send a event to axops to update the yamls in the axdb.""" logger.info("Update yaml %s, %s, %s", repo, branch, head) try: yaml_contents = repo_client.get_files(commit=head, subdir=TEMPLATE_DIR, filter_yaml=True) except Exception as e: logger.error("Failed to obtain YAML files: %s", str(e)) return -1 if len(yaml_contents) >= 0: # This is a partition key defined as RepoName$$$$BranchName. # The key is used by Kafka partition, which means it allows concurrency # if the events are for different repo/branch key = '{}$$$${}'.format(repo, branch) payload = { 'Op': 'update', 'Payload': { 'Revision': head, 'Content': [v['content'] for v in yaml_contents] if yaml_contents else [] } } kafka_client.send('devops_template', key=key, value=payload, timeout=120) logger.info("Updated YAML %s files (repo: %s, branch: %s)", len(yaml_contents), repo, branch) return len(yaml_contents) def update_repo(self, repo_type, vendor, protocol, repo_owner, repo_name, username, password, use_webhook): """Update a repo.""" # Examples for the input variables # BASE_DIR: /ax/data/repos # Repo_type: github # Vendor: github.com # Protocol: https # Repo_owner: argo # Repo_name: prod.git is_first_fetch = False do_send_gc_event = False workspace = '{}/{}/{}/{}'.format(BASE_DIR, vendor, repo_owner, repo_name) url = '{}://{}/{}/{}'.format(protocol, vendor, repo_owner, repo_name) kafka_client = ProducerClient() if not os.path.isdir(workspace): os.makedirs(workspace) # If we recreate the workspace, we need to purge all branch heads of this repo self.axdb_client.purge_branch_heads(url) logger.info("Start scanning repository (%s) ...", url) if repo_type == ScmVendors.CODECOMMIT: client = CodeCommitClient(path=workspace, repo=url, username=username, password=password) else: client = GitClient(path=workspace, repo=url, username=username, password=password, use_permanent_credentials=True) # Even if there is no change, performing a fetch is harmless but has a benefit # that, in case the workspace is destroyed without purging the history, we can # still update the workspace to the proper state logger.info("Start fetching ...") client.fetch() # Retrieve all previous branch heads and construct hash table prev_heads = self.axdb_client.get_branch_heads(url) logger.info("Have %s branch heads (repo: %s) from previous scan", len(prev_heads), url) if len(prev_heads) == 0: is_first_fetch = True logger.debug( "This is an initial scan as no previous heads were found") prev_heads_map = dict() for prev_head in prev_heads: key = (prev_head['repo'], prev_head['branch']) prev_heads_map[key] = prev_head['head'] # Retrieve all current branch heads current_heads = client.get_remote_heads() logger.info("Have %s branch heads (repo: %s) from current scan", len(current_heads), url) current_heads = sorted(current_heads, key=lambda v: v['commit_date'], reverse=is_first_fetch) # Find out which branch heads need to be updated heads_to_update = list() heads_for_event = list() for current_head in current_heads: head, branch = current_head['commit'], current_head[ 'reference'].replace('refs/heads/', '') previous_head = prev_heads_map.pop((url, branch), None) if head != previous_head: event = {'repo': url, 'branch': branch, 'head': head} heads_to_update.append(event) if previous_head is None: logger.info( "New branch detected (branch: %s, current head: %s)", branch, head) else: logger.info( "Existing ranch head updated (branch: %s, previous: %s, current: %s)", branch, previous_head, head) # Send CI event in case of policy heads_for_event.append(event.copy()) if prev_heads_map: logger.info("There are %s get deleted from repo: %s", prev_heads_map.keys(), url) do_send_gc_event = True for key in prev_heads_map: self.axdb_client.purge_branch_head(repo=key[0], branch=key[1]) # Invalidate cache if there is head update or branch deleted if heads_to_update or prev_heads_map: cache_key = '{}:{}'.format(NAMESPACE, workspace) logger.info('Invalidating cache (key: %s) ...', cache_key) if redis_client.exists(cache_key): redis_client.delete(cache_key) # Update YAML contents count = 0 for event in heads_to_update: res_count = RepoManager.update_yaml(repo_client=client, kafka_client=kafka_client, repo=url, branch=event['branch'], head=event['head']) if res_count >= 0: self.axdb_client.set_branch_head(**event) count += res_count logger.info( "Updated %s YAML files (template/policy) for %s branches (repo: %s)", count, len(heads_to_update), url) logger.info("Updated %s branch heads (repo: %s)", len(heads_to_update), url) # If garbarge collection needed due to branch or repo deletion if do_send_gc_event: logger.info( "Send gc event so that axops can garbage collect deleted branch / repo" ) ci_event = { 'Op': "gc", 'Payload': { 'details': "Repo or branch get deleted." } } kafka_client.send(AxSettings.TOPIC_GC_EVENT, key=AxSettings.TOPIC_GC_EVENT, value=ci_event, timeout=120) # If webhook is disabled, we need to send CI events if not use_webhook: for event in heads_for_event: commit = client.get_commit(event['head']) ci_event = { 'Op': "ci", 'Payload': { 'author': commit['author'], 'branch': event['branch'], 'commit': commit['revision'], 'committer': commit['committer'], 'date': datetime.datetime.fromtimestamp( commit['date']).strftime('%Y-%m-%dT%H:%M:%S'), 'description': commit['description'], 'repo': commit['repo'], 'type': "push", 'vendor': repo_type } } kafka_client.send("devops_template", key="{}$$$${}".format( event['repo'], event['branch']), value=ci_event, timeout=120) logger.info('Webhook not enabled, send %s devops_ci_event events', len(heads_for_event)) kafka_client.close() logger.info('Successfully scanned repository (%s)', url) return len(heads_to_update) > 0 or len(prev_heads_map) > 0