def _get_branches(workspace): """Retrieve list of remote branches in the workspace. :param workspace: :return: a list of dictionaries. """ try: key = '{}:{}'.format(Gateway.NAMESPACE, workspace) if self.redis_client.exists(key): logger.info('Loading cache (workspace: %s) ...', workspace) results = self.redis_client.get(key, decoder=json.loads) return results else: logger.info('Scanning workspace (%s) ...', workspace) git_client = GitClient(path=workspace, read_only=True) repo = git_client.get_remote() branches = git_client.get_remote_heads() results = [] for i in range(len(branches)): results.append({ 'repo': repo, 'name': branches[i]['reference'], 'revision': branches[i]['commit'], 'commit_date': branches[i]['commit_date'] }) logger.info('Saving cache (workspace: %s) ...', workspace) self.redis_client.set(key, results, expire=Gateway.BRANCH_CACHE_TTL, encoder=json.dumps) return results except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e) return []
def _get_commits(workspace, branch=None, since=None, until=None, commit=None, author=None, committer=None, description=None, limit=None): """Search for commits in a workspace.""" try: logger.info('Scanning workspace (%s) for commits ...', workspace) git_client = GitClient(path=workspace, read_only=True) if commit and commit.startswith('~'): commit = commit[1:] if author and author.startswith('~'): author = author[1:] if committer and committer.startswith('~'): committer = committer[1:] if description and description.startswith('~'): description = description[1:] return git_client.get_commits(branch=branch, commit=commit, since=since, until=until, author=author, committer=committer, description=description, limit=limit) except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e)
def _get_commit(workspace, commit): """Get a commit from a workspace.""" try: logger.info('Scanning workspace (%s) for commit (%s) ...', workspace, commit) git_client = GitClient(path=workspace, read_only=True) return git_client.get_commit(commit) except Exception as e: logger.warning('Failed to scan workspace (%s): %s', workspace, e)
def _put_file(repo, branch, path): """Put a file in s3. :param repo: :param branch: :param path: :return: """ _, vendor, repo_owner, repo_name = parse_repo(repo) workspace = '{}/{}/{}/{}'.format(BASE_DIR, vendor, repo_owner, repo_name) if not os.path.isdir(workspace): raise AXApiInvalidParam('Invalid repository', 'Invalid repository ({})'.format(repo)) try: logger.info( 'Extracting file content from repository (repo: %s, branch: %s, path: %s) ...', repo, branch, path) git_client = GitClient(path=workspace, read_only=True) files = git_client.get_files(branch=branch, subdir=path, binary_mode=True) except Exception as e: message = 'Failed to extract file content' detail = '{}: {}'.format(message, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: if len(files) == 0: raise AXApiInvalidParam( 'Unable to locate file with given information') file_content = files[0]['content'] logger.info('Successfully extracted file content') try: # Cluster name id always has the form <cluster_name>-<36_bytes_long_cluster_id> cluster_name, cluster_id = CLUSTER_NAME_ID[:-37], CLUSTER_NAME_ID[-36:] key = '{cluster_name}/{cluster_id}/{vendor}/{repo_owner}/{repo_name}/{branch}/{path}'.format( cluster_name=cluster_name, cluster_id=cluster_id, vendor=vendor, repo_owner=repo_owner, repo_name=repo_name, branch=branch, path=path) logger.info('Uploading file content to s3 (bucket: %s, key: %s) ...', S3_BUCKET_NAME, key) response = s3_bucket.Object(key).put(Body=file_content) etag = response.get('ETag') if etag: etag = json.loads(etag) except Exception as e: message = 'Failed to upload file content' detail = '{}: {}'.format(message, str(e)) logger.error(detail) raise AXApiInternalError(message, detail) else: logger.info('Successfully uploaded file content') return {'bucket': S3_BUCKET_NAME, 'key': key, 'etag': etag}
def get_untracked_repos(repos): """Get all untracked repos.""" # Construct list of expected workspaces expected_workspaces = set() for i in range(len(repos)): expected_workspace = '{}/{}/{}/{}'.format(BASE_DIR, repos[i]['vendor'], repos[i]['repo_owner'], repos[i]['repo_name']) expected_workspaces.add(expected_workspace) # Construct list of all workspaces currently on disk dirs = [ dir[0] for dir in os.walk(BASE_DIR) if dir[0].endswith('/.git') ] workspaces = list(map(lambda v: v[:-5], dirs)) # Construct list of untracked repos untracked_repos = {} for i in range(len(workspaces)): if workspaces[i] not in expected_workspaces: client = GitClient(path=workspaces[i]) repo = client.get_remote() untracked_repos[repo] = workspaces[i] return untracked_repos
def get_repos(self, scm_type, url, username, password): """Get all repos owned by the user.""" if scm_type in {ScmVendors.BITBUCKET, ScmVendors.GITHUB, ScmVendors.GITLAB}: try: repos = self.scm_clients[scm_type].get_repos(username, password) except Exception as e: logger.warning('Unable to connect to %s: %s', scm_type, e) detail = { 'type': scm_type, 'username': username, 'error': str(e.detail) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to %s server' % scm_type) else: return repos elif scm_type == ScmVendors.GIT: _, vendor, repo_owner, repo_name = Gateway.parse_repo(url) path = '/tmp/{}/{}/{}'.format(vendor, repo_owner, repo_name) if os.path.isfile(path): os.remove(path) if os.path.isdir(path): shutil.rmtree(path) os.makedirs(path) client = GitClient(path=path, repo=url, username=username, password=password) try: client.list_remote() except Exception as e: logger.warning('Unable to connect to git server (%s): %s', url, e) detail = { 'type': scm_type, 'url': url, 'username': username, 'error': str(e) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to git server') else: return {url: url} elif scm_type == ScmVendors.CODECOMMIT: repos = {} region = 'us-east-1' default_url_format = 'https://git-codecommit.{}.amazonaws.com/v1/repos/{}' client = boto3.client('codecommit', aws_access_key_id=username, aws_secret_access_key=password, region_name=region) try: response = client.list_repositories().get('repositories', []) for r in response: repo_url = default_url_format.format(region, r['repositoryName']) repos[repo_url] = repo_url except Exception as exc: detail = { 'type': scm_type, 'region': region, 'url': default_url_format.format(region, ''), 'username': username, 'error': 'Cannot connect to CodeCommit' + str(exc) } self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR, detail=detail) raise AXApiInvalidParam('Cannot connect to CodeCommit: %s' % exc) else: return repos else: return {}
def update_repo(self, repo_type, vendor, protocol, repo_owner, repo_name, username, password, use_webhook): """Update a repo.""" # Examples for the input variables # BASE_DIR: /ax/data/repos # Repo_type: github # Vendor: github.com # Protocol: https # Repo_owner: argo # Repo_name: prod.git is_first_fetch = False do_send_gc_event = False workspace = '{}/{}/{}/{}'.format(BASE_DIR, vendor, repo_owner, repo_name) url = '{}://{}/{}/{}'.format(protocol, vendor, repo_owner, repo_name) kafka_client = ProducerClient() if not os.path.isdir(workspace): os.makedirs(workspace) # If we recreate the workspace, we need to purge all branch heads of this repo self.axdb_client.purge_branch_heads(url) logger.info("Start scanning repository (%s) ...", url) if repo_type == ScmVendors.CODECOMMIT: client = CodeCommitClient(path=workspace, repo=url, username=username, password=password) else: client = GitClient(path=workspace, repo=url, username=username, password=password, use_permanent_credentials=True) # Even if there is no change, performing a fetch is harmless but has a benefit # that, in case the workspace is destroyed without purging the history, we can # still update the workspace to the proper state logger.info("Start fetching ...") client.fetch() # Retrieve all previous branch heads and construct hash table prev_heads = self.axdb_client.get_branch_heads(url) logger.info("Have %s branch heads (repo: %s) from previous scan", len(prev_heads), url) if len(prev_heads) == 0: is_first_fetch = True logger.debug( "This is an initial scan as no previous heads were found") prev_heads_map = dict() for prev_head in prev_heads: key = (prev_head['repo'], prev_head['branch']) prev_heads_map[key] = prev_head['head'] # Retrieve all current branch heads current_heads = client.get_remote_heads() logger.info("Have %s branch heads (repo: %s) from current scan", len(current_heads), url) current_heads = sorted(current_heads, key=lambda v: v['commit_date'], reverse=is_first_fetch) # Find out which branch heads need to be updated heads_to_update = list() heads_for_event = list() for current_head in current_heads: head, branch = current_head['commit'], current_head[ 'reference'].replace('refs/heads/', '') previous_head = prev_heads_map.pop((url, branch), None) if head != previous_head: event = {'repo': url, 'branch': branch, 'head': head} heads_to_update.append(event) if previous_head is None: logger.info( "New branch detected (branch: %s, current head: %s)", branch, head) else: logger.info( "Existing ranch head updated (branch: %s, previous: %s, current: %s)", branch, previous_head, head) # Send CI event in case of policy heads_for_event.append(event.copy()) if prev_heads_map: logger.info("There are %s get deleted from repo: %s", prev_heads_map.keys(), url) do_send_gc_event = True for key in prev_heads_map: self.axdb_client.purge_branch_head(repo=key[0], branch=key[1]) # Invalidate cache if there is head update or branch deleted if heads_to_update or prev_heads_map: cache_key = '{}:{}'.format(NAMESPACE, workspace) logger.info('Invalidating cache (key: %s) ...', cache_key) if redis_client.exists(cache_key): redis_client.delete(cache_key) # Update YAML contents count = 0 for event in heads_to_update: res_count = RepoManager.update_yaml(repo_client=client, kafka_client=kafka_client, repo=url, branch=event['branch'], head=event['head']) if res_count >= 0: self.axdb_client.set_branch_head(**event) count += res_count logger.info( "Updated %s YAML files (template/policy) for %s branches (repo: %s)", count, len(heads_to_update), url) logger.info("Updated %s branch heads (repo: %s)", len(heads_to_update), url) # If garbarge collection needed due to branch or repo deletion if do_send_gc_event: logger.info( "Send gc event so that axops can garbage collect deleted branch / repo" ) ci_event = { 'Op': "gc", 'Payload': { 'details': "Repo or branch get deleted." } } kafka_client.send(AxSettings.TOPIC_GC_EVENT, key=AxSettings.TOPIC_GC_EVENT, value=ci_event, timeout=120) # If webhook is disabled, we need to send CI events if not use_webhook: for event in heads_for_event: commit = client.get_commit(event['head']) ci_event = { 'Op': "ci", 'Payload': { 'author': commit['author'], 'branch': event['branch'], 'commit': commit['revision'], 'committer': commit['committer'], 'date': datetime.datetime.fromtimestamp( commit['date']).strftime('%Y-%m-%dT%H:%M:%S'), 'description': commit['description'], 'repo': commit['repo'], 'type': "push", 'vendor': repo_type } } kafka_client.send("devops_template", key="{}$$$${}".format( event['repo'], event['branch']), value=ci_event, timeout=120) logger.info('Webhook not enabled, send %s devops_ci_event events', len(heads_for_event)) kafka_client.close() logger.info('Successfully scanned repository (%s)', url) return len(heads_to_update) > 0 or len(prev_heads_map) > 0