Ejemplo n.º 1
0
class Gateway(object):
    """Repo Controller"""
    BASE_DIR = '/ax/data/repos'
    BRANCH_CACHE_TTL = 5 * 60  # 5 minutes TTL as we expect we won't finish upgrade within 5 minutes
    NAMESPACE = 'gateway'

    CLUSTER_NAME_ID = os.environ.get('AX_CLUSTER')
    CUSTOMER_ID = os.environ.get('AX_CUSTOMER_ID')
    S3_BUCKET_NAME = 'applatix-cluster-{account}-{seq}'.format(account=CUSTOMER_ID, seq=0)
    s3_bucket = boto3.resource('s3').Bucket(S3_BUCKET_NAME)

    def __init__(self):
        self.axdb_client = AxdbClient()
        self.axops_client = AxopsClient()
        self.axsys_client = AxsysClient()
        self.redis_client = RedisClient('redis', db=DB_REPORTING)
        self.event_notification_client = EventNotificationClient(FACILITY_GATEWAY)
        self.scm_clients = {
            ScmVendors.BITBUCKET: BitBucketClient(),
            ScmVendors.GITHUB: GitHubClient(),
            ScmVendors.GITLAB: GitLabClient()
        }
        self.repo_manager = RepoManager(DEFAULT_CONCURRENCY, DEFAULT_INTERVAL)
        self.event_trigger = EventTrigger()

    def get_repos(self, scm_type, url, username, password):
        """Get all repos owned by the user."""
        if scm_type in {ScmVendors.BITBUCKET, ScmVendors.GITHUB, ScmVendors.GITLAB}:
            try:
                repos = self.scm_clients[scm_type].get_repos(username, password)
            except Exception as e:
                logger.warning('Unable to connect to %s: %s', scm_type, e)
                detail = {
                    'type': scm_type,
                    'username': username,
                    'error': str(e.detail)
                }
                self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR,
                                                                                   detail=detail)
                raise AXApiInvalidParam('Cannot connect to %s server' % scm_type)
            else:
                return repos
        elif scm_type == ScmVendors.GIT:
            _, vendor, repo_owner, repo_name = Gateway.parse_repo(url)
            path = '/tmp/{}/{}/{}'.format(vendor, repo_owner, repo_name)
            if os.path.isfile(path):
                os.remove(path)
            if os.path.isdir(path):
                shutil.rmtree(path)
            os.makedirs(path)
            client = GitClient(path=path, repo=url, username=username, password=password)
            try:
                client.list_remote()
            except Exception as e:
                logger.warning('Unable to connect to git server (%s): %s', url, e)
                detail = {
                    'type': scm_type,
                    'url': url,
                    'username': username,
                    'error': str(e)
                }
                self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR,
                                                                                   detail=detail)
                raise AXApiInvalidParam('Cannot connect to git server')
            else:
                return {url: url}
        elif scm_type == ScmVendors.CODECOMMIT:
            repos = {}
            region = 'us-east-1'
            default_url_format = 'https://git-codecommit.{}.amazonaws.com/v1/repos/{}'
            client = boto3.client('codecommit', aws_access_key_id=username, aws_secret_access_key=password,
                                  region_name=region)
            try:
                response = client.list_repositories().get('repositories', [])
                for r in response:
                    repo_url = default_url_format.format(region, r['repositoryName'])
                    repos[repo_url] = repo_url
            except Exception as exc:
                detail = {
                    'type': scm_type,
                    'region': region,
                    'url': default_url_format.format(region, ''),
                    'username': username,
                    'error': 'Cannot connect to CodeCommit' + str(exc)
                }
                self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR,
                                                                                   detail=detail)
                raise AXApiInvalidParam('Cannot connect to CodeCommit: %s' % exc)
            else:
                return repos
        else:
            return {}

    @staticmethod
    def parse_repo(repo):
        """Parse repo url into 4-tuple (protocol, vendor, repo_owner, repo_name).

        :param repo:
        :return:
        """
        parsed_url = urlparse(repo)
        protocol, vendor = parsed_url.scheme, parsed_url.hostname
        m = re.match(r'/([a-zA-Z0-9\-]+)/([a-zA-Z0-9_.\-/]+)', parsed_url.path)
        if not m:
            raise AXScmException('Illegal repo URL', detail='Illegal repo URL ({})'.format(repo))
        repo_owner, repo_name = m.groups()
        return protocol, vendor, repo_owner, repo_name

    def has_webhook(self, repo):
        """Test if there is any repo which uses webhook.

        :param repo:
        :return:
        """
        tools = self.axops_client.get_tools(category='scm')
        for i in range(len(tools)):
            use_webhook = tools[i].get('use_webhook', False)
            repos = set(tools[i].get('repos', []))
            repos -= {repo}
            if use_webhook and repos:
                return True
        return False

    def get_webhook(self, vendor, repo):
        """Get webhook

        :param vendor:
        :param repo:
        :returns:
        """
        logger.info('Retrieving webhook (repo: %s) ...', repo)
        return self.scm_clients[vendor].get_webhook(repo)

    def create_webhook(self, vendor, repo):
        """Create webhook

        :param vendor:
        :param repo:
        :returns:
        """

        @retry(wait_fixed=5000, stop_max_delay=20 * 60 * 1000)
        def _verify_elb(hostname):
            try:
                logger.info('Verifying ELB (%s) ...', hostname)
                ip = socket.gethostbyname(hostname)
                logger.info('Successfully resolved ELB (%s) to IP (%s)', hostname, ip)
            except Exception as e:
                logger.error('ELB not ready: %s', str(e))
                raise AXApiInternalError('ELB not ready', str(e))

        ip_range = self.scm_clients[vendor].get_webhook_whitelist()

        # Create ELB
        payload = {'ip_range': ip_range, 'external_port': 8443, 'internal_port': 8087}
        try:
            logger.info('Creating ELB for webhook ...')
            result = self.axsys_client.create_webhook(**payload)
        except Exception as e:
            logger.error('Failed to create ELB for webhook: %s', str(e))
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_CREATION_FAILURE,
                                                                               detail=payload)
            raise AXApiInternalError('Failed to create ELB for webhook', str(e))
        else:
            logger.info('Successfully created ELB for webhook')

        # Verify ELB
        hostname = result['hostname']
        try:
            _verify_elb(hostname)
        except Exception as e:
            logger.error('Timed out on waiting for ELB to be available: %s', str(e))
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_VERIFICATION_TIMEOUT,
                                                                               detail={'hostname': hostname})
            raise AXApiInternalError('Timed out on waiting for ELB to be available: %s' % str(e))

        # Create webhook
        try:
            logger.info('Creating webhook (repo: %s) ...', repo)
            self.scm_clients[vendor].create_webhook(repo)
        except AXApiAuthFailed as e:
            logger.error('Invalid credential supplied')
            detail = {
                'repo': repo,
                'error': 'Invalid credential supplied:' + str(e)
            }
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE,
                                                                               detail=detail)
            raise AXApiInvalidParam('User authentication failed', detail=str(e))
        except AXApiForbiddenReq as e:
            logger.error('Supplied credential is valid but having insufficient permission')
            detail = {
                'repo': repo,
                'error': 'Supplied credential is valid but having insufficient permission:' + str(e)
            }
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE,
                                                                               detail=detail)
            raise AXApiInvalidParam('User has insufficient permission', detail=str(e))
        except Exception as e:
            logger.error('Failed to configure webhook: %s', e)
            detail = {
                'repo': repo,
                'error': 'Failed to configure webhook:' + str(e)
            }
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_CREATION_FAILURE,
                                                                               detail=detail)
            raise AXApiInternalError('Failed to configure webhook', str(e))
        else:
            logger.info('Successfully created webhook (repo: %s)', repo)
            return {}

    def delete_webhook(self, vendor, repo):
        """Delete webhook

        :param vendor:
        :param repo:
        :returns:
        """
        # Delete webhook
        try:
            logger.info('Deleting webhook (repo: %s) ...', repo)
            self.scm_clients[vendor].delete_webhook(repo)
        except AXApiAuthFailed as e:
            logger.error('Invalid credential supplied')
            detail = {
                'repo': repo,
                'error': 'Invalid credential supplied:' + str(e)
            }
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE,
                                                                               detail=detail)
            raise AXApiInvalidParam('User authentication failed', detail=str(e))
        except AXApiForbiddenReq as e:
            logger.error('Supplied credential is valid but having insufficient permission')
            detail = {
                'repo': repo,
                'error': 'Supplied credential is valid but having insufficient permission:' + str(e)
            }
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE,
                                                                               detail=detail)
            raise AXApiInvalidParam('User has insufficient permission', detail=str(e))
        except Exception as e:
            logger.error('Failed to delete webhook: %s', e)
            detail = {
                'repo': repo,
                'error': 'Failed to delete webhook:' + str(e)
            }
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_WEBHOOK_DELETION_FAILURE,
                                                                               detail=detail)
            raise AXApiInternalError('Failed to delete webhook', str(e))
        else:
            logger.info('Successfully deleted webhook (repo: %s)', repo)

        # Delete ELB
        try:
            if not self.has_webhook(repo):
                logger.info('Deleting ELB for webhook ...')
                self.axsys_client.delete_webhook()
        except Exception as e:
            logger.error('Failed to delete ELB for webhook: %s', str(e))
            detail = {'repo': repo,
                      'error': 'Failed to delete ELB for webhook' + str(e)
                      }
            self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_DELETION_FAILURE,
                                                                               detail=detail)
            raise AXApiInternalError('Failed to delete ELB for webhook', str(e))
        else:
            logger.info('Successfully deleted ELB for webhook')
            return {}

    def purge_branches(self, repo, branch=None):
        """Purge branch heads.

        :param repo:
        :param branch:
        :return:
        """
        if not repo:
            raise AXApiInvalidParam('Missing required parameter', 'Missing required parameter (repo)')
        logger.info('Purging branch heads (repo: %s, branch: %s) ...', repo, branch)

        try:
            if not branch:
                self.axdb_client.purge_branch_heads(repo)
            else:
                self.axdb_client.purge_branch_head(repo, branch)
        except Exception as e:
            message = 'Unable to purge branch heads'
            detail = 'Unable to purge branch heads (repo: {}, branch: {}): {}'.format(repo, branch, str(e))
            logger.error(detail)
            raise AXApiInternalError(message, detail)
        else:
            logger.info('Successfully purged branch heads')

    def get_branches(self, repo=None, branch=None, order_by=None, limit=None):
        """Get branches.

        :param repo:
        :param branch:
        :param order_by:
        :param limit:
        :return:
        """

        def _get_branches(workspace):
            """Retrieve list of remote branches in the workspace.

            :param workspace:
            :return: a list of dictionaries.
            """
            try:
                key = '{}:{}'.format(Gateway.NAMESPACE, workspace)
                if self.redis_client.exists(key):
                    logger.info('Loading cache (workspace: %s) ...', workspace)
                    results = self.redis_client.get(key, decoder=json.loads)
                    return results
                else:
                    logger.info('Scanning workspace (%s) ...', workspace)
                    git_client = GitClient(path=workspace, read_only=True)
                    repo = git_client.get_remote()
                    branches = git_client.get_remote_heads()
                    results = []
                    for i in range(len(branches)):
                        results.append({
                            'repo': repo,
                            'name': branches[i]['reference'],
                            'revision': branches[i]['commit'],
                            'commit_date': branches[i]['commit_date']
                        })
                    logger.info('Saving cache (workspace: %s) ...', workspace)
                    self.redis_client.set(key, results, expire=Gateway.BRANCH_CACHE_TTL, encoder=json.dumps)
                    return results
            except Exception as e:
                logger.warning('Failed to scan workspace (%s): %s', workspace, e)
                return []

        logger.info('Retrieving branches (repo: %s, branch: %s) ...', repo, branch)
        if repo:
            repo = unquote(repo)
            _, vendor, repo_owner, repo_name = self.parse_repo(repo)
            workspaces = ['{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name)]
        else:
            dirs = [dir_name[0] for dir_name in os.walk(Gateway.BASE_DIR) if dir_name[0].endswith('/.git')]
            workspaces = list(map(lambda v: v[:-5], dirs))

        branches = []
        with ThreadPoolExecutor(max_workers=20) as executor:
            futures = []
            for i in range(len(workspaces)):
                futures.append(executor.submit(_get_branches, workspaces[i]))
            for future in as_completed(futures):
                try:
                    data = future.result()
                except Exception as e:
                    logger.warning('Unexpected exception occurred during processing: %s', e)
                else:
                    for i in range(len(data)):
                        branches.append(data[i])
        if branch:
            pattern = '.*{}.*'.format(branch.replace('*', '.*'))
            branches = [branches[i] for i in range(len(branches)) if re.match(pattern, branches[i]['name'])]
        if order_by == 'commit_date':
            branches = sorted(branches, key=lambda v: v['commit_date'])
        elif order_by == '-commit_date':
            branches = sorted(branches, key=lambda v: v['commit_date'], reverse=True)
        elif order_by == '-native':
            branches = sorted(branches, key=lambda v: (v['repo'], v['name']), reverse=True)
        else:
            branches = sorted(branches, key=lambda v: (v['repo'], v['name']))
        if limit:
            branches = branches[:limit]
        logger.info('Successfully retrieved %s branches', len(branches))
        return branches

    @staticmethod
    def _get_commits(workspace, branch=None, since=None, until=None, commit=None, author=None, committer=None,
                     description=None, limit=None):
        """Search for commits in a workspace."""
        try:
            logger.info('Scanning workspace (%s) for commits ...', workspace)
            git_client = GitClient(path=workspace, read_only=True)
            if commit and commit.startswith('~'):
                commit = commit[1:]
            if author and author.startswith('~'):
                author = author[1:]
            if committer and committer.startswith('~'):
                committer = committer[1:]
            if description and description.startswith('~'):
                description = description[1:]
            return git_client.get_commits(branch=branch, commit=commit, since=since, until=until, author=author,
                                          committer=committer, description=description, limit=limit)
        except Exception as e:
            logger.warning('Failed to scan workspace (%s): %s', workspace, e)

    @staticmethod
    def _get_commit(workspace, commit):
        """Get a commit from a workspace."""
        try:
            logger.info('Scanning workspace (%s) for commit (%s) ...', workspace, commit)
            git_client = GitClient(path=workspace, read_only=True)
            return git_client.get_commit(commit)
        except Exception as e:
            logger.warning('Failed to scan workspace (%s): %s', workspace, e)

    @staticmethod
    def _parse_repo_branch(repo, branch, repo_branch):
        """Parse repo / branch / repo_branch."""
        if repo:
            try:
                repo = unquote(repo)
                _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo)
            except Exception as e:
                msg = 'Unable to parse repo: %s', e
                logger.error(msg)
                raise AXApiInvalidParam('Unable to parse repo', msg)
            else:
                dir = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name)
                workspaces = {dir: [branch] if branch else []}
        elif repo_branch:
            try:
                repo_branch = json.loads(repo_branch)
                workspaces = {}
                for repo in repo_branch.keys():
                    repo = unquote(repo)
                    _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo)
                    dir = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name)
                    if dir not in workspaces:
                        workspaces[dir] = set()
                    for branch in repo_branch[repo]:
                        workspaces[dir].add(branch)
            except Exception as e:
                msg = 'Unable to parse repo_branch: %s' % str(e)
                logger.error(msg)
                raise AXApiInvalidParam('Unable to parse repo_branch', msg)
        else:
            dirs = [dir[0] for dir in os.walk(Gateway.BASE_DIR) if dir[0].endswith('/.git')]
            workspaces = list(map(lambda v: v[:-5], dirs))
            workspaces = dict([(k, [branch] if branch else []) for k in workspaces])
        return workspaces

    @staticmethod
    def _put_file(repo, branch, path):
        """Put a file in s3.

        :param repo:
        :param branch:
        :param path:
        :return:
        """
        _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo)
        workspace = '{}/{}/{}/{}'.format(Gateway.BASE_DIR, vendor, repo_owner, repo_name)
        if not os.path.isdir(workspace):
            raise AXApiInvalidParam('Invalid repository', 'Invalid repository ({})'.format(repo))
        try:
            logger.info('Extracting file content from repository (repo: %s, branch: %s, path: %s) ...',
                        repo, branch, path)
            git_client = GitClient(path=workspace, read_only=True)
            files = git_client.get_files(branch=branch, subdir=path, binary_mode=True)
        except Exception as e:
            message = 'Failed to extract file content'
            detail = '{}: {}'.format(message, str(e))
            logger.error(detail)
            raise AXApiInternalError(message, detail)
        else:
            if len(files) == 0:
                raise AXApiInvalidParam('Unable to locate file with given information')
            file_content = files[0]['content']
            logger.info('Successfully extracted file content')

        try:
            # Cluster name id always has the form <cluster_name>-<36_bytes_long_cluster_id>
            cluster_name, cluster_id = Gateway.CLUSTER_NAME_ID[:-37], Gateway.CLUSTER_NAME_ID[-36:]
            key = '{cluster_name}/{cluster_id}/{vendor}/{repo_owner}/{repo_name}/{branch}/{path}'.format(
                cluster_name=cluster_name, cluster_id=cluster_id, vendor=vendor,
                repo_owner=repo_owner, repo_name=repo_name, branch=branch, path=path)
            logger.info('Uploading file content to s3 (bucket: %s, key: %s) ...', Gateway.S3_BUCKET_NAME, key)
            response = Gateway.s3_bucket.Object(key).put(Body=file_content)
            etag = response.get('ETag')
            if etag:
                etag = json.loads(etag)
        except Exception as e:
            message = 'Failed to upload file content'
            detail = '{}: {}'.format(message, str(e))
            logger.error(detail)
            raise AXApiInternalError(message, detail)
        else:
            logger.info('Successfully uploaded file content')
            return {'bucket': Gateway.S3_BUCKET_NAME, 'key': key, 'etag': etag}

    @staticmethod
    def _delete_file(repo, branch, path):
        """Delete a file from s3.

        :param repo:
        :param branch:
        :param path:
        :return:
        """
        _, vendor, repo_owner, repo_name = Gateway.parse_repo(repo)
        try:
            cluster_name, cluster_id = Gateway.CLUSTER_NAME_ID[:-37], Gateway.CLUSTER_NAME_ID[-36:]
            key = '{cluster_name}/{cluster_id}/{vendor}/{repo_owner}/{repo_name}/{branch}/{path}'.format(
                cluster_name=cluster_name, cluster_id=cluster_id, vendor=vendor,
                repo_owner=repo_owner, repo_name=repo_name, branch=branch, path=path)
            logger.info('Deleting file from s3 (bucket: %s, key: %s) ...', Gateway.S3_BUCKET_NAME, key)
            Gateway.s3_bucket.Object(key).delete()
        except Exception as e:
            message = 'Failed to delete file'
            detail = '{}: {}'.format(message, str(e))
            logger.error(detail)
            raise AXApiInternalError(message, detail)
        else:
            logger.info('Successfully deleted file')
            return {'bucket': Gateway.S3_BUCKET_NAME, 'key': key}

    @staticmethod
    def init_jira_client(axops_client, url=None, username=None, password=None):
        """Initialize an Jira client"""

        def get_jira_configuration():
            js = axops_client.get_tools(category='issue_management', type='jira')
            if js:
                return {'url': js[0]['url'],
                        'username': js[0]['username'],
                        'password': js[0]['password']
                        }
            else:
                return dict()

        if url is None or username is None or password is None:
            conf = get_jira_configuration()
            if not conf:
                raise AXApiInvalidParam('No JIRA configured')
            else:
                url, username, password = conf['url'], conf['username'], conf['password']
        return JiraClient(url, username, password)

    # Verify whether this function is still needed
    def check_github_whitelist(self):
        if not self.is_github_webhook_enabled():
            logger.info('No GitHub webhook configured')
            return
        configured = self.get_from_cache()
        logger.info('The configured GitHub webhook whitelist is %s', configured)
        advertised = self.scm_clients[ScmVendors.GITHUB].get_webhook_whitelist()
        logger.info('The GitHub webhook whitelist is %s', advertised)
        if set(configured) == set(advertised):
            logger.info('No update needed')
        else:
            # Create ELB
            payload = {'ip_range': advertised, 'external_port': 8443, 'internal_port': 8087}
            try:
                logger.info('Creating ELB for webhook ...')
                self.axsys_client.create_webhook(**payload)
            except Exception as exc:
                logger.error('Failed to create ELB for webhook: %s', str(exc))
                self.event_notification_client.send_message_to_notification_center(CODE_JOB_CI_ELB_CREATION_FAILURE,
                                                                                   detail=payload)
            else:
                # Update cache
                self.write_to_cache(advertised)
                logger.info('Successfully updated ELB for webhook')

    def is_github_webhook_enabled(self):
        """ Check whether the webhook is configured or not"""
        github_data = self.axops_client.get_tools(type='github')
        use_webhook = [each for each in github_data if each['use_webhook']]
        return bool(use_webhook)

    @staticmethod
    def write_to_cache(ip_range):
        """ Store the webhook whitelist info"""
        cache_file = '/tmp/github_webhook_whitelist'
        with open(cache_file, 'w+') as f:
            f.write(json.dumps(ip_range))

    def get_from_cache(self):
        """ Get cached webhook whitelist info, otherwise get from axmon"""
        cache_file = '/tmp/github_webhook_whitelist'
        ip_range = list()
        if os.path.exists(cache_file):
            with open(cache_file, 'r+') as f:
                data = f.readlines()
                ip_range = json.loads(data[0])
        else:
            logger.debug('No cache file')
            try:
                data = self.axsys_client.get_webhook()
            except Exception as exc:
                logger.warning(exc)
            else:
                logger.info('Write whitelist info to cache file')
                ip_range = data['ip_ranges']
                self.write_to_cache(ip_range)
        return ip_range
Ejemplo n.º 2
0
class RepoManager(object):
    """Manage all repositories in track."""
    def __init__(self, concurrency, interval):
        self.axdb_client = AxdbClient()
        self.axops_client = AxopsClient()
        self.concurrency = concurrency
        self.interval = interval

    def run(self):
        """Create workspaces and perform initial/incremental fetch."""
        while True:
            logger.info('Start repository scan ...')
            try:
                self.connect()
                repos, has_change = self.synchronize()
                with ThreadPoolExecutor(
                        max_workers=self.concurrency) as executor:
                    futures = []
                    for i in range(len(repos)):
                        futures.append(
                            executor.submit(self.update_repo, **repos[i]))
                    for future in as_completed(futures):
                        try:
                            if not has_change and future.result():
                                has_change = True
                        except Exception as e:
                            logger.warning(
                                'Unexpected exception occurred during processing: %s',
                                e)
                # Notify UI backend about a change in repos
                if has_change:
                    key = '{}:repos_updated'.format(NAMESPACE)
                    redis_client.set(key, value=str(int(time.time())))
            except Exception as e:
                logger.warning('Repository scan failed: %s', str(e))
            else:
                logger.info('Repository scan completed\n')
            finally:
                time.sleep(self.interval)

    @retry(wait_fixed=5000)
    def connect(self):
        """Connect to axops."""
        connected = self.axops_client.ping()
        if not connected:
            msg = 'Unable to connect to axops'
            logger.warning(msg)
            raise ConnectionError(msg)

    def synchronize(self):
        """Synchronize all repos."""
        logger.info('Synchronizing repositories ...')

        # Get all repos
        repos = self.get_all_repos()
        logger.info('%s repositories currently in track', len(repos))

        # Get untracked repos currently on disk
        untracked_repos = self.get_untracked_repos(repos)
        logger.info('%s untracked repositories found on disk',
                    len(list(untracked_repos.keys())))

        for repo in untracked_repos:
            # Purge all branch heads
            logger.info('Purging branch heads (repo: %s) ...', repo)
            self.axdb_client.purge_branch_heads(repo)
            # Delete workspace
            logger.info('Deleting workspace (path: %s) ...',
                        untracked_repos[repo])
            shutil.rmtree(untracked_repos[repo])
            # Invalidate caches
            logger.info('Invalidating caches (workspace: %s) ...',
                        untracked_repos[repo])
            key_pattern = '^{}\:{}.*$'.format(NAMESPACE, untracked_repos[repo])
            keys = redis_client.keys(key_pattern)
            for k in keys:
                logger.debug('Invalidating cache (key: %s) ...', k)
                redis_client.delete(k)

        # Send event to trigger garbage collection from axops
        if untracked_repos:
            kafka_client = ProducerClient()
            ci_event = {
                'Op': "gc",
                'Payload': {
                    'details': "Repo or branch get deleted."
                }
            }
            kafka_client.send(AxSettings.TOPIC_GC_EVENT,
                              key=AxSettings.TOPIC_GC_EVENT,
                              value=ci_event,
                              timeout=120)

        return repos, len(untracked_repos) > 0

    def get_all_repos(self):
        """Retrieve all repos from axops."""
        tools = self.axops_client.get_tools(category='scm')
        repos = {}
        for i in range(len(tools)):
            _repos = tools[i].get('repos', [])
            for j in range(len(_repos)):
                parsed_url = urlparse(_repos[j])
                protocol, vendor = parsed_url.scheme, parsed_url.hostname
                m = re.match(r'/([a-zA-Z0-9-]+)/([a-zA-Z0-9_.-]+)',
                             parsed_url.path)
                if not m:
                    logger.warning('Illegal repo URL: %s, skip', parsed_url)
                    continue
                _, repo_owner, repo_name = parsed_url.path.split('/',
                                                                 maxsplit=2)
                key = (vendor, repo_owner, repo_name)
                if key in repos and repos[key]['protocol'] == 'https':
                    continue
                repos[key] = {
                    'repo_type': tools[i].get('type'),
                    'vendor': vendor,
                    'protocol': protocol,
                    'repo_owner': repo_owner,
                    'repo_name': repo_name,
                    'username': tools[i].get('username'),
                    'password': tools[i].get('password'),
                    'use_webhook': tools[i].get('use_webhook', False)
                }
        return list(repos.values())

    @staticmethod
    def get_untracked_repos(repos):
        """Get all untracked repos."""
        # Construct list of expected workspaces
        expected_workspaces = set()
        for i in range(len(repos)):
            expected_workspace = '{}/{}/{}/{}'.format(BASE_DIR,
                                                      repos[i]['vendor'],
                                                      repos[i]['repo_owner'],
                                                      repos[i]['repo_name'])
            expected_workspaces.add(expected_workspace)
        # Construct list of all workspaces currently on disk
        dirs = [
            dir[0] for dir in os.walk(BASE_DIR) if dir[0].endswith('/.git')
        ]
        workspaces = list(map(lambda v: v[:-5], dirs))
        # Construct list of untracked repos
        untracked_repos = {}
        for i in range(len(workspaces)):
            if workspaces[i] not in expected_workspaces:
                client = GitClient(path=workspaces[i])
                repo = client.get_remote()
                untracked_repos[repo] = workspaces[i]
        return untracked_repos

    @staticmethod
    def get_repo_workspace(repo_vendor, repo_owner, repo_name):
        return '{}/{}/{}/{}'.format(BASE_DIR, repo_vendor, repo_owner,
                                    repo_name)

    @staticmethod
    def get_repo_url(protocol, repo_vendor, repo_owner, repo_name):
        return '{}://{}/{}/{}'.format(protocol, repo_vendor, repo_owner,
                                      repo_name)

    @staticmethod
    def update_yaml(repo_client, kafka_client, repo, branch, head):
        """Using Kafka to send a event to axops to update the yamls in the axdb."""
        logger.info("Update yaml %s, %s, %s", repo, branch, head)
        try:
            yaml_contents = repo_client.get_files(commit=head,
                                                  subdir=TEMPLATE_DIR,
                                                  filter_yaml=True)
        except Exception as e:
            logger.error("Failed to obtain YAML files: %s", str(e))
            return -1

        if len(yaml_contents) >= 0:
            # This is a partition key defined as RepoName$$$$BranchName.
            # The key is used by Kafka partition, which means it allows concurrency
            #  if the events are for different repo/branch
            key = '{}$$$${}'.format(repo, branch)
            payload = {
                'Op': 'update',
                'Payload': {
                    'Revision':
                    head,
                    'Content':
                    [v['content']
                     for v in yaml_contents] if yaml_contents else []
                }
            }
            kafka_client.send('devops_template',
                              key=key,
                              value=payload,
                              timeout=120)
            logger.info("Updated YAML %s files (repo: %s, branch: %s)",
                        len(yaml_contents), repo, branch)
        return len(yaml_contents)

    def update_repo(self, repo_type, vendor, protocol, repo_owner, repo_name,
                    username, password, use_webhook):
        """Update a repo."""

        # Examples for the input variables
        # BASE_DIR:   /ax/data/repos
        # Repo_type:  github
        # Vendor:     github.com
        # Protocol:   https
        # Repo_owner: argo
        # Repo_name:  prod.git

        is_first_fetch = False
        do_send_gc_event = False
        workspace = '{}/{}/{}/{}'.format(BASE_DIR, vendor, repo_owner,
                                         repo_name)
        url = '{}://{}/{}/{}'.format(protocol, vendor, repo_owner, repo_name)
        kafka_client = ProducerClient()

        if not os.path.isdir(workspace):
            os.makedirs(workspace)
            # If we recreate the workspace, we need to purge all branch heads of this repo
            self.axdb_client.purge_branch_heads(url)

        logger.info("Start scanning repository (%s) ...", url)
        if repo_type == ScmVendors.CODECOMMIT:
            client = CodeCommitClient(path=workspace,
                                      repo=url,
                                      username=username,
                                      password=password)
        else:
            client = GitClient(path=workspace,
                               repo=url,
                               username=username,
                               password=password,
                               use_permanent_credentials=True)

        # Even if there is no change, performing a fetch is harmless but has a benefit
        # that, in case the workspace is destroyed without purging the history, we can
        # still update the workspace to the proper state
        logger.info("Start fetching ...")
        client.fetch()

        # Retrieve all previous branch heads and construct hash table
        prev_heads = self.axdb_client.get_branch_heads(url)
        logger.info("Have %s branch heads (repo: %s) from previous scan",
                    len(prev_heads), url)

        if len(prev_heads) == 0:
            is_first_fetch = True
            logger.debug(
                "This is an initial scan as no previous heads were found")

        prev_heads_map = dict()
        for prev_head in prev_heads:
            key = (prev_head['repo'], prev_head['branch'])
            prev_heads_map[key] = prev_head['head']

        # Retrieve all current branch heads
        current_heads = client.get_remote_heads()
        logger.info("Have %s branch heads (repo: %s) from current scan",
                    len(current_heads), url)
        current_heads = sorted(current_heads,
                               key=lambda v: v['commit_date'],
                               reverse=is_first_fetch)

        # Find out which branch heads need to be updated
        heads_to_update = list()
        heads_for_event = list()
        for current_head in current_heads:
            head, branch = current_head['commit'], current_head[
                'reference'].replace('refs/heads/', '')
            previous_head = prev_heads_map.pop((url, branch), None)
            if head != previous_head:
                event = {'repo': url, 'branch': branch, 'head': head}
                heads_to_update.append(event)

                if previous_head is None:
                    logger.info(
                        "New branch detected (branch: %s, current head: %s)",
                        branch, head)
                else:
                    logger.info(
                        "Existing ranch head updated (branch: %s, previous: %s, current: %s)",
                        branch, previous_head, head)
                    # Send CI event in case of policy
                    heads_for_event.append(event.copy())

        if prev_heads_map:
            logger.info("There are %s get deleted from repo: %s",
                        prev_heads_map.keys(), url)
            do_send_gc_event = True
            for key in prev_heads_map:
                self.axdb_client.purge_branch_head(repo=key[0], branch=key[1])

        # Invalidate cache if there is head update or branch deleted
        if heads_to_update or prev_heads_map:
            cache_key = '{}:{}'.format(NAMESPACE, workspace)
            logger.info('Invalidating cache (key: %s) ...', cache_key)
            if redis_client.exists(cache_key):
                redis_client.delete(cache_key)

        # Update YAML contents
        count = 0
        for event in heads_to_update:
            res_count = RepoManager.update_yaml(repo_client=client,
                                                kafka_client=kafka_client,
                                                repo=url,
                                                branch=event['branch'],
                                                head=event['head'])
            if res_count >= 0:
                self.axdb_client.set_branch_head(**event)
                count += res_count

        logger.info(
            "Updated %s YAML files (template/policy) for %s branches (repo: %s)",
            count, len(heads_to_update), url)
        logger.info("Updated %s branch heads (repo: %s)", len(heads_to_update),
                    url)

        # If garbarge collection needed due to branch or repo deletion
        if do_send_gc_event:
            logger.info(
                "Send gc event so that axops can garbage collect deleted branch / repo"
            )
            ci_event = {
                'Op': "gc",
                'Payload': {
                    'details': "Repo or branch get deleted."
                }
            }
            kafka_client.send(AxSettings.TOPIC_GC_EVENT,
                              key=AxSettings.TOPIC_GC_EVENT,
                              value=ci_event,
                              timeout=120)

        # If webhook is disabled, we need to send CI events
        if not use_webhook:
            for event in heads_for_event:
                commit = client.get_commit(event['head'])
                ci_event = {
                    'Op': "ci",
                    'Payload': {
                        'author':
                        commit['author'],
                        'branch':
                        event['branch'],
                        'commit':
                        commit['revision'],
                        'committer':
                        commit['committer'],
                        'date':
                        datetime.datetime.fromtimestamp(
                            commit['date']).strftime('%Y-%m-%dT%H:%M:%S'),
                        'description':
                        commit['description'],
                        'repo':
                        commit['repo'],
                        'type':
                        "push",
                        'vendor':
                        repo_type
                    }
                }
                kafka_client.send("devops_template",
                                  key="{}$$$${}".format(
                                      event['repo'], event['branch']),
                                  value=ci_event,
                                  timeout=120)
            logger.info('Webhook not enabled, send %s devops_ci_event events',
                        len(heads_for_event))

        kafka_client.close()
        logger.info('Successfully scanned repository (%s)', url)

        return len(heads_to_update) > 0 or len(prev_heads_map) > 0