Esempio n. 1
0
        def _get_branches(workspace):
            """Retrieve list of remote branches in the workspace.

            :param workspace:
            :return: a list of dictionaries.
            """
            try:
                key = '{}:{}'.format(Gateway.NAMESPACE, workspace)
                if self.redis_client.exists(key):
                    logger.info('Loading cache (workspace: %s) ...', workspace)
                    results = self.redis_client.get(key, decoder=json.loads)
                    return results
                else:
                    logger.info('Scanning workspace (%s) ...', workspace)
                    git_client = GitClient(path=workspace, read_only=True)
                    repo = git_client.get_remote()
                    branches = git_client.get_remote_heads()
                    results = []
                    for i in range(len(branches)):
                        results.append({
                            'repo': repo,
                            'name': branches[i]['reference'],
                            'revision': branches[i]['commit'],
                            'commit_date': branches[i]['commit_date']
                        })
                    logger.info('Saving cache (workspace: %s) ...', workspace)
                    self.redis_client.set(key, results, expire=Gateway.BRANCH_CACHE_TTL, encoder=json.dumps)
                    return results
            except Exception as e:
                logger.warning('Failed to scan workspace (%s): %s', workspace, e)
                return []
Esempio n. 2
0
 def _get_commits(workspace,
                  branch=None,
                  since=None,
                  until=None,
                  commit=None,
                  author=None,
                  committer=None,
                  description=None,
                  limit=None):
     """Search for commits in a workspace."""
     try:
         logger.info('Scanning workspace (%s) for commits ...', workspace)
         git_client = GitClient(path=workspace, read_only=True)
         if commit and commit.startswith('~'):
             commit = commit[1:]
         if author and author.startswith('~'):
             author = author[1:]
         if committer and committer.startswith('~'):
             committer = committer[1:]
         if description and description.startswith('~'):
             description = description[1:]
         return git_client.get_commits(branch=branch,
                                       commit=commit,
                                       since=since,
                                       until=until,
                                       author=author,
                                       committer=committer,
                                       description=description,
                                       limit=limit)
     except Exception as e:
         logger.warning('Failed to scan workspace (%s): %s', workspace, e)
Esempio n. 3
0
 def _get_commit(workspace, commit):
     """Get a commit from a workspace."""
     try:
         logger.info('Scanning workspace (%s) for commit (%s) ...', workspace, commit)
         git_client = GitClient(path=workspace, read_only=True)
         return git_client.get_commit(commit)
     except Exception as e:
         logger.warning('Failed to scan workspace (%s): %s', workspace, e)
Esempio n. 4
0
def _put_file(repo, branch, path):
    """Put a file in s3.

    :param repo:
    :param branch:
    :param path:
    :return:
    """
    _, vendor, repo_owner, repo_name = parse_repo(repo)
    workspace = '{}/{}/{}/{}'.format(BASE_DIR, vendor, repo_owner, repo_name)
    if not os.path.isdir(workspace):
        raise AXApiInvalidParam('Invalid repository',
                                'Invalid repository ({})'.format(repo))
    try:
        logger.info(
            'Extracting file content from repository (repo: %s, branch: %s, path: %s) ...',
            repo, branch, path)
        git_client = GitClient(path=workspace, read_only=True)
        files = git_client.get_files(branch=branch,
                                     subdir=path,
                                     binary_mode=True)
    except Exception as e:
        message = 'Failed to extract file content'
        detail = '{}: {}'.format(message, str(e))
        logger.error(detail)
        raise AXApiInternalError(message, detail)
    else:
        if len(files) == 0:
            raise AXApiInvalidParam(
                'Unable to locate file with given information')
        file_content = files[0]['content']
        logger.info('Successfully extracted file content')

    try:
        # Cluster name id always has the form <cluster_name>-<36_bytes_long_cluster_id>
        cluster_name, cluster_id = CLUSTER_NAME_ID[:-37], CLUSTER_NAME_ID[-36:]
        key = '{cluster_name}/{cluster_id}/{vendor}/{repo_owner}/{repo_name}/{branch}/{path}'.format(
            cluster_name=cluster_name,
            cluster_id=cluster_id,
            vendor=vendor,
            repo_owner=repo_owner,
            repo_name=repo_name,
            branch=branch,
            path=path)
        logger.info('Uploading file content to s3 (bucket: %s, key: %s) ...',
                    S3_BUCKET_NAME, key)
        response = s3_bucket.Object(key).put(Body=file_content)
        etag = response.get('ETag')
        if etag:
            etag = json.loads(etag)
    except Exception as e:
        message = 'Failed to upload file content'
        detail = '{}: {}'.format(message, str(e))
        logger.error(detail)
        raise AXApiInternalError(message, detail)
    else:
        logger.info('Successfully uploaded file content')
        return {'bucket': S3_BUCKET_NAME, 'key': key, 'etag': etag}
Esempio n. 5
0
 def get_untracked_repos(repos):
     """Get all untracked repos."""
     # Construct list of expected workspaces
     expected_workspaces = set()
     for i in range(len(repos)):
         expected_workspace = '{}/{}/{}/{}'.format(BASE_DIR,
                                                   repos[i]['vendor'],
                                                   repos[i]['repo_owner'],
                                                   repos[i]['repo_name'])
         expected_workspaces.add(expected_workspace)
     # Construct list of all workspaces currently on disk
     dirs = [
         dir[0] for dir in os.walk(BASE_DIR) if dir[0].endswith('/.git')
     ]
     workspaces = list(map(lambda v: v[:-5], dirs))
     # Construct list of untracked repos
     untracked_repos = {}
     for i in range(len(workspaces)):
         if workspaces[i] not in expected_workspaces:
             client = GitClient(path=workspaces[i])
             repo = client.get_remote()
             untracked_repos[repo] = workspaces[i]
     return untracked_repos
Esempio n. 6
0
 def get_repos(self, scm_type, url, username, password):
     """Get all repos owned by the user."""
     if scm_type in {ScmVendors.BITBUCKET, ScmVendors.GITHUB, ScmVendors.GITLAB}:
         try:
             repos = self.scm_clients[scm_type].get_repos(username, password)
         except Exception as e:
             logger.warning('Unable to connect to %s: %s', scm_type, e)
             detail = {
                 'type': scm_type,
                 'username': username,
                 'error': str(e.detail)
             }
             self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR,
                                                                                detail=detail)
             raise AXApiInvalidParam('Cannot connect to %s server' % scm_type)
         else:
             return repos
     elif scm_type == ScmVendors.GIT:
         _, vendor, repo_owner, repo_name = Gateway.parse_repo(url)
         path = '/tmp/{}/{}/{}'.format(vendor, repo_owner, repo_name)
         if os.path.isfile(path):
             os.remove(path)
         if os.path.isdir(path):
             shutil.rmtree(path)
         os.makedirs(path)
         client = GitClient(path=path, repo=url, username=username, password=password)
         try:
             client.list_remote()
         except Exception as e:
             logger.warning('Unable to connect to git server (%s): %s', url, e)
             detail = {
                 'type': scm_type,
                 'url': url,
                 'username': username,
                 'error': str(e)
             }
             self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR,
                                                                                detail=detail)
             raise AXApiInvalidParam('Cannot connect to git server')
         else:
             return {url: url}
     elif scm_type == ScmVendors.CODECOMMIT:
         repos = {}
         region = 'us-east-1'
         default_url_format = 'https://git-codecommit.{}.amazonaws.com/v1/repos/{}'
         client = boto3.client('codecommit', aws_access_key_id=username, aws_secret_access_key=password,
                               region_name=region)
         try:
             response = client.list_repositories().get('repositories', [])
             for r in response:
                 repo_url = default_url_format.format(region, r['repositoryName'])
                 repos[repo_url] = repo_url
         except Exception as exc:
             detail = {
                 'type': scm_type,
                 'region': region,
                 'url': default_url_format.format(region, ''),
                 'username': username,
                 'error': 'Cannot connect to CodeCommit' + str(exc)
             }
             self.event_notification_client.send_message_to_notification_center(CODE_CONFIGURATION_SCM_CONNECTION_ERROR,
                                                                                detail=detail)
             raise AXApiInvalidParam('Cannot connect to CodeCommit: %s' % exc)
         else:
             return repos
     else:
         return {}
Esempio n. 7
0
    def update_repo(self, repo_type, vendor, protocol, repo_owner, repo_name,
                    username, password, use_webhook):
        """Update a repo."""

        # Examples for the input variables
        # BASE_DIR:   /ax/data/repos
        # Repo_type:  github
        # Vendor:     github.com
        # Protocol:   https
        # Repo_owner: argo
        # Repo_name:  prod.git

        is_first_fetch = False
        do_send_gc_event = False
        workspace = '{}/{}/{}/{}'.format(BASE_DIR, vendor, repo_owner,
                                         repo_name)
        url = '{}://{}/{}/{}'.format(protocol, vendor, repo_owner, repo_name)
        kafka_client = ProducerClient()

        if not os.path.isdir(workspace):
            os.makedirs(workspace)
            # If we recreate the workspace, we need to purge all branch heads of this repo
            self.axdb_client.purge_branch_heads(url)

        logger.info("Start scanning repository (%s) ...", url)
        if repo_type == ScmVendors.CODECOMMIT:
            client = CodeCommitClient(path=workspace,
                                      repo=url,
                                      username=username,
                                      password=password)
        else:
            client = GitClient(path=workspace,
                               repo=url,
                               username=username,
                               password=password,
                               use_permanent_credentials=True)

        # Even if there is no change, performing a fetch is harmless but has a benefit
        # that, in case the workspace is destroyed without purging the history, we can
        # still update the workspace to the proper state
        logger.info("Start fetching ...")
        client.fetch()

        # Retrieve all previous branch heads and construct hash table
        prev_heads = self.axdb_client.get_branch_heads(url)
        logger.info("Have %s branch heads (repo: %s) from previous scan",
                    len(prev_heads), url)

        if len(prev_heads) == 0:
            is_first_fetch = True
            logger.debug(
                "This is an initial scan as no previous heads were found")

        prev_heads_map = dict()
        for prev_head in prev_heads:
            key = (prev_head['repo'], prev_head['branch'])
            prev_heads_map[key] = prev_head['head']

        # Retrieve all current branch heads
        current_heads = client.get_remote_heads()
        logger.info("Have %s branch heads (repo: %s) from current scan",
                    len(current_heads), url)
        current_heads = sorted(current_heads,
                               key=lambda v: v['commit_date'],
                               reverse=is_first_fetch)

        # Find out which branch heads need to be updated
        heads_to_update = list()
        heads_for_event = list()
        for current_head in current_heads:
            head, branch = current_head['commit'], current_head[
                'reference'].replace('refs/heads/', '')
            previous_head = prev_heads_map.pop((url, branch), None)
            if head != previous_head:
                event = {'repo': url, 'branch': branch, 'head': head}
                heads_to_update.append(event)

                if previous_head is None:
                    logger.info(
                        "New branch detected (branch: %s, current head: %s)",
                        branch, head)
                else:
                    logger.info(
                        "Existing ranch head updated (branch: %s, previous: %s, current: %s)",
                        branch, previous_head, head)
                    # Send CI event in case of policy
                    heads_for_event.append(event.copy())

        if prev_heads_map:
            logger.info("There are %s get deleted from repo: %s",
                        prev_heads_map.keys(), url)
            do_send_gc_event = True
            for key in prev_heads_map:
                self.axdb_client.purge_branch_head(repo=key[0], branch=key[1])

        # Invalidate cache if there is head update or branch deleted
        if heads_to_update or prev_heads_map:
            cache_key = '{}:{}'.format(NAMESPACE, workspace)
            logger.info('Invalidating cache (key: %s) ...', cache_key)
            if redis_client.exists(cache_key):
                redis_client.delete(cache_key)

        # Update YAML contents
        count = 0
        for event in heads_to_update:
            res_count = RepoManager.update_yaml(repo_client=client,
                                                kafka_client=kafka_client,
                                                repo=url,
                                                branch=event['branch'],
                                                head=event['head'])
            if res_count >= 0:
                self.axdb_client.set_branch_head(**event)
                count += res_count

        logger.info(
            "Updated %s YAML files (template/policy) for %s branches (repo: %s)",
            count, len(heads_to_update), url)
        logger.info("Updated %s branch heads (repo: %s)", len(heads_to_update),
                    url)

        # If garbarge collection needed due to branch or repo deletion
        if do_send_gc_event:
            logger.info(
                "Send gc event so that axops can garbage collect deleted branch / repo"
            )
            ci_event = {
                'Op': "gc",
                'Payload': {
                    'details': "Repo or branch get deleted."
                }
            }
            kafka_client.send(AxSettings.TOPIC_GC_EVENT,
                              key=AxSettings.TOPIC_GC_EVENT,
                              value=ci_event,
                              timeout=120)

        # If webhook is disabled, we need to send CI events
        if not use_webhook:
            for event in heads_for_event:
                commit = client.get_commit(event['head'])
                ci_event = {
                    'Op': "ci",
                    'Payload': {
                        'author':
                        commit['author'],
                        'branch':
                        event['branch'],
                        'commit':
                        commit['revision'],
                        'committer':
                        commit['committer'],
                        'date':
                        datetime.datetime.fromtimestamp(
                            commit['date']).strftime('%Y-%m-%dT%H:%M:%S'),
                        'description':
                        commit['description'],
                        'repo':
                        commit['repo'],
                        'type':
                        "push",
                        'vendor':
                        repo_type
                    }
                }
                kafka_client.send("devops_template",
                                  key="{}$$$${}".format(
                                      event['repo'], event['branch']),
                                  value=ci_event,
                                  timeout=120)
            logger.info('Webhook not enabled, send %s devops_ci_event events',
                        len(heads_for_event))

        kafka_client.close()
        logger.info('Successfully scanned repository (%s)', url)

        return len(heads_to_update) > 0 or len(prev_heads_map) > 0