def fetch_workspaces(self): """Fetch Github repository Zenhub workspaces.""" for config in self.configs: gh_host = config.get('github_host', GH_HOST_URL) zh_root = config.get('api_root', ZH_API_ROOT) repo = config['github_repo'] repo_hash = get_sha256_hash([gh_host, repo], 10) fname = f'zh_repo_{repo_hash}_workspaces.json' self.config.add_evidences([ RawEvidence( fname, 'issues', DAY, f'Zenhub workspaces for {gh_host}/{repo} repository') ]) with raw_evidence(self.locker, f'issues/{fname}') as evidence: if evidence: if gh_host not in self.gh_pool.keys(): self.gh_pool[gh_host] = Github(base_url=gh_host) if zh_root not in self.zh_pool.keys(): self.zh_pool[zh_root] = BaseSession(zh_root) service = 'zenhub' if zh_root != ZH_API_ROOT: service = 'zenhub_enterprise' token = self.config.creds[service].token self.zh_pool[zh_root].headers.update({ 'Content-Type': 'application/json', 'X-Authentication-Token': token }) workspaces = self._get_workspaces(repo, config.get('workspaces'), gh_host, zh_root) evidence.set_content(json.dumps(workspaces))
def fetch_gh_org_collaborators(self): """Fetch collaborators from GH organization repositories.""" for config in self.config.get('org.permissions.org_integrity.orgs'): host, org = config['url'].rsplit('/', 1) for aff in config.get('collaborator_types', GH_ALL_COLLABORATORS): url_hash = get_sha256_hash([config['url']], 10) json_file = f'gh_{aff}_collaborators_{url_hash}.json' path = ['permissions', json_file] description = ( f'{aff.title()} collaborators of the {org} GH org') self.config.add_evidences( [RawEvidence(path[1], path[0], DAY, description)]) with raw_evidence(self.locker, '/'.join(path)) as evidence: if evidence: if host not in self.gh_pool: self.gh_pool[host] = Github(base_url=host) if not config.get('repos'): repos = self.gh_pool[host].paginate_api( f'orgs/{org}/repos') config['repos'] = [repo['name'] for repo in repos] collabs = {} for repo in config['repos']: collabs_url = f'repos/{org}/{repo}/collaborators' collabs[repo] = self.gh_pool[host].paginate_api( collabs_url, affiliation=aff) evidence.set_content(json.dumps(collabs))
def fetch_gh_repo_branch_protection_details(self): """Fetch Github repository branch protection metadata.""" branches = self.config.get( 'org.auditree.repo_integrity.branches', {self.config.get('locker.repo_url'): ['master']}) current_url = None github = None for repo_url, repo_branches in branches.items(): parsed = urlparse(repo_url) base_url = f'{parsed.scheme}://{parsed.hostname}' repo = parsed.path.strip('/') for branch in repo_branches: file_prefix_parts = [ repo.lower().replace('/', '_').replace('-', '_'), branch.lower().replace('-', '_') ] file_prefix = '_'.join(file_prefix_parts) path = ['auditree', f'gh_{file_prefix}_branch_protection.json'] if base_url != current_url: github = Github(self.config.creds, base_url) current_url = base_url self.config.add_evidences([ RepoBranchProtectionEvidence( path[1], path[0], DAY, (f'Github branch protection for {repo} repo ' f'{branch} branch')) ]) joined_path = os.path.join(*path) with raw_evidence(self.locker, joined_path) as evidence: if evidence: evidence.set_content( json.dumps( github.get_branch_protection_details( repo, branch)))
def fetch_gh_repo_branch_recent_commits_details(self): """Fetch Github repository branch recent commits metadata.""" branches = self.config.get( 'org.auditree.repo_integrity.branches', {self.config.get('locker.repo_url'): ['master']}) current_url = None github = None for repo_url, repo_branches in branches.items(): parsed = urlparse(repo_url) base_url = f'{parsed.scheme}://{parsed.hostname}' repo = parsed.path.strip('/') for branch in repo_branches: file_prefix_parts = [ repo.lower().replace('/', '_').replace('-', '_'), branch.lower().replace('-', '_') ] file_prefix = '_'.join(file_prefix_parts) path = ['auditree', f'gh_{file_prefix}_recent_commits.json'] if base_url != current_url: github = Github(self.config.creds, base_url) current_url = base_url ttl = DAY # To ensure signed commits check picks up locker commits if (repo_url == self.locker.repo_url and branch == self.locker.branch): ttl = DAY * 2 self.config.add_evidences([ RepoCommitEvidence( path[1], path[0], ttl, (f'Github recent commits for {repo} repo ' f'{branch} branch')) ]) joined_path = os.path.join(*path) with raw_evidence(self.locker, joined_path) as evidence: if evidence: meta = self.locker.get_evidence_metadata(evidence.path) if meta is None: meta = {} now = datetime.utcnow().strftime(LOCKER_DTTM_FORMAT) since = datetime.strptime(meta.get('last_update', now), LOCKER_DTTM_FORMAT) evidence.set_content( json.dumps( github.get_commit_details(repo, since, branch)))
def fetch_gh_repo_branch_file_path_recent_commits_details(self): """Fetch Github repository branch file path recent commits metadata.""" filepaths = self.config.get('org.auditree.repo_integrity.filepaths') current_url = None github = None for repo_url, repo_branches in filepaths.items(): parsed = urlparse(repo_url) base_url = f'{parsed.scheme}://{parsed.hostname}' repo = parsed.path.strip('/') for branch, repo_filepaths in repo_branches.items(): for filepath in repo_filepaths: ev_file_prefix = f'{repo}_{branch}_{filepath}'.lower() for symbol in [' ', '/', '-', '.']: ev_file_prefix = ev_file_prefix.replace(symbol, '_') path = [ 'auditree', f'gh_{ev_file_prefix}_recent_commits.json' ] if base_url != current_url: github = Github(self.config.creds, base_url) current_url = base_url self.config.add_evidences([ RepoCommitEvidence( path[1], path[0], DAY, (f'Github recent commits for {repo} repo ' f'{branch} branch, {filepath} file path')) ]) joined_path = os.path.join(*path) with raw_evidence(self.locker, joined_path) as evidence: if evidence: meta = self.locker.get_evidence_metadata( evidence.path) if meta is None: meta = {} utcnow = datetime.utcnow() now = utcnow.strftime(LOCKER_DTTM_FORMAT) since = datetime.strptime( meta.get('last_update', now), LOCKER_DTTM_FORMAT) evidence.set_content( json.dumps( github.get_commit_details( repo, since, branch, filepath)))
def fetch_issues(self): """Fetch Github repository issues.""" for config in self.configs: host = config.get('host', GH_HOST_URL) repo = config['repo'] fname = f'gh_repo_{get_sha256_hash([host, repo], 10)}_issues.json' self.config.add_evidences([ RawEvidence(fname, 'issues', DAY, f'Github issues for {host}/{repo} repository') ]) with raw_evidence(self.locker, f'issues/{fname}') as evidence: if evidence: if host not in self.gh_pool.keys(): self.gh_pool[host] = Github(base_url=host) issues = [] for search in self._compose_searches(config, host): issue_ids = [i['id'] for i in issues] for result in self.gh_pool[host].search_issues(search): if result['id'] not in issue_ids: issues.append(result) evidence.set_content(json.dumps(issues))
def fetch_gh_repo_details(self): """Fetch Github repository metadata.""" repo_urls = self.config.get('org.auditree.repo_integrity.repos', [self.config.get('locker.repo_url')]) current_url = None github = None for repo_url in repo_urls: parsed = urlparse(repo_url) base_url = f'{parsed.scheme}://{parsed.hostname}' repo = parsed.path.strip('/') file_prefix = repo.lower().replace('/', '_').replace('-', '_') path = ['auditree', f'gh_{file_prefix}_repo_metadata.json'] if base_url != current_url: github = Github(self.config.creds, base_url) current_url = base_url self.config.add_evidences([ RepoMetadataEvidence(path[1], path[0], DAY, f'Github {repo} repo metadata details') ]) with raw_evidence(self.locker, os.path.join(*path)) as evidence: if evidence: evidence.set_content( json.dumps(github.get_repo_details(repo)))