def github_releases(org: str, name: str, config: GithubConfig) -> ETLReference: url = config.releases_url(org, name) with APILimiter(url, DELAY) as api_limiter: with ETLDataset(url) as etl_dataset: for entry in _sync_continuous_data(api_limiter, etl_dataset, url, config.auth(), config.headers): pass return ETLReference(url)
def github_tags(org: str, name: str, config: GithubConfig) -> ETLReference: url = config.tags_url(org, name) with APILimiter(url, DELAY) as api_limiter: with ETLDataset(url) as etl_dataset: for entry in _sync_continuous_data(api_limiter, etl_dataset, url, config.auth(), config.headers): pass # Bulid an abstraction that'll update a list of times in place. # /tags dosen't return enough data, we'll need to call /tags/:sha to have a more complete dataset return ETLReference(url)
def extract_github_data(): import os from collectGithubData import extractors work_queue, done_queue, ologger = utils.comm_binders(extract_github_data) local_cache_path = '/tmp/github-cache' if not os.path.exists(local_cache_path): os.makedirs(local_cache_path) config = GithubConfig(os.environ['GITHUB_USERNAME'], os.environ['GITHUB_PASSWORD'], ologger) for details in work_queue: org: str = details['org_name'] name: str = details['repo_name'] if name != 'jwst': continue details['etl'] = { 'repo': extractors.github_repo(org, name, config), 'commits': extractors.github_commits(org, name, config), 'issues': extractors.github_issues(org, name, config), 'pull-requests': extractors.github_pull_requests(org, name, config), 'releases': extractors.github_releases(org, name, config), 'collaborators': extractors.github_collaborators(org, name, config), 'tags': extractors.github_tags(org, name, config), } done_queue.put(details)
def github_repo(org: str, name: str, config: GithubConfig) -> ETLReference: url = config.repos_url(org, name) with APILimiter(url, DELAY) as api_limiter: with ETLDataset(url) as etl_dataset: response = requests.get(url, auth=config.auth(), headers=config.headers) if response.status_code in [404]: logger.error( f'User[{config.username}] may not have access to Repo[{org}/{name}]' ) elif response.status_code in [200]: etl_dataset.update(response.json()) else: raise NotImplementedError(f'{response.status_code}: {url}') return ETLReference(url)