Example #1
0
def github_releases(org: str, name: str, config: GithubConfig) -> ETLReference:
    url = config.releases_url(org, name)
    with APILimiter(url, DELAY) as api_limiter:
        with ETLDataset(url) as etl_dataset:
            for entry in _sync_continuous_data(api_limiter, etl_dataset, url,
                                               config.auth(), config.headers):
                pass

    return ETLReference(url)
Example #2
0
def github_tags(org: str, name: str, config: GithubConfig) -> ETLReference:
    url = config.tags_url(org, name)
    with APILimiter(url, DELAY) as api_limiter:
        with ETLDataset(url) as etl_dataset:
            for entry in _sync_continuous_data(api_limiter, etl_dataset, url,
                                               config.auth(), config.headers):
                pass
            # Bulid an abstraction that'll update a list of times in place.
            # /tags dosen't return enough data, we'll need to call /tags/:sha to have a more complete dataset

    return ETLReference(url)
Example #3
0
def extract_github_data():
    import os

    from collectGithubData import extractors

    work_queue, done_queue, ologger = utils.comm_binders(extract_github_data)

    local_cache_path = '/tmp/github-cache'
    if not os.path.exists(local_cache_path):
        os.makedirs(local_cache_path)

    config = GithubConfig(os.environ['GITHUB_USERNAME'],
                          os.environ['GITHUB_PASSWORD'], ologger)
    for details in work_queue:
        org: str = details['org_name']
        name: str = details['repo_name']
        if name != 'jwst':
            continue

        details['etl'] = {
            'repo': extractors.github_repo(org, name, config),
            'commits': extractors.github_commits(org, name, config),
            'issues': extractors.github_issues(org, name, config),
            'pull-requests':
            extractors.github_pull_requests(org, name, config),
            'releases': extractors.github_releases(org, name, config),
            'collaborators':
            extractors.github_collaborators(org, name, config),
            'tags': extractors.github_tags(org, name, config),
        }
        done_queue.put(details)
Example #4
0
def github_repo(org: str, name: str, config: GithubConfig) -> ETLReference:
    url = config.repos_url(org, name)
    with APILimiter(url, DELAY) as api_limiter:
        with ETLDataset(url) as etl_dataset:
            response = requests.get(url,
                                    auth=config.auth(),
                                    headers=config.headers)
            if response.status_code in [404]:
                logger.error(
                    f'User[{config.username}] may not have access to Repo[{org}/{name}]'
                )

            elif response.status_code in [200]:
                etl_dataset.update(response.json())

            else:
                raise NotImplementedError(f'{response.status_code}: {url}')

    return ETLReference(url)