Beispiel #1
0
def fetch_inventories():
    """Fetch all inventories for Airflow documentation packages and store in cache."""
    os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True)
    to_download: List[Tuple[str, str, str]] = []

    for pkg_name in get_available_providers_packages():
        to_download.append((
            pkg_name,
            S3_DOC_URL_VERSIONED.format(package_name=pkg_name),
            f'{CACHE_DIR}/{pkg_name}/objects.inv',
        ))
    for pkg_name in ['apache-airflow', 'helm-chart']:
        to_download.append((
            pkg_name,
            S3_DOC_URL_VERSIONED.format(package_name=pkg_name),
            f'{CACHE_DIR}/{pkg_name}/objects.inv',
        ))
    for pkg_name in ['apache-airflow-providers', 'docker-stack']:
        to_download.append((
            pkg_name,
            S3_DOC_URL_NON_VERSIONED.format(package_name=pkg_name),
            f'{CACHE_DIR}/{pkg_name}/objects.inv',
        ))
    to_download.extend((
        pkg_name,
        f"{doc_url}/objects.inv",
        f'{CACHE_DIR}/{pkg_name}/objects.inv',
    ) for pkg_name, doc_url in THIRD_PARTY_INDEXES.items())

    to_download = [(pkg_name, url, path) for pkg_name, url, path in to_download
                   if _is_outdated(path)]
    if not to_download:
        print("Nothing to do")
        return []

    print(f"To download {len(to_download)} inventorie(s)")

    with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(
            DEFAULT_POOLSIZE) as pool:
        download_results: Iterator[Tuple[str, bool]] = pool.map(
            _fetch_file,
            repeat(session, len(to_download)),
            (pkg_name for pkg_name, _, _ in to_download),
            (url for _, url, _ in to_download),
            (path for _, _, path in to_download),
        )
    failed, success = partition(lambda d: d[1], download_results)
    failed, success = list(failed), list(success)
    print(f"Result: {len(success)} success, {len(failed)} failed")
    if failed:
        print("Failed packages:")
        for pkg_no, (pkg_name, _) in enumerate(failed, start=1):
            print(f"{pkg_no}. {pkg_name}")

    return [pkg_name for pkg_name, status in failed]
Beispiel #2
0
def fetch_inventories():
    """Fetch all inventories for Airflow documentation packages and store in cache."""
    os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True)
    to_download = []

    for pkg_name in get_available_providers_packages():
        to_download.append(
            (
                S3_DOC_URL_VERSIONED.format(package_name=pkg_name),
                f'{CACHE_DIR}/{pkg_name}/objects.inv',
            )
        )
    to_download.append(
        (
            S3_DOC_URL_VERSIONED.format(package_name='apache-airflow'),
            f'{CACHE_DIR}/apache-airflow/objects.inv',
        )
    )
    for pkg_name in ['apache-airflow-providers', 'docker-stack']:
        to_download.append(
            (
                S3_DOC_URL_NON_VERSIONED.format(package_name=pkg_name),
                f'{CACHE_DIR}/{pkg_name}/objects.inv',
            )
        )
    to_download.extend(
        (
            f"{doc_url}/objects.inv",
            f'{CACHE_DIR}/{pkg_name}/objects.inv',
        )
        for pkg_name, doc_url in THIRD_PARTY_INDEXES.items()
    )

    to_download = [(url, path) for url, path in to_download if _is_outdated(path)]
    if not to_download:
        print("Nothing to do")
        return

    print(f"To download {len(to_download)} inventorie(s)")

    with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool:
        for url, path in to_download:
            pool.submit(_fetch_file, session=session, url=url, path=path)