예제 #1
0
    async def _fetch_galaxy_pages(self):
        """
        Fetch the roles in a remote repository.

        Returns:
            async generator: dicts that represent pages from galaxy api

        """
        page_count = 0
        remote = self.remote

        progress_data = dict(message="Parsing Pages from Galaxy Roles API", code="parsing.roles")
        with ProgressReport(**progress_data) as progress_bar:
            api_version = get_api_version(remote.url)
            downloader = remote.get_downloader(url=get_page_url(remote.url, api_version))
            metadata = parse_metadata(await downloader.run())

            page_count = math.ceil(float(metadata["count"]) / float(PAGE_SIZE))
            progress_bar.total = page_count
            progress_bar.save()

            yield metadata
            progress_bar.increment()

            # Concurrent downloads are limited by aiohttp...
            not_done = set(
                remote.get_downloader(url=get_page_url(remote.url, api_version, page)).run()
                for page in range(2, page_count + 1)
            )

            while not_done:
                done, not_done = await asyncio.wait(not_done, return_when=FIRST_COMPLETED)
                for item in done:
                    yield parse_metadata(item.result())
                    progress_bar.increment()
예제 #2
0
    async def _fetch_collections(self):
        """
        Fetch the collections in a remote repository.

        Returns:
            async generator: dicts that represent collections from galaxy api

        """
        page_count = 1
        remote = self.remote
        collection_info = self.collection_info

        def _get_url(page, api_version):
            if collection_info:
                name, version, source = collection_info[page - 1]
                namespace, name = name.split(".")
                root = source or remote.url
                url = f"{root.rstrip('/')}/{namespace}/{name}"
                return url

            return get_page_url(remote.url, api_version, page)

        def _build_url(path_or_url):
            """Check value and turn it into a url using remote.url if it's a relative path."""
            url_parts = urlparse(path_or_url)
            if not url_parts.netloc:
                new_url_parts = urlparse(
                    self.remote.url)._replace(path=url_parts.path)
                return urlunparse(new_url_parts)
            else:
                return path_or_url

        progress_data = dict(message="Parsing Galaxy Collections API",
                             code="parsing.collections")
        with ProgressReport(**progress_data) as progress_bar:
            api_version = get_api_version(remote.url)
            url = _get_url(page_count, api_version)
            downloader = remote.get_downloader(url=url)
            initial_data = parse_metadata(await downloader.run())

            _count = initial_data.get("count") or initial_data.get(
                "meta", {}).get("count", 1)
            count = len(self.collection_info) * PAGE_SIZE or _count
            page_count = math.ceil(float(count) / float(PAGE_SIZE))
            progress_bar.total = count
            progress_bar.save()

            # Concurrent downloads are limited by aiohttp...
            not_done = set()
            for page in range(1, page_count + 1):
                downloader = remote.get_downloader(
                    url=_get_url(page, api_version))
                not_done.add(downloader.run())

            additional_metadata = {}

            while not_done:
                done, not_done = await asyncio.wait(
                    not_done, return_when=asyncio.FIRST_COMPLETED)

                for item in done:
                    data = parse_metadata(item.result())

                    if api_version < 3:
                        results = data.get("results", [data])
                    else:
                        results = data.get("data", [data])

                    for result in results:
                        download_url = result.get("download_url")

                        if result.get("versions_url"):
                            versions_url = _build_url(
                                result.get("versions_url"))
                            not_done.update([
                                remote.get_downloader(url=versions_url).run()
                            ])

                        if result.get("version") and not download_url:
                            version_url = _build_url(result["href"])
                            not_done.update(
                                [remote.get_downloader(url=version_url).run()])
                            additional_metadata[version_url] = {
                                "docs_blob_url": f"{version_url}docs-blob/"
                            }

                        if download_url:
                            metadata = additional_metadata.get(
                                _build_url(data["href"]), {})
                            data["docs_blob_url"] = metadata.get(
                                "docs_blob_url")
                            yield data
                            progress_bar.increment()