async def _fetch_galaxy_pages(self): """ Fetch the roles in a remote repository. Returns: async generator: dicts that represent pages from galaxy api """ page_count = 0 remote = self.remote progress_data = dict(message="Parsing Pages from Galaxy Roles API", code="parsing.roles") with ProgressReport(**progress_data) as progress_bar: api_version = get_api_version(remote.url) downloader = remote.get_downloader(url=get_page_url(remote.url, api_version)) metadata = parse_metadata(await downloader.run()) page_count = math.ceil(float(metadata["count"]) / float(PAGE_SIZE)) progress_bar.total = page_count progress_bar.save() yield metadata progress_bar.increment() # Concurrent downloads are limited by aiohttp... not_done = set( remote.get_downloader(url=get_page_url(remote.url, api_version, page)).run() for page in range(2, page_count + 1) ) while not_done: done, not_done = await asyncio.wait(not_done, return_when=FIRST_COMPLETED) for item in done: yield parse_metadata(item.result()) progress_bar.increment()
async def _fetch_collections(self): """ Fetch the collections in a remote repository. Returns: async generator: dicts that represent collections from galaxy api """ page_count = 1 remote = self.remote collection_info = self.collection_info def _get_url(page, api_version): if collection_info: name, version, source = collection_info[page - 1] namespace, name = name.split(".") root = source or remote.url url = f"{root.rstrip('/')}/{namespace}/{name}" return url return get_page_url(remote.url, api_version, page) def _build_url(path_or_url): """Check value and turn it into a url using remote.url if it's a relative path.""" url_parts = urlparse(path_or_url) if not url_parts.netloc: new_url_parts = urlparse( self.remote.url)._replace(path=url_parts.path) return urlunparse(new_url_parts) else: return path_or_url progress_data = dict(message="Parsing Galaxy Collections API", code="parsing.collections") with ProgressReport(**progress_data) as progress_bar: api_version = get_api_version(remote.url) url = _get_url(page_count, api_version) downloader = remote.get_downloader(url=url) initial_data = parse_metadata(await downloader.run()) _count = initial_data.get("count") or initial_data.get( "meta", {}).get("count", 1) count = len(self.collection_info) * PAGE_SIZE or _count page_count = math.ceil(float(count) / float(PAGE_SIZE)) progress_bar.total = count progress_bar.save() # Concurrent downloads are limited by aiohttp... not_done = set() for page in range(1, page_count + 1): downloader = remote.get_downloader( url=_get_url(page, api_version)) not_done.add(downloader.run()) additional_metadata = {} while not_done: done, not_done = await asyncio.wait( not_done, return_when=asyncio.FIRST_COMPLETED) for item in done: data = parse_metadata(item.result()) if api_version < 3: results = data.get("results", [data]) else: results = data.get("data", [data]) for result in results: download_url = result.get("download_url") if result.get("versions_url"): versions_url = _build_url( result.get("versions_url")) not_done.update([ remote.get_downloader(url=versions_url).run() ]) if result.get("version") and not download_url: version_url = _build_url(result["href"]) not_done.update( [remote.get_downloader(url=version_url).run()]) additional_metadata[version_url] = { "docs_blob_url": f"{version_url}docs-blob/" } if download_url: metadata = additional_metadata.get( _build_url(data["href"]), {}) data["docs_blob_url"] = metadata.get( "docs_blob_url") yield data progress_bar.increment()