async def _download_unpaginated_metadata(self): root_endpoint, api_version = await self._get_root_api(self.remote.url) self._api_version = api_version if api_version > 2: collection_endpoint = f"{root_endpoint}/collections/all/" downloader = self.remote.get_downloader( url=collection_endpoint, silence_errors_for_response_status_codes={404}) try: collection_metadata_list = parse_metadata(await downloader.run()) except FileNotFoundError: pass else: self._unpaginated_collection_metadata = defaultdict(dict) for collection in collection_metadata_list: namespace = collection["namespace"] name = collection["name"] self._unpaginated_collection_metadata[namespace][ name] = collection collection_version_endpoint = f"{root_endpoint}/collection_versions/all/" downloader = self.remote.get_downloader( url=collection_version_endpoint) collection_version_metadata_list = parse_metadata( await downloader.run()) self._unpaginated_collection_version_metadata = defaultdict( lambda: defaultdict(list)) for collection_version_metadata in collection_version_metadata_list: namespace = collection_version_metadata["namespace"][ "name"] name = collection_version_metadata["name"] self._unpaginated_collection_version_metadata[namespace][ name].append(collection_version_metadata)
async def _get_collection_api(root): """ Returns the collection api path and api version. Based on https://git.io/JTMxE. """ if root == "https://galaxy.ansible.com" or root == "https://galaxy.ansible.com/": root = "https://galaxy.ansible.com/api/" downloader = remote.get_downloader(url=root) try: api_data = parse_metadata(await downloader.run()) except (json.decoder.JSONDecodeError, ClientResponseError): if root.endswith("/api/"): raise root = urljoin(root, "api/") downloader = remote.get_downloader(url=root) api_data = parse_metadata(await downloader.run()) if "available_versions" not in api_data: raise RuntimeError(_("Could not find 'available_versions' at {}").format(root)) if "v3" in api_data.get("available_versions", {}): self.api_version = 3 elif "v2" in api_data.get("available_versions", {}): self.api_version = 2 else: raise RuntimeError(_("Unsupported API versions at {}").format(root)) endpoint = f"{root}v{self.api_version}/collections/" return endpoint, self.api_version
async def _download_unpaginated_metadata(self): root_endpoint, api_version = await self._get_root_api(self.remote.url) self._api_version = api_version if api_version > 2: loop = asyncio.get_event_loop() collection_endpoint = f"{root_endpoint}/collections/all/" excludes_endpoint = f"{root_endpoint}/excludes/" col_downloader = self.remote.get_downloader( url=collection_endpoint, silence_errors_for_response_status_codes={404}) exc_downloader = self.remote.get_downloader( url=excludes_endpoint, silence_errors_for_response_status_codes={404}) tasks = [ loop.create_task(col_downloader.run()), loop.create_task(exc_downloader.run()) ] col_results, exc_results = await asyncio.gather( *tasks, return_exceptions=True) if not isinstance(exc_results, FileNotFoundError): excludes_response = parse_metadata(exc_results) if excludes_response: try: excludes_list = parse_collections_requirements_file( excludes_response) except ValidationError: pass else: excludes = { r.name: parse_requirements_entry(r) for r in excludes_list } self.exclude_info.update(excludes) if not isinstance(col_results, FileNotFoundError): collection_metadata_list = parse_metadata(col_results) self._unpaginated_collection_metadata = defaultdict(dict) for collection in collection_metadata_list: namespace = collection["namespace"] name = collection["name"] self._unpaginated_collection_metadata[namespace][ name] = collection collection_version_endpoint = f"{root_endpoint}/collection_versions/all/" downloader = self.remote.get_downloader( url=collection_version_endpoint) collection_version_metadata_list = parse_metadata( await downloader.run()) self._unpaginated_collection_version_metadata = defaultdict( lambda: defaultdict(list)) for collection_version_metadata in collection_version_metadata_list: namespace = collection_version_metadata["namespace"][ "name"] name = collection_version_metadata["name"] self._unpaginated_collection_version_metadata[namespace][ name].append(collection_version_metadata)
async def _fetch_galaxy_pages(self): """ Fetch the roles in a remote repository. Returns: async generator: dicts that represent pages from galaxy api """ page_count = 0 remote = self.remote progress_data = dict(message="Parsing Pages from Galaxy Roles API", code="parsing.roles") with ProgressReport(**progress_data) as progress_bar: api_version = get_api_version(remote.url) downloader = remote.get_downloader(url=get_page_url(remote.url, api_version)) metadata = parse_metadata(await downloader.run()) page_count = math.ceil(float(metadata["count"]) / float(PAGE_SIZE)) progress_bar.total = page_count progress_bar.save() yield metadata progress_bar.increment() # Concurrent downloads are limited by aiohttp... not_done = set( remote.get_downloader(url=get_page_url(remote.url, api_version, page)).run() for page in range(2, page_count + 1) ) while not_done: done, not_done = await asyncio.wait(not_done, return_when=FIRST_COMPLETED) for item in done: yield parse_metadata(item.result()) progress_bar.increment()
async def _fetch_collections(self): """ Fetch the collections in a remote repository. Returns: async generator: dicts that represent collections from galaxy api """ page_count = 1 remote = self.remote collection_info = self.collection_info def _get_url(page): if collection_info: name, version, source = collection_info[page - 1] namespace, name = name.split(".") root = source or remote.url url = f"{root}/api/v2/collections/{namespace}/{name}" return url return get_page_url(remote.url, page) progress_data = dict(message="Parsing Galaxy Collections API", code="parsing.collections") with ProgressReport(**progress_data) as progress_bar: url = _get_url(page_count) downloader = remote.get_downloader(url=url) initial_data = parse_metadata(await downloader.run()) count = len(self.collection_info) or initial_data.get("count", 1) page_count = math.ceil(float(count) / float(PAGE_SIZE)) progress_bar.total = count progress_bar.save() # Concurrent downloads are limited by aiohttp... not_done = set() for page in range(1, page_count + 1): downloader = remote.get_downloader(url=_get_url(page)) not_done.add(downloader.run()) while not_done: done, not_done = await asyncio.wait(not_done, return_when=asyncio.FIRST_COMPLETED) for item in done: data = parse_metadata(item.result()) for result in data.get("results", [data]): download_url = result.get("download_url") if result.get("versions_url"): not_done.update( [remote.get_downloader(url=result["versions_url"]).run()] ) if result.get("version") and not download_url: not_done.update([remote.get_downloader(url=result["href"]).run()]) if download_url: yield data progress_bar.increment()
async def _fetch_paginated_collection_metadata(self, name, namespace, requirement, source=None): root = source or self.remote.url collection_endpoint, api_version = await self._get_paginated_collection_api( root) collection_url = f"{collection_endpoint}{namespace}/{name}" collection_metadata_downloader = self.remote.get_downloader( url=collection_url) collection_metadata = parse_metadata( await collection_metadata_downloader.run()) loop = asyncio.get_event_loop() tasks = [] page_num = 1 while True: versions_list_downloader = self._collection_versions_list_downloader( api_version, collection_endpoint, namespace, name, page_num, PAGE_SIZE) collection_versions_list = parse_metadata( await versions_list_downloader.run()) if api_version == 2: collection_versions = collection_versions_list["results"] else: collection_versions = collection_versions_list["data"] for collection_version in collection_versions: if collection_version["version"] in requirement: version_num = collection_version["version"] collection_version_detail_url = f"{collection_url}/versions/{version_num}/" if collection_metadata["deprecated"]: d_content = DeclarativeContent( content=AnsibleCollectionDeprecated( namespace=namespace, name=name), ) self.deprecation_after_sync.add(f"{namespace}.{name}") await self.put(d_content) tasks.append( loop.create_task( self._fetch_collection_version_metadata( api_version, collection_version_detail_url, ))) next_value = self._get_response_next_value( api_version, collection_versions_list) if not next_value: break page_num = page_num + 1 await asyncio.gather(*tasks)
async def _should_we_sync(self): """Check last synced metadata time.""" msg = _("no-op: Checking if remote changed since last sync.") noop = ProgressReport(message=msg, code="noop") noop.state = TASK_STATES.COMPLETED noop.save() if not self.repository.remote: return True if self.remote != self.repository.remote.cast(): return True root, api_version = await self._get_root_api(self.remote.url) if api_version == 3: downloader = self.remote.get_downloader( url=root, silence_errors_for_response_status_codes={404}) try: metadata = parse_metadata(await downloader.run()) except FileNotFoundError: return True try: self.last_synced_metadata_time = parse_datetime( metadata["published"]) except KeyError: return True sources = set() if self.collection_info: sources = {r.source for r in self.collection_info if r.source} sources.add(self.remote.url) if len(sources) > 1: return True if self.last_synced_metadata_time == self.repository.last_synced_metadata_time: noop.message = _( "no-op: {remote} did not change since last sync - {published}" .format(remote=self.remote.url, published=self.last_synced_metadata_time)) noop.save() return False return True
async def _find_all_collections(self): if self._unpaginated_collection_version_metadata: await self._find_all_collections_from_unpaginated_data() return collection_endpoint, api_version = await self._get_paginated_collection_api( self.remote.url) loop = asyncio.get_event_loop() tasks = [] page_num = 1 while True: collection_list_downloader = self._collection_list_downloader( api_version, collection_endpoint, page_num, PAGE_SIZE) collection_list = parse_metadata(await collection_list_downloader.run()) if api_version == 2: collections = collection_list["results"] else: collections = collection_list["data"] for collection in collections: if api_version == 2: namespace = collection["namespace"]["name"] else: namespace = collection["namespace"] name = collection["name"] requirements_file = RequirementsFileEntry( name=".".join([namespace, name]), version="*", source=None, ) tasks.append( loop.create_task( self._fetch_collection_metadata(requirements_file))) next_value = self._get_response_next_value(api_version, collection_list) if not next_value: break page_num = page_num + 1 await asyncio.gather(*tasks)
async def _loop_through_pages(not_done, versions_url=None): """ Loop through API pagination. """ url = await _get_url(1, versions_url) downloader = remote.get_downloader(url=url) data = parse_metadata(await downloader.run()) count = data.get("count") or data.get("meta", {}).get("count", 1) if collection_info and not versions_url: count = len(collection_info) page_count = count else: page_count = math.ceil(float(count) / float(PAGE_SIZE)) for page in range(1, page_count + 1): url = await _get_url(page, versions_url) downloader = remote.get_downloader(url=url) not_done.add(downloader.run()) return count
async def _fetch_collections(self): """ Fetch the collections in a remote repository. Returns: async generator: dicts that represent collections from galaxy api """ remote = self.remote collection_info = self.collection_info async def _get_collection_api(root): """ Returns the collection api path and api version. Based on https://git.io/JTMxE. """ if root == "https://galaxy.ansible.com" or root == "https://galaxy.ansible.com/": root = "https://galaxy.ansible.com/api/" downloader = remote.get_downloader(url=root) try: api_data = parse_metadata(await downloader.run()) except (json.decoder.JSONDecodeError, ClientResponseError): if root.endswith("/api/"): raise root = urljoin(root, "api/") downloader = remote.get_downloader(url=root) api_data = parse_metadata(await downloader.run()) if "available_versions" not in api_data: raise RuntimeError(_("Could not find 'available_versions' at {}").format(root)) if "v3" in api_data.get("available_versions", {}): self.api_version = 3 elif "v2" in api_data.get("available_versions", {}): self.api_version = 2 else: raise RuntimeError(_("Unsupported API versions at {}").format(root)) endpoint = f"{root}v{self.api_version}/collections/" return endpoint, self.api_version async def _get_url(page, versions_url=None): if collection_info and not versions_url: name, version, source = collection_info[page - 1] namespace, name = name.split(".") root = source or remote.url api_endpoint = (await _get_collection_api(root))[0] url = f"{api_endpoint}{namespace}/{name}/" return url if not versions_url: api_endpoint, api_version = await _get_collection_api(remote.url) return get_page_url(api_endpoint, api_version, page) if not self.api_version: await _get_collection_api(remote.url) return get_page_url(versions_url, self.api_version, page) async def _loop_through_pages(not_done, versions_url=None): """ Loop through API pagination. """ url = await _get_url(1, versions_url) downloader = remote.get_downloader(url=url) data = parse_metadata(await downloader.run()) count = data.get("count") or data.get("meta", {}).get("count", 1) if collection_info and not versions_url: count = len(collection_info) page_count = count else: page_count = math.ceil(float(count) / float(PAGE_SIZE)) for page in range(1, page_count + 1): url = await _get_url(page, versions_url) downloader = remote.get_downloader(url=url) not_done.add(downloader.run()) return count def _build_url(path_or_url): """Check value and turn it into a url using remote.url if it's a relative path.""" url_parts = urlparse(path_or_url) if not url_parts.netloc: new_url_parts = urlparse(self.remote.url)._replace(path=url_parts.path) return urlunparse(new_url_parts) else: return path_or_url def _add_collection_level_metadata(data, additional_metadata): """Additional metadata at collection level to be sent through stages.""" name = data["collection"]["name"] namespace = data["namespace"]["name"] metadata = additional_metadata.get(f"{namespace}_{name}", {}) data["deprecated"] = metadata.get("deprecated") def _add_collection_version_level_metadata(data, additional_metadata): """Additional metadata at collection version level to be sent through stages.""" metadata = additional_metadata.get(_build_url(data["href"]), {}) data["docs_blob_url"] = metadata.get("docs_blob_url") progress_data = dict(message="Parsing Galaxy Collections API", code="parsing.collections") with ProgressReport(**progress_data) as progress_bar: not_done = set() count = await _loop_through_pages(not_done) progress_bar.total = count progress_bar.save() additional_metadata = {} while not_done: done, not_done = await asyncio.wait(not_done, return_when=asyncio.FIRST_COMPLETED) for item in done: data = parse_metadata(item.result()) if "data" in data: # api v3 results = data["data"] elif "results" in data: # api v2 results = data["results"] else: results = [data] for result in results: download_url = result.get("download_url") if result.get("deprecated"): name = result["name"] try: namespace = result["namespace"]["name"] # api v3 except TypeError: namespace = result["namespace"] # api v2 additional_metadata[f"{namespace}_{name}"] = { "deprecated": result["deprecated"] } if result.get("versions_url"): versions_url = _build_url(result.get("versions_url")) await _loop_through_pages(not_done, versions_url) progress_bar.increment() if result.get("version") and not download_url: version_url = _build_url(result["href"]) not_done.update([remote.get_downloader(url=version_url).run()]) additional_metadata[version_url] = { "docs_blob_url": f"{version_url}docs-blob/" } if download_url: _add_collection_level_metadata(data, additional_metadata) _add_collection_version_level_metadata(data, additional_metadata) yield data
async def _fetch_collections(self): """ Fetch the collections in a remote repository. Returns: async generator: dicts that represent collections from galaxy api """ page_count = 1 remote = self.remote collection_info = self.collection_info def _get_url(page): if collection_info: name, version, source = collection_info[page - 1] namespace, name = name.split(".") root = source or remote.url url = f"{root}/api/v2/collections/{namespace}/{name}" return url return get_page_url(remote.url, page) def _build_url(path_or_url): """Check value and turn it into a url using remote.url if it's a relative path.""" url_parts = urlparse(path_or_url) if not url_parts.netloc: new_url_parts = urlparse( self.remote.url)._replace(path=url_parts.path) return urlunparse(new_url_parts) else: return path_or_url progress_data = dict(message="Parsing Galaxy Collections API", code="parsing.collections") with ProgressReport(**progress_data) as progress_bar: url = _get_url(page_count) downloader = remote.get_downloader(url=url) initial_data = parse_metadata(await downloader.run()) count = len(self.collection_info) or initial_data.get("count", 1) page_count = math.ceil(float(count) / float(PAGE_SIZE)) progress_bar.total = count progress_bar.save() # Concurrent downloads are limited by aiohttp... not_done = set() for page in range(1, page_count + 1): downloader = remote.get_downloader(url=_get_url(page)) not_done.add(downloader.run()) while not_done: done, not_done = await asyncio.wait( not_done, return_when=asyncio.FIRST_COMPLETED) for item in done: data = parse_metadata(item.result()) # v2 uses 'results' as the key while v3 uses 'data' results = data.get("results") or data.get("data") or [data] for result in results: download_url = result.get("download_url") if result.get("versions_url"): versions_url = _build_url( result.get("versions_url")) not_done.update([ remote.get_downloader(url=versions_url).run() ]) if result.get("version") and not download_url: version_url = _build_url(result["href"]) not_done.update( [remote.get_downloader(url=version_url).run()]) if download_url: yield data progress_bar.increment()
async def _fetch_collection_version_metadata(self, api_version, collection_version_url): downloader = self.remote.get_downloader(url=collection_version_url) metadata = parse_metadata(await downloader.run()) await self._add_collection_version(api_version, collection_version_url, metadata)