Ejemplo n.º 1
0
    async def _download_unpaginated_metadata(self):
        root_endpoint, api_version = await self._get_root_api(self.remote.url)
        self._api_version = api_version
        if api_version > 2:
            collection_endpoint = f"{root_endpoint}/collections/all/"
            downloader = self.remote.get_downloader(
                url=collection_endpoint,
                silence_errors_for_response_status_codes={404})
            try:
                collection_metadata_list = parse_metadata(await
                                                          downloader.run())
            except FileNotFoundError:
                pass
            else:
                self._unpaginated_collection_metadata = defaultdict(dict)
                for collection in collection_metadata_list:
                    namespace = collection["namespace"]
                    name = collection["name"]
                    self._unpaginated_collection_metadata[namespace][
                        name] = collection

                collection_version_endpoint = f"{root_endpoint}/collection_versions/all/"
                downloader = self.remote.get_downloader(
                    url=collection_version_endpoint)
                collection_version_metadata_list = parse_metadata(
                    await downloader.run())

                self._unpaginated_collection_version_metadata = defaultdict(
                    lambda: defaultdict(list))
                for collection_version_metadata in collection_version_metadata_list:
                    namespace = collection_version_metadata["namespace"][
                        "name"]
                    name = collection_version_metadata["name"]
                    self._unpaginated_collection_version_metadata[namespace][
                        name].append(collection_version_metadata)
Ejemplo n.º 2
0
        async def _get_collection_api(root):
            """
            Returns the collection api path and api version.

            Based on https://git.io/JTMxE.
            """
            if root == "https://galaxy.ansible.com" or root == "https://galaxy.ansible.com/":
                root = "https://galaxy.ansible.com/api/"

            downloader = remote.get_downloader(url=root)

            try:
                api_data = parse_metadata(await downloader.run())
            except (json.decoder.JSONDecodeError, ClientResponseError):
                if root.endswith("/api/"):
                    raise

                root = urljoin(root, "api/")
                downloader = remote.get_downloader(url=root)
                api_data = parse_metadata(await downloader.run())

            if "available_versions" not in api_data:
                raise RuntimeError(_("Could not find 'available_versions' at {}").format(root))

            if "v3" in api_data.get("available_versions", {}):
                self.api_version = 3
            elif "v2" in api_data.get("available_versions", {}):
                self.api_version = 2
            else:
                raise RuntimeError(_("Unsupported API versions at {}").format(root))

            endpoint = f"{root}v{self.api_version}/collections/"

            return endpoint, self.api_version
Ejemplo n.º 3
0
    async def _download_unpaginated_metadata(self):
        root_endpoint, api_version = await self._get_root_api(self.remote.url)
        self._api_version = api_version
        if api_version > 2:
            loop = asyncio.get_event_loop()

            collection_endpoint = f"{root_endpoint}/collections/all/"
            excludes_endpoint = f"{root_endpoint}/excludes/"
            col_downloader = self.remote.get_downloader(
                url=collection_endpoint,
                silence_errors_for_response_status_codes={404})
            exc_downloader = self.remote.get_downloader(
                url=excludes_endpoint,
                silence_errors_for_response_status_codes={404})
            tasks = [
                loop.create_task(col_downloader.run()),
                loop.create_task(exc_downloader.run())
            ]
            col_results, exc_results = await asyncio.gather(
                *tasks, return_exceptions=True)

            if not isinstance(exc_results, FileNotFoundError):
                excludes_response = parse_metadata(exc_results)
                if excludes_response:
                    try:
                        excludes_list = parse_collections_requirements_file(
                            excludes_response)
                    except ValidationError:
                        pass
                    else:
                        excludes = {
                            r.name: parse_requirements_entry(r)
                            for r in excludes_list
                        }
                        self.exclude_info.update(excludes)

            if not isinstance(col_results, FileNotFoundError):
                collection_metadata_list = parse_metadata(col_results)

                self._unpaginated_collection_metadata = defaultdict(dict)
                for collection in collection_metadata_list:
                    namespace = collection["namespace"]
                    name = collection["name"]
                    self._unpaginated_collection_metadata[namespace][
                        name] = collection

                collection_version_endpoint = f"{root_endpoint}/collection_versions/all/"
                downloader = self.remote.get_downloader(
                    url=collection_version_endpoint)
                collection_version_metadata_list = parse_metadata(
                    await downloader.run())

                self._unpaginated_collection_version_metadata = defaultdict(
                    lambda: defaultdict(list))
                for collection_version_metadata in collection_version_metadata_list:
                    namespace = collection_version_metadata["namespace"][
                        "name"]
                    name = collection_version_metadata["name"]
                    self._unpaginated_collection_version_metadata[namespace][
                        name].append(collection_version_metadata)
Ejemplo n.º 4
0
    async def _fetch_galaxy_pages(self):
        """
        Fetch the roles in a remote repository.

        Returns:
            async generator: dicts that represent pages from galaxy api

        """
        page_count = 0
        remote = self.remote

        progress_data = dict(message="Parsing Pages from Galaxy Roles API", code="parsing.roles")
        with ProgressReport(**progress_data) as progress_bar:
            api_version = get_api_version(remote.url)
            downloader = remote.get_downloader(url=get_page_url(remote.url, api_version))
            metadata = parse_metadata(await downloader.run())

            page_count = math.ceil(float(metadata["count"]) / float(PAGE_SIZE))
            progress_bar.total = page_count
            progress_bar.save()

            yield metadata
            progress_bar.increment()

            # Concurrent downloads are limited by aiohttp...
            not_done = set(
                remote.get_downloader(url=get_page_url(remote.url, api_version, page)).run()
                for page in range(2, page_count + 1)
            )

            while not_done:
                done, not_done = await asyncio.wait(not_done, return_when=FIRST_COMPLETED)
                for item in done:
                    yield parse_metadata(item.result())
                    progress_bar.increment()
Ejemplo n.º 5
0
    async def _fetch_collections(self):
        """
        Fetch the collections in a remote repository.

        Returns:
            async generator: dicts that represent collections from galaxy api

        """
        page_count = 1
        remote = self.remote
        collection_info = self.collection_info

        def _get_url(page):
            if collection_info:
                name, version, source = collection_info[page - 1]
                namespace, name = name.split(".")
                root = source or remote.url
                url = f"{root}/api/v2/collections/{namespace}/{name}"
                return url

            return get_page_url(remote.url, page)

        progress_data = dict(message="Parsing Galaxy Collections API", code="parsing.collections")
        with ProgressReport(**progress_data) as progress_bar:
            url = _get_url(page_count)
            downloader = remote.get_downloader(url=url)
            initial_data = parse_metadata(await downloader.run())

            count = len(self.collection_info) or initial_data.get("count", 1)
            page_count = math.ceil(float(count) / float(PAGE_SIZE))
            progress_bar.total = count
            progress_bar.save()

            # Concurrent downloads are limited by aiohttp...
            not_done = set()
            for page in range(1, page_count + 1):
                downloader = remote.get_downloader(url=_get_url(page))
                not_done.add(downloader.run())

            while not_done:
                done, not_done = await asyncio.wait(not_done, return_when=asyncio.FIRST_COMPLETED)
                for item in done:
                    data = parse_metadata(item.result())
                    for result in data.get("results", [data]):
                        download_url = result.get("download_url")

                        if result.get("versions_url"):
                            not_done.update(
                                [remote.get_downloader(url=result["versions_url"]).run()]
                            )

                        if result.get("version") and not download_url:
                            not_done.update([remote.get_downloader(url=result["href"]).run()])

                        if download_url:
                            yield data
                            progress_bar.increment()
Ejemplo n.º 6
0
    async def _fetch_paginated_collection_metadata(self,
                                                   name,
                                                   namespace,
                                                   requirement,
                                                   source=None):
        root = source or self.remote.url
        collection_endpoint, api_version = await self._get_paginated_collection_api(
            root)
        collection_url = f"{collection_endpoint}{namespace}/{name}"
        collection_metadata_downloader = self.remote.get_downloader(
            url=collection_url)
        collection_metadata = parse_metadata(
            await collection_metadata_downloader.run())
        loop = asyncio.get_event_loop()

        tasks = []
        page_num = 1
        while True:
            versions_list_downloader = self._collection_versions_list_downloader(
                api_version, collection_endpoint, namespace, name, page_num,
                PAGE_SIZE)
            collection_versions_list = parse_metadata(
                await versions_list_downloader.run())
            if api_version == 2:
                collection_versions = collection_versions_list["results"]
            else:
                collection_versions = collection_versions_list["data"]
            for collection_version in collection_versions:
                if collection_version["version"] in requirement:
                    version_num = collection_version["version"]
                    collection_version_detail_url = f"{collection_url}/versions/{version_num}/"
                    if collection_metadata["deprecated"]:
                        d_content = DeclarativeContent(
                            content=AnsibleCollectionDeprecated(
                                namespace=namespace, name=name), )
                        self.deprecation_after_sync.add(f"{namespace}.{name}")
                        await self.put(d_content)
                    tasks.append(
                        loop.create_task(
                            self._fetch_collection_version_metadata(
                                api_version,
                                collection_version_detail_url,
                            )))
            next_value = self._get_response_next_value(
                api_version, collection_versions_list)
            if not next_value:
                break
            page_num = page_num + 1

        await asyncio.gather(*tasks)
Ejemplo n.º 7
0
    async def _should_we_sync(self):
        """Check last synced metadata time."""
        msg = _("no-op: Checking if remote changed since last sync.")
        noop = ProgressReport(message=msg, code="noop")
        noop.state = TASK_STATES.COMPLETED
        noop.save()

        if not self.repository.remote:
            return True

        if self.remote != self.repository.remote.cast():
            return True

        root, api_version = await self._get_root_api(self.remote.url)
        if api_version == 3:
            downloader = self.remote.get_downloader(
                url=root, silence_errors_for_response_status_codes={404})
            try:
                metadata = parse_metadata(await downloader.run())
            except FileNotFoundError:
                return True

            try:
                self.last_synced_metadata_time = parse_datetime(
                    metadata["published"])
            except KeyError:
                return True

            sources = set()
            if self.collection_info:
                sources = {r.source for r in self.collection_info if r.source}
            sources.add(self.remote.url)
            if len(sources) > 1:
                return True

            if self.last_synced_metadata_time == self.repository.last_synced_metadata_time:
                noop.message = _(
                    "no-op: {remote} did not change since last sync - {published}"
                    .format(remote=self.remote.url,
                            published=self.last_synced_metadata_time))
                noop.save()
                return False

        return True
Ejemplo n.º 8
0
    async def _find_all_collections(self):
        if self._unpaginated_collection_version_metadata:
            await self._find_all_collections_from_unpaginated_data()
            return

        collection_endpoint, api_version = await self._get_paginated_collection_api(
            self.remote.url)
        loop = asyncio.get_event_loop()

        tasks = []
        page_num = 1
        while True:
            collection_list_downloader = self._collection_list_downloader(
                api_version, collection_endpoint, page_num, PAGE_SIZE)
            collection_list = parse_metadata(await
                                             collection_list_downloader.run())

            if api_version == 2:
                collections = collection_list["results"]
            else:
                collections = collection_list["data"]

            for collection in collections:
                if api_version == 2:
                    namespace = collection["namespace"]["name"]
                else:
                    namespace = collection["namespace"]
                name = collection["name"]
                requirements_file = RequirementsFileEntry(
                    name=".".join([namespace, name]),
                    version="*",
                    source=None,
                )
                tasks.append(
                    loop.create_task(
                        self._fetch_collection_metadata(requirements_file)))

            next_value = self._get_response_next_value(api_version,
                                                       collection_list)
            if not next_value:
                break
            page_num = page_num + 1

        await asyncio.gather(*tasks)
Ejemplo n.º 9
0
        async def _loop_through_pages(not_done, versions_url=None):
            """
            Loop through API pagination.
            """
            url = await _get_url(1, versions_url)
            downloader = remote.get_downloader(url=url)
            data = parse_metadata(await downloader.run())

            count = data.get("count") or data.get("meta", {}).get("count", 1)
            if collection_info and not versions_url:
                count = len(collection_info)
                page_count = count
            else:
                page_count = math.ceil(float(count) / float(PAGE_SIZE))

            for page in range(1, page_count + 1):
                url = await _get_url(page, versions_url)
                downloader = remote.get_downloader(url=url)
                not_done.add(downloader.run())

            return count
Ejemplo n.º 10
0
    async def _fetch_collections(self):
        """
        Fetch the collections in a remote repository.

        Returns:
            async generator: dicts that represent collections from galaxy api

        """
        remote = self.remote
        collection_info = self.collection_info

        async def _get_collection_api(root):
            """
            Returns the collection api path and api version.

            Based on https://git.io/JTMxE.
            """
            if root == "https://galaxy.ansible.com" or root == "https://galaxy.ansible.com/":
                root = "https://galaxy.ansible.com/api/"

            downloader = remote.get_downloader(url=root)

            try:
                api_data = parse_metadata(await downloader.run())
            except (json.decoder.JSONDecodeError, ClientResponseError):
                if root.endswith("/api/"):
                    raise

                root = urljoin(root, "api/")
                downloader = remote.get_downloader(url=root)
                api_data = parse_metadata(await downloader.run())

            if "available_versions" not in api_data:
                raise RuntimeError(_("Could not find 'available_versions' at {}").format(root))

            if "v3" in api_data.get("available_versions", {}):
                self.api_version = 3
            elif "v2" in api_data.get("available_versions", {}):
                self.api_version = 2
            else:
                raise RuntimeError(_("Unsupported API versions at {}").format(root))

            endpoint = f"{root}v{self.api_version}/collections/"

            return endpoint, self.api_version

        async def _get_url(page, versions_url=None):
            if collection_info and not versions_url:
                name, version, source = collection_info[page - 1]
                namespace, name = name.split(".")
                root = source or remote.url
                api_endpoint = (await _get_collection_api(root))[0]
                url = f"{api_endpoint}{namespace}/{name}/"
                return url

            if not versions_url:
                api_endpoint, api_version = await _get_collection_api(remote.url)
                return get_page_url(api_endpoint, api_version, page)

            if not self.api_version:
                await _get_collection_api(remote.url)

            return get_page_url(versions_url, self.api_version, page)

        async def _loop_through_pages(not_done, versions_url=None):
            """
            Loop through API pagination.
            """
            url = await _get_url(1, versions_url)
            downloader = remote.get_downloader(url=url)
            data = parse_metadata(await downloader.run())

            count = data.get("count") or data.get("meta", {}).get("count", 1)
            if collection_info and not versions_url:
                count = len(collection_info)
                page_count = count
            else:
                page_count = math.ceil(float(count) / float(PAGE_SIZE))

            for page in range(1, page_count + 1):
                url = await _get_url(page, versions_url)
                downloader = remote.get_downloader(url=url)
                not_done.add(downloader.run())

            return count

        def _build_url(path_or_url):
            """Check value and turn it into a url using remote.url if it's a relative path."""
            url_parts = urlparse(path_or_url)
            if not url_parts.netloc:
                new_url_parts = urlparse(self.remote.url)._replace(path=url_parts.path)
                return urlunparse(new_url_parts)
            else:
                return path_or_url

        def _add_collection_level_metadata(data, additional_metadata):
            """Additional metadata at collection level to be sent through stages."""
            name = data["collection"]["name"]
            namespace = data["namespace"]["name"]
            metadata = additional_metadata.get(f"{namespace}_{name}", {})
            data["deprecated"] = metadata.get("deprecated")

        def _add_collection_version_level_metadata(data, additional_metadata):
            """Additional metadata at collection version level to be sent through stages."""
            metadata = additional_metadata.get(_build_url(data["href"]), {})
            data["docs_blob_url"] = metadata.get("docs_blob_url")

        progress_data = dict(message="Parsing Galaxy Collections API", code="parsing.collections")
        with ProgressReport(**progress_data) as progress_bar:
            not_done = set()
            count = await _loop_through_pages(not_done)
            progress_bar.total = count
            progress_bar.save()

            additional_metadata = {}

            while not_done:
                done, not_done = await asyncio.wait(not_done, return_when=asyncio.FIRST_COMPLETED)

                for item in done:
                    data = parse_metadata(item.result())

                    if "data" in data:  # api v3
                        results = data["data"]
                    elif "results" in data:  # api v2
                        results = data["results"]
                    else:
                        results = [data]

                    for result in results:
                        download_url = result.get("download_url")

                        if result.get("deprecated"):
                            name = result["name"]
                            try:
                                namespace = result["namespace"]["name"]  # api v3
                            except TypeError:
                                namespace = result["namespace"]  # api v2
                            additional_metadata[f"{namespace}_{name}"] = {
                                "deprecated": result["deprecated"]
                            }

                        if result.get("versions_url"):
                            versions_url = _build_url(result.get("versions_url"))
                            await _loop_through_pages(not_done, versions_url)
                            progress_bar.increment()

                        if result.get("version") and not download_url:
                            version_url = _build_url(result["href"])
                            not_done.update([remote.get_downloader(url=version_url).run()])
                            additional_metadata[version_url] = {
                                "docs_blob_url": f"{version_url}docs-blob/"
                            }

                        if download_url:
                            _add_collection_level_metadata(data, additional_metadata)
                            _add_collection_version_level_metadata(data, additional_metadata)
                            yield data
Ejemplo n.º 11
0
    async def _fetch_collections(self):
        """
        Fetch the collections in a remote repository.

        Returns:
            async generator: dicts that represent collections from galaxy api

        """
        page_count = 1
        remote = self.remote
        collection_info = self.collection_info

        def _get_url(page):
            if collection_info:
                name, version, source = collection_info[page - 1]
                namespace, name = name.split(".")
                root = source or remote.url
                url = f"{root}/api/v2/collections/{namespace}/{name}"
                return url

            return get_page_url(remote.url, page)

        def _build_url(path_or_url):
            """Check value and turn it into a url using remote.url if it's a relative path."""
            url_parts = urlparse(path_or_url)
            if not url_parts.netloc:
                new_url_parts = urlparse(
                    self.remote.url)._replace(path=url_parts.path)
                return urlunparse(new_url_parts)
            else:
                return path_or_url

        progress_data = dict(message="Parsing Galaxy Collections API",
                             code="parsing.collections")
        with ProgressReport(**progress_data) as progress_bar:
            url = _get_url(page_count)
            downloader = remote.get_downloader(url=url)
            initial_data = parse_metadata(await downloader.run())

            count = len(self.collection_info) or initial_data.get("count", 1)
            page_count = math.ceil(float(count) / float(PAGE_SIZE))
            progress_bar.total = count
            progress_bar.save()

            # Concurrent downloads are limited by aiohttp...
            not_done = set()
            for page in range(1, page_count + 1):
                downloader = remote.get_downloader(url=_get_url(page))
                not_done.add(downloader.run())

            while not_done:
                done, not_done = await asyncio.wait(
                    not_done, return_when=asyncio.FIRST_COMPLETED)

                for item in done:
                    data = parse_metadata(item.result())

                    # v2 uses 'results' as the key while v3 uses 'data'
                    results = data.get("results") or data.get("data") or [data]

                    for result in results:
                        download_url = result.get("download_url")

                        if result.get("versions_url"):
                            versions_url = _build_url(
                                result.get("versions_url"))
                            not_done.update([
                                remote.get_downloader(url=versions_url).run()
                            ])

                        if result.get("version") and not download_url:
                            version_url = _build_url(result["href"])
                            not_done.update(
                                [remote.get_downloader(url=version_url).run()])

                        if download_url:
                            yield data
                            progress_bar.increment()
Ejemplo n.º 12
0
 async def _fetch_collection_version_metadata(self, api_version,
                                              collection_version_url):
     downloader = self.remote.get_downloader(url=collection_version_url)
     metadata = parse_metadata(await downloader.run())
     await self._add_collection_version(api_version, collection_version_url,
                                        metadata)