def validate_mirror(self, mirror_url): """ Make sure a mirror serves :attr:`distribution_codename`. :param mirror_url: The base URL of the mirror (a string). :returns: :data:`True` if the mirror hosts the relevant release, :data:`False` otherwise. This method assumes that :attr:`old_releases_url` is always valid. """ if mirrors_are_equal(mirror_url, self.old_releases_url): return True else: mirror_url = normalize_mirror_url(mirror_url) key = (mirror_url, self.distribution_codename) value = self.validated_mirrors.get(key) if value is None: logger.info( "Checking whether %s is a supported release for %s ..", self.distribution_codename.capitalize(), self.distributor_id.capitalize()) mirror = CandidateMirror(mirror_url=mirror_url, updater=self) try: response = fetch_url(mirror.release_gpg_url, retry=False) mirror.release_gpg_contents = response.read() except Exception: pass self.validated_mirrors[key] = value = mirror.is_available return value
def is_mirror_url(self, base_url, stable_resource, expected_content): """Validate a given mirror URL based on a stable resource URL and its expected response.""" base_url = normalize_mirror_url(base_url) if base_url.startswith(('http://', 'https://')): if not hasattr(self, 'mirror_cache'): self.mirror_cache = {} cache_key = (base_url, stable_resource, expected_content) if cache_key not in self.mirror_cache: try: # Look for a file with a stable filename (assumed to always be available). resource_url = base_url + stable_resource response = fetch_url(resource_url) # Check the contents of the response. if expected_content in response: logger.info("URL %s served expected content.", resource_url) self.mirror_cache[cache_key] = True else: logger.warning("URL %s didn't serve expected content!", resource_url) self.mirror_cache[cache_key] = False except TimeoutException: logger.warning( "URL %s reported timeout, not failing test suite on this .." ) self.mirror_cache[cache_key] = True except Exception: logger.warning("URL %s triggered exception!", resource_url, exc_info=True) self.mirror_cache[cache_key] = False return self.mirror_cache[cache_key] return False
def discover_mirrors(): """ Discover available Ubuntu mirrors by querying :data:`MIRRORS_URL`. :returns: A set of :class:`.CandidateMirror` objects that have their :attr:`~.CandidateMirror.mirror_url` property set and may have the :attr:`~.CandidateMirror.last_updated` property set. :raises: If no mirrors are discovered an exception is raised. An example run: >>> from apt_mirror_updater.backends.ubuntu import discover_mirrors >>> from pprint import pprint >>> pprint(discover_mirrors()) set([CandidateMirror(mirror_url='http://archive.ubuntu.com/ubuntu/'), CandidateMirror(mirror_url='http://ftp.nluug.nl/os/Linux/distr/ubuntu/'), CandidateMirror(mirror_url='http://ftp.snt.utwente.nl/pub/os/linux/ubuntu/'), CandidateMirror(mirror_url='http://ftp.tudelft.nl/archive.ubuntu.com/'), CandidateMirror(mirror_url='http://mirror.1000mbps.com/ubuntu/'), CandidateMirror(mirror_url='http://mirror.amsiohosting.net/archive.ubuntu.com/'), CandidateMirror(mirror_url='http://mirror.i3d.net/pub/ubuntu/'), CandidateMirror(mirror_url='http://mirror.nforce.com/pub/linux/ubuntu/'), CandidateMirror(mirror_url='http://mirror.nl.leaseweb.net/ubuntu/'), CandidateMirror(mirror_url='http://mirror.transip.net/ubuntu/ubuntu/'), ...]) """ timer = Timer() mirrors = set() logger.info("Discovering Ubuntu mirrors at %s ..", MIRRORS_URL) response = fetch_url(MIRRORS_URL, retry=True) soup = BeautifulSoup(response, 'html.parser') for table in soup.findAll('table'): for tr in table.findAll('tr'): for a in tr.findAll('a', href=True): # Check if the link looks like a mirror URL. if (a['href'].startswith(('http://', 'https://')) and a['href'].endswith('/ubuntu/')): # Try to figure out the mirror's reported latency. last_updated = None text = u''.join(tr.findAll(text=True)) for status_label, num_seconds in MIRROR_STATUSES: if status_label in text: last_updated = num_seconds break # Add the mirror to our overview. mirrors.add( CandidateMirror( mirror_url=a['href'], last_updated=last_updated, )) # Skip to the next row. break if not mirrors: raise Exception("Failed to discover any Ubuntu mirrors! (using %s)" % MIRRORS_URL) logger.info("Discovered %s in %s.", pluralize(len(mirrors), "Ubuntu mirror"), timer) return mirrors
def discover_mirror_selection(): """Discover "geographically suitable" Ubuntu mirrors.""" timer = Timer() logger.info("Identifying fast Ubuntu mirrors using %s ..", MIRROR_SELECTION_URL) data = fetch_url(MIRROR_SELECTION_URL, retry=False) dammit = UnicodeDammit(data) mirrors = set( CandidateMirror(mirror_url=mirror_url.strip()) for mirror_url in dammit.unicode_markup.splitlines() if mirror_url and not mirror_url.isspace() ) logger.debug("Found %s in %s.", pluralize(len(mirrors), "fast Ubuntu mirror"), timer) return mirrors
def discover_mirror_selection(): """Discover "geographically suitable" Ubuntu mirrors.""" timer = Timer() logger.info("Identifying fast Ubuntu mirrors using %s ..", MIRROR_SELECTION_URL) data = fetch_url(MIRROR_SELECTION_URL, retry=False) dammit = UnicodeDammit(data) mirrors = set( CandidateMirror(mirror_url=mirror_url.strip()) for mirror_url in dammit.unicode_markup.splitlines() if mirror_url and not mirror_url.isspace()) logger.debug("Found %s in %s.", pluralize(len(mirrors), "fast Ubuntu mirror"), timer) return mirrors
def discover_mirrors(): """ Discover available Debian mirrors by querying :data:`MIRRORS_URL`. :returns: A set of :class:`.CandidateMirror` objects that have their :attr:`~.CandidateMirror.mirror_url` property set. :raises: If no mirrors are discovered an exception is raised. An example run: >>> from apt_mirror_updater.backends.debian import discover_mirrors >>> from pprint import pprint >>> pprint(discover_mirrors()) set([CandidateMirror(mirror_url='http://ftp.at.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.au.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.be.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.bg.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.br.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.by.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.ca.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.ch.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.cn.debian.org/debian/'), CandidateMirror(mirror_url='http://ftp.cz.debian.org/debian/'), ...]) """ timer = Timer() logger.info("Discovering Debian mirrors at %s ..", MIRRORS_URL) response = fetch_url(MIRRORS_URL, retry=True) soup = BeautifulSoup(response, 'html.parser') tables = soup.findAll('table') if not tables: raise Exception( "Failed to locate <table> element in Debian mirror page! (%s)" % MIRRORS_URL) mirrors = set( CandidateMirror(mirror_url=a['href']) for a in tables[0].findAll('a', href=True)) if not mirrors: raise Exception("Failed to discover any Debian mirrors! (using %s)" % MIRRORS_URL) logger.info("Discovered %s in %s.", pluralize(len(mirrors), "Debian mirror"), timer) return mirrors
def validate_mirror(self, mirror_url): """ Make sure a mirror serves :attr:`distribution_codename`. :param mirror_url: The base URL of the mirror (a string). :returns: One of the values in the :class:`MirrorStatus` enumeration. This method assumes that :attr:`old_releases_url` is always valid. """ if mirrors_are_equal(mirror_url, self.old_releases_url): return MirrorStatus.AVAILABLE else: mirror_url = normalize_mirror_url(mirror_url) key = (mirror_url, self.distribution_codename) value = self.validated_mirrors.get(key) if value is None: logger.info("Checking if %s is available on %s ..", self.release, mirror_url) # Try to download the Release.gpg file, in the assumption that # this file should always exist and is more or less guaranteed # to be relatively small. try: mirror = CandidateMirror(mirror_url=mirror_url, updater=self) mirror.release_gpg_contents = fetch_url(mirror.release_gpg_url, retry=False) value = (MirrorStatus.AVAILABLE if mirror.is_available else MirrorStatus.UNAVAILABLE) except NotFoundError: # When the mirror is serving 404 responses it can be an # indication that the release has gone end of life. In any # case the mirror is unavailable. value = MirrorStatus.MAYBE_EOL except Exception: # When we get an unspecified error that is not a 404 # response we conclude that the mirror is unavailable. value = MirrorStatus.UNAVAILABLE # Cache the mirror status that we just determined. self.validated_mirrors[key] = value return value
def validate_mirror(self, mirror_url): """ Make sure a mirror serves :attr:`distribution_codename`. :param mirror_url: The base URL of the mirror (a string). :returns: One of the values in the :class:`MirrorStatus` enumeration. This method assumes that :attr:`old_releases_url` is always valid. """ mirror_url = normalize_mirror_url(mirror_url) key = (mirror_url, self.distribution_codename) value = self.validated_mirrors.get(key) if value is None: logger.info("Checking if %s is available on %s ..", self.release, mirror_url) # Try to download the Release.gpg file, in the assumption that # this file should always exist and is more or less guaranteed # to be relatively small. try: mirror = CandidateMirror(mirror_url=mirror_url, updater=self) mirror.release_gpg_contents = fetch_url(mirror.release_gpg_url, retry=False) value = (MirrorStatus.AVAILABLE if mirror.is_available else MirrorStatus.UNAVAILABLE) except NotFoundError: # When the mirror is serving 404 responses it can be an # indication that the release has gone end of life. In any # case the mirror is unavailable. value = MirrorStatus.MAYBE_EOL except Exception: # When we get an unspecified error that is not a 404 # response we conclude that the mirror is unavailable. value = MirrorStatus.UNAVAILABLE # Cache the mirror status that we just determined. self.validated_mirrors[key] = value return value
def discover_mirrors(): """ Discover available Ubuntu mirrors. :returns: A set of :class:`.CandidateMirror` objects that have their :attr:`~.CandidateMirror.mirror_url` property set and may have the :attr:`~.CandidateMirror.last_updated` property set. :raises: If no mirrors are discovered an exception is raised. This queries :data:`MIRRORS_URL` and :data:`MIRROR_SELECTION_URL` to discover available Ubuntu mirrors. Here's an example run: >>> from apt_mirror_updater.backends.ubuntu import discover_mirrors >>> from pprint import pprint >>> pprint(discover_mirrors()) set([CandidateMirror(mirror_url='http://archive.ubuntu.com/ubuntu/'), CandidateMirror(mirror_url='http://ftp.nluug.nl/os/Linux/distr/ubuntu/'), CandidateMirror(mirror_url='http://ftp.snt.utwente.nl/pub/os/linux/ubuntu/'), CandidateMirror(mirror_url='http://ftp.tudelft.nl/archive.ubuntu.com/'), CandidateMirror(mirror_url='http://mirror.1000mbps.com/ubuntu/'), CandidateMirror(mirror_url='http://mirror.amsiohosting.net/archive.ubuntu.com/'), CandidateMirror(mirror_url='http://mirror.i3d.net/pub/ubuntu/'), CandidateMirror(mirror_url='http://mirror.nforce.com/pub/linux/ubuntu/'), CandidateMirror(mirror_url='http://mirror.nl.leaseweb.net/ubuntu/'), CandidateMirror(mirror_url='http://mirror.transip.net/ubuntu/ubuntu/'), ...]) """ timer = Timer() mirrors = set() logger.info("Discovering Ubuntu mirrors at %s ..", MIRRORS_URL) data = fetch_url(MIRRORS_URL, retry=True) soup = BeautifulSoup(data, 'html.parser') for table in soup.findAll('table'): for tr in table.findAll('tr'): for a in tr.findAll('a', href=True): # Check if the link looks like a mirror URL. if (a['href'].startswith(('http://', 'https://')) and a['href'].endswith('/ubuntu/')): # Try to figure out the mirror's reported latency. last_updated = None text = u''.join(tr.findAll(text=True)) for status_label, num_seconds in MIRROR_STATUSES: if status_label in text: last_updated = num_seconds break # Add the mirror to our overview. mirrors.add(CandidateMirror( mirror_url=a['href'], last_updated=last_updated, )) # Skip to the next row. break if not mirrors: raise Exception("Failed to discover any Ubuntu mirrors! (using %s)" % MIRRORS_URL) # Discover fast (geographically suitable) mirrors to speed up ranking. # See also https://github.com/xolox/python-apt-mirror-updater/issues/6. selected_mirrors = discover_mirror_selection() slow_mirrors = mirrors ^ selected_mirrors fast_mirrors = mirrors ^ slow_mirrors if len(fast_mirrors) > 10: # Narrow down the list of candidate mirrors to fast mirrors. logger.info("Discovered %s in %s (narrowed down from %s).", pluralize(len(fast_mirrors), "Ubuntu mirror"), timer, pluralize(len(mirrors), "mirror")) mirrors = fast_mirrors else: logger.info("Discovered %s in %s.", pluralize(len(mirrors), "Ubuntu mirror"), timer) return mirrors
def discover_mirrors(): """ Discover available Ubuntu mirrors. :returns: A set of :class:`.CandidateMirror` objects that have their :attr:`~.CandidateMirror.mirror_url` property set and may have the :attr:`~.CandidateMirror.last_updated` property set. :raises: If no mirrors are discovered an exception is raised. This queries :data:`MIRRORS_URL` and :data:`MIRROR_SELECTION_URL` to discover available Ubuntu mirrors. Here's an example run: >>> from apt_mirror_updater.backends.ubuntu import discover_mirrors >>> from pprint import pprint >>> pprint(discover_mirrors()) set([CandidateMirror(mirror_url='http://archive.ubuntu.com/ubuntu/'), CandidateMirror(mirror_url='http://ftp.nluug.nl/os/Linux/distr/ubuntu/'), CandidateMirror(mirror_url='http://ftp.snt.utwente.nl/pub/os/linux/ubuntu/'), CandidateMirror(mirror_url='http://ftp.tudelft.nl/archive.ubuntu.com/'), CandidateMirror(mirror_url='http://mirror.1000mbps.com/ubuntu/'), CandidateMirror(mirror_url='http://mirror.amsiohosting.net/archive.ubuntu.com/'), CandidateMirror(mirror_url='http://mirror.i3d.net/pub/ubuntu/'), CandidateMirror(mirror_url='http://mirror.nforce.com/pub/linux/ubuntu/'), CandidateMirror(mirror_url='http://mirror.nl.leaseweb.net/ubuntu/'), CandidateMirror(mirror_url='http://mirror.transip.net/ubuntu/ubuntu/'), ...]) """ timer = Timer() mirrors = set() logger.info("Discovering Ubuntu mirrors at %s ..", MIRRORS_URL) data = fetch_url(MIRRORS_URL, retry=True) soup = BeautifulSoup(data, 'html.parser') for table in soup.findAll('table'): for tr in table.findAll('tr'): for a in tr.findAll('a', href=True): # Check if the link looks like a mirror URL. if (a['href'].startswith(('http://', 'https://')) and a['href'].endswith('/ubuntu/')): # Try to figure out the mirror's reported latency. last_updated = None text = u''.join(tr.findAll(text=True)) for status_label, num_seconds in MIRROR_STATUSES: if status_label in text: last_updated = num_seconds break # Add the mirror to our overview. mirrors.add( CandidateMirror( mirror_url=a['href'], last_updated=last_updated, )) # Skip to the next row. break if not mirrors: raise Exception("Failed to discover any Ubuntu mirrors! (using %s)" % MIRRORS_URL) # Discover fast (geographically suitable) mirrors to speed up ranking. # See also https://github.com/xolox/python-apt-mirror-updater/issues/6. selected_mirrors = discover_mirror_selection() slow_mirrors = mirrors ^ selected_mirrors fast_mirrors = mirrors ^ slow_mirrors if len(fast_mirrors) > 10: # Narrow down the list of candidate mirrors to fast mirrors. logger.info("Discovered %s in %s (narrowed down from %s).", pluralize(len(fast_mirrors), "Ubuntu mirror"), timer, pluralize(len(mirrors), "mirror")) mirrors = fast_mirrors else: logger.info("Discovered %s in %s.", pluralize(len(mirrors), "Ubuntu mirror"), timer) return mirrors