Python fetch_url Examples, apt_mirror_updater.http.fetch_url Python Examples

Example #1

0

Show file

    def validate_mirror(self, mirror_url):
        """
        Make sure a mirror serves :attr:`distribution_codename`.

        :param mirror_url: The base URL of the mirror (a string).
        :returns: :data:`True` if the mirror hosts the relevant release,
                  :data:`False` otherwise.

        This method assumes that :attr:`old_releases_url` is always valid.
        """
        if mirrors_are_equal(mirror_url, self.old_releases_url):
            return True
        else:
            mirror_url = normalize_mirror_url(mirror_url)
            key = (mirror_url, self.distribution_codename)
            value = self.validated_mirrors.get(key)
            if value is None:
                logger.info(
                    "Checking whether %s is a supported release for %s ..",
                    self.distribution_codename.capitalize(),
                    self.distributor_id.capitalize())
                mirror = CandidateMirror(mirror_url=mirror_url, updater=self)
                try:
                    response = fetch_url(mirror.release_gpg_url, retry=False)
                    mirror.release_gpg_contents = response.read()
                except Exception:
                    pass
                self.validated_mirrors[key] = value = mirror.is_available
            return value

Example #2

0

Show file

File: tests.py Project: aspartam206/python-apt-mirror-updater

 def is_mirror_url(self, base_url, stable_resource, expected_content):
     """Validate a given mirror URL based on a stable resource URL and its expected response."""
     base_url = normalize_mirror_url(base_url)
     if base_url.startswith(('http://', 'https://')):
         if not hasattr(self, 'mirror_cache'):
             self.mirror_cache = {}
         cache_key = (base_url, stable_resource, expected_content)
         if cache_key not in self.mirror_cache:
             try:
                 # Look for a file with a stable filename (assumed to always be available).
                 resource_url = base_url + stable_resource
                 response = fetch_url(resource_url)
                 # Check the contents of the response.
                 if expected_content in response:
                     logger.info("URL %s served expected content.",
                                 resource_url)
                     self.mirror_cache[cache_key] = True
                 else:
                     logger.warning("URL %s didn't serve expected content!",
                                    resource_url)
                     self.mirror_cache[cache_key] = False
             except TimeoutException:
                 logger.warning(
                     "URL %s reported timeout, not failing test suite on this .."
                 )
                 self.mirror_cache[cache_key] = True
             except Exception:
                 logger.warning("URL %s triggered exception!",
                                resource_url,
                                exc_info=True)
                 self.mirror_cache[cache_key] = False
         return self.mirror_cache[cache_key]
     return False

Example #3

0

Show file

def discover_mirrors():
    """
    Discover available Ubuntu mirrors by querying :data:`MIRRORS_URL`.

    :returns: A set of :class:`.CandidateMirror` objects that have their
              :attr:`~.CandidateMirror.mirror_url` property set and may have
              the :attr:`~.CandidateMirror.last_updated` property set.
    :raises: If no mirrors are discovered an exception is raised.

    An example run:

    >>> from apt_mirror_updater.backends.ubuntu import discover_mirrors
    >>> from pprint import pprint
    >>> pprint(discover_mirrors())
    set([CandidateMirror(mirror_url='http://archive.ubuntu.com/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.nluug.nl/os/Linux/distr/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.snt.utwente.nl/pub/os/linux/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.tudelft.nl/archive.ubuntu.com/'),
         CandidateMirror(mirror_url='http://mirror.1000mbps.com/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.amsiohosting.net/archive.ubuntu.com/'),
         CandidateMirror(mirror_url='http://mirror.i3d.net/pub/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.nforce.com/pub/linux/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.nl.leaseweb.net/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.transip.net/ubuntu/ubuntu/'),
         ...])
    """
    timer = Timer()
    mirrors = set()
    logger.info("Discovering Ubuntu mirrors at %s ..", MIRRORS_URL)
    response = fetch_url(MIRRORS_URL, retry=True)
    soup = BeautifulSoup(response, 'html.parser')
    for table in soup.findAll('table'):
        for tr in table.findAll('tr'):
            for a in tr.findAll('a', href=True):
                # Check if the link looks like a mirror URL.
                if (a['href'].startswith(('http://', 'https://'))
                        and a['href'].endswith('/ubuntu/')):
                    # Try to figure out the mirror's reported latency.
                    last_updated = None
                    text = u''.join(tr.findAll(text=True))
                    for status_label, num_seconds in MIRROR_STATUSES:
                        if status_label in text:
                            last_updated = num_seconds
                            break
                    # Add the mirror to our overview.
                    mirrors.add(
                        CandidateMirror(
                            mirror_url=a['href'],
                            last_updated=last_updated,
                        ))
                    # Skip to the next row.
                    break
    if not mirrors:
        raise Exception("Failed to discover any Ubuntu mirrors! (using %s)" %
                        MIRRORS_URL)
    logger.info("Discovered %s in %s.", pluralize(len(mirrors),
                                                  "Ubuntu mirror"), timer)
    return mirrors

Example #4

0

Show file

File: ubuntu.py Project: xolox/python-apt-mirror-updater

def discover_mirror_selection():
    """Discover "geographically suitable" Ubuntu mirrors."""
    timer = Timer()
    logger.info("Identifying fast Ubuntu mirrors using %s ..", MIRROR_SELECTION_URL)
    data = fetch_url(MIRROR_SELECTION_URL, retry=False)
    dammit = UnicodeDammit(data)
    mirrors = set(
        CandidateMirror(mirror_url=mirror_url.strip())
        for mirror_url in dammit.unicode_markup.splitlines()
        if mirror_url and not mirror_url.isspace()
    )
    logger.debug("Found %s in %s.", pluralize(len(mirrors), "fast Ubuntu mirror"), timer)
    return mirrors

Example #5

0

Show file

def discover_mirror_selection():
    """Discover "geographically suitable" Ubuntu mirrors."""
    timer = Timer()
    logger.info("Identifying fast Ubuntu mirrors using %s ..",
                MIRROR_SELECTION_URL)
    data = fetch_url(MIRROR_SELECTION_URL, retry=False)
    dammit = UnicodeDammit(data)
    mirrors = set(
        CandidateMirror(mirror_url=mirror_url.strip())
        for mirror_url in dammit.unicode_markup.splitlines()
        if mirror_url and not mirror_url.isspace())
    logger.debug("Found %s in %s.",
                 pluralize(len(mirrors), "fast Ubuntu mirror"), timer)
    return mirrors

Example #6

0

Show file

File: debian.py Project: tfogwill/python-apt-mirror-updater

def discover_mirrors():
    """
    Discover available Debian mirrors by querying :data:`MIRRORS_URL`.

    :returns: A set of :class:`.CandidateMirror` objects that have their
             :attr:`~.CandidateMirror.mirror_url` property set.
    :raises: If no mirrors are discovered an exception is raised.

    An example run:

    >>> from apt_mirror_updater.backends.debian import discover_mirrors
    >>> from pprint import pprint
    >>> pprint(discover_mirrors())
    set([CandidateMirror(mirror_url='http://ftp.at.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.au.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.be.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.bg.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.br.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.by.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.ca.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.ch.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.cn.debian.org/debian/'),
         CandidateMirror(mirror_url='http://ftp.cz.debian.org/debian/'),
         ...])
    """
    timer = Timer()
    logger.info("Discovering Debian mirrors at %s ..", MIRRORS_URL)
    response = fetch_url(MIRRORS_URL, retry=True)
    soup = BeautifulSoup(response, 'html.parser')
    tables = soup.findAll('table')
    if not tables:
        raise Exception(
            "Failed to locate <table> element in Debian mirror page! (%s)" %
            MIRRORS_URL)
    mirrors = set(
        CandidateMirror(mirror_url=a['href'])
        for a in tables[0].findAll('a', href=True))
    if not mirrors:
        raise Exception("Failed to discover any Debian mirrors! (using %s)" %
                        MIRRORS_URL)
    logger.info("Discovered %s in %s.", pluralize(len(mirrors),
                                                  "Debian mirror"), timer)
    return mirrors

Example #7

0

Show file

File: __init__.py Project: xolox/python-apt-mirror-updater

    def validate_mirror(self, mirror_url):
        """
        Make sure a mirror serves :attr:`distribution_codename`.

        :param mirror_url: The base URL of the mirror (a string).
        :returns: One of the values in the :class:`MirrorStatus` enumeration.

        This method assumes that :attr:`old_releases_url` is always valid.
        """
        if mirrors_are_equal(mirror_url, self.old_releases_url):
            return MirrorStatus.AVAILABLE
        else:
            mirror_url = normalize_mirror_url(mirror_url)
            key = (mirror_url, self.distribution_codename)
            value = self.validated_mirrors.get(key)
            if value is None:
                logger.info("Checking if %s is available on %s ..", self.release, mirror_url)
                # Try to download the Release.gpg file, in the assumption that
                # this file should always exist and is more or less guaranteed
                # to be relatively small.
                try:
                    mirror = CandidateMirror(mirror_url=mirror_url, updater=self)
                    mirror.release_gpg_contents = fetch_url(mirror.release_gpg_url, retry=False)
                    value = (MirrorStatus.AVAILABLE if mirror.is_available else MirrorStatus.UNAVAILABLE)
                except NotFoundError:
                    # When the mirror is serving 404 responses it can be an
                    # indication that the release has gone end of life. In any
                    # case the mirror is unavailable.
                    value = MirrorStatus.MAYBE_EOL
                except Exception:
                    # When we get an unspecified error that is not a 404
                    # response we conclude that the mirror is unavailable.
                    value = MirrorStatus.UNAVAILABLE
                # Cache the mirror status that we just determined.
                self.validated_mirrors[key] = value
            return value

Example #8

0

Show file

    def validate_mirror(self, mirror_url):
        """
        Make sure a mirror serves :attr:`distribution_codename`.

        :param mirror_url: The base URL of the mirror (a string).
        :returns: One of the values in the :class:`MirrorStatus` enumeration.

        This method assumes that :attr:`old_releases_url` is always valid.
        """
        mirror_url = normalize_mirror_url(mirror_url)
        key = (mirror_url, self.distribution_codename)
        value = self.validated_mirrors.get(key)
        if value is None:
            logger.info("Checking if %s is available on %s ..", self.release,
                        mirror_url)
            # Try to download the Release.gpg file, in the assumption that
            # this file should always exist and is more or less guaranteed
            # to be relatively small.
            try:
                mirror = CandidateMirror(mirror_url=mirror_url, updater=self)
                mirror.release_gpg_contents = fetch_url(mirror.release_gpg_url,
                                                        retry=False)
                value = (MirrorStatus.AVAILABLE
                         if mirror.is_available else MirrorStatus.UNAVAILABLE)
            except NotFoundError:
                # When the mirror is serving 404 responses it can be an
                # indication that the release has gone end of life. In any
                # case the mirror is unavailable.
                value = MirrorStatus.MAYBE_EOL
            except Exception:
                # When we get an unspecified error that is not a 404
                # response we conclude that the mirror is unavailable.
                value = MirrorStatus.UNAVAILABLE
            # Cache the mirror status that we just determined.
            self.validated_mirrors[key] = value
        return value

Example #9

0

Show file

File: ubuntu.py Project: xolox/python-apt-mirror-updater

def discover_mirrors():
    """
    Discover available Ubuntu mirrors.

    :returns: A set of :class:`.CandidateMirror` objects that have their
              :attr:`~.CandidateMirror.mirror_url` property set and may have
              the :attr:`~.CandidateMirror.last_updated` property set.
    :raises: If no mirrors are discovered an exception is raised.

    This queries :data:`MIRRORS_URL` and :data:`MIRROR_SELECTION_URL` to
    discover available Ubuntu mirrors. Here's an example run:

    >>> from apt_mirror_updater.backends.ubuntu import discover_mirrors
    >>> from pprint import pprint
    >>> pprint(discover_mirrors())
    set([CandidateMirror(mirror_url='http://archive.ubuntu.com/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.nluug.nl/os/Linux/distr/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.snt.utwente.nl/pub/os/linux/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.tudelft.nl/archive.ubuntu.com/'),
         CandidateMirror(mirror_url='http://mirror.1000mbps.com/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.amsiohosting.net/archive.ubuntu.com/'),
         CandidateMirror(mirror_url='http://mirror.i3d.net/pub/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.nforce.com/pub/linux/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.nl.leaseweb.net/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.transip.net/ubuntu/ubuntu/'),
         ...])
    """
    timer = Timer()
    mirrors = set()
    logger.info("Discovering Ubuntu mirrors at %s ..", MIRRORS_URL)
    data = fetch_url(MIRRORS_URL, retry=True)
    soup = BeautifulSoup(data, 'html.parser')
    for table in soup.findAll('table'):
        for tr in table.findAll('tr'):
            for a in tr.findAll('a', href=True):
                # Check if the link looks like a mirror URL.
                if (a['href'].startswith(('http://', 'https://')) and
                        a['href'].endswith('/ubuntu/')):
                    # Try to figure out the mirror's reported latency.
                    last_updated = None
                    text = u''.join(tr.findAll(text=True))
                    for status_label, num_seconds in MIRROR_STATUSES:
                        if status_label in text:
                            last_updated = num_seconds
                            break
                    # Add the mirror to our overview.
                    mirrors.add(CandidateMirror(
                        mirror_url=a['href'],
                        last_updated=last_updated,
                    ))
                    # Skip to the next row.
                    break
    if not mirrors:
        raise Exception("Failed to discover any Ubuntu mirrors! (using %s)" % MIRRORS_URL)
    # Discover fast (geographically suitable) mirrors to speed up ranking.
    # See also https://github.com/xolox/python-apt-mirror-updater/issues/6.
    selected_mirrors = discover_mirror_selection()
    slow_mirrors = mirrors ^ selected_mirrors
    fast_mirrors = mirrors ^ slow_mirrors
    if len(fast_mirrors) > 10:
        # Narrow down the list of candidate mirrors to fast mirrors.
        logger.info("Discovered %s in %s (narrowed down from %s).",
                    pluralize(len(fast_mirrors), "Ubuntu mirror"),
                    timer, pluralize(len(mirrors), "mirror"))
        mirrors = fast_mirrors
    else:
        logger.info("Discovered %s in %s.", pluralize(len(mirrors), "Ubuntu mirror"), timer)
    return mirrors

Example #10

0

Show file

def discover_mirrors():
    """
    Discover available Ubuntu mirrors.

    :returns: A set of :class:`.CandidateMirror` objects that have their
              :attr:`~.CandidateMirror.mirror_url` property set and may have
              the :attr:`~.CandidateMirror.last_updated` property set.
    :raises: If no mirrors are discovered an exception is raised.

    This queries :data:`MIRRORS_URL` and :data:`MIRROR_SELECTION_URL` to
    discover available Ubuntu mirrors. Here's an example run:

    >>> from apt_mirror_updater.backends.ubuntu import discover_mirrors
    >>> from pprint import pprint
    >>> pprint(discover_mirrors())
    set([CandidateMirror(mirror_url='http://archive.ubuntu.com/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.nluug.nl/os/Linux/distr/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.snt.utwente.nl/pub/os/linux/ubuntu/'),
         CandidateMirror(mirror_url='http://ftp.tudelft.nl/archive.ubuntu.com/'),
         CandidateMirror(mirror_url='http://mirror.1000mbps.com/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.amsiohosting.net/archive.ubuntu.com/'),
         CandidateMirror(mirror_url='http://mirror.i3d.net/pub/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.nforce.com/pub/linux/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.nl.leaseweb.net/ubuntu/'),
         CandidateMirror(mirror_url='http://mirror.transip.net/ubuntu/ubuntu/'),
         ...])
    """
    timer = Timer()
    mirrors = set()
    logger.info("Discovering Ubuntu mirrors at %s ..", MIRRORS_URL)
    data = fetch_url(MIRRORS_URL, retry=True)
    soup = BeautifulSoup(data, 'html.parser')
    for table in soup.findAll('table'):
        for tr in table.findAll('tr'):
            for a in tr.findAll('a', href=True):
                # Check if the link looks like a mirror URL.
                if (a['href'].startswith(('http://', 'https://'))
                        and a['href'].endswith('/ubuntu/')):
                    # Try to figure out the mirror's reported latency.
                    last_updated = None
                    text = u''.join(tr.findAll(text=True))
                    for status_label, num_seconds in MIRROR_STATUSES:
                        if status_label in text:
                            last_updated = num_seconds
                            break
                    # Add the mirror to our overview.
                    mirrors.add(
                        CandidateMirror(
                            mirror_url=a['href'],
                            last_updated=last_updated,
                        ))
                    # Skip to the next row.
                    break
    if not mirrors:
        raise Exception("Failed to discover any Ubuntu mirrors! (using %s)" %
                        MIRRORS_URL)
    # Discover fast (geographically suitable) mirrors to speed up ranking.
    # See also https://github.com/xolox/python-apt-mirror-updater/issues/6.
    selected_mirrors = discover_mirror_selection()
    slow_mirrors = mirrors ^ selected_mirrors
    fast_mirrors = mirrors ^ slow_mirrors
    if len(fast_mirrors) > 10:
        # Narrow down the list of candidate mirrors to fast mirrors.
        logger.info("Discovered %s in %s (narrowed down from %s).",
                    pluralize(len(fast_mirrors), "Ubuntu mirror"), timer,
                    pluralize(len(mirrors), "mirror"))
        mirrors = fast_mirrors
    else:
        logger.info("Discovered %s in %s.",
                    pluralize(len(mirrors), "Ubuntu mirror"), timer)
    return mirrors