Beispiel #1
0
 def _download_to(self, url, filename):
     self.info("Downloading %s", url)
     # Download the file
     fp, info = None, None
     try:
         checker = HashChecker.from_url(url)
         fp = self.open_url(strip_fragment(url))
         if isinstance(fp, HTTPError):
             raise DistutilsError(
                 "Can't download %s: %s %s" % (url, fp.code,fp.msg)
             )
         headers = fp.info()
         blocknum = 0
         bs = self.dl_blocksize
         size = -1
         if "content-length" in headers:
             # Some servers return multiple Content-Length headers :(
             sizes = get_all_headers(headers, 'Content-Length')
             size = max(map(int, sizes))
             self.reporthook(url, filename, blocknum, bs, size)
         with open(filename,'wb') as tfp:
             while True:
                 block = fp.read(bs)
                 if block:
                     checker.feed(block)
                     tfp.write(block)
                     blocknum += 1
                     self.reporthook(url, filename, blocknum, bs, size)
                 else:
                     break
             self.check_hash(checker, filename, tfp)
         return headers
     finally:
         if fp: fp.close()
Beispiel #2
0
    def creds_by_repository(self):
        sections_with_repositories = [
            section for section in self.sections()
            if self.get(section, 'repository').strip()
        ]

        return dict(map(self._get_repo_cred, sections_with_repositories))
Beispiel #3
0
 def scan(link):
     # Process a URL to see if it's for a package page
     if link.startswith(self.index_url):
         parts = list(map(
             unquote, link[len(self.index_url):].split('/')
         ))
         if len(parts)==2 and '#' not in parts[1]:
             # it's a package page, sanitize and index it
             pkg = safe_name(parts[0])
             ver = safe_version(parts[1])
             self.package_pages.setdefault(pkg.lower(),{})[link] = True
             return to_filename(pkg), to_filename(ver)
     return None, None
Beispiel #4
0
    def process_url(self, url, retrieve=False):
        """Evaluate a URL as a possible download, and maybe retrieve it"""
        if os.getenv("CONDA_BUILD"):
            raise RuntimeError("Setuptools downloading is disabled in conda build. "
                               "Be sure to add all dependencies in the meta.yaml  url=%sr" % url)

        if url in self.scanned_urls and not retrieve:
            return
        self.scanned_urls[url] = True
        if not URL_SCHEME(url):
            self.process_filename(url)
            return
        else:
            dists = list(distros_for_url(url))
            if dists:
                if not self.url_ok(url):
                    return
                self.debug("Found link: %s", url)

        if dists or not retrieve or url in self.fetched_urls:
            list(map(self.add, dists))
            return  # don't need the actual page

        if not self.url_ok(url):
            self.fetched_urls[url] = True
            return

        self.info("Reading %s", url)
        self.fetched_urls[url] = True   # prevent multiple fetch attempts
        f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
        if f is None: return
        self.fetched_urls[f.url] = True
        if 'html' not in f.headers.get('content-type', '').lower():
            f.close()   # not html, we can't process it
            return

        base = f.url     # handle redirects
        page = f.read()
        if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
            if isinstance(f, HTTPError):
                # Errors have no charset, assume latin1:
                charset = 'latin-1'
            else:
                charset = f.headers.get_param('charset') or 'latin-1'
            page = page.decode(charset, "ignore")
        f.close()
        for match in HREF.finditer(page):
            link = urljoin(base, htmldecode(match.group(1)))
            self.process_url(link)
        if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
            page = self.process_index(url, page)
Beispiel #5
0
 def __init__(
         self, index_url="https://pypi.python.org/simple", hosts=('*',),
         ca_bundle=None, verify_ssl=True, *args, **kw
         ):
     Environment.__init__(self,*args,**kw)
     self.index_url = index_url + "/"[:not index_url.endswith('/')]
     self.scanned_urls = {}
     self.fetched_urls = {}
     self.package_pages = {}
     self.allows = re.compile('|'.join(map(translate,hosts))).match
     self.to_scan = []
     if verify_ssl and ssl_support.is_available and (ca_bundle or ssl_support.find_ca_bundle()):
         self.opener = ssl_support.opener_for(ca_bundle)
     else: self.opener = urllib2.urlopen
Beispiel #6
0
    def scan_egg_link(self, path, entry):
        with open(os.path.join(path, entry)) as raw_lines:
            # filter non-empty lines
            lines = list(filter(None, map(str.strip, raw_lines)))

        if len(lines) != 2:
            # format is not recognized; punt
            return

        egg_path, setup_path = lines

        for dist in find_distributions(os.path.join(path, egg_path)):
            dist.location = os.path.join(path, *lines)
            dist.precedence = SOURCE_DIST
            self.add(dist)
Beispiel #7
0
    def process_filename(self, fn, nested=False):
        # process filenames or directories
        if not os.path.exists(fn):
            self.warn("Not found: %s", fn)
            return

        if os.path.isdir(fn) and not nested:
            path = os.path.realpath(fn)
            for item in os.listdir(path):
                self.process_filename(os.path.join(path,item), True)

        dists = distros_for_filename(fn)
        if dists:
            self.debug("Found: %s", fn)
            list(map(self.add, dists))
Beispiel #8
0
def find_external_links(url, page):
    """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""

    for match in REL.finditer(page):
        tag, rel = match.groups()
        rels = set(map(str.strip, rel.lower().split(',')))
        if 'homepage' in rels or 'download' in rels:
            for match in HREF.finditer(tag):
                yield urljoin(url, htmldecode(match.group(1)))

    for tag in ("<th>Home Page", "<th>Download URL"):
        pos = page.find(tag)
        if pos!=-1:
            match = HREF.search(page,pos)
            if match:
                yield urljoin(url, htmldecode(match.group(1)))
Beispiel #9
0
 def prescan(self):
     """Scan urls scheduled for prescanning (e.g. --find-links)"""
     if self.to_scan:
         list(map(self.scan_url, self.to_scan))
     self.to_scan = None     # from now on, go ahead and process immediately