def parse_index(self, disturl, html, scrape=True): p = HTMLPage(html, disturl.url) seen = set() for link in p.links: newurl = URL(link.url) if not newurl.is_valid_http_url(): continue eggfragment = newurl.eggfragment if scrape and eggfragment: if normalize_name(eggfragment).startswith(self.projectname): # XXX seems we have to maintain a particular # order to keep pip/easy_install happy with some # packages (e.g. nose) if newurl not in self.egglinks: self.egglinks.insert(0, newurl) else: log.debug("skip egg link %s (projectname: %s)", newurl, self.projectname) continue if is_archive_of_project(newurl, self.projectname): if not newurl.is_valid_http_url(): log.warn("unparseable/unsupported url: %r", newurl) else: seen.add(newurl.url) self._mergelink_ifbetter(newurl) continue if scrape: for link in p.rel_links(): if link.url not in seen: disturl = URL(link.url) if disturl.is_valid_http_url(): self.crawllinks.add(disturl)
def gen_nginx(tw, config, writer): outside_url = config.args.outside_url if outside_url is None: # default outside_url = "http://localhost:80" parts = URL(outside_url).netloc.split(":") if len(parts) < 2: parts.append(80) outside_host, outside_port = parts nginxconf = render(tw, "nginx-devpi.conf", format=1, outside_url=outside_url, outside_host = outside_host, outside_port = outside_port, port=config.args.port, serverdir=config.serverdir) writer("nginx-devpi.conf", nginxconf)
def _normalize_url(self, url): url = URL(url, asdir=1) if not url.is_valid_http_url(): url = URL(self.simpleindex, url.url).url return url
def __init__(self, url="", *args, **kwargs): self.requires_python = kwargs.pop('requires_python', None) URL.__init__(self, url, *args, **kwargs)
def root_url(self): if self.login: return URL(self.login, ".")
def index_url(self): if self.index: return URL(self.index) return URL("")
def get_release_paths(self, projectname): r = self.get_simple(projectname) pkg_url = URL(r.request.url) paths = [pkg_url.joinpath(link["href"]).path for link in BeautifulSoup(r.body).findAll("a")] return paths
def get_release_paths(self, project): r = self.get_simple(project) pkg_url = URL(r.request.url) paths = [pkg_url.joinpath(link["href"]).path for link in BeautifulSoup(r.body, "html.parser").findAll("a")] return paths
def __init__(self, url): HTMLParser.__init__(self) self.projects = set() self.baseurl = URL(url) self.basehost = self.baseurl.replace(path='') self.project = None
def test_addpath(self): url = URL("http://root.com/path") assert url.addpath("sub").url == "http://root.com/path/sub" assert url.addpath("sub", asdir=1).url == "http://root.com/path/sub/" url = URL("http://root.com/path/") assert url.addpath("sub").url == "http://root.com/path/sub" assert url.addpath("sub", asdir=1).url == "http://root.com/path/sub/" url = URL("http://root.com/path?foo=bar") assert url.addpath("sub").url == "http://root.com/path/sub?foo=bar" assert url.addpath("sub", asdir=1).url == "http://root.com/path/sub/?foo=bar" url = URL("http://root.com/path/?foo=bar") assert url.addpath("sub").url == "http://root.com/path/sub?foo=bar" assert url.addpath("sub", asdir=1).url == "http://root.com/path/sub/?foo=bar"
def test_relpath_edge_case(self): with pytest.raises(ValueError): URL("http://qwe/path").relpath("lkjqwe")
def test_netloc(self): assert URL("http://qwe/").netloc == 'qwe' assert URL("http://*****:*****@qwe/").netloc == 'foo:pass@qwe' assert URL("http://qwe/?foo=bar").netloc == 'qwe' assert URL("http://*****:*****@qwe/?foo=bar").netloc == 'foo:pass@qwe'
def test_repr(self): d = URL("http://host.com/path") assert repr(d) == "<URL 'http://host.com/path'>" d = URL("http://host.com/path?foo=bar") assert repr(d) == "<URL 'http://host.com/path?foo=bar'>" assert d.query == "foo=bar"
def test_geturl_nofrag(self): url = URL("http://a/py.tar.gz#egg=py-dev") assert url.geturl_nofragment() == "http://a/py.tar.gz" url = URL("http://a/py.tar.gz?foo=bar#egg=py-dev") assert url.geturl_nofragment() == "http://a/py.tar.gz?foo=bar"
def test_empty_url(self): assert not URL("") assert not URL() url = URL(None) assert url.url == ""
def fetch(self, handler, url): if self.initial_fetch: url = URL(url) if url.query: url = url.replace(query=url.query + '&initial_fetch') else: url = url.replace(query='initial_fetch') url = url.url log = self.log config = self.xom.config log.info("fetching %s", url) uuid, master_uuid = make_uuid_headers(config.nodeinfo) assert uuid != master_uuid try: self.master_contacted_at = time.time() token = self.auth_serializer.dumps(uuid) r = self.session.get(url, auth=self.master_auth, headers={ H_REPLICA_UUID: uuid, H_EXPECTED_MASTER_ID: master_uuid, H_REPLICA_OUTSIDE_URL: config.args.outside_url, str('Authorization'): 'Bearer %s' % token }, timeout=self.REPLICA_REQUEST_TIMEOUT) except Exception as e: msg = ''.join(traceback.format_exception_only(e.__class__, e)).strip() log.error("error fetching %s: %s", url, msg) return False if r.status_code not in (200, 202): log.error("%s %s: failed fetching %s", r.status_code, r.reason, url) return False # we check that the remote instance # has the same UUID we saw last time master_uuid = config.get_master_uuid() remote_master_uuid = r.headers.get(H_MASTER_UUID) if not remote_master_uuid: # we don't fatally leave the process because # it might just be a temporary misconfiguration # for example of a nginx frontend log.error( "remote provides no %r header, running " "<devpi-server-2.1?" " headers were: %s", H_MASTER_UUID, r.headers) self.thread.sleep(self.ERROR_SLEEP) return True if master_uuid and remote_master_uuid != master_uuid: # we got a master_uuid and it is not the one we # expect, we are replicating for -- it's unlikely this heals # itself. It's thus better to die and signal we can't operate. log.error( "FATAL: master UUID %r does not match " "expected master UUID %r. EXITTING.", remote_master_uuid, master_uuid) # force exit of the process os._exit(3) try: remote_serial = int(r.headers["X-DEVPI-SERIAL"]) except Exception as e: msg = ''.join(traceback.format_exception_only(e.__class__, e)).strip() log.error("error fetching %s: %s", url, msg) return False if r.status_code == 200: try: handler(r) except Exception: log.exception("could not process: %s", r.url) else: # we successfully received data so let's # record the master_uuid for future consistency checks if not master_uuid: self.xom.config.set_master_uuid(remote_master_uuid) # also record the current master serial for status info self.update_master_serial(remote_serial) return True elif r.status_code == 202: remote_serial = int(r.headers["X-DEVPI-SERIAL"]) log.debug("%s: trying again %s\n", r.status_code, url) # also record the current master serial for status info self.update_master_serial(remote_serial) return True return False
def test_relpathentry_size(self, filestore): link = URL("http://pypi.python.org/pkg/pytest-1.7.zip") entry = filestore.maplink(link) entry.set(size=123123) assert py.builtin._istext(entry._mapping["size"]) assert entry.size == u"123123"
def test_replace(self): url = URL("http://qwe/foo?bar=ham#hash") assert url.replace( scheme='https').url == "https://qwe/foo?bar=ham#hash" assert url.replace(scheme='').url == "//qwe/foo?bar=ham#hash" assert url.replace( netloc='world').url == "http://world/foo?bar=ham#hash" assert url.replace(netloc='').url == "http:///foo?bar=ham#hash" assert url.replace(path='/').url == "http://qwe/?bar=ham#hash" assert url.replace(path='').url == "http://qwe?bar=ham#hash" assert url.replace(query='').url == "http://qwe/foo#hash" assert url.replace( query='foo=bar').url == "http://qwe/foo?foo=bar#hash" assert url.replace(fragment='').url == "http://qwe/foo?bar=ham" assert url.replace(fragment='foo').url == "http://qwe/foo?bar=ham#foo" # original shouldn't have changed assert url.url == "http://qwe/foo?bar=ham#hash" # trying to change something not existing does nothing assert url.replace(foo='https').url == "http://qwe/foo?bar=ham#hash"
def import_filedesc(self, stage, filedesc, versions): rel = filedesc["relpath"] project = filedesc["projectname"] p = self.import_rootdir.join(rel) assert p.check(), p data = p.read("rb") if self.xom.config.hard_links: # wrap the data for additional attribute data = BytesForHardlink(data) data.devpi_srcpath = p.strpath if filedesc["type"] == "releasefile": mapping = filedesc["entrymapping"] if self.dumpversion == "1": # previous versions would not add a version attribute version = BasenameMeta(p.basename).version else: version = filedesc["version"] if hasattr(stage, 'store_releasefile'): link = stage.store_releasefile( project, version, p.basename, data, last_modified=mapping["last_modified"]) entry = link.entry else: link = None url = URL( mapping['url']).replace(fragment=mapping['hash_spec']) entry = self.xom.filestore.maplink(url, stage.username, stage.index, project) entry.file_set_content(data, mapping["last_modified"]) (_, links_with_data, serial) = stage._load_cache_links(project) if links_with_data is None: links_with_data = [] links = [(url.basename, entry.relpath)] requires_python = [versions[version].get('requires_python')] yanked = [versions[version].get('yanked')] for key, href, require_python, is_yanked in links_with_data: links.append((key, href)) requires_python.append(require_python) yanked.append(is_yanked) stage._save_cache_links(project, links, requires_python, yanked, serial) # devpi-server-2.1 exported with md5 checksums if "md5" in mapping: assert "hash_spec" not in mapping mapping["hash_spec"] = "md5=" + mapping["md5"] hash_algo, hash_value = parse_hash_spec(mapping["hash_spec"]) digest = hash_algo(entry.file_get_content()).hexdigest() if digest != hash_value: fatal("File %s has bad checksum %s, expected %s" % (p, digest, hash_value)) # note that the actual hash_type used within devpi-server is not # determined here but in store_releasefile/store_doczip/store_toxresult etc elif filedesc["type"] == "doczip": version = filedesc["version"] link = stage.store_doczip(project, version, data) elif filedesc["type"] == "toxresult": linkstore = stage.get_linkstore_perstage(filedesc["projectname"], filedesc["version"]) # we can not search for the full relative path because # it might use a different checksum basename = posixpath.basename(filedesc["for_entrypath"]) link, = linkstore.get_links(basename=basename) link = stage.store_toxresult(link, json.loads(data.decode("utf8"))) else: fatal("unknown file type: %s" % (type, )) if link is not None: history_log = filedesc.get('log') if history_log is None: link.add_log('upload', '<import>', dst=stage.name) else: link.add_logs(history_log)
def test_replace_nothing(self): url = URL("http://qwe/foo?bar=ham#hash") new_url = url.replace() assert new_url is not url assert new_url.url == url.url
def test_comparison(self): base = URL('https://pypi.org') url = URL('https://pypi.org/simple/foo').replace(path='') assert base == url assert not (base != url)
def pypi_package_link(self, pkgname, md5=True): link = "https://pypi.python.org/package/some/%s" % pkgname if md5 == True: self._md5.update(link.encode("utf8")) # basically random link += "#md5=%s" % self._md5.hexdigest() return URL(link)
def test_query(self): assert URL("http://example.com").query == "" assert URL("http://example.com?foo=bar").query == "foo=bar"
def indexroot(self, user, index): stage = self.getstage(user, index) if json_preferred(): projectlist = stage.getprojectnames_perstage() projectlist = sorted(projectlist) apireturn(200, type="list:projectconfig", result=projectlist) if stage.name == "root/pypi": return simple_html_body("%s index" % stage.name, [ html.ul(html.li(html.a("simple index", href="+simple/")), ), ]).unicode() # XXX this should go to a template if hasattr(stage, "ixconfig"): bases = html.ul() for base in stage.ixconfig["bases"]: bases.append( html.li( html.a("%s" % base, href="/%s/" % base), " (", html.a("simple", href="/%s/+simple/" % base), " )", )) if bases: bases = [html.h2("inherited bases"), bases] else: bases = [] latest_packages = html.table( html.tr(html.td("info"), html.td("file"), html.td("docs"))) for projectname in stage.getprojectnames_perstage(): metadata = stage.get_metadata_latest(projectname) try: name, ver = metadata["name"], metadata["version"] except KeyError: log.error("metadata for project %r empty: %s, skipping", projectname, metadata) continue dockey = stage._doc_key(name, ver) if dockey.exists(): docs = [ html.a("%s-%s docs" % (name, ver), href="%s/%s/+doc/index.html" % (name, ver)) ] else: docs = [] files = metadata.get("+files", {}) if not files: log.warn("project %r version %r has no files", projectname, metadata.get("version")) baseurl = URL(request.path) for basename, relpath in files.items(): latest_packages.append( html.tr( html.td( html.a("%s-%s info page" % (name, ver), href="%s/%s/" % (name, ver))), html.td( html.a(basename, href=baseurl.relpath("/" + relpath))), html.td(*docs), )) break # could present more releasefiles latest_packages = [ html.h2("in-stage latest packages, at least as recent as bases"), latest_packages ] return simple_html_body("%s index" % stage.name, [ html.ul(html.li(html.a("simple index", href="+simple/")), ), latest_packages, bases, ]).unicode()
def test_query_items(self, url, kwargs, expected): assert URL(url).get_query_items(**kwargs) == expected
def test_query_replace(self, url, query, expected): assert URL(url).replace(query=query) == expected
def get_user_url(self, user=None): if user is None: user = self.get_auth_user() if not user: raise ValueError("no current authenticated user") return URL(self.rooturl).addpath(user)
def test_hashing(self): assert hash(URL("http://a")) == hash(URL("http://a")) assert URL("http://a") == URL("http://a") assert hash(URL("http://a?foo=bar")) == hash(URL("http://a?foo=bar")) assert URL("http://a?foo=bar") == URL("http://a?foo=bar")
def indexroot(self, user, index): stage = self.getstage(user, index) if json_preferred(): projectlist = stage.getprojectnames_perstage() projectlist = sorted(projectlist) apireturn(200, type="list:projectconfig", result=projectlist) if stage.name == "root/pypi": return simple_html_body("%s index" % stage.name, [ html.ul( html.li(html.a("simple index", href="+simple/")), ), ]).unicode() # XXX this should go to a template if hasattr(stage, "ixconfig"): bases = html.ul() for base in stage.ixconfig["bases"]: bases.append(html.li( html.a("%s" % base, href="/%s/" % base), " (", html.a("simple", href="/%s/+simple/" % base), " )", )) if bases: bases = [html.h2("inherited bases"), bases] else: bases = [] latest_packages = html.table( html.tr(html.td("info"), html.td("file"), html.td("docs"))) for projectname in stage.getprojectnames_perstage(): metadata = stage.get_metadata_latest(projectname) try: name, ver = metadata["name"], metadata["version"] except KeyError: log.error("metadata for project %r empty: %s, skipping", projectname, metadata) continue dockey = stage._doc_key(name, ver) if dockey.exists(): docs = [html.a("%s-%s docs" %(name, ver), href="%s/%s/+doc/index.html" %(name, ver))] else: docs = [] files = metadata.get("+files", {}) if not files: log.warn("project %r version %r has no files", projectname, metadata.get("version")) baseurl = URL(request.path) for basename, relpath in files.items(): latest_packages.append(html.tr( html.td(html.a("%s-%s info page" % (name, ver), href="%s/%s/" % (name, ver))), html.td(html.a(basename, href=baseurl.relpath("/" + relpath))), html.td(*docs), )) break # could present more releasefiles latest_packages = [ html.h2("in-stage latest packages, at least as recent as bases"), latest_packages] return simple_html_body("%s index" % stage.name, [ html.ul( html.li(html.a("simple index", href="+simple/")), ), latest_packages, bases, ]).unicode()
def test_eggfragment(self): url = URL("http://a/py.tar.gz#egg=py-dev") assert url.eggfragment == "py-dev" url = URL("http://a/py.tar.gz?foo=bar#egg=py-dev") assert url.eggfragment == "py-dev" assert url.query == "foo=bar"
def __init__(self, key_href): self.key, self.href = key_href self._url = URL(self.href) self.name, self.version, self.ext = splitbasename(self._url.basename, checkarch=False) self.eggfragment = self._url.eggfragment
def test_nohashtypes(self): link = URL("whateveer#lqk=123") assert link.hash_value is None and link.hash_algo is None link = URL("whateveer?foo=bar#lqk=123") assert link.hash_value is None and link.hash_algo is None assert link.query == "foo=bar"
def test_joinpath_asdir(self): url = URL("http://heise.de") new = url.joinpath("hello", asdir=1) assert new.url == "http://heise.de/hello/" new = url.joinpath("hello/", asdir=1) assert new.url == "http://heise.de/hello/"
def test_asfile(self): assert URL("http://heise.de").asfile().url == "http://heise.de" assert URL("http://heise.de/").asfile().url == "http://heise.de" assert URL("http://x.de/path/").asfile().url == "http://x.de/path" assert URL("http://x.de/path").asfile().url == "http://x.de/path"