Beispiel #1
0
 def parse_index(self, disturl, html, scrape=True):
     p = HTMLPage(html, disturl.url)
     seen = set()
     for link in p.links:
         newurl = URL(link.url)
         if not newurl.is_valid_http_url():
             continue
         eggfragment = newurl.eggfragment
         if scrape and eggfragment:
             if normalize_name(eggfragment).startswith(self.projectname):
                 # XXX seems we have to maintain a particular
                 # order to keep pip/easy_install happy with some
                 # packages (e.g. nose)
                 if newurl not in self.egglinks:
                     self.egglinks.insert(0, newurl)
             else:
                 log.debug("skip egg link %s (projectname: %s)",
                           newurl, self.projectname)
             continue
         if is_archive_of_project(newurl, self.projectname):
             if not newurl.is_valid_http_url():
                 log.warn("unparseable/unsupported url: %r", newurl)
             else:
                 seen.add(newurl.url)
                 self._mergelink_ifbetter(newurl)
                 continue
     if scrape:
         for link in p.rel_links():
             if link.url not in seen:
                 disturl = URL(link.url)
                 if disturl.is_valid_http_url():
                     self.crawllinks.add(disturl)
Beispiel #2
0
def gen_nginx(tw, config, writer):
    outside_url = config.args.outside_url
    if outside_url is None: # default
        outside_url = "http://localhost:80"

    parts = URL(outside_url).netloc.split(":")
    if len(parts) < 2:
        parts.append(80)
    outside_host, outside_port = parts

    nginxconf = render(tw, "nginx-devpi.conf", format=1,
                       outside_url=outside_url,
                       outside_host = outside_host,
                       outside_port = outside_port,
                       port=config.args.port,
                       serverdir=config.serverdir)
    writer("nginx-devpi.conf", nginxconf)
Beispiel #3
0
 def _normalize_url(self, url):
     url = URL(url, asdir=1)
     if not url.is_valid_http_url():
         url = URL(self.simpleindex, url.url).url
     return url
Beispiel #4
0
 def __init__(self, url="", *args, **kwargs):
     self.requires_python = kwargs.pop('requires_python', None)
     URL.__init__(self, url, *args, **kwargs)
Beispiel #5
0
 def root_url(self):
     if self.login:
         return URL(self.login, ".")
Beispiel #6
0
 def index_url(self):
     if self.index:
         return URL(self.index)
     return URL("")
Beispiel #7
0
 def get_release_paths(self, projectname):
     r = self.get_simple(projectname)
     pkg_url = URL(r.request.url)
     paths = [pkg_url.joinpath(link["href"]).path
              for link in BeautifulSoup(r.body).findAll("a")]
     return paths
Beispiel #8
0
 def get_release_paths(self, project):
     r = self.get_simple(project)
     pkg_url = URL(r.request.url)
     paths = [pkg_url.joinpath(link["href"]).path
              for link in BeautifulSoup(r.body, "html.parser").findAll("a")]
     return paths
Beispiel #9
0
 def __init__(self, url):
     HTMLParser.__init__(self)
     self.projects = set()
     self.baseurl = URL(url)
     self.basehost = self.baseurl.replace(path='')
     self.project = None
Beispiel #10
0
 def test_addpath(self):
     url = URL("http://root.com/path")
     assert url.addpath("sub").url == "http://root.com/path/sub"
     assert url.addpath("sub", asdir=1).url == "http://root.com/path/sub/"
     url = URL("http://root.com/path/")
     assert url.addpath("sub").url == "http://root.com/path/sub"
     assert url.addpath("sub", asdir=1).url == "http://root.com/path/sub/"
     url = URL("http://root.com/path?foo=bar")
     assert url.addpath("sub").url == "http://root.com/path/sub?foo=bar"
     assert url.addpath("sub",
                        asdir=1).url == "http://root.com/path/sub/?foo=bar"
     url = URL("http://root.com/path/?foo=bar")
     assert url.addpath("sub").url == "http://root.com/path/sub?foo=bar"
     assert url.addpath("sub",
                        asdir=1).url == "http://root.com/path/sub/?foo=bar"
Beispiel #11
0
 def test_relpath_edge_case(self):
     with pytest.raises(ValueError):
         URL("http://qwe/path").relpath("lkjqwe")
Beispiel #12
0
 def test_netloc(self):
     assert URL("http://qwe/").netloc == 'qwe'
     assert URL("http://*****:*****@qwe/").netloc == 'foo:pass@qwe'
     assert URL("http://qwe/?foo=bar").netloc == 'qwe'
     assert URL("http://*****:*****@qwe/?foo=bar").netloc == 'foo:pass@qwe'
Beispiel #13
0
 def test_repr(self):
     d = URL("http://host.com/path")
     assert repr(d) == "<URL 'http://host.com/path'>"
     d = URL("http://host.com/path?foo=bar")
     assert repr(d) == "<URL 'http://host.com/path?foo=bar'>"
     assert d.query == "foo=bar"
Beispiel #14
0
 def test_geturl_nofrag(self):
     url = URL("http://a/py.tar.gz#egg=py-dev")
     assert url.geturl_nofragment() == "http://a/py.tar.gz"
     url = URL("http://a/py.tar.gz?foo=bar#egg=py-dev")
     assert url.geturl_nofragment() == "http://a/py.tar.gz?foo=bar"
Beispiel #15
0
 def test_empty_url(self):
     assert not URL("")
     assert not URL()
     url = URL(None)
     assert url.url == ""
Beispiel #16
0
    def fetch(self, handler, url):
        if self.initial_fetch:
            url = URL(url)
            if url.query:
                url = url.replace(query=url.query + '&initial_fetch')
            else:
                url = url.replace(query='initial_fetch')
            url = url.url
        log = self.log
        config = self.xom.config
        log.info("fetching %s", url)
        uuid, master_uuid = make_uuid_headers(config.nodeinfo)
        assert uuid != master_uuid
        try:
            self.master_contacted_at = time.time()
            token = self.auth_serializer.dumps(uuid)
            r = self.session.get(url,
                                 auth=self.master_auth,
                                 headers={
                                     H_REPLICA_UUID: uuid,
                                     H_EXPECTED_MASTER_ID: master_uuid,
                                     H_REPLICA_OUTSIDE_URL:
                                     config.args.outside_url,
                                     str('Authorization'): 'Bearer %s' % token
                                 },
                                 timeout=self.REPLICA_REQUEST_TIMEOUT)
        except Exception as e:
            msg = ''.join(traceback.format_exception_only(e.__class__,
                                                          e)).strip()
            log.error("error fetching %s: %s", url, msg)
            return False

        if r.status_code not in (200, 202):
            log.error("%s %s: failed fetching %s", r.status_code, r.reason,
                      url)
            return False

        # we check that the remote instance
        # has the same UUID we saw last time
        master_uuid = config.get_master_uuid()
        remote_master_uuid = r.headers.get(H_MASTER_UUID)
        if not remote_master_uuid:
            # we don't fatally leave the process because
            # it might just be a temporary misconfiguration
            # for example of a nginx frontend
            log.error(
                "remote provides no %r header, running "
                "<devpi-server-2.1?"
                " headers were: %s", H_MASTER_UUID, r.headers)
            self.thread.sleep(self.ERROR_SLEEP)
            return True
        if master_uuid and remote_master_uuid != master_uuid:
            # we got a master_uuid and it is not the one we
            # expect, we are replicating for -- it's unlikely this heals
            # itself.  It's thus better to die and signal we can't operate.
            log.error(
                "FATAL: master UUID %r does not match "
                "expected master UUID %r. EXITTING.", remote_master_uuid,
                master_uuid)
            # force exit of the process
            os._exit(3)

        try:
            remote_serial = int(r.headers["X-DEVPI-SERIAL"])
        except Exception as e:
            msg = ''.join(traceback.format_exception_only(e.__class__,
                                                          e)).strip()
            log.error("error fetching %s: %s", url, msg)
            return False

        if r.status_code == 200:
            try:
                handler(r)
            except Exception:
                log.exception("could not process: %s", r.url)
            else:
                # we successfully received data so let's
                # record the master_uuid for future consistency checks
                if not master_uuid:
                    self.xom.config.set_master_uuid(remote_master_uuid)
                # also record the current master serial for status info
                self.update_master_serial(remote_serial)
                return True
        elif r.status_code == 202:
            remote_serial = int(r.headers["X-DEVPI-SERIAL"])
            log.debug("%s: trying again %s\n", r.status_code, url)
            # also record the current master serial for status info
            self.update_master_serial(remote_serial)
            return True
        return False
Beispiel #17
0
 def test_relpathentry_size(self, filestore):
     link = URL("http://pypi.python.org/pkg/pytest-1.7.zip")
     entry = filestore.maplink(link)
     entry.set(size=123123)
     assert py.builtin._istext(entry._mapping["size"])
     assert entry.size == u"123123"
Beispiel #18
0
 def test_replace(self):
     url = URL("http://qwe/foo?bar=ham#hash")
     assert url.replace(
         scheme='https').url == "https://qwe/foo?bar=ham#hash"
     assert url.replace(scheme='').url == "//qwe/foo?bar=ham#hash"
     assert url.replace(
         netloc='world').url == "http://world/foo?bar=ham#hash"
     assert url.replace(netloc='').url == "http:///foo?bar=ham#hash"
     assert url.replace(path='/').url == "http://qwe/?bar=ham#hash"
     assert url.replace(path='').url == "http://qwe?bar=ham#hash"
     assert url.replace(query='').url == "http://qwe/foo#hash"
     assert url.replace(
         query='foo=bar').url == "http://qwe/foo?foo=bar#hash"
     assert url.replace(fragment='').url == "http://qwe/foo?bar=ham"
     assert url.replace(fragment='foo').url == "http://qwe/foo?bar=ham#foo"
     # original shouldn't have changed
     assert url.url == "http://qwe/foo?bar=ham#hash"
     # trying to change something not existing does nothing
     assert url.replace(foo='https').url == "http://qwe/foo?bar=ham#hash"
Beispiel #19
0
    def import_filedesc(self, stage, filedesc, versions):
        rel = filedesc["relpath"]
        project = filedesc["projectname"]
        p = self.import_rootdir.join(rel)
        assert p.check(), p
        data = p.read("rb")
        if self.xom.config.hard_links:
            # wrap the data for additional attribute
            data = BytesForHardlink(data)
            data.devpi_srcpath = p.strpath
        if filedesc["type"] == "releasefile":
            mapping = filedesc["entrymapping"]
            if self.dumpversion == "1":
                # previous versions would not add a version attribute
                version = BasenameMeta(p.basename).version
            else:
                version = filedesc["version"]

            if hasattr(stage, 'store_releasefile'):
                link = stage.store_releasefile(
                    project,
                    version,
                    p.basename,
                    data,
                    last_modified=mapping["last_modified"])
                entry = link.entry
            else:
                link = None
                url = URL(
                    mapping['url']).replace(fragment=mapping['hash_spec'])
                entry = self.xom.filestore.maplink(url, stage.username,
                                                   stage.index, project)
                entry.file_set_content(data, mapping["last_modified"])
                (_, links_with_data, serial) = stage._load_cache_links(project)
                if links_with_data is None:
                    links_with_data = []
                links = [(url.basename, entry.relpath)]
                requires_python = [versions[version].get('requires_python')]
                yanked = [versions[version].get('yanked')]
                for key, href, require_python, is_yanked in links_with_data:
                    links.append((key, href))
                    requires_python.append(require_python)
                    yanked.append(is_yanked)
                stage._save_cache_links(project, links, requires_python,
                                        yanked, serial)
            # devpi-server-2.1 exported with md5 checksums
            if "md5" in mapping:
                assert "hash_spec" not in mapping
                mapping["hash_spec"] = "md5=" + mapping["md5"]
            hash_algo, hash_value = parse_hash_spec(mapping["hash_spec"])
            digest = hash_algo(entry.file_get_content()).hexdigest()
            if digest != hash_value:
                fatal("File %s has bad checksum %s, expected %s" %
                      (p, digest, hash_value))
            # note that the actual hash_type used within devpi-server is not
            # determined here but in store_releasefile/store_doczip/store_toxresult etc
        elif filedesc["type"] == "doczip":
            version = filedesc["version"]
            link = stage.store_doczip(project, version, data)
        elif filedesc["type"] == "toxresult":
            linkstore = stage.get_linkstore_perstage(filedesc["projectname"],
                                                     filedesc["version"])
            # we can not search for the full relative path because
            # it might use a different checksum
            basename = posixpath.basename(filedesc["for_entrypath"])
            link, = linkstore.get_links(basename=basename)
            link = stage.store_toxresult(link, json.loads(data.decode("utf8")))
        else:
            fatal("unknown file type: %s" % (type, ))
        if link is not None:
            history_log = filedesc.get('log')
            if history_log is None:
                link.add_log('upload', '<import>', dst=stage.name)
            else:
                link.add_logs(history_log)
Beispiel #20
0
 def test_replace_nothing(self):
     url = URL("http://qwe/foo?bar=ham#hash")
     new_url = url.replace()
     assert new_url is not url
     assert new_url.url == url.url
Beispiel #21
0
 def test_comparison(self):
     base = URL('https://pypi.org')
     url = URL('https://pypi.org/simple/foo').replace(path='')
     assert base == url
     assert not (base != url)
Beispiel #22
0
 def pypi_package_link(self, pkgname, md5=True):
     link = "https://pypi.python.org/package/some/%s" % pkgname
     if md5 == True:
         self._md5.update(link.encode("utf8"))  # basically random
         link += "#md5=%s" % self._md5.hexdigest()
     return URL(link)
Beispiel #23
0
 def test_query(self):
     assert URL("http://example.com").query == ""
     assert URL("http://example.com?foo=bar").query == "foo=bar"
Beispiel #24
0
    def indexroot(self, user, index):
        stage = self.getstage(user, index)
        if json_preferred():
            projectlist = stage.getprojectnames_perstage()
            projectlist = sorted(projectlist)
            apireturn(200, type="list:projectconfig", result=projectlist)
        if stage.name == "root/pypi":
            return simple_html_body("%s index" % stage.name, [
                html.ul(html.li(html.a("simple index", href="+simple/")), ),
            ]).unicode()

        # XXX this should go to a template
        if hasattr(stage, "ixconfig"):
            bases = html.ul()
            for base in stage.ixconfig["bases"]:
                bases.append(
                    html.li(
                        html.a("%s" % base, href="/%s/" % base),
                        " (",
                        html.a("simple", href="/%s/+simple/" % base),
                        " )",
                    ))
            if bases:
                bases = [html.h2("inherited bases"), bases]
        else:
            bases = []
        latest_packages = html.table(
            html.tr(html.td("info"), html.td("file"), html.td("docs")))

        for projectname in stage.getprojectnames_perstage():
            metadata = stage.get_metadata_latest(projectname)
            try:
                name, ver = metadata["name"], metadata["version"]
            except KeyError:
                log.error("metadata for project %r empty: %s, skipping",
                          projectname, metadata)
                continue
            dockey = stage._doc_key(name, ver)
            if dockey.exists():
                docs = [
                    html.a("%s-%s docs" % (name, ver),
                           href="%s/%s/+doc/index.html" % (name, ver))
                ]
            else:
                docs = []
            files = metadata.get("+files", {})
            if not files:
                log.warn("project %r version %r has no files", projectname,
                         metadata.get("version"))
            baseurl = URL(request.path)
            for basename, relpath in files.items():
                latest_packages.append(
                    html.tr(
                        html.td(
                            html.a("%s-%s info page" % (name, ver),
                                   href="%s/%s/" % (name, ver))),
                        html.td(
                            html.a(basename,
                                   href=baseurl.relpath("/" + relpath))),
                        html.td(*docs),
                    ))
                break  # could present more releasefiles

        latest_packages = [
            html.h2("in-stage latest packages, at least as recent as bases"),
            latest_packages
        ]

        return simple_html_body("%s index" % stage.name, [
            html.ul(html.li(html.a("simple index", href="+simple/")), ),
            latest_packages,
            bases,
        ]).unicode()
Beispiel #25
0
 def test_query_items(self, url, kwargs, expected):
     assert URL(url).get_query_items(**kwargs) == expected
Beispiel #26
0
 def _normalize_url(self, url):
     url = URL(url, asdir=1)
     if not url.is_valid_http_url():
         url = URL(self.simpleindex, url.url).url
     return url
Beispiel #27
0
 def test_query_replace(self, url, query, expected):
     assert URL(url).replace(query=query) == expected
Beispiel #28
0
 def get_user_url(self, user=None):
     if user is None:
         user = self.get_auth_user()
         if not user:
             raise ValueError("no current authenticated user")
     return URL(self.rooturl).addpath(user)
Beispiel #29
0
 def test_hashing(self):
     assert hash(URL("http://a")) == hash(URL("http://a"))
     assert URL("http://a") == URL("http://a")
     assert hash(URL("http://a?foo=bar")) == hash(URL("http://a?foo=bar"))
     assert URL("http://a?foo=bar") == URL("http://a?foo=bar")
Beispiel #30
0
    def indexroot(self, user, index):
        stage = self.getstage(user, index)
        if json_preferred():
            projectlist = stage.getprojectnames_perstage()
            projectlist = sorted(projectlist)
            apireturn(200, type="list:projectconfig", result=projectlist)
        if stage.name == "root/pypi":
            return simple_html_body("%s index" % stage.name, [
                html.ul(
                    html.li(html.a("simple index", href="+simple/")),
                ),
            ]).unicode()


        # XXX this should go to a template
        if hasattr(stage, "ixconfig"):
            bases = html.ul()
            for base in stage.ixconfig["bases"]:
                bases.append(html.li(
                    html.a("%s" % base, href="/%s/" % base),
                    " (",
                    html.a("simple", href="/%s/+simple/" % base),
                    " )",
                ))
            if bases:
                bases = [html.h2("inherited bases"), bases]
        else:
            bases = []
        latest_packages = html.table(
            html.tr(html.td("info"), html.td("file"), html.td("docs")))

        for projectname in stage.getprojectnames_perstage():
            metadata = stage.get_metadata_latest(projectname)
            try:
                name, ver = metadata["name"], metadata["version"]
            except KeyError:
                log.error("metadata for project %r empty: %s, skipping",
                          projectname, metadata)
                continue
            dockey = stage._doc_key(name, ver)
            if dockey.exists():
                docs = [html.a("%s-%s docs" %(name, ver),
                        href="%s/%s/+doc/index.html" %(name, ver))]
            else:
                docs = []
            files = metadata.get("+files", {})
            if not files:
                log.warn("project %r version %r has no files", projectname,
                         metadata.get("version"))
            baseurl = URL(request.path)
            for basename, relpath in files.items():
                latest_packages.append(html.tr(
                    html.td(html.a("%s-%s info page" % (name, ver),
                           href="%s/%s/" % (name, ver))),
                    html.td(html.a(basename,
                                   href=baseurl.relpath("/" + relpath))),
                    html.td(*docs),
                ))
                break  # could present more releasefiles

        latest_packages = [
            html.h2("in-stage latest packages, at least as recent as bases"),
            latest_packages]

        return simple_html_body("%s index" % stage.name, [
            html.ul(
                html.li(html.a("simple index", href="+simple/")),
            ),
            latest_packages,
            bases,
        ]).unicode()
Beispiel #31
0
 def test_eggfragment(self):
     url = URL("http://a/py.tar.gz#egg=py-dev")
     assert url.eggfragment == "py-dev"
     url = URL("http://a/py.tar.gz?foo=bar#egg=py-dev")
     assert url.eggfragment == "py-dev"
     assert url.query == "foo=bar"
Beispiel #32
0
 def __init__(self, key_href):
     self.key, self.href = key_href
     self._url = URL(self.href)
     self.name, self.version, self.ext = splitbasename(self._url.basename,
                                                       checkarch=False)
     self.eggfragment = self._url.eggfragment
Beispiel #33
0
 def test_nohashtypes(self):
     link = URL("whateveer#lqk=123")
     assert link.hash_value is None and link.hash_algo is None
     link = URL("whateveer?foo=bar#lqk=123")
     assert link.hash_value is None and link.hash_algo is None
     assert link.query == "foo=bar"
Beispiel #34
0
 def test_joinpath_asdir(self):
     url = URL("http://heise.de")
     new = url.joinpath("hello", asdir=1)
     assert new.url == "http://heise.de/hello/"
     new = url.joinpath("hello/", asdir=1)
     assert new.url == "http://heise.de/hello/"
Beispiel #35
0
 def test_asfile(self):
     assert URL("http://heise.de").asfile().url == "http://heise.de"
     assert URL("http://heise.de/").asfile().url == "http://heise.de"
     assert URL("http://x.de/path/").asfile().url == "http://x.de/path"
     assert URL("http://x.de/path").asfile().url == "http://x.de/path"