def test_rebase_url(self): """rebase_url""" from StillWeb.sw_urllib import rebase_url # Simple rebasing self.assertEqual("http://a/new/b/c", rebase_url(url="http://a/b/c", src="http://a/", dest="http://a/new/")) self.assertEqual("http://a/new/b/c", rebase_url("http://a/old/b/c", "http://a/old/", "http://a/new/")) # When the URL can't be rebased self.assertEqual("http://xyz/foo", rebase_url("http://xyz/foo", "http://a/old/", "http://a/new/")) self.assertEqual("http://a/foo", rebase_url("/foo", "http://a/bar/", "http://xyz/new/")) self.assertRaises(ValueError, rebase_url, "/foo", "http://a/bar/", "http://xyz/new/", must_rebase=True)
def cb(url, criterion): # Resolve the link URL with respect to the current (fake) URL link_url = rfc3986_urljoin(fake_current_url, url) # Convert the fake URL into an absolute real URL link_url = rebase_url(link_url, fake_base_url, real_base_url) if not always_absolute: # Convert the absolute URL into a relative URL link_url = relative_url(link_url, real_current_url) return link_url
def __init__(self, framework, orig_target_url): if not orig_target_url.startswith("/"): raise AssertionError("orig_target_url must start with /") self.orig_target_url = orig_target_url self.target_url = strip_index_from_url(orig_target_url) self.pathtuple = pathtuple_from_target_url(orig_target_url) if "output_dir" in framework.plugins['vars'].vars: self.output_dir = framework.plugins['vars'].vars['output_dir'] self.output_filename = os.path.join(self.output_dir, *self.pathtuple) if "source_dir" in framework.plugins['vars'].vars: self.source_dir = framework.plugins['vars'].vars['source_dir'] self.source_filename = os.path.join(self.source_dir, *self.pathtuple) if "base_url" in framework.plugins['vars'].vars: self.base_url = framework.plugins['vars'].vars['base_url'] self.current_url = rebase_url(self.target_url, generate_fake_url(), self.base_url)
def rewrite_links(node, match_criteria, target_url, base_url, always_absolute=False): # We generate a fake URL so links like <a href="/">...</a> will resolve # to the top-level URL of the *site* rather than of the *server*. fake_base_url = generate_fake_url() fake_current_url = rfc3986_urljoin(fake_base_url, target_url) real_base_url = base_url real_current_url = rebase_url(fake_current_url, fake_base_url, real_base_url) def cb(url, criterion): # Resolve the link URL with respect to the current (fake) URL link_url = rfc3986_urljoin(fake_current_url, url) # Convert the fake URL into an absolute real URL link_url = rebase_url(link_url, fake_base_url, real_base_url) if not always_absolute: # Convert the absolute URL into a relative URL link_url = relative_url(link_url, real_current_url) return link_url LinkRewriter(match_criteria).rewrite_links(node, cb)