예제 #1
0
    def test_get_base_url_empty_basehref(self):
        """Base tag exists but href is empty"""
        html = u'<html><head><base href="" />\
<body></body></html>'
        url = "http://example.com/products/p19.html"
        page = HtmlPage(url, body=html)
        self.assertEqual(get_base_url(page), url)
예제 #2
0
    def test_get_base_url(self):
        """Basic get_base_url test"""
        html = u'<html><head><base href="http://example.com/products/" />\
<body></body></html>'

        page = HtmlPage("http://example.com/products/p19.html", body=html)
        self.assertEqual(get_base_url(page), "http://example.com/products/")
예제 #3
0
    def test_get_base_url_empty_basehref(self):
        """Base tag exists but href is empty"""
        html = u'<html><head><base href="" />\
<body></body></html>'

        url = "http://example.com/products/p19.html"
        page = HtmlPage(url, body=html)
        self.assertEqual(get_base_url(page), url)
예제 #4
0
파일: url.py 프로젝트: zirconer/portia
 def adapt(self, text, htmlpage=None):
     if htmlpage is None:
         return text
     if text is None:
         return
     encoding = getattr(htmlpage, 'encoding', 'utf-8')
     text = text.encode(encoding)
     unquoted = unquote_markup(text, encoding=encoding)
     cleaned = strip_url(disallowed.sub('', unquoted))
     base = get_base_url(htmlpage).encode(encoding)
     base_url = strip_url(unquote_markup(base, encoding=encoding))
     joined = urljoin(base_url, cleaned)
     return safe_download_url(joined)
예제 #5
0
 def test_get_base_url_nobase(self):
     """Base tag does not exists"""
     html = u"<html><head><body></body></html>"
     page = HtmlPage("http://example.com/products/p19.html", body=html)
     self.assertEqual(get_base_url(page), "http://example.com/products/p19.html")
예제 #6
0
    def test_get_base_url(self):
        """Basic get_base_url test"""
        html = u'<html><head><base href="http://example.com/products/" />\
<body></body></html>'
        page = HtmlPage("http://example.com/products/p19.html", body=html)
        self.assertEqual(get_base_url(page), "http://example.com/products/")
예제 #7
0
 def adapt(self, text, htmlpage):
     text = text.encode(htmlpage.encoding)
     joined = urljoin_rfc(get_base_url(htmlpage), text)
     return safe_download_url(unquote_markup(joined))
예제 #8
0
 def adapt(self, text, htmlpage):
     text = text.encode(htmlpage.encoding)
     joined = urljoin(
         get_base_url(htmlpage).encode(htmlpage.encoding), text)
     return safe_download_url(
         unquote_markup(joined, encoding=htmlpage.encoding))
예제 #9
0
 def test_get_base_url_nobase(self):
     """Base tag does not exists"""
     html = u'<html><head><body></body></html>'
     page = HtmlPage("http://example.com/products/p19.html", body=html)
     self.assertEqual(get_base_url(page),
                      "http://example.com/products/p19.html")
예제 #10
0
파일: url.py 프로젝트: TimoC1982/portia
 def adapt(self, text, htmlpage=None):
     if htmlpage is None:
         return text
     text = text.encode(htmlpage.encoding)
     joined = urljoin(get_base_url(htmlpage).encode(htmlpage.encoding), text)
     return safe_download_url(unquote_markup(joined, encoding=htmlpage.encoding))