def adapt(self, text, htmlpage=None): if htmlpage is None: return text if text is None: return encoding = getattr(htmlpage, 'encoding', 'utf-8') text = text.encode(encoding) unquoted = unquote_markup(text, encoding=encoding) cleaned = strip_url(disallowed.sub('', unquoted)) base = get_base_url(htmlpage).encode(encoding) base_url = strip_url(unquote_markup(base, encoding=encoding)) joined = urljoin(base_url, cleaned) return safe_download_url(joined)
def extract(self, text): if text is not None: return extract_image_url(strip_url(text)) return ''
def extract(self, text): if text is not None: return strip_url(text) return ''
def extract(self, text): return extract_image_url(strip_url(text))