def start_a(self, tag, attrs): attrs = dict(attrs) if 'href' in attrs: url = self.crawler.accept_url(urljoin(self.base_href, wash_url(attrs['href']))) if url: self.crawler.inject_url(url) self.anchor_href = url self.anchor_text = [] return
def start_base(self, tag, attrs): attrs = dict(attrs) if 'href' in attrs: self.base_href = urljoin(self.base_href, wash_url(attrs['href'])) return