class ParsingHandle(object): """ Structure of handle that is used to do one thing only, parse a soup object. Parsing of the soup object must take place in the handle method which must be defined explictly. """ name = None url = None soup = None def __init__(self, url='', soup=None): self.name = validate_attribute_name(get_class_name(self)) if url: self.set_url(url) if soup: self.set_soup(soup) def __repr__(self): return '<Handle: %s>' % self.name def set_url(self, url): if is_lazy_url(url, ignore=True): self.url = url elif is_str(url): self.url = LazyUrl(url) def set_url_path(self, path): self.url.clear_full_path() self.url.set_path(path) def has_url(self, raise_error=False): has = False has = (self.url == True) if not has and raise_error: raise ValueError('%r is a false LazyUrl' % self.url) return has def set_soup(self, soup): """ Set the instance soup object to be used in handle """ is_soup(soup) self.soup = soup def has_soup(self, raise_error=False): return has_soup(self, raise_error) def handle(self): """ Override with your own HTML parsing logic """ pass # Do something with the soup object
def set_url(self, url): if is_lazy_url(url, ignore=True): self.url = url elif is_str(url): self.url = LazyUrl(url)