def __init__(self, url, first_page=True): """Initialize the parser with url and tell him if he's on the first page of results or not.""" SGMLParser.__init__(self) self.download_url = url self.first_page = first_page self.results = [] self.other_pages = [] self.tr_counter = 0 self.cat_re = re.compile(r'tracker\.php\?f=\d+') self.name_re = re.compile(r'viewtopic\.php\?t=\d+') self.link_re = re.compile(r'('+self.download_url+'dl\.php\?t=\d+)') self.pages_re = re.compile(r'tracker\.php\?.*?start=(\d+)') self.reset_current()
def __init__(self, url, first_page=True): """Initialize the parser with url and tell him if he's on the first page of results or not.""" SGMLParser.__init__(self) self.download_url = url self.first_page = first_page self.results = [] self.other_pages = [] self.tr_counter = 0 self.cat_re = re.compile(r'tracker\.php\?f=\d+') self.name_re = re.compile(r'viewtopic\.php\?t=\d+') self.link_re = re.compile(r'(' + self.download_url + 'dl\.php\?t=\d+)') self.pages_re = re.compile(r'tracker\.php\?.*?start=(\d+)') self.reset_current()
def close(self): """Override default close() method just to define additional processing.""" # We add last item found manually because items are added on new # <tr class="tCenter"> and not on </tr> (can't do it without the attribute). self.results.append(self.current_item) SGMLParser.close(self)