def parse_pagination(self, soup: BeautifulSoup, url: URL): pager = soup.find('div', {'class': 'pager'}) if pager: for page in _iter(pager.find_all('li', {'class': 'page'})): page_span = page.find('span', { 'data-query-key': True, 'data-query-value': True }) if page_span: label = str(page_span.string).strip() page_url = URL(url.get()) page_url.add_query([(page_span.attrs['data-query-key'], page_span.attrs['data-query-value'])]) self.add_page(label, page_url)
def _parse_pagination(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'pagination-holder'}) if container: for page in _iter(container.find_all('a', {'href': True})): if not page.attrs['href'].startswith('#'): self.add_page(page.string, URL(page.attrs['href'], base_url=url)) else: pair_list=[('mode','async'), ('function','get_block'), ('block_id', page.attrs['data-block-id'])] parameters=page.attrs['data-parameters'].split(';') for item in parameters: key,unused,value=item.partition(':') pair_list.append(tuple([key,value])) xhr_url=URL(url.get()) xhr_url.add_query(pair_list) page_url=URL(url.get()) page_url.any_data=dict(xhr=xhr_url) self.add_page(page.string, page_url)