Beispiel #1
0
 def parse_pagination(self, soup: BeautifulSoup, url: URL):
     pager = soup.find('div', {'class': 'pager'})
     if pager:
         for page in _iter(pager.find_all('li', {'class': 'page'})):
             page_span = page.find('span', {
                 'data-query-key': True,
                 'data-query-value': True
             })
             if page_span:
                 label = str(page_span.string).strip()
                 page_url = URL(url.get())
                 page_url.add_query([(page_span.attrs['data-query-key'],
                                      page_span.attrs['data-query-value'])])
                 self.add_page(label, page_url)
Beispiel #2
0
    def _parse_pagination(self, soup: BeautifulSoup, url: URL):
        container = soup.find('div', {'class': 'pagination-holder'})
        if container:
            for page in _iter(container.find_all('a', {'href': True})):
                if not page.attrs['href'].startswith('#'):
                    self.add_page(page.string, URL(page.attrs['href'], base_url=url))
                else:
                    pair_list=[('mode','async'),
                               ('function','get_block'),
                               ('block_id', page.attrs['data-block-id'])]
                    parameters=page.attrs['data-parameters'].split(';')
                    for item in parameters:
                        key,unused,value=item.partition(':')
                        pair_list.append(tuple([key,value]))

                    xhr_url=URL(url.get())
                    xhr_url.add_query(pair_list)
                    page_url=URL(url.get())
                    page_url.any_data=dict(xhr=xhr_url)

                    self.add_page(page.string, page_url)