def get_page(self, name: str): name = self.special_chars_to_hex(name, [" ", "'", "&"]) name = name.split(' (')[0] return make_soup( f'https://www.reuters.com/search/news?sortBy=&dateRange=&blob={name}' )
def click_links(self, links): for i, link in enumerate(links): print("Parsing page: {}".format(i)) if i not in [4, 22, 24, 29, 38, 49, 50, 63, 73, 88, 89, 105, 109]: page_soup = make_soup( f'http://www.chambers-associate.com{link}') self.parse_individual_page(page_soup)
def __init__(self): self._firms = [] self._soup = make_soup(self.main_url)
def click_link(self): self._page += 1 self._soup = make_soup( f"https://www.ilrg.com/nlj250?page={self._page}") self.parse_page(self._soup)
def get_page(self, name: str): name = super().special_chars_to_hex(name, [" ", "'", "&"]) name = name.split(' (')[0] return make_soup(f'https://abovethelaw.com/?s={name}')