def start_shop_info_parsing(my_dict, shop_urls): """This function fetches information about each shop and prints it in console""" for url in shops_urls: html = get_html(url) shop_soup = BeautifulSoup(html, "html.parser") shop_info = ShopInfo(shop_soup, my_dict) shop_info.print_result()
def get_total_num_pages(sector_url): """This func gets a total number of pages to parse in each sector. sector_url is a link from list sectors_search_url presented in settings.py For example, https://5karmanov.ru/cat/aksessuary-muzhskie It is need to created a sector page url via create_sector_page_url (for example, https://5karmanov.ru/cat/aksessuary-muzhskie?&page=3) :return sector_pages_qt int """ sector_page_html = get_html(sector_url) if sector_page_html: soup = BeautifulSoup(sector_page_html, "html.parser") try: last_page_url = soup.find("ul", class_="pagination").find_next( "li", class_="more").find_next('a')["href"] except AttributeError: last_page_url = soup.find("ul", class_="pagination").find_all("a") last_page_url = last_page_url[-2]["href"] sector_pages_qt = last_page_url.partition("page=")[-1].strip('"') try: int(sector_pages_qt) except ValueError as e: logging.exception( f"Fail to get page_num at {sector_url}.Received data is not integer {e}" ) print("Не удалось загрузить секцию", sector_url) return int(sector_pages_qt) else: logging.exception(f"Failed to get {sector_url} html") print("Не удалось загрузить секцию", sector_url)
def get_products_on_pages(url): """Gets a soup objects with all products on page""" page_html = get_html(url) if page_html: soup = BeautifulSoup(page_html, "html.parser") all_products = soup.find_all("div", itemprop="itemListElement") return all_products
def get_shop_codes(base_url): """This function fetches information about outlet_id and shop_name presented at base_url. It returns a Python dictionary, contains pairs "store_name":"outlet_id". """ htms = get_html(base_url) shop_soup = BeautifulSoup(htms, "html.parser") result = shop_soup.find_all( "li", class_="shops-list__item j-info-shop-list-item") my_dict = {} for i in result: id_code = i["data-id"].strip() name = i.find("span", class_="name").text.strip() my_dict[name] = id_code return my_dict
def get_product_page_soup(self): """This method open product page url and returns a Beautiful soup object""" product_page_html = get_html(self.product_url) self.product_page_soup = BeautifulSoup(product_page_html, "html.parser") return self.product_page_soup