コード例 #1
0
def start_shop_info_parsing(my_dict, shop_urls):
    """This function fetches information about each shop and prints it in console"""
    for url in shops_urls:
        html = get_html(url)
        shop_soup = BeautifulSoup(html, "html.parser")
        shop_info = ShopInfo(shop_soup, my_dict)
        shop_info.print_result()
コード例 #2
0
def get_total_num_pages(sector_url):
    """This func gets a total number of pages to parse in each sector.
    sector_url is a link from list sectors_search_url presented in settings.py
    For example,  https://5karmanov.ru/cat/aksessuary-muzhskie
    It is need to created a sector page url via create_sector_page_url
    (for example, https://5karmanov.ru/cat/aksessuary-muzhskie?&page=3)
    :return sector_pages_qt int
    """
    sector_page_html = get_html(sector_url)
    if sector_page_html:
        soup = BeautifulSoup(sector_page_html, "html.parser")
        try:
            last_page_url = soup.find("ul", class_="pagination").find_next(
                "li", class_="more").find_next('a')["href"]
        except AttributeError:
            last_page_url = soup.find("ul", class_="pagination").find_all("a")
            last_page_url = last_page_url[-2]["href"]
        sector_pages_qt = last_page_url.partition("page=")[-1].strip('"')
        try:
            int(sector_pages_qt)
        except ValueError as e:
            logging.exception(
                f"Fail to get page_num at {sector_url}.Received data is not integer {e}"
            )
            print("Не удалось загрузить секцию", sector_url)
        return int(sector_pages_qt)
    else:
        logging.exception(f"Failed to get {sector_url} html")
        print("Не удалось загрузить секцию", sector_url)
コード例 #3
0
def get_products_on_pages(url):
    """Gets a soup objects with all products on page"""
    page_html = get_html(url)
    if page_html:
        soup = BeautifulSoup(page_html, "html.parser")
        all_products = soup.find_all("div", itemprop="itemListElement")
    return all_products
コード例 #4
0
def get_shop_codes(base_url):
    """This function fetches information about outlet_id and shop_name presented at base_url.
    It returns a Python dictionary, contains pairs "store_name":"outlet_id". """
    htms = get_html(base_url)
    shop_soup = BeautifulSoup(htms, "html.parser")
    result = shop_soup.find_all(
        "li", class_="shops-list__item j-info-shop-list-item")
    my_dict = {}
    for i in result:
        id_code = i["data-id"].strip()
        name = i.find("span", class_="name").text.strip()
        my_dict[name] = id_code
    return my_dict
コード例 #5
0
 def get_product_page_soup(self):
     """This method open product page url and returns a Beautiful soup object"""
     product_page_html = get_html(self.product_url)
     self.product_page_soup = BeautifulSoup(product_page_html,
                                            "html.parser")
     return self.product_page_soup