コード例 #1
0
def main():
    config = read_json("test")["UK"]["M&S"]
    search_url = config["base_url"]
    categories = config["categories"]

    web_tree = Tree()

    for category, items in categories.items():
        new_category = Category(name=category)
        web_tree.add_category(new_category)
        for item in items:
            # Create subcategory and link it to parent category
            new_subcategory = Category(name=item)
            web_tree.add_category(new_subcategory)
            web_tree.create_edge(new_category, new_subcategory)

            #Search for item on website
            soup = Webpage.get_source_code(search_url + item).find(
                "div", {"class": "search-result-content"})
            if soup:
                for product_on_web in Webpage.get_element(soup, "li"):
                    new_product = get_product_details(product_on_web)
                    if new_product:
                        new_subcategory.add_product(new_product)

    display(web_tree)
    write_csv("consept_test", web_tree, config)
コード例 #2
0
def main():
    config = read_json("ms_config")["Boots"]
    web_tree = Tree()
    create_categories(web_tree, config)
    assing_hrefs(web_tree, config)

    for category_name in config["categories"].keys():
        category = web_tree.get_category_by_name(category_name)
        print(category.get_name())
        for link in category.get_href(single=False):
            subcategories = re.sub("https://www.boots.com/","",link).split("/")
            
            for i, subcategory in enumerate(subcategories):
                #Create categories and edges between them
                if web_tree.get_category_by_name(subcategory):
                    parent_category = web_tree.get_category_by_name(subcategory)
                    print(f"Parent: {subcategory}")
                    continue                    
                else:
                    print(f"Created: {subcategory}")
                    new_subcategory = Category(name=subcategory)
                    web_tree.add_category(new_subcategory)
                    web_tree.create_edge(parent_category, new_subcategory)
                    parent_category = new_subcategory
                
                #Add product details to leaf
                if i == len(subcategories)-1:
                    print(f"{link}----------------------------------------------------------")
                    try:
                        soup = Webpage.get_source_code(link)
                    except Exception as ex:
                        print(f"Error - link ************************** {link} - {ex}")
                        continue
                    for product_detail in Webpage.get_element(soup, "div", "class", "estore_product_container"):
                        new_product = get_product_details(product_detail)
                        if new_product:
                            parent_category.add_product(new_product)
                            print(new_product)
        
            #print(f"\t{str(link)}")
    
    write_to_csv("boots", web_tree, config)
コード例 #3
0
def main():

    config = read_json("ms_config")["M&S"]
    print(config)

    web_tree = Tree()
    create_categories(web_tree, config)

    for category_name in config["categories"].keys():
        webpage = Webpage()
        parent_category = web_tree.get_category_by_name(category_name)
        url = config["base_url"] + parent_category.get_href()
        soup = Webpage.get_source_code(url)

        sub_menu_nav = Webpage.get_element(soup, "div", "class", "content-replace-holder nav-primary__submenu nav-submenu__six-col-gnav")
        for sub_element in Webpage.get_element(sub_menu_nav, "ul", "class", "nav-submenu__link-list"):
            new_subcategory2 = Category(name=sub_element["data-mns-sub-navigation-content"])
            web_tree.add_category(new_subcategory2)
            web_tree.create_edge(parent_category, new_subcategory2)
            for li in Webpage.get_element(sub_element, "a"):
                try:
                    new_subcategory3 = Category(name=li.get_text(), href=li['href'])
                    web_tree.add_category(new_subcategory3)
                    web_tree.create_edge(new_subcategory2, new_subcategory3)
                    soup = Webpage.get_source_code(config["base_url"]+li['href']).find( "div", {"class": "product__list col-xs-12 remove-padding"})
                    for product_details in Webpage.get_element(soup, "li"):
                        title = product_details.find("h3", {"class":"product__title"}).get_text().strip()
                        price = product_details.find("div", {"class":"product__price"}).get_text().strip()
                        new_product = Product(title=title, price=price)
                        new_subcategory3.add_product(new_product)
                        print(new_product)
                        #break
                except Exception as ex:
                    print(f"ERROR *****************************************{ex}")
                    continue
        print("--------------------------------------------")
    
    write_to_csv("m&s", web_tree, config)