def main(): config = read_json("test")["UK"]["M&S"] search_url = config["base_url"] categories = config["categories"] web_tree = Tree() for category, items in categories.items(): new_category = Category(name=category) web_tree.add_category(new_category) for item in items: # Create subcategory and link it to parent category new_subcategory = Category(name=item) web_tree.add_category(new_subcategory) web_tree.create_edge(new_category, new_subcategory) #Search for item on website soup = Webpage.get_source_code(search_url + item).find( "div", {"class": "search-result-content"}) if soup: for product_on_web in Webpage.get_element(soup, "li"): new_product = get_product_details(product_on_web) if new_product: new_subcategory.add_product(new_product) display(web_tree) write_csv("consept_test", web_tree, config)
def main(): config = read_json("ms_config")["Boots"] web_tree = Tree() create_categories(web_tree, config) assing_hrefs(web_tree, config) for category_name in config["categories"].keys(): category = web_tree.get_category_by_name(category_name) print(category.get_name()) for link in category.get_href(single=False): subcategories = re.sub("https://www.boots.com/","",link).split("/") for i, subcategory in enumerate(subcategories): #Create categories and edges between them if web_tree.get_category_by_name(subcategory): parent_category = web_tree.get_category_by_name(subcategory) print(f"Parent: {subcategory}") continue else: print(f"Created: {subcategory}") new_subcategory = Category(name=subcategory) web_tree.add_category(new_subcategory) web_tree.create_edge(parent_category, new_subcategory) parent_category = new_subcategory #Add product details to leaf if i == len(subcategories)-1: print(f"{link}----------------------------------------------------------") try: soup = Webpage.get_source_code(link) except Exception as ex: print(f"Error - link ************************** {link} - {ex}") continue for product_detail in Webpage.get_element(soup, "div", "class", "estore_product_container"): new_product = get_product_details(product_detail) if new_product: parent_category.add_product(new_product) print(new_product) #print(f"\t{str(link)}") write_to_csv("boots", web_tree, config)
def main(): config = read_json("ms_config")["M&S"] print(config) web_tree = Tree() create_categories(web_tree, config) for category_name in config["categories"].keys(): webpage = Webpage() parent_category = web_tree.get_category_by_name(category_name) url = config["base_url"] + parent_category.get_href() soup = Webpage.get_source_code(url) sub_menu_nav = Webpage.get_element(soup, "div", "class", "content-replace-holder nav-primary__submenu nav-submenu__six-col-gnav") for sub_element in Webpage.get_element(sub_menu_nav, "ul", "class", "nav-submenu__link-list"): new_subcategory2 = Category(name=sub_element["data-mns-sub-navigation-content"]) web_tree.add_category(new_subcategory2) web_tree.create_edge(parent_category, new_subcategory2) for li in Webpage.get_element(sub_element, "a"): try: new_subcategory3 = Category(name=li.get_text(), href=li['href']) web_tree.add_category(new_subcategory3) web_tree.create_edge(new_subcategory2, new_subcategory3) soup = Webpage.get_source_code(config["base_url"]+li['href']).find( "div", {"class": "product__list col-xs-12 remove-padding"}) for product_details in Webpage.get_element(soup, "li"): title = product_details.find("h3", {"class":"product__title"}).get_text().strip() price = product_details.find("div", {"class":"product__price"}).get_text().strip() new_product = Product(title=title, price=price) new_subcategory3.add_product(new_product) print(new_product) #break except Exception as ex: print(f"ERROR *****************************************{ex}") continue print("--------------------------------------------") write_to_csv("m&s", web_tree, config)