def crawl_achatusag(request): if request.method == "POST": print("[achatusag] Started crawling website") item_links = [] for category_link in CATEGORIES: item_links.extend(_crawl_achatusag_category_2(category_link)) item_links_subcategory = [] for category_link in item_links: item_links_subcategory.extend( _crawl_achatusag_category(category_link)) print(f"[achatusag] Got {len(item_links_subcategory)} item links") if not item_links: send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "achatusag") return "No links were found on achatusag website" db = firestore.Client() comparison_result = add_and_compare_new_items(db, "achatusag", item_links) added_items, deleted_items = comparison_result[ "added"], comparison_result["deleted"] email_text = "" if added_items: _process_added_items(added_items) email_text += format_links_modified("Added", added_items) if email_text != "": send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "Comparison results for achatusag", email_text) return email_text else: return "No new added or new deleted items found" else: return "This method is not supported"
def crawl_ceqinc(request): if request.method == "POST": print("[ceqinc] Started crawling website") items = _crawl_ceqinc() print(f"[ceqinc] Got {len(items)} items") if not items: send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "ceqinc") return "No links were found on ceqinc website" db = firestore.Client() comparison_result = add_and_compare_new_items(db, "ceqinc", items) added_items, deleted_items = comparison_result["added"], comparison_result["deleted"] email_text = "" if added_items: _process_added_items(items) email_text += format_links_modified("Added", added_items) if email_text != "": send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "Comparison results for ceqinc", email_text) return email_text else: return "No new added or new deleted items found" else: return "This method is not supported"
def crawl_southeastforklifts(request): if request.method == "POST": print("[southeastforklifts] Started crawling website") item_links = crawl_southeastforklifts_pages() db = firestore.Client() if not item_links: send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "southeastforklifts") return "No links were found on southeastforklifts website" comparison_result = add_and_compare_new_items(db, "southeastforklifts", item_links) added_items, deleted_items = comparison_result[ "added"], comparison_result["deleted"] email_text = "" if added_items: _process_added_items(added_items) email_text += format_links_modified("Added", added_items) if email_text != "": send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "Comparison results for southeastforklifts", email_text) return email_text else: return "No new added or new deleted items found" else: return "This method is not supported"
def crawl_manuvic(request): if request.method == "POST": print("[manuvic] Started crawling website") response_text = request_( "GET", "https://www.manuvic.com/produits/chariots-elevateurs.html?cat=116&product_list_limit=100" ).text soup = BeautifulSoup(response_text, "html.parser") item_links = [ el.get("href") for el in soup.find_all("a", class_="product photo product-item-photo") ] db = firestore.Client() storage_client = storage.Client() if not item_links: send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "manuvic") return "No links were found on manuvic website" comparison_result = add_and_compare_new_items(db, "manuvic", item_links) added_items, deleted_items = comparison_result["added"], comparison_result["deleted"] email_text = "" if added_items: _process_added_items(storage_client, added_items) email_text += format_links_modified("Added", added_items) if email_text != "": send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "Comparison results for manuvic", email_text) return email_text else: return "No new added or new deleted items found" else: return "This method is not supported"
def crawl_komatsuforklift(request): if request.method == "POST": print("[komatsuforklift] Started crawling website") item_links = [] for category_link in CATEGORIES: item_links.extend(_crawl_komatsuforklift_category(category_link)) print(f"[komatsuforklift] Got {len(item_links)} item links") final_links = [] for url, item in item_links: final_links.append(item) if not final_links: send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "komatsuforklift") return "No links were found on komatsuforklift website" db = firestore.Client() comparison_result = add_and_compare_new_items(db, "komatsuforklift", final_links) added_items, deleted_items = comparison_result[ "added"], comparison_result["deleted"] email_text = "" if added_items: final_items.extend([(url, i) for item in added_items for url, i in item_links if item == i]) _process_added_items(added_items) email_text += format_links_modified("Added", added_items) if email_text != "": send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "Comparison results for komatsuforklift", email_text) return email_text else: return "No new added or new deleted items found" else: return "This method is not supported"
def crawl_valleesaintsauveur(request): if request.method == "POST": print("[valleesaintsauveur] Started crawling website") response_text = request_( "POST", "https://www.valleesaintsauveur.com/1-chambre-de-commerce/repertoire-des-membres.html", data={ "actionEntreprise": "1", "NomEntreprise": "Nom de l'entreprise", "VILLE_ID": "", } ).text soup = BeautifulSoup(response_text, "html.parser") links_data = [] sections = soup.find_all("div", class_="section8") for section in sections: data = _crawl_valleesaintsauveur_section(section) if data is not None: links_data.append(data) page = 1 while True: response_text = request_( "POST", "https://www.valleesaintsauveur.com/1-chambre-de-commerce/repertoire-des-membres.html", data={ "actionEntreprise": "1", "NomEntreprise": "Nom de l'entreprise", "VILLE_ID": "", "start": page * 10 + 1, "Ordre": "societe asc", } ).text soup = BeautifulSoup(response_text, "html.parser") sections = soup.find_all("div", class_="section8") new_links_data = [] for section in sections: data = _crawl_valleesaintsauveur_section(section) if data is not None: new_links_data.append(data) if new_links_data: links_data.extend(new_links_data) else: break page += 1 print(f"[valleesaintsauveur] Got {len(links_data)} item links") if not links_data: send_warning_email(SENDGRID_API_KEY, EMAIL, [EMAIL], "valleesaintsauveur") return "No links were found on valleesaintsauveur website" db = firestore.Client() comparison_result = add_and_compare_new_items(db, "valleesaintsauveur", links_data) added_items, deleted_items = comparison_result["added"], comparison_result["deleted"] email_text = "" if added_items: email_text += format_links_modified("Added", added_items) if email_text != "": send_email(SENDGRID_API_KEY, EMAIL, [EMAIL], "Comparison results for valleesaintsauveur", email_text) return email_text else: return "No new added or new deleted items found" else: return "This method is not supported"