def crawl_achatusag(request):
    if request.method == "POST":
        print("[achatusag] Started crawling website")
        item_links = []
        for category_link in CATEGORIES:
            item_links.extend(_crawl_achatusag_category_2(category_link))
        item_links_subcategory = []
        for category_link in item_links:
            item_links_subcategory.extend(
                _crawl_achatusag_category(category_link))
        print(f"[achatusag] Got {len(item_links_subcategory)} item links")

        if not item_links:
            send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS,
                               "achatusag")
            return "No links were found on achatusag website"

        db = firestore.Client()

        comparison_result = add_and_compare_new_items(db, "achatusag",
                                                      item_links)
        added_items, deleted_items = comparison_result[
            "added"], comparison_result["deleted"]
        email_text = ""
        if added_items:
            _process_added_items(added_items)
            email_text += format_links_modified("Added", added_items)
        if email_text != "":
            send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS,
                       "Comparison results for achatusag", email_text)
            return email_text
        else:
            return "No new added or new deleted items found"
    else:
        return "This method is not supported"
Exemple #2
0
def crawl_ceqinc(request):
    if request.method == "POST":
        print("[ceqinc] Started crawling website")
        items = _crawl_ceqinc()
        print(f"[ceqinc] Got {len(items)} items")

        if not items:
            send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "ceqinc")
            return "No links were found on ceqinc website"

        db = firestore.Client()

        comparison_result = add_and_compare_new_items(db, "ceqinc", items)
        added_items, deleted_items = comparison_result["added"], comparison_result["deleted"]
        email_text = ""
        if added_items:
            _process_added_items(items)
            email_text += format_links_modified("Added", added_items)
        if email_text != "":
            send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "Comparison results for ceqinc", email_text)
            return email_text
        else:
            return "No new added or new deleted items found"
    else:
        return "This method is not supported"
def crawl_southeastforklifts(request):
    if request.method == "POST":
        print("[southeastforklifts] Started crawling website")
        item_links = crawl_southeastforklifts_pages()

        db = firestore.Client()

        if not item_links:
            send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS,
                               "southeastforklifts")
            return "No links were found on southeastforklifts website"

        comparison_result = add_and_compare_new_items(db, "southeastforklifts",
                                                      item_links)
        added_items, deleted_items = comparison_result[
            "added"], comparison_result["deleted"]
        email_text = ""
        if added_items:
            _process_added_items(added_items)
            email_text += format_links_modified("Added", added_items)
        if email_text != "":
            send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS,
                       "Comparison results for southeastforklifts", email_text)
            return email_text
        else:
            return "No new added or new deleted items found"
    else:
        return "This method is not supported"
def crawl_manuvic(request):
    if request.method == "POST":
        print("[manuvic] Started crawling website")
        response_text = request_(
            "GET",
            "https://www.manuvic.com/produits/chariots-elevateurs.html?cat=116&product_list_limit=100"
        ).text

        soup = BeautifulSoup(response_text, "html.parser")
        item_links = [
            el.get("href")
            for el in soup.find_all("a", class_="product photo product-item-photo")
        ]

        db = firestore.Client()
        storage_client = storage.Client()

        if not item_links:
            send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "manuvic")
            return "No links were found on manuvic website"

        comparison_result = add_and_compare_new_items(db, "manuvic", item_links)
        added_items, deleted_items = comparison_result["added"], comparison_result["deleted"]
        email_text = ""
        if added_items:
            _process_added_items(storage_client, added_items)
            email_text += format_links_modified("Added", added_items)
        if email_text != "":
            send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS, "Comparison results for manuvic", email_text)
            return email_text
        else:
            return "No new added or new deleted items found"
    else:
        return "This method is not supported"
def crawl_komatsuforklift(request):
    if request.method == "POST":
        print("[komatsuforklift] Started crawling website")
        item_links = []
        for category_link in CATEGORIES:
            item_links.extend(_crawl_komatsuforklift_category(category_link))
        print(f"[komatsuforklift] Got {len(item_links)} item links")
        final_links = []
        for url, item in item_links:
            final_links.append(item)

        if not final_links:
            send_warning_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS,
                               "komatsuforklift")
            return "No links were found on komatsuforklift website"

        db = firestore.Client()

        comparison_result = add_and_compare_new_items(db, "komatsuforklift",
                                                      final_links)
        added_items, deleted_items = comparison_result[
            "added"], comparison_result["deleted"]
        email_text = ""
        if added_items:
            final_items.extend([(url, i) for item in added_items
                                for url, i in item_links if item == i])
            _process_added_items(added_items)
            email_text += format_links_modified("Added", added_items)
        if email_text != "":
            send_email(SENDGRID_API_KEY, SENDER_EMAIL, RECEIVER_EMAILS,
                       "Comparison results for komatsuforklift", email_text)
            return email_text
        else:
            return "No new added or new deleted items found"
    else:
        return "This method is not supported"
Exemple #6
0
def crawl_valleesaintsauveur(request):
    if request.method == "POST":
        print("[valleesaintsauveur] Started crawling website")
        response_text = request_(
            "POST",
            "https://www.valleesaintsauveur.com/1-chambre-de-commerce/repertoire-des-membres.html",
            data={
                "actionEntreprise": "1",
                "NomEntreprise": "Nom de l'entreprise",
                "VILLE_ID": "",
            }
        ).text

        soup = BeautifulSoup(response_text, "html.parser")
        links_data = []
        sections = soup.find_all("div", class_="section8")
        for section in sections:
            data = _crawl_valleesaintsauveur_section(section)
            if data is not None:
                links_data.append(data)
        page = 1
        while True:
            response_text = request_(
                "POST",
                "https://www.valleesaintsauveur.com/1-chambre-de-commerce/repertoire-des-membres.html",
                data={
                    "actionEntreprise": "1",
                    "NomEntreprise": "Nom de l'entreprise",
                    "VILLE_ID": "",
                    "start": page * 10 + 1,
                    "Ordre": "societe asc",
                }
            ).text
            soup = BeautifulSoup(response_text, "html.parser")
            sections = soup.find_all("div", class_="section8")
            new_links_data = []
            for section in sections:
                data = _crawl_valleesaintsauveur_section(section)
                if data is not None:
                    new_links_data.append(data)
            if new_links_data:
                links_data.extend(new_links_data)
            else:
                break

            page += 1

        print(f"[valleesaintsauveur] Got {len(links_data)} item links")

        if not links_data:
            send_warning_email(SENDGRID_API_KEY, EMAIL, [EMAIL], "valleesaintsauveur")
            return "No links were found on valleesaintsauveur website"

        db = firestore.Client()

        comparison_result = add_and_compare_new_items(db, "valleesaintsauveur", links_data)
        added_items, deleted_items = comparison_result["added"], comparison_result["deleted"]
        email_text = ""
        if added_items:
            email_text += format_links_modified("Added", added_items)
        if email_text != "":
            send_email(SENDGRID_API_KEY, EMAIL, [EMAIL], "Comparison results for valleesaintsauveur", email_text)
            return email_text
        else:
            return "No new added or new deleted items found"
    else:
        return "This method is not supported"