コード例 #1
0
    def start_requests(self):
        self.i = 0

        if self.re_query_id is None:
            raise ValueError("No re_query_id passed to spider")
            return
        query_from_db = get_re_query(int(self.re_query_id))
        if query_from_db is None: # don't scrape
            print("WARNING: Skelbiu: Not scraping (query not found):")
            print(query_from_db)
            return None
        qr = SkelbiuReQuery(query_from_db)
        re_query = qr.generate()
        if re_query is None: # don't scrape
            print("WARNING: Skelbiu: Not scraping:")
            print(query_from_db)
            return None
        assert re_query is not None, "RE query not found!"

        urls = ["https://skelbiu.lt/skelbimai/?" + urlencode(re_query)]

        print("STARTED CRAWLING SKELBIU")
        for url in urls:
            rq = scrapy.Request(url=url, callback=self.parse, headers={"user-agent": 
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"})
            yield rq
コード例 #2
0
    def __init__(self, old_ads, scraped_ads, query):
        self.re_query: dict = query
        self.old_ads = {}
        self.new_ads = {}
        self.ad_changes = {}
        for ad in old_ads:
            self.old_ads[ad["id"]] = ad

        for ad in scraped_ads:
            self.new_ads[ad["id"]] = ad

        self.compare_re_ads()
        ads_to_delete = self.deleted_re_ads()
        print("ads_to_ mark as deleted:")
        print(ads_to_delete)

        mark_re_ads_as_deleted(ads_to_delete)

        print("RE_AD_CHANGES:")
        print(self.ad_changes)

        self.re_query = get_re_query(self.re_query["id"])
        if self.re_query["was_scraped"] and len(self.ad_changes) != 0:
            msg = self.generate_message()
            self.msg_id = insert_message(self.re_query["user_id"],
                                         "Pasikeitė paieškos rezultatai", msg)
            print("MESSAGE TO SEND TO THE USER:"******"NO RE AD CHANGES OR RE QUERY IS NEW")
コード例 #3
0
def del_re_query(user_id, query_id):
    jwt = get_jwt_identity()
    query = get_re_query(query_id)
    if query is None:
        return Response(status=404)
    if query["user_id"] != jwt["user_id"]:
        if jwt["group"] != "admin":
            return jsonify(
                {"error": "You can only access your own resources."}), 403

    if delete_re_query(user_id, query_id):
        scraper_interface.delete_re_query(user_id, query_id)
        return Response(status=200)
コード例 #4
0
def put_query(user_id, query_id):
    jwt = get_jwt_identity()
    query = get_re_query(query_id)
    if query is None:
        if (res := validate_resource(user_id)) != True:
            return res