def price_walk(handler, req_data): ap(req_data) raw_item, now_price, old_price = yield [ table_jingdong_itemid.find_one({"itemid": req_data["itemid"]}), table_jingdong_price.find_one({"itemid": req_data["itemid"]}), table_jingdong_price_old.find({ "itemid": req_data["itemid"] }).limit(1000).to_list(length=None), ] all_price = old_price + [now_price] date_set = set([]) for item in all_price: item["date"] = item["datetime"].split(" ")[0] if item.get("calc_price", 0) <= 0: continue date_set.add(item["date"]) date_set = sorted(date_set) lowest_price = 0 price_list = [] calc_price_list = [] calc_advice_list = [] for date in date_set: date_price = [ x for x in all_price if x["date"] == date and x["calc_price"] > 0 ] max_p, xi = date_price[0]["calc_price"], 0 min_p, ni = date_price[0]["calc_price"], 0 for i, item in enumerate(date_price): if i == 0: continue price = item["calc_price"] if max_p < price: max_p = price xi = i if price < min_p: min_p = price ni = i if lowest_price == 0 or min_p < lowest_price: lowest_price = min_p calc_advice_list.append(date_price[ni]["calc_advice"]) price_list.append(date_price[ni]["price"]) calc_price_list.append([ date_price[0]["calc_price"], date_price[-1]["calc_price"], min_p, max_p, ]) return { "price_list": price_list, "lowest_price": lowest_price, "good_price": raw_item["good_price"], "datetime_list": list(date_set), "calc_price_list": calc_price_list, "calc_advice_list": calc_advice_list, }, 1
def task_list(handler, req_data): ap(req_data) db_query = {} if req_data.get("name"): db_query["name"] = { "$regex": "^" + "".join(["(?=.*%s)" % s for s in req_data["name"].split(" ") if s.strip()]) + ".*$" } # db_query["$and"] = [{"name": {"$regex": s}} for s in req_data["name"].split(" ") if s.strip()] if req_data.get("status"): db_query["status"] = req_data["status"] print("db_query:", db_query) result = yield table_kaola_itemid.find(db_query).to_list(length=None) result_detail = yield table_kaola_price.find({ "_id": { "$in": [x["_id"] for x in result] } }).sort([("datetime", -1)]).to_list(length=None) for item in result: item_detail = next((x for x in result_detail if x["_id"] == item["_id"]), {}) item["price"] = item_detail.get("min_price", 0) item["vender"] = "自营" item["stock"] = item_detail.get("current_store", 0) item["datetime"] = item_detail.get("datetime", "") item["quan"] = item_detail.get("quan", "") item["promote"] = item_detail.get("promote", "") item["presale"] = item_detail.get("presale", "") result = sorted(result, key=lambda x: x.get("datetime"), reverse=True) raise tornado.gen.Return((result, len(result)))
def task_add(handler, req_data): ap(req_data) itemid_map = { x.split("-")[0]: x.split("-")[1] for x in req_data["itemid_list"] } itemid_list = [x for x in itemid_map] result = None exists_items = yield table_yanxuan_itemid.find({ "itemid": { "$in": itemid_list } }).to_list(length=None) exists_items = set([x["itemid"] for x in exists_items]) items = [{ "itemid": x, "index": int(itemid_map[x]) - 1, "status": req_data["status"], } for x in (set(itemid_list) - exists_items)] result = yield table_yanxuan_itemid.insert_many(items) result = result.inserted_ids raise tornado.gen.Return((result, 1))
def remove_item(handler, req_data): ap(req_data) result = yield table_kaola_itemid.delete_one({ "_id": ObjectId(req_data["_id"]), }) raise tornado.gen.Return((result.raw_result, 1))
def query_filter(handler, req_data): ap(req_data) db_query = { "website": req_data["website"], } result = yield table_filter.find_one(db_query) raise tornado.gen.Return((result, 1))
def task_update_status(handler, req_data): ap(req_data) result = yield table_yanxuan_itemid.update_one( { "_id": ObjectId(req_data["_id"]), }, {"$set": { "status": int(req_data["status"]), }}) raise tornado.gen.Return((result.raw_result, 1))
def update_good_price(handler, req_data): ap(req_data) result = yield table_yanxuan_itemid.update_one( { "_id": ObjectId(req_data["_id"]), }, {"$set": { "good_price": int(req_data["good_price"]), }}) raise tornado.gen.Return((result.raw_result, 1))
def test(): iid = "3441177" base_url = get_item_url_by_id(iid) info = yield get_base_info({ "url": base_url, "index": 2, }) ap(info) tornado.ioloop.IOLoop.current().stop()
def test(): iid = "2544379" base_url = get_item_url_by_id(iid) info = yield get_base_info({"url": base_url}) ap(info) promote_info = yield get_promote_info(info) ap(promote_info) tornado.ioloop.IOLoop.current().stop()
def get_presale_info(info): # 获取 商品基本信息 & 价格 presale_api_url = PRESALE_URL % ( info["itemid"], str(time.time()).replace(".", ""), JQNAME, ) ap(presale_api_url) if not DEBUG: # response = requests.get(presale_api_url, headers={}, timeout=16) response = yield tool.http_request({ "url": presale_api_url, "method": "GET", "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6", # "Referer": presale_api_url, # "Pragma": "no-cache", # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", } }) # q.d() # ap("response:", response) open("presale_api.js", "w").write(tool.try_decode_html_content(response.body)) presale_api_content = open("presale_api.js", "r").read() presale_api_content_json = get_jsonp_json(presale_api_content) # ap(presale_api_content_json) # q.d() if not presale_api_content_json.get("ret"): return None return { "currentPrice": presale_api_content_json["ret"]["currentPrice"], "presaleStartTime": presale_api_content_json["ret"]["presaleStartTime"], "presaleEndTime": presale_api_content_json["ret"]["presaleEndTime"], # "balanceBeginTime": presale_api_content_json["ret"]["balanceBeginTime"], # "balanceEndTime": presale_api_content_json["ret"]["balanceEndTime"], }
def task_add(handler, req_data): ap(req_data) result = None exists_items = yield table_kaola_itemid.find({ "itemid": { "$in": req_data["itemid_list"] } }).to_list(length=None) exists_items = set([x["itemid"] for x in exists_items]) items = [{"itemid": x, "status": req_data["status"]} for x in (set(req_data["itemid_list"]) - exists_items)] result = yield table_kaola_itemid.insert_many(items) result = result.inserted_ids raise tornado.gen.Return((result, 1))
def add(handler, req_data): ap(req_data) result = None if req_data.get("site") == "jingdong": exists_items = yield table_jingdong_itemid.find({ "itemid": { "$in": req_data["itemid_list"] } }).to_list(length=None) exists_items = set([x["itemid"] for x in exists_items]) items = [{ "itemid": x } for x in (set(req_data["itemid_list"]) - exists_items)] if items: result = yield table_jingdong_itemid.insert_many(items) result = result.inserted_ids raise tornado.gen.Return(result)
def remove_item(handler, req_data): ap(req_data) # 删除之前残留的,将价格移入old价格 # yield clear_lost_items() item_detail = yield table_jingdong_itemid.find_one_and_delete({ "_id": ObjectId(req_data["_id"]), }) item_price = yield table_jingdong_price.find_one_and_delete({ "itemid": item_detail["itemid"], }) del item_price["_id"] yield table_jingdong_price_old.insert_one(item_price) raise tornado.gen.Return((True, 1))
def get_base_info(item): # 获取页面html内容 if not DEBUG: response = yield tool.http_request({ "url": item["url"], "method": "GET", "headers": HEADERS }) open("kaola.base_url_page.html", "w").write(tool.try_decode_html_content(response.body)) item_content = open("kaola.base_url_page.html", "r").read() item_content_lines = item_content.split("\n") icat = next( (i for (i, x) in enumerate(item_content_lines) if "$addGoods" in x), -1) info_text = item_content_lines[icat + 1:icat + 12] for i, line in enumerate(info_text): if "," in info_text[i]: info_text[i] = info_text[i][:info_text[i].index(",")] info_text[i] = info_text[i].replace("'", "").strip() else: ap("[WARN]:", "Something unexpected happened.") info_text[i] = "" info = { "分类id": info_text[0], "品牌id": info_text[1], "商品名称": info_text[2], "itemid": info_text[3], "商品售价": info_text[4], # "商品图片": info_text[5], "分类名": info_text[6], "品牌名": info_text[7], "商品库存": info_text[8], "网络价": info_text[9], # "收藏人数": info_text[10], } return info
def task_list(handler, req_data): ap(req_data) page_index = req_data.get("page_index", 1) - 1 page_size = req_data.get("page_size", 50) sort_query = { "sortby": req_data.get("sortby", "date"), "order": req_data.get("order", -1), } db_query = {"skip": 0} if req_data.get("title"): db_query["title"] = { "$regex": "^" + "".join([ "(?=.*%s)" % s for s in req_data["title"].split(" ") if s.strip() ]) + ".*$" } if req_data.get("author"): db_query["author"] = req_data["author"] if req_data.get("group"): db_query["group_name"] = req_data["group"] print("db_query:", db_query) # db_find = table_douban.find(db_query) result_count = yield table_douban.count_documents(db_query) result_list = yield table_douban.find(db_query).sort([ (sort_query["sortby"], sort_query["order"]) ]).skip(page_index * page_size).limit(page_size).to_list(length=None) for item in result_list: del item["_id"] raise tornado.gen.Return((result_list, result_count))
def change_filter(handler, req_data): ap(req_data) upsert_data = { "website": req_data["website"], } for key in ("title", "author", "author_full_name", "not"): if isinstance(req_data[key], list): upsert_data[key] = req_data[key] else: upsert_data[key] = req_data[key].split(",") for key in ("price_min", "price_max"): upsert_data[key] = req_data[key] query_data = { "website": req_data["website"], } result = yield table_filter.update_one(query_data, {"$set": upsert_data}, upsert=True) result = yield content_filter.filter_fresh_data(table_douban, "douban") raise tornado.gen.Return((result.modified_count, 1))
def execute(): # 查 item 的: 商品id & 店铺id 之类的基本信息 状态为 1 的商品 kaola_itemid_list = yield table_kaola_itemid.find( {"status": 1}, {"status": False} # {"status": 1, "itemid": "5142333"}, {"status": False} ).to_list(length=None) ap(kaola_itemid_list) for item in kaola_itemid_list: try: if not item.get("url"): item["url"] = fine_kaola.get_item_url_by_id(item["itemid"]) base_info = yield get_base_info(item) if not base_info: continue item.update(base_info) yield table_kaola_itemid.update_one( { "_id": item["_id"] }, { "$set": base_info } ) except Exception: ap(traceback.format_exc()) finally: yield tornado.gen.sleep(0.2) last_wx = None # 为没有值的 item 填充默认值 for item in kaola_itemid_list: ap("Doing:", item["itemid"], item["商品名称"]) # 如果之前没有查询到 BASE_NAME 的值,填充一个默认值 item["分类id"] = item.get("分类id") or "0" # 查 item 的: 库存 & 价格 & 活动 & 礼物 try: promote_info = yield get_promote_info(item) if not promote_info: continue # ap(promote_info) item.update(promote_info) except Exception: ap(traceback.format_exc()) continue finally: yield tornado.gen.sleep(0.2) # 对比之前的数据,如果有不同,则插入一条信新的记录,并发送信息到微信上 try: old_item = yield table_kaola_price.find_one({"_id": item["_id"]}) # 一开始没有价格信息 if not old_item: # dont yield item["datetime"] = tool.get_datetime_string() yield table_kaola_price.insert_one(item) else: # datetime 不做比较,先赋予一样的值 跳过 item["datetime"] = old_item["datetime"] # 如果完全一样 if old_item == item: # [x for x in item if item[x] != old_item[x]] continue # 与原来的价格信息不一样 else: # diff_keys = [x for x in COMPARE_KEYS if item[x] != old_item[x]] diff_keys = [x for x in COMPARE_KEYS if item.get(x) and item[x] != old_item.get(x)] item["datetime"] = tool.get_datetime_string() yield table_kaola_price.update_one({ "_id": item["_id"] }, { "$set": item }) del old_item["_id"] yield table_kaola_price_old.insert_one(old_item) """ 不提醒: - 涨价 - 变成无货 """ if "min_price" in diff_keys: if 0 < float(old_item["min_price"]) < float(item["min_price"]) or float(item["min_price"]) == -1: diff_keys.remove("min_price") # 如果新的比旧的少,无视 for key in ("quan", ): for line in item[key]: if line and line not in old_item[key]: break else: if key in diff_keys: diff_keys.remove(key) continue if "current_store" in diff_keys: if old_item["current_store"] == "0" and item["current_store"] != "0": pass else: diff_keys.remove("current_store") if not diff_keys: continue content = "\r\n".join([ "", ",".join(diff_keys), "", "[新数据](%s)" % (item.get("url")), "", "```json", "%s", "```", "", "# 旧数据", "", "```json", "%s", "```", "", ]) % (get_wx_content(item), get_wx_content(old_item)) last_wx = tool.send_to_my_wx("考拉" + item["商品名称"], content) except Exception: ap(traceback.format_exc()) yield tornado.gen.sleep(2) finally: yield tornado.gen.sleep(0.5) if last_wx: try: yield last_wx except Exception: ap(traceback.format_exc()) ap("Last wx send fail!")
def filter_fresh_data(collection, website="douban"): """ 用 filter 更新指定表 collection 的数据 """ content_filter = yield table_filter.find_one({"website": website}) update_all_data = yield collection.update_many({}, {"$set": { "skip": 1, }}) db_query = { "$and": [], "$or": [{ "price": 0 }, { "price": { "$gte": content_filter.get("price_min", 0), "$lte": content_filter.get("price_max", 99999), } }], } if content_filter.get("title"): """ .*(?<!(非|不是|不|无))(游戏|娱乐).* .*([^非是不无]|^)(游戏|娱乐).* https://alf.nu/RegexGolf https://www.regextester.com/15 https://gist.github.com/Davidebyzero/9221685 https://gist.github.com/jonathanmorley/8058871 """ db_query["$and"].append({ "title": { "$not": { # nothing do with to match 非合租 # "$regex": "(%s)" % "|".join(content_filter["title"]) # lookbehind is not a good idea. https://stackoverflow.com/questions/3796436/whats-the-technical-reason-for-lookbehind-assertion-must-be-fixed-length-in-r # error: lookbehind assertion is not fixed length # "$regex": "^.*(?<!(%s))(%s).*$" % ("|".join(content_filter["not"]), "|".join(content_filter["title"])) # So limit `not` to 1 char # BUG: []不能匹配0个字符,比如 "求租" 无法匹配 # "$regex": "^.*[^%s](%s).*$" % ("".join(content_filter["not"]), "|".join(content_filter["title"])) "$regex": ".*([^%s]|^)(%s).*" % ("".join(content_filter["not"]), "|".join(content_filter["title"])), "$options": "i" } } }) if content_filter.get("author"): db_query["$and"].append({ "author": { "$not": { "$regex": "(%s)" % "|".join(content_filter["author"]) }, "$nin": content_filter.get("author_full_name", []) } }) ap("db_query:", db_query) update_all_data = yield collection.update_many(db_query, {"$set": { "skip": 0 }}) raise tornado.gen.Return(update_all_data)
def execute(): # yield fine_yanxuan.test() # exit(1) # 查 item 的: 商品id & 店铺id 之类的基本信息 状态为 1 的商品 yanxuan_itemid_list = yield table_yanxuan_itemid.find( {"status": 1}, {"status": False} # {"status": 1, "itemid": "5142333"}, {"status": False} ).to_list(length=None) ap(yanxuan_itemid_list) last_wx = None for item in yanxuan_itemid_list: if not item.get("url"): item["url"] = fine_yanxuan.get_item_url_by_id(item["itemid"]) # 查 item 的: 库存 & 价格 & 活动 & 券 try: base_info = yield get_base_info(item) if not base_info: continue ap(base_info) item.update(base_info) del base_info["promote"] del base_info["quan"] yield table_yanxuan_itemid.update_one( { "_id": item["_id"] }, { "$set": base_info } ) except Exception: ap(traceback.format_exc()) continue finally: yield tornado.gen.sleep(0.2) # 对比之前的数据,如果有不同,则插入一条信新的记录,并发送信息到微信上 try: old_item = yield table_yanxuan_price.find_one({"_id": item["_id"]}) # 一开始没有价格信息 if not old_item: # dont yield item["datetime"] = tool.get_datetime_string() yield table_yanxuan_price.insert_one(item) else: # datetime 不做比较,先赋予一样的值 跳过 item["datetime"] = old_item["datetime"] # 如果完全一样 if old_item == item: # [x for x in item if item[x] != old_item[x]] continue # 与原来的价格信息不一样 else: diff_keys = [x for x in COMPARE_KEYS if item.get(x) and item[x] != old_item.get(x)] item["datetime"] = tool.get_datetime_string() yield table_yanxuan_price.update_one({ "_id": item["_id"] }, { "$set": item }) del old_item["_id"] yield table_yanxuan_price_old.insert_one(old_item) """ 不提醒: - 涨价 - 变成无货 """ # 如果新的比旧的少,无视 for key in ("quan", "promote"): for line in item[key]: if line and line not in old_item[key]: break else: if key in diff_keys: diff_keys.remove(key) continue if not diff_keys: continue q.d() content = "\r\n".join([ "", ",".join(diff_keys), "", "[新数据](%s)" % (item.get("url")), "", "```json", "%s", "```", "", "# 旧数据", "", "```json", "%s", "```", "", ]) % (get_wx_content(item), get_wx_content(old_item)) last_wx = tool.send_to_my_wx("严选" + item["name"], content) except Exception: ap(traceback.format_exc()) yield tornado.gen.sleep(2) finally: yield tornado.gen.sleep(0.5) if last_wx: try: yield last_wx except Exception: ap(traceback.format_exc()) ap("Last wx send fail!")
def execute(): # 查 item 的: 商品id & 店铺id 之类的基本信息 状态为 1 的商品 jingdong_itemid_list = yield table_jingdong_itemid.find( { "status": 1 }, { "status": False }, # {"itemid": "3596721"}, {"status": False}, # 香满楼牛奶 ).to_list(length=None) for item in jingdong_itemid_list: try: if not item.get("url"): item["url"] = fine_jingdong.get_item_url_by_id(item["itemid"]) base_info = yield get_base_info(item) if not base_info: continue item.update(base_info) yield table_jingdong_itemid.update_one({"_id": item["_id"]}, {"$set": base_info}) except Exception: ap(traceback.format_exc()) finally: yield tornado.gen.sleep(0.1) last_wx = None # 为没有值的 item 填充默认值 for item in jingdong_itemid_list: ap("Doing:", item["itemid"], item["name"]) # 如果之前没有查询到 BASE_NAME 的值,填充一个默认值 item["cat"] = item.get("cat") or "1,2,3" item["name"] = item.get("name") or "" item["venderId"] = item.get("venderId") or "0" item["shopId"] = item.get("shopId") or "0" # 查 item 的: 库存 & 价格 & 店铺名称 try: store_info = yield get_store_info(item) # print("store_info:", store_info) if not store_info: continue # ap(store_info) item.update(store_info) except Exception: ap(traceback.format_exc()) yield tornado.gen.sleep(2) continue finally: yield tornado.gen.sleep(0.5) # 查 item 的:促销 & 赠品 & 返券 & 活动广告 try: promote_info = yield get_promote_info(item) if not promote_info: continue # ap(promote_info) item.update(promote_info) except Exception: ap(traceback.format_exc()) yield tornado.gen.sleep(2) continue finally: yield tornado.gen.sleep(0.5) if item.get("presale"): # 查 预售 价格 try: presale_info = yield fine_jingdong.get_presale_info(item) if presale_info: item["presale_info"] = presale_info else: item["presale_info"] = None # price except Exception: ap(traceback.format_exc()) yield tornado.gen.sleep(2) finally: yield tornado.gen.sleep(0.5) # 对比之前的数据,如果有不同,则插入一条信新的记录,并发送信息到微信上 try: old_item = yield table_jingdong_price.find_one( {"_id": item["_id"]}) calc_price_text = "" is_good_price = False try: calc_discount.JDDiscount.calc(item, old_item) if 0 < item["calc_price"] < item.get("good_price", 0): is_good_price = True calc_price_text = "\r\n预估价:%s,%s\r\n" % (item["calc_price"], item["calc_advice"]) except Exception: ap(traceback.format_exc()) # 一开始没有价格信息 if not old_item: # dont yield item["datetime"] = tool.get_datetime_string() yield table_jingdong_price.insert_one(item) else: # datetime / good_price 不做比较,先赋予一样的值 跳过 item["datetime"] = old_item["datetime"] if "good_price" in item: old_item["good_price"] = item["good_price"] # 如果完全一样 if old_item == item: # [x for x in item if item[x] != old_item[x]] continue # 与原来的价格信息不一样 else: diff_keys = [ x for x in COMPARE_KEYS if item.get(x) and item[x] != old_item.get(x) ] # 单独处理 quan if "quan" in item: if "quan" in old_item: """ 如果券过期,就过滤掉 如果旧的券只推送1次,在新券里没有,如果券未过期,就需要继承到新券里 """ new_quan = {} for it in (old_item["quan"] + item["quan"]): # 旧数据 len == 2 if len(it) == 2: continue # 已过期 if it[1].split(" ~ ")[-1] < tool.get_date( "today"): continue new_quan[it[2]] = it old_quan_all = { x[2] for x in old_item["quan"] if len(x) > 3 } item["quan"] = [new_quan[x] for x in new_quan] if set(new_quan.keys()) != old_quan_all: diff_keys.append("quan") else: # 如果没有其他的 diff_keys,则全部都一样,不需要更新 if not diff_keys: continue else: pass else: diff_keys.append("quan") item["datetime"] = tool.get_datetime_string() yield table_jingdong_price.update_one({"_id": item["_id"]}, {"$set": item}) del old_item["_id"] yield table_jingdong_price_old.insert_one(old_item) """ 不提醒: - 涨价 - 没了: promote gift 没了 quan 没了 ads 没了 feedback 没了(字符串) stock 变成无货 - 无视 vender """ if "price" in diff_keys: if 0 < float(old_item["price"]) < float( item["price"]) or float(item["price"]) == -1: diff_keys.remove("price") # 如果新的比旧的少,无视 # ads 有时候会是 [] 或 [""] 无视 for key in ("promote", "gift", "quan", "ads", "feedback"): for line in item[key]: if line and line not in old_item[key]: # 促销如果是 换购 就无视 if not (key == "promote" and re.search( PROMOTE_FILTER, line[0])): break else: if key in diff_keys: diff_keys.remove(key) continue if "stock" in diff_keys: if old_item["stock"] == "无货" and item["stock"] != "无货": pass else: diff_keys.remove("stock") if not diff_keys: continue # 如果计算后的价格+20% 仍然没有达到好价,忽略 if (old_item.get("good_price") or 0) * 1.20 < item[ "calc_price"] or item["calc_price"] < 1.0: continue content = "\r\n".join([ "", ",".join(diff_keys), "", "[商品链接](%s) 好价:%s" % (item.get("url"), old_item.get("good_price") or 0), "%s" % (calc_price_text), # 新增 预估价 "```json", "%s", "```", "", "# 旧数据", "", "```json", "%s", "```", "", ]) % (get_wx_content(item), get_wx_content(old_item)) last_wx = tool.send_to_my_wx( (is_good_price and "__" or "") + "京东" + item["name"], content, ) # q.d() # yield last_wx # yield tornado.gen.sleep(3) # exit(1) except Exception: ap(traceback.format_exc()) yield tornado.gen.sleep(2) finally: yield tornado.gen.sleep(0.5) if last_wx: try: yield last_wx except Exception: ap(traceback.format_exc()) ap("Last wx send fail!")