def get_store_info(info): # 获取 商品基本信息 & 价格 store_api_url = STORE_URL % ( info["itemid"], MY_AREA, info["venderId"], info["cat"], str(time.time()).replace(".", ""), JQNAME, ) # ap(store_api_url) if not DEBUG: # response = requests.get(store_api_url, headers={}, timeout=16) response = yield tool.http_request({ "url": store_api_url, "method": "GET", "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6", # "Referer": store_api_url, # "Pragma": "no-cache", # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", } }) # q.d() # ap("response:", response) open("store_api.js", "w").write(tool.try_decode_html_content(response.body)) store_api_content = open("store_api.js", "r").read() store_api_content_json = get_jsonp_json(store_api_content) # ap(store_api_content_json) # 取 商家 名称 vender_string = (store_api_content_json["stock"].get("self_D") or store_api_content_json["stock"].get("D") or {}).get("vender") or "自营" # 取plus的价格(一般更低)或者原价 if store_api_content_json["stock"].get("jdPrice"): price = store_api_content_json["stock"]["jdPrice"].get( "tpp") or store_api_content_json["stock"]["jdPrice"]["p"] if store_api_content_json["stock"]["jdPrice"].get("sfp"): price = min(store_api_content_json["stock"]["jdPrice"].get("sfp"), price) else: price = "-1.00" # q.d() return { "price": float(price), "vender": vender_string, "stock": store_api_content_json["stock"]["StockStateName"], }
def get_promote_info(info): # 获取价格以及 促销 & 券 & 礼物 promote_api_url = PROMOTE_URL % ( MY_AREA[0], MY_AREA[1], MY_AREA[2], info["itemid"], info["分类id"], int(time.time() * 1000), ) # 获取页面html内容 if not DEBUG: response = yield tool.http_request({ "url": promote_api_url, "method": "GET", "headers": HEADERS }) open("kaola.promopt_page.html", "w").write(tool.try_decode_html_content(response.body)) item_content = open("kaola.promopt_page.html", "r").read() item_content = tool.json_load(item_content) # 这两个不是一模一样的吗 skuPrice = item_content["data"].get( "skuPrice") or item_content["data"]["skuDetailList"][0]["skuPrice"] min_price = min(skuPrice["currentPrice"], skuPrice["kaolaPrice"], skuPrice["suggestPrice"], skuPrice["marketPrice"]) presale = item_content["data"].get( "depositGoodsAdditionalInfo" ) or item_content["data"]["skuDetailList"][0]["depositSkuAdditionalInfo"] if presale: min_price = presale.get("handPrice") or min_price current_store = item_content["data"].get( "goodsCurrentStore" ) or item_content["data"]["skuDetailList"][0]["skuStore"]["currentStore"] promotion_info = item_content["data"].get("promotionList") or item_content[ "data"]["skuDetailList"][0]["promotionList"] or [] promote = [[x["promotionContent"], x["promotionUrl"], "0000 ~ 0000"] for x in promotion_info] quan = item_content["data"].get("goodsCouponList") or [] # q.d() return { "min_price": min_price, "current_store": current_store, "promote": promote, "quan": quan, "presale": bool(presale), }
def get_presale_info(info): # 获取 商品基本信息 & 价格 presale_api_url = PRESALE_URL % ( info["itemid"], str(time.time()).replace(".", ""), JQNAME, ) ap(presale_api_url) if not DEBUG: # response = requests.get(presale_api_url, headers={}, timeout=16) response = yield tool.http_request({ "url": presale_api_url, "method": "GET", "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6", # "Referer": presale_api_url, # "Pragma": "no-cache", # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", } }) # q.d() # ap("response:", response) open("presale_api.js", "w").write(tool.try_decode_html_content(response.body)) presale_api_content = open("presale_api.js", "r").read() presale_api_content_json = get_jsonp_json(presale_api_content) # ap(presale_api_content_json) # q.d() if not presale_api_content_json.get("ret"): return None return { "currentPrice": presale_api_content_json["ret"]["currentPrice"], "presaleStartTime": presale_api_content_json["ret"]["presaleStartTime"], "presaleEndTime": presale_api_content_json["ret"]["presaleEndTime"], # "balanceBeginTime": presale_api_content_json["ret"]["balanceBeginTime"], # "balanceEndTime": presale_api_content_json["ret"]["balanceEndTime"], }
def get_base_info(item): # 获取页面html内容 if not DEBUG: response = yield tool.http_request({ "url": item["url"], "method": "GET", "headers": HEADERS }) open("kaola.base_url_page.html", "w").write(tool.try_decode_html_content(response.body)) item_content = open("kaola.base_url_page.html", "r").read() item_content_lines = item_content.split("\n") icat = next( (i for (i, x) in enumerate(item_content_lines) if "$addGoods" in x), -1) info_text = item_content_lines[icat + 1:icat + 12] for i, line in enumerate(info_text): if "," in info_text[i]: info_text[i] = info_text[i][:info_text[i].index(",")] info_text[i] = info_text[i].replace("'", "").strip() else: ap("[WARN]:", "Something unexpected happened.") info_text[i] = "" info = { "分类id": info_text[0], "品牌id": info_text[1], "商品名称": info_text[2], "itemid": info_text[3], "商品售价": info_text[4], # "商品图片": info_text[5], "分类名": info_text[6], "品牌名": info_text[7], "商品库存": info_text[8], "网络价": info_text[9], # "收藏人数": info_text[10], } return info
def get_base_info(item): # 获取页面html内容 if not DEBUG: # response = requests.get(item["url"], headers={}, timeout=16) response = yield tool.http_request({ "url": item["url"], "method": "GET", "headers": { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6", "Referer": item["url"], "Pragma": "no-cache", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", } }) # q.d() open("content_page.html", "w").write(tool.try_decode_html_content(response.body)) item_content = open("content_page.html", "r").read() item_content_lines = item_content.split("\n") icat = next( (i for (i, x) in enumerate(item_content_lines) if "cat: [" in x), -1) info = get_item_neighbor(item_content_lines, icat) for line in item_content_lines[:20]: if "<title>" in line: info["name"] = re.sub( r"""([\W]*<title>|[【][^【]*[】][^】]*</title>[\W]*$)""", "", line) return info
def get_promote_info(info): # 获取价格以及 促销 & 券 & 礼物 promote_api_url = PROMOTE_URL % ( info["itemid"], MY_AREA, info["shopId"], info["venderId"], info["cat"].replace(",", "%2C"), info["price"], str(time.time()).replace(".", "")[:-3], JQNAME, ) # promote_api_url = """https://cd.jd.com/promotion/v2?callback=jQuery5415158&skuId=65610440044&area=19_1601_3635_0&shopId=10131385&venderId=10252350&cat=1672%%2C2599%%2C12078&isCanUseDQ=1&isCanUseJQ=1&platform=0&orgType=2&jdPrice=299.00&appid=1&_=%s""" % str(time.time()).replace(".", "")[:-3] # promote_api_url = """https://cd.jd.com/promotion/v2?callback=jQuery4255721&skuId=65610440044&area=19_1601_3635_0&shopId=10131385&venderId=10252350&cat=1672%%2C2599%%2C12078&isCanUseDQ=1&isCanUseJQ=1&platform=0&orgType=2&jdPrice=299.00&appid=1&_=%s""" % str(time.time()).replace(".", "")[:-3] # ap(promote_api_url) if not DEBUG: # response = requests.get(promote_api_url, headers={}, timeout=16) response = yield tool.http_request({ "url": promote_api_url, "method": "GET", "headers": { # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", # "Accept-Encoding": "gzip, deflate, br", # "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6", "accept": "*/*", "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6,ja;q=0.5", "Referer": "https://item.jd.com/65610440044.html", "Pragma": "no-cache", # 额,必须要有 cookie 了 "cookie": "__jdv=76161171|direct|-|none|-|1614594740019; __jdu=1614594740018869872184; areaId=19; ipLoc-djd=19-1601-3633-0; PCSYCityID=CN_440000_440100_440106; shshshfpa=00883b4f-d3c1-1602-7cd6-17731ed20a6e-1614594741; shshshfpb=m8UQnw74GyqJycpcp0lvCLg%3D%3D; __jda=122270672.1614594740018869872184.1614594740.1614594740.1614594740.1; __jdc=122270672; 3AB9D23F7A4B3C9B=RE4QF44JWCVUXEC7MQAZGA24NVF27LEI6CEQC4P7SABGXROC4ZDLKLWQBR6ULUZOEYHS5I7WMZBDNH5KDNWYC7VZFY; shshshfp=0263d234510f0c11eede903101b88cca; shshshsID=4122aa91f19c1d2e5fbd4fbec3deab0d_3_1614594756699; __jdb=122270672.4.1614594740018869872184|1.1614594740", "sec-fetch-dest": "script", "sec-fetch-mode": "no-cors", "sec-fetch-site": "same-site", # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36", } }) open("promote_api.js", "w").write(tool.try_decode_html_content(response.body)) promote_api_content = open("promote_api.js", "r").read() promote_api_content_json = get_jsonp_json(promote_api_content) # ap(promote_api_content_json) # 行间广告 ads_strings = [ x["ad"].replace("<", "<").replace(">", ">") for x in promote_api_content_json.get("ads") or [] ] # 促销活动 promote_strings = map( lambda x: [ x["content"], GET_PROMOTE_URL % x["pid"].split("_")[0], "%s ~ %s" % (tool.get_datetime_from_stamp(x["st"]), tool.get_datetime_from_stamp(x["d"])), ], promote_api_content_json["prom"]["pickOneTag"]) promote_strings = list(promote_strings) # ap(promote_strings) # 赠品 礼物 gift_strings = [] for tag in promote_api_content_json["prom"]["tags"]: if "gifts" in tag: gift_string = map(lambda x: [x["nm"], CONTENT_URL % x["sid"]], tag["gifts"]) gift_string = list(gift_string) gift_strings.append([ tag["name"], tool.get_datetime_from_stamp(tag["d"]), gift_string ]) elif "name" in tag: gift_strings.append( [tag["name"], tool.get_datetime_from_stamp(tag["d"])]) # 返券 返点 feedback_strings = "" if promote_api_content_json.get("quan"): feedback_url = promote_api_content_json["quan"]["actUrl"] or ( MFDETAIL % promote_api_content_json["quan"]["activityId"]) if feedback_url[:2] == "//": feedback_url = "https:%s" % feedback_url feedback_strings = [ feedback_url, promote_api_content_json["quan"]["title"] ] # 领取优惠券 使用优惠券 quan_strings = [] if promote_api_content_json.get("skuCoupon"): for item in promote_api_content_json["skuCoupon"]: quan_string = item.get("allDesc") or "满%s减%s" % (item["quota"], item["discount"]) quan_strings.append([ quan_string, "%s ~ %s" % (item.get("beginTime") or "", item.get("endTime") or "") ]) quan_strings[-1].append(item.get("key")) quan_strings[-1].append(item.get("url") or "") # q.d() return { "promote": promote_strings, "gift": gift_strings, "quan": quan_strings, "feedback": feedback_strings, "ads": ads_strings, }
def get_base_info(item): # 获取页面html内容 if not DEBUG: response = yield tool.http_request({ "url": item["url"], "method": "GET", "headers": HEADERS }) open("yanxuan.base_url_page.html", "w").write(tool.try_decode_html_content(response.body)) item_content = open("yanxuan.base_url_page.html", "r").read() item_content_lines = item_content.split("\n") icat = next( (i for (i, x) in enumerate(item_content_lines) if "\"item\":" in x), -1) info_text = item_content_lines[icat][7:-1] info_json = tool.json_load(info_text) # info_text = info_text.replace("\"item\":", "") # if info_text[-1] == ",": # info_text = info_text[0:-1] if item.get("iid"): item_info = next( (x for x in info_json["skuList"] if x["id"] == item["iid"]), {}) else: item_info = info_json["skuList"][item["index"]] if not item_info: return None promote_info = item_info.get("hdrkDetailVOList") if item_info.get("couponShortNameList"): quan_info = item_info.get("couponShortNameList") elif item_info.get("shortCouponList"): quan_info = [x["displayName"] for x in item_info["shortCouponList"]] else: quan_info = None price = min(item_info["retailPrice"], item_info["counterPrice"], item_info["calcPrice"], item_info["preSellPrice"]) if item_info.get("spmcBanner"): spmc_price = float(item_info["spmcBanner"].get("spmcPrice") or 0) price = spmc_price > 0 and min(spmc_price, price) or price if item_info.get("detailPromBanner"): activity_price = float( item_info["detailPromBanner"].get("activityPrice") or 0) price = activity_price > 0 and min(activity_price, price) or price info = { "name": item_info["skuTitle"], "iid": item_info["id"], "promote": [[x["name"], x["huodongUrlPc"], "0 ~ 0"] for x in promote_info], "quan": quan_info, "price": price, "store": item_info["sellVolume"], } return info