def get_product_price(self): price = "" pprice = "" DATE = {} skuid = self.get_product_skuid() r = HTMLInfo.getHTML("https://d.jd.com/lab/get?callback=lab") MATCH = re.compile(r"lab\(\[(.*?)\]\)") for i in eval(re.findall(MATCH, r.text)[0]): if re.match('www.jd.com', i['url']): date = i["startOn"] date = str(date) + "1608370126" # this url to get the price for JD url = "http://p.3.cn/prices/mgets?&type=1&pduid=" + date + "&skuIds=J_" + skuid # response.json() can return the json-encoded content of a response status = HTMLInfo.getHTML(url).json()[0] if status: if 'tpp' in status: pprice = "PLUS价:<br />" + status['tpp'] if 'p' in status: price = "京东价:<br />" + status['p'] return price + "<br />" + pprice
def get_product_price(self): price = "" plus_price = "" date = {} sku_id = self.get_product_skuid() r = HTMLInfo.get_html("https://d.jd.com/lab/get?callback=lab") match_pattern = re.compile(r"lab\(\[(.*?)\]\)") try: json_data = json.loads(re.findall(match_pattern, r.text)[0]) except Exception as ex: print('get_product_price Ex:', ex) if re.match('www.jd.com', json_data['url']): date = json_data["startOn"] date = str(date) + "1608370126" # this url to get the price for JD url = "http://p.3.cn/prices/mgets?&type=1&pduid=" + date + "&skuIds=J_" + sku_id # response.json() can return the json-encoded content of a response status = HTMLInfo.get_html(url).json()[0] if status: if 'tpp' in status: plus_price = u"PLUS价:<br />" + status['tpp'] if 'p' in status: price = u"京东价:<br />" + status['p'] return price + "<br />" + plus_price
def __init__(self, url): self.url = url r = HTMLInfo.get_html(url) if r.encoding: self.html = r.content.decode(r.encoding) else: self.html = r.content.decode('utf-8')
def __init__(self, url): self.url = url HTMLInfo.REFERER = url r = HTMLInfo.get_html(url) self.html = r.text self.info = self.get_product()
def __init__(self, url): self.url = url HTMLInfo.REFERER = url r = HTMLInfo.getHTML(url) if r.encoding: self.html = r.content.decode(r.encoding) else: self.html = r.content.decode('utf-8') self.info = self.get_info()
def process(url): start = time.time() info = HTMLInfo.HTMLinfo(url) info.shop() info.getGoods() info.replaceGoods() info.createURL() info.getItemList() info.multiProcess() # multi threads (4) genHTML.createHTML(info.infolist) print("Done!") end = time.time() print("costs %0.2f seconds"%(end - start))
def get_product_promotion(self): discount = {} content = "" vip = "" sku_id = self.get_product_skuid() cat = self.get_product_cate() vender_id = self.get_vendorId() shop_id = self.get_shopId() # 2_2813_51976_0 stands for Shanghai; 1_72_2799_0 means Beijing url = "http://cd.jd.com/promotion/v2?&skuId=" + sku_id + "&area=2_2813_51976_0&shopId=" + shop_id + "&venderId=" + vender_id + "&cat=" + cat prom = HTMLInfo.get_html(url).content.decode('gbk') try: if prom.find('You have triggered an abuse') < 0: prom = json.loads(prom) if "skuCoupon" in prom.keys(): if prom["skuCoupon"]: for i in prom["skuCoupon"]: discount[i["discount"]] = i["quota"] if "prom" in prom.keys(): if "tags" in prom["prom"].keys(): if prom["prom"]["tags"]: if prom["prom"]["tags"][0]["name"] == u'会员特价': vip = prom["prom"]["tags"][0]["name"] if "pickOneTag" in prom["prom"].keys(): if prom["prom"]["pickOneTag"]: content = prom["prom"]["pickOneTag"][0]["content"] except Exception as ex: print('get_product_promotion ', ex) sale = "" gift = "" if discount: for i in discount.keys(): sale += u'满减:满' + str(discount[i]) + u'减' + str(i) + "<br />" if vip: vip = str(vip) + "<br />" if content: gift = u'满赠:' + str(content) + "<br />" promotion = vip + sale + gift return promotion
def process(url): start = time.time() info = HTMLInfo.HTMLinfo(url) info.shop() info.get_goods() info.replace_goods() info.create_url() info.get_items() try: info.multi_process() # multi threads (4) except Exception as ex: print(ex) genHTML.create_html(info.info_list) print("Done!") end = time.time() print("costs %0.2f seconds" % (end - start))
def get_product_promotion(self): discount = {} content = "" vip = "" skuid = self.get_product_skuid() cat = self.get_product_cate() venderId = self.get_vendorId() shopId = self.get_shopId() # 2_2813_51976_0 stands for Shanghai; 1_72_2799_0 means Beijing url = "http://cd.jd.com/promotion/v2?&skuId=" + skuid + "&area=2_2813_51976_0&shopId=" + shopId + "&venderId=" + venderId + "&cat=" + cat prom = HTMLInfo.getHTML(url).content.decode('gbk') # eval() can help to convert the json style string to dictionary prom = eval(prom) if "skuCoupon" in prom.keys(): if prom["skuCoupon"]: for i in prom["skuCoupon"]: discount[i["discount"]] = i["quota"] if "prom" in prom.keys(): if "tags" in prom["prom"].keys(): if prom["prom"]["tags"]: if prom["prom"]["tags"][0]["name"] == "会员特价": vip = prom["prom"]["tags"][0]["name"] if "pickOneTag" in prom["prom"].keys(): if prom["prom"]["pickOneTag"]: content = prom["prom"]["pickOneTag"][0]["content"] sale = "" gift = "" if discount: for i in discount.keys(): sale += "满减:满" + str(discount[i]) + "减" + str(i) + "<br />" if vip: vip = str(vip) + "<br />" if content: gift = "满赠:" + str(content) + "<br />" promotion = vip + sale + gift return promotion
def __init__(self, url): self.url = url #HTMLInfo.header['referer'] = url r = HTMLInfo.getHTML(url) self.html = r self.info = self.get_info()
print("Done!") end = time.time() print("costs %0.2f seconds"%(end - start)) urlList = [] def getInfoFromurlLink(): with open("cfg/urlLink", "r") as f: for line in f.readlines(): url = re.sub('\n', '', line) if re.search('http', url) and not re.search('^#', url): urlList.append(url) getInfoFromurlLink() genHTML.clearHTML() if urlList: for i in urlList: try: process(i) except: print("Unexpected error:", sys.exc_info()[0]) driver = HTMLInfo.getWebdriver() if driver: url = "file://" + os.getcwd() + "/show.html" driver.get(url) driver.maximize_window()
def __init__(self, url): self.url = url r = HTMLInfo.getHTML(url) self.html = r