async def save(self): """ 处理_parse()中获得的数据模型,并写入到数据库中 """ async for i in self._parse(): if not i: print("需要要切换淘宝账户") return 0 res = mysql.get_data(db=test_server, t="tb_master", c={'link_id': i['link_id']}, dict_result=True) flag = ["update"] narrative = [] if res: if res[0]['price'] != i['price']: flag.append("price") narrative.append("更新销售价格:[{}]=>[{}]".format(res[0]['price'], i['price'])) if res[0]['promotionPrice'] != i['promotionPrice']: flag.append("promotion") narrative.append("更新优惠售价格:[{}]=>[{}]".format(res[0]['promotionPrice'], i['promotionPrice'])) if res[0]['sale_num'] != i['sale_num']: flag.append("sale") narrative.append("更新销量:[{}]=>[{}]".format(res[0]['sale_num'], i['sale_num'])) i['flag'] = "_".join(flag) i['narrative'] = ";".join(narrative) mysql.update_data(db=test_server, t='tb_master', set=i, c={"link_id": i['link_id']}) else: i['flag'] = 'insert' mysql.insert_data(db=test_server, t="tb_master", d=i)
def save_in_sql(self, item, tableName): if 'goodsCode' in item: """判断是否orderdetail""" dict_select_condition = { 'orderNo': item['orderNo'], 'itemNo': item['itemNo'] } result = mysql.get_data(t=tableName, c=dict_select_condition) if result: item.pop("goodsCode") mysql.update_data(t=tableName, set=item, c=dict_select_condition) # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|订单详情更新成功|") else: mysql.insert_data(t=tableName, d=item) # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|新订单详情写入成功") else: dict_select_condition = {'orderNo': item['orderNo']} result = mysql.get_data(t=tableName, c=dict_select_condition) if result: mysql.update_data(t=tableName, set=item, c=dict_select_condition) # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|订单详情更新成功|") else: mysql.insert_data(t=tableName, d=item)
def report_in(self, **kwargs): if kwargs['flag'] == 'lookup': res = mysql.get_data("update_reports", l=1, cn=["lookup"], c={"link_id": "count"}, db=self.db_test) mysql.update_data(t="update_reports", set={ 'lookup': res[0][0] + 1, 'last_time': kwargs['SpiderDate'] }, c={ 'link_id': 'count', 'shop_id': kwargs['shop_id'] }) else: self.report_item['stockid'] = kwargs['stockid'] self.report_item['link_id'] = kwargs['link_id'] self.report_item['shop_id'] = kwargs['shop_id'] self.report_item['price_tb'] = kwargs['price_tb'] # self.report_item['first_discount'] = kwargs['first_discount'] self.report_item['last_time'] = kwargs['SpiderDate'] self.report_item['attribute'] = kwargs['attribute'] self.report_item['flag'] = kwargs['flag'] self.report_item['description'] = kwargs['description'] mysql.insert_data(t="update_reports", d=self.report_item, db=self.db_test)
async def run(): b = await launch(**dev) p = await b.newPage() await p.setViewport({"width": 1440, "height": 900}) await p.goto("https://login.taobao.com") ms = await p.J(".module-static") if ms: ls = await p.J(".login-switch") box = await ls.boundingBox() await p.mouse.click(box['x'] + 10, box['y']) while 1: try: await p.waitForSelector("#J_QRCodeImg") image = await p.J("#J_QRCodeImg") await image.screenshot({'path': './qrcode.png'}) except Exception as e: pass else: break qrcode = mpimg.imread('qrcode.png') # 读取和代码处于同一目录下的 qrcode.png plt.imshow(qrcode) # 显示图片 plt.axis('off') # 不显示坐标轴 plt.show() await p.waitForNavigation() start_url = 'https://shop.taobao.com/' sql = "select shop_id from shop_info where shop_id!='88888888'" # 获取所有的店铺ID shop_infos = mysql.get_data(sql=sql, dict_result=True) for shop_info in shop_infos: item = {"shop_id": shop_info['shop_id']} url = start_url.replace("shop", "shop" + shop_info["shop_id"]) await p.goto(url) await asyncio.sleep(5) await p.waitForSelector(".all-cats-trigger.popup-trigger") await p.click(".all-cats-trigger.popup-trigger") await asyncio.sleep(5) item['user_agent'] = await b.userAgent() cookies = await p.cookies() item['cookies'] = ";".join( [c['name'] + "=" + c['value'] for c in cookies]) item['refer'] = p.url await p.waitForSelector('input[name="_ksTS"]') content = await p.content() print(content) doc = PyQuery(content) _ksTS = doc('input[name="_ksTS"]').val() item['_ksTS'] = _ksTS.split("_").pop() item['callback'] = 'jsonp' + str(int(item['_ksTS']) + 1) item['mid'] = doc('input[name="mid"]').val() item['wid'] = doc('input[name="wid"]').val() item['spm'] = doc('div.pagination form input[name="spm"]').val() mysql.insert_data(db=test_server, t='user_record', d=item) break
def result(): if request.method == 'POST': result = request.form item = result.to_dict() item['curl'] = item['curl'].strip() if not re.match("^curl", item['curl']): return render_template("index.html", result={"status": "传送的数据不正确"}) shop_id_match = re.search("shop(\d+)\.", item['curl']) if shop_id_match: item['shop_id'] = shop_id_match.group(1) mysql.insert_data(db=test_server, t='tb_search_curl', d=item) return render_template('index.html', result={"status": "写入数据库成功"})
async def verify(self, p): try: await p.waitForSelector("div.aq_overlay_mask", timeout=10000) except errors.TimeoutError: pass else: logger.info("需要要手机验证码") if LINUX: test_server = ts.copy() test_server['db'] = "test" id = random.randint(0, 100) mysql.insert_data(db=test_server, t="phone_verify", d={"id": id}) frames = p.frames net_check() verify_code = "0" while True: net_check() await frames[1].click(".J_SendCodeBtn") for i in range(120): await asyncio.sleep(5) res = mysql.get_data( db=test_server, cn=["verify_code"], t="phone_verify", c={"id": id}, ) verify_code = res[0][0] if verify_code != "0": mysql.delete_data(db=test_server, t="phone_verify", c={"id": id}) break if verify_code != "0": break await asyncio.sleep(10) else: frames = p.frames net_check() await frames[1].click(".J_SendCodeBtn") verify_code = input(time_now() + " | 请输入6位数字验证码:") # await frames[1].click(".J_SendCodeBtn") # verify_code = input(time_now() + " | 请输入6位数字验证码:") await frames[1].type(".J_SafeCode", verify_code, {'delay': self.input_time_random() - 50}) net_check() await frames[1].click("#J_FooterSubmitBtn")
def maintain(self, operation, **kwargs): item = { 'stockid': kwargs['goodsCode'], 'link_id': kwargs['link_id'], 'shop_id': self.shop_id(kwargs['fromStore']), 'price_tb': kwargs['unitPrice'], # 'first_discount': kwargs['unitBenefits'], 'currabrev': 'CNY', 'operator': '爬虫维护', 'SpiderDate': time_now(), 'attribute': kwargs['goodsAttribute'], 'flag': None, 'description': kwargs['tbName'], 'typeabbrev': "", 'price_erp': 0, 'last_time': time_now(), 'freight': "", 'ratio': 1, 'promotionprice': 0, 'sales': 0, 'rates': 0, 'Checker': "", 'package_number': 1, 'CheckDate': time_now(), } if operation == "更新": item['flag'] = 'update' item_set = { 'SpiderDate': time_now(), 'flag': 'update', 'price_tb': kwargs['unitPrice'], 'description': kwargs['tbName'], 'ratio': kwargs['ratio'], 'attribute': kwargs['goodsAttribute'], # 'first_discount': kwargs['unitBenefits'] } item_where = { 'stockid': kwargs['goodsCode'], 'link_id': kwargs['link_id'], 'shop_id': self.shop_id(kwargs['fromStore']) } mysql.update_data(t="prices_tb", set=item_set, c=item_where) elif operation == "创建": item['flag'] = 'create' mysql.insert_data(t="prices_tb", d=item) else: item['flag'] = 'lookup' self.report_in(**item)
def competitor_data(): if request.method == 'POST': # print("adlsfjlsdjf") result = request.form res = result.to_dict() stk_list = result.getlist("stockid[]") res.pop("stockid[]") attr_list = result.getlist("attribute[]") res.pop("attribute[]") price_list = result.getlist("price_tb[]") res.pop("price_tb[]") pgn_list = result.getlist("package_number[]") res.pop("package_number[]") for i in range(len(stk_list)): item = res.copy() item['stockid'] = stk_list[i] if not item['stockid']: continue item['attribute'] = attr_list[i] item['price_tb'] = price_list[i] pgn = item.pop("package_number_t") if int(pgn_list[i]) > 1: item['package_number'] = pgn_list[i] elif int(pgn) > 1: item['package_number'] = pgn else: item['package_number'] = 1 c = {"stockid": item["stockid"], "link_id": item["link_id"]} res_sql = mysql.get_data(c=c, t="prices_tb") if res_sql: mysql.update_data(set=item, c=c, t="prices_tb") else: mysql.insert_data(d=item, t="prices_tb") # print(res) # print(item) return "添加成功!"
async def login(self, page=None, **kwargs): # shutil.rmtree(CHROME_PROFILE_PATH, True) if not page: page = await self.get_new_page() while 1: try: net_check() await page.goto(login_url) except errors.PageError: pass except errors.TimeoutError: pass else: break while True: try: await page.waitForSelector(".forget-pwd.J_Quick2Static", visible=True, timeout=10000) await page.click(".forget-pwd.J_Quick2Static") except errors.TimeoutError: pass except errors.ElementHandleError: await page.reload() continue finally: try: await page.type('#TPL_username_1', kwargs['username'], {'delay': self.input_time_random() - 50}) await page.type('#TPL_password_1', kwargs['password'], {'delay': self.input_time_random()}) except errors.ElementHandleError: await page.reload() else: break net_check() # 检测页面是否有滑块。原理是检测页面元素。 slider = await page.Jeval('#nocaptcha', 'node => node.style') # 是否有滑块 if slider: print("出现滑块情况判定") await self.mouse_slide(p=page) await page.click("#J_SubmitStatic") # 调用page模拟点击登录按钮。 time.sleep(2) await self.get_cookie(page) else: await page.click("#J_SubmitStatic") try: await page.waitForSelector("#container", timeout=10000) except errors.TimeoutError: print("超时需要手机验证!") frames = page.frames try: await frames[1].waitForSelector("button#J_GetCode", timeout=10000) except errors.TimeoutError: pass else: print("需要要手机验证码") test_server['db'] = "test" id = random.randint(0, 100) mysql.insert_data(db=test_server, t="phone_verify", d={"id": id}) # frames = page.frames # await frames[1].click(".J_SendCodeBtn") verify_code = "0" while True: net_check() await frames[1].click("button#J_GetCode") for i in range(120): await asyncio.sleep(5) res = mysql.get_data( db=test_server, cn=["verify_code"], t="phone_verify", c={"id": id}, ) verify_code = res[0][0] if verify_code != "0": mysql.delete_data(db=test_server, t="phone_verify", c={"id": id}) break if verify_code != "0": break await frames[1].type("input#J_Phone_Checkcode", verify_code, {"delay": self.input_time_random() - 50}) # await frames[1].type(".J_SafeCode", a, {'delay': self.input_time_random() - 50}) net_check() await frames[1].click("input#submitBtn") # await frames[1].click("#J_FooterSubmitBtn") net_check() await page.goto("https://myseller.taobao.com/home.htm") await page.waitForSelector("#container", timeout=30000) return self.b, page, kwargs['fromStore']
async def link_spider(self, p, f): test_server["db"] = "test" while True: sql = """ SELECT a.id,url,goodsCode,a.orderNo FROM tb_order_detail_spider a JOIN tb_order_spider b ON a.`orderNo`=b.`orderNo` WHERE link_id="1" AND b.`fromStore`='%s' AND a.url IS NOT NULL ORDER BY b.createTime DESC LIMIT 1 """ % (f) url = "https://smf.taobao.com/promotionmonitor/orderPromotionQuery.htm?orderNo=" results = mysql.get_data(sql=sql, dict_result=True) if not results: break orderno = results[0]['orderNo'] url += orderno await p.goto(url) content = await p.content() data = re.findall(">(\{.*?\})<", content) order = json.loads(data[0]) try: sub_orders = order["data"]["subOrderViewDTOs"] except KeyError: continue for so in sub_orders: order_no = so["orderNoStr"] link_id = so["itemId"] sql = "select goodsCode from tb_order_detail_spider where url like '%%%s%%'" % ( order_no) print(sql) goodsCode = mysql.get_data(sql=sql, return_one=True) del sql sql = "update tb_order_detail_spider set link_id='%s' where url like '%%%s%%'" % ( link_id, order_no) mysql.update_data(sql=sql) del sql sql = """ SELECT SpiderDate FROM prices_tb WHERE link_id='%s' AND stockid='%s' AND flag NOT IN ('del','XiaJia') """ % (link_id, goodsCode) res = mysql.get_data(sql=sql) res_fix = mysql.get_data(db=test_server, dict_result=True, t='prices_tb_fix', c={ "link_id": link_id, "server": "production_server" }) if res: spider_date = res[0][0] days = 1 if spider_date != '0000-00-00 00:00:00': days = (datetime.datetime.now() - spider_date).days if spider_date == '0000-00-00 00:00:00' or days > 14: if not res_fix: mysql.insert_data(db=test_server, t="prices_tb_fix", d={ "link_id": link_id, "fromStore": f, "flag": 1 }) elif res_fix[0]["isComplete"] != 0: mysql.update_data(db=test_server, t="prices_tb_fix", set={ "isComplete": 0, "flag": 1 }, c={ "link_id": link_id, "server": "production_server" }) else: if not res_fix: mysql.insert_data(db=test_server, t="prices_tb_fix", d={ "link_id": link_id, "fromStore": f, "flag": 0 }) elif res_fix[0]["isComplete"] != 0: mysql.update_data(db=test_server, t="prices_tb_fix", set={ "flag": 0, "isComplete": 0 }, c={ "link_id": link_id, "server": "production_server" }) sleep(5) await p.close() await self.run_link_spider()
async def parse(self, data): if data != "q": for i in range(len(data)): self.item = {} self.item = self.common.copy() self.item['stockid'] = re.search( "编码:(.*)", data[i]['itemDesc']['desc'][1]['text']).group(1).upper() self.item['link_id'] = data[i]['itemId'] self.item['attribute'] = "" self.item['flag'] = "update" self.item['typeabbrev'] = self.fromStore self.item['shop_id'] = self.shop_id(self.fromStore) self.item['SpiderDate'] = time_now() temp_des = data[i]['itemDesc']['desc'][0]['text'] self.item['description'] = temp_des.replace("(", "(").replace( ")", ")") self.item['price_tb'] = re.findall( "(\d+.?\d*)", data[i]["managerPrice"]['currentPrice'])[0] self.item['promotionprice'] = self.promo_price.get( self.item['link_id']) # print(self.promo_price) sql = "select spe_link from prices_tb_fix where link_id='%s' and server='%s'" % ( self.item['link_id'], self.sn) spe_link_id = mysql.get_data(db=self.db_test, sql=sql, return_one=True) isMut = re.search("^MUT\D*", self.item['stockid']) if isMut or spe_link_id: await self.page.setRequestInterception(True) self.page.on('request', self.intercept_request) self.page.on('response', self.intercept_response) await asyncio.sleep(1) net_check() await self.page.click( ".next-table-row td:nth-child(2) div.product-desc-hasImg span:nth-child(2) i" ) await asyncio.sleep(1) await self.page.keyboard.press('Escape') else: # print(self.item) if self.item['promotionprice'] is None: mail("price_tb_error", self.fromStore + ":" + self.item['link_id'], ["*****@*****.**"]) logger.error("error:" + self.fromStore + " : " + self.item['link_id'] + " and " + mysql.concat(self.promo_price, "=")) self.complete = 2 break condition = { "stockid": self.item['stockid'], "link_id": self.item['link_id'], "shop_id": self.item['shop_id'], } res = mysql.get_data(t="prices_tb", l=1, cn=["id"], c=condition, db=self.target_server) if res: self.item['ratio'] = round( float(self.item['price_tb']) / float(res[0][0]), 2) print(self.item) mysql.update_data(t="prices_tb", set=self.item, c=condition, db=self.target_server) else: insert_item = self.item.copy() insert_item["currabrev"] = "CNY" insert_item["price_erp"] = 0 insert_item["operator"] = self.operator insert_item["last_time"] = time_now() if self.operator == "爬虫维护": insert_item["flag"] = "create" else: insert_item['flag'] = "add" insert_item["ratio"] = 1 insert_item["package_number"] = 1 insert_item["Checker"] = "" insert_item["CheckDate"] = "0000-00-00 00:00:00" print(insert_item) with open( "reports/report_" + self.fromStore + "_insert.txt", "a") as file: file.writelines("物料编码:" + insert_item['stockid'] + " 与 商品ID:" + insert_item['link_id'] + " 为最新匹配,添加至ERP系统。\n" + self.item_url + insert_item['link_id'] + "\n" + self.item_erp_url + insert_item['link_id'] + "\n\n") mysql.insert_data(t="prices_tb", d=insert_item, db=self.target_server) result = mysql.get_data( t="prices_tb", cn=["*"], c={"link_id": self.item['link_id']}, db=self.target_server, dict_result=True) if len(result) > 1: for r in result: if r['stockid'] != self.item['stockid'] and r[ 'flag'] != "del": with open( "reports/report_" + self.fromStore + "_delete.txt", "a") as file: file.writelines("物料编码:" + r['stockid'] + " 与 商品ID:" + self.item['link_id'] + " 不匹配,已被爬虫从ERP系统中删除。\n" + self.item_url + self.item['link_id'] + "\n" + self.item_erp_url + self.item['link_id'] + "\n\n") mysql.update_data(t="prices_tb", c={"id": r['id']}, db=self.target_server, set={"flag": "del"}) self.complete = 1 else: pass self.complete = 1
async def parse_2(self, data): verify = [] repeat_list = [] for i in data['skuOuterIdTable']['dataSource']: self.item['stockid'] = i['skuOuterId'] logger.info(self.item['stockid']) if not self.item['stockid']: continue else: if self.item['stockid'] not in verify: verify.append(self.item['stockid']) else: if self.item['stockid'] not in repeat_list: repeat_list.append(self.item['stockid']) skuId = str(i['skuId']) temp_attr = self.prop.get(skuId) self.item['attribute'] = temp_attr.replace("(", "(").replace(")", ")") if not self.item['attribute']: self.item.pop('attribute') self.item['price_tb'] = self.prices.get(skuId) if self.promo_price: self.item["promotionprice"] = self.promo_price.get(skuId) else: self.item["promotionprice"] = 0 condition = { "stockid": self.item['stockid'], "link_id": self.item['link_id'], "shop_id": self.item['shop_id'], } res = mysql.get_data(t="prices_tb", l=1, cn=["price_tb"], c=condition, db=self.target_server) if res: if res[0][0] == 0: self.item['ratio'] = 1 else: self.item['ratio'] = round( float(self.item['price_tb']) / float(res[0][0]), 2) print(self.item) mysql.update_data(t="prices_tb", set=self.item, c=condition, db=self.target_server) else: insert_item = self.item.copy() insert_item["currabrev"] = "CNY" insert_item["price_erp"] = 0 insert_item["operator"] = self.operator insert_item["last_time"] = time_now() if self.operator == "爬虫维护": insert_item["flag"] = "create" else: insert_item['flag'] = "add" insert_item["ratio"] = 1 insert_item["package_number"] = 1 insert_item["Checker"] = "" insert_item["CheckDate"] = "0000-00-00 00:00:00" print(insert_item) with open("reports/report_" + self.fromStore + "_insert.txt", "a") as file: file.write("物料编码:" + insert_item['stockid'] + " 与商品ID:" + insert_item['link_id'] + " 为最新匹配,添加至ERP系统。\n" + self.item_url + insert_item['link_id'] + "\n" + self.item_erp_url + insert_item['link_id'] + "\n\n") mysql.insert_data(t="prices_tb", d=insert_item, db=self.target_server) if repeat_list: with open("reports/report_" + self.fromStore + "_repeat.txt", "a") as file: file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" + self.item['link_id'] + " 重复编码\n" + "重复编码:" + ",".join(repeat_list) + "\n" + self.item_url + self.item['link_id'] + "\n\n") if not verify: with open("reports/report_" + self.fromStore + "_empty.txt", "a") as file: file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" + self.item['link_id'] + " 空编码\n" + self.item_url + self.item['link_id'] + "\n\n") sql = """ select id,stockid from prices_tb where link_id='%s' and flag not in('del','XiaJia') """ % (self.item['link_id']) res_verify = mysql.get_data(sql=sql, db=self.target_server) for rv in res_verify: if rv[1] not in verify: with open("reports/report_" + self.fromStore + "_delete.txt", "a") as file: file.write("物料编码:" + rv[1] + " 与 商品ID:" + self.item['link_id'] + " 不匹配,已被爬虫从ERP系统中删除。\n" + self.item_url + self.item['link_id'] + "\n" + self.item_erp_url + self.item['link_id'] + "\n\n") mysql.update_data(t="prices_tb", c={"id": rv[0]}, db=self.target_server, set={ "flag": "del", "operator": self.operator, "last_time": time_now() }) self.complete = 1