async def input_verify_code(self, frame, fromStore, type): logger.info("需要要手机验证码") ms = MySql(db_setting=TEST_SERVER_DB_TEST) ms.delete(t='phone_verify', c={'fromStore': fromStore}) ms.insert(t="phone_verify", d={"fromStore": fromStore}) mail(fromStore + "手机验证码", fromStore + "登陆需要手机验证码", MAIL_RECEIVERS) verify_code = "0" while 1: if type == 0: await frame.click(PHONE_GET_CODE[0]) else: await frame.click(PHONE_GET_CODE[1]) for i in range(120): await asyncio.sleep(5) verify_code = ms.get_one(t='phone_verify', cn=['verify_code'], c={"fromStore": fromStore}) if verify_code != "0": ms.delete(t='phone_verify', c={'fromStore': fromStore}) del ms break if verify_code != "0": break await asyncio.sleep(10) if type == 0: await frame.type(PHONE_CHECK_INPUT[0], verify_code, {'delay': self.input_time_random() - 50}) await frame.click(PHONE_SUBMIT_BTN[0]) else: await frame.type(PHONE_CHECK_INPUT[1], verify_code, {'delay': self.input_time_random() - 50}) await frame.click(PHONE_SUBMIT_BTN[1])
def verify(): l_orderNo = [] column_name = [ 'orderNo', 'deliverFee', 'actualFee', 'couponPrice', 'fromStore', 'orderStatus' ] condition = {'isVerify': '0', 'isDetaildown': '1'} # kwargs = {'isVerify': '2', 'isDetaildown': '1'} ms = MySql() result = ms.get(t="tb_order_spider", cn=column_name, c=condition) if result: for i in result: total = 0 orderNo = i[0] deliverFee = i[1] actualFee = i[2] couponPrice = i[3] fromStore = i[4] column_name = ['unitPrice', 'sellNum', 'unitBenefits'] condition = {'orderNo': orderNo} result2 = ms.get(t="tb_order_detail_spider", cn=column_name, c=condition) for j in result2: unitPrice = j[0] sellNum = j[1] unitBenefits = j[2] total = total + unitPrice * sellNum - unitBenefits a = round(total, 3) + deliverFee - actualFee - couponPrice if abs(a) > 0.0001 and i[5] != '交易关闭': list_tmp = [] list_tmp.append(str(round(total, 2))) list_tmp.append(str(deliverFee)) list_tmp.append(str(actualFee)) list_tmp.append(str(couponPrice)) list_tmp.append(str(a)) list_tmp.append(store_trans(fromStore)) list_tmp.append(orderNo) l_orderNo.append("|".join(list_tmp)) ms.update(t="tb_order_spider", set={ 'isVerify': 2, 'isDetaildown': 0 }, c={'orderNo': orderNo}) else: ms.update(t="tb_order_spider", set={'isVerify': 1}, c={'orderNo': orderNo}) # print('没有异常数据,验证完成!') pass if l_orderNo: s = "\n".join(l_orderNo) # print(s) mail("数据异常报告", s, ["*****@*****.**"])
async def do_login(self, **kwargs): await self.set_page(**kwargs) while 1: try: await self.page.goto("https://login.taobao.com", timeout=30000) except Exception as e: logger.error("网络连接异常,5秒后重连,原因" + str(e)) my_sleep(5) else: break while 1: try: await self.page.waitForSelector(S_T_P_L, visible=True, timeout=10000) await self.page.click(S_T_P_L) except errors.TimeoutError: pass except errors.ElementHandleError: await self.page.reload() continue finally: result = await self.type_with_login_info(kwargs) if result: break else: mail("登陆错误", "登陆CSS错误,请查看CSS是否正确", MAIL_RECEIVERS) slider = await self.check_captcha(page=self.page) # 检测是否有滑块 if slider: logger.info("检测页面出现滑块") t = await self.slider(page=self.page) if not t: try: await self.page.click(LOGIN_SUBMIT[0]) # 调用page模拟点击登录按钮。 except Exception as e: str(e) await self.page.click(LOGIN_SUBMIT[1]) # 调用page模拟点击登录按钮。 my_sleep(5) else: try: await self.page.click(LOGIN_SUBMIT[0]) # 调用page模拟点击登录按钮。 except Exception as e: str(e) await self.page.click(LOGIN_SUBMIT[1]) # 调用page模拟点击登录按钮。 t = await self.phone_verify(self.page, kwargs['fromStore']) if t: exit("登陆失败退出程序") return self.browser, self.page, kwargs['fromStore']
def restart(): kill_temp_file() cmd_list = [ "ping www.baidu.com", "taskkill /F /IM chrome.exe", "shutdown -r -t 60", "taskkill /F /IM python.exe", ] mail(SPIDER_ADDRESS + "爬虫自动重启", "", MAIL_RECEIVERS) for cmd in cmd_list: x = subprocess.run(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def parse(self): for html, shop_id, used_page_nums, total_page, page_num in self._get_html( ): doc = PyQuery(html) match = re.search("item\dline1", html) if not match: MySql.cls_delete(db_setting=test_db, t='tb_search_curl', c={"shop_id": shop_id}) mail("店铺搜索页爬虫出错", shop_id + "错误页码:" + str(page_num) + "\n" + html, MAIL_RECEIVERS) continue used_page_nums.append(page_num) used_page_nums.sort() tspi = { # tb_search_page_info "used_page_nums": ",".join([str(x) for x in used_page_nums]), "last_date": datetime.date.today() } write(flag="tspi", value=tspi) num = doc(".pagination span.page-info").text() try: total_page_num = re.search("\d+\/(\d+)", num).group(1) except Exception as e: logger.error(str(e)) else: if int(total_page_num) != int(total_page): tspi['total_page'] = total_page_num write(flag="tspi", value=tspi) items = doc("." + match.group() + " dl.item").items() ms = MySql(db_setting=test_db) ms_prod = MySql() for i in items: tb_master_item = TBMasterItem() tb_master_item.shop_id = shop_id tb_master_item.link_id = i.attr('data-id') tb_master_item.description = i.find("dd.detail a").text() cprice = float(i.find("div.cprice-area span.c-price").text()) if i.find("div.sprice-area span.s-price").text(): sprice = float( i.find("div.sprice-area span.s-price").text()) else: sprice = 0 if i.find("div.sale-area span.sale-num").text(): tb_master_item.sales = int( i.find("div.sale-area span.sale-num").text()) if i.find("dd.rates a span").text(): tb_master_item.rates = int( i.find("dd.rates a span").text()) if sprice: tb_master_item.price_tb = sprice tb_master_item.promotionprice = cprice else: tb_master_item.price_tb = cprice tb_master_item.promotionprice = sprice print(tb_master_item) tb_master_item.save(ms, ms_prod) del ms, ms_prod
def report(self, shop_ids): r = self.get(shop_ids=shop_ids) for k, v in r.items(): mail(v.get("title"), v.get("mail_content"), v.get("receivers"))