async def input_verify_code(self, frame, fromStore, type): logger.info("需要要手机验证码") ms = MySql(db_setting=TEST_SERVER_DB_TEST) ms.delete(t='phone_verify', c={'fromStore': fromStore}) ms.insert(t="phone_verify", d={"fromStore": fromStore}) mail(fromStore + "手机验证码", fromStore + "登陆需要手机验证码", MAIL_RECEIVERS) verify_code = "0" while 1: if type == 0: await frame.click(PHONE_GET_CODE[0]) else: await frame.click(PHONE_GET_CODE[1]) for i in range(120): await asyncio.sleep(5) verify_code = ms.get_one(t='phone_verify', cn=['verify_code'], c={"fromStore": fromStore}) if verify_code != "0": ms.delete(t='phone_verify', c={'fromStore': fromStore}) del ms break if verify_code != "0": break await asyncio.sleep(10) if type == 0: await frame.type(PHONE_CHECK_INPUT[0], verify_code, {'delay': self.input_time_random() - 50}) await frame.click(PHONE_SUBMIT_BTN[0]) else: await frame.type(PHONE_CHECK_INPUT[1], verify_code, {'delay': self.input_time_random() - 50}) await frame.click(PHONE_SUBMIT_BTN[1])
def _get_page_num(shop_id): # 从数据库得到数据 ms = MySql(db_setting=test_db) result = ms.get_dict(t="tb_search_page_info", c={"shop_id": shop_id}) if not result: # 没有数据就新增一个默认数据 d = { "shop_id": shop_id, "total_page": 20, "used_page_nums": "0", "last_date": datetime.date.today(), "spent_time": 0 } # 插入数据后再重新获取 ms.insert(t="tb_search_page_info", d=d) result = ms.get_dict(t="tb_search_page_info", c={"shop_id": shop_id}) if result[0]['last_date'] < datetime.date.today(): ms.update(t="tb_search_page_info", set={ "used_page_nums": "0", "spent_time": 0 }, c={"shop_id": shop_id}) result = ms.get_dict(t="tb_search_page_info", c={"shop_id": shop_id}) # 获取已采集的数据的页码列表 used_page_nums = [ int(x) for x in result[0]['used_page_nums'].split(",") ] total_page = result[0]['total_page'] set_a = set([i for i in range(total_page + 1)]) # 全部页码的set集合 set_b = set(used_page_nums) # 已采集的数据的页码集合 list_result = list(set_a - set_b) # 未采集数据的页码列表 if list_result: # 返回一个随机的未采集数据的页码,已采集的页码集合,和总的页码数 return random.choice( list_result ), used_page_nums, total_page, result[0]['spent_time'] else: # 如果没有未采集的页码,则表示当前店铺的所有页码全部采集完成 return 0, 0, 0, 0
async def parse(self, html): ms = MySql() self._item['SpiderDate'] = time_now() sku_map = re.search('skuMap.*?(\{.*)', html) match_xia_jia = re.search("此宝贝已下架", html) if match_xia_jia: self._item['flag'] = "XiaJia" if not sku_map: MySql.cls_update(db_setting=TEST_SERVER_DB_TEST, t="tb_master", set={ "isUsed": 1, "isMut": 0 }, c={"link_id": self._item['link_id']}) res = ms.get_dict(t="prices_tb", c={"link_id": self._item['link_id']}) if res: ms.update(t="prices_tb", set=self._item, c={"link_id": self._item['link_id']}) else: self._item['stockid'] = "no_match" self._item['SpiderDate'] = time_ago(minutes=60) self._item['need_to_update'] = 1 ms.insert(t="prices_tb", d=self._item) logger.info(self._item) else: MySql.cls_update(db_setting=TEST_SERVER_DB_TEST, t="tb_master", set={ "isUsed": 1, "isMut": 1 }, c={"link_id": self._item['link_id']}) doc = PyQuery(html) items = doc("li[data-value]").items() logger.debug(items) attr_map = {} if items: for item in items: attr_map[item.attr('data-value')] = item.find( 'span').text().replace("(", "(").replace(")", ")") sku_dict = json.loads(sku_map.group(1)) count = 1 for k, v in sku_dict.items(): sku_result = self._item.copy() if self._item['promotionprice'] > 0: discount = round( float(self._item['price_tb']) - float(self._item['promotionprice']), 4) sku_result['promotionprice'] = round( float(v.get('price')) - float(discount), 4) else: sku_result['promotionprice'] = 0 sku_result['skuId'] = v.get('skuId') sku_result['price_tb'] = v.get('price') sku_result['attribute'] = "-".join([ attr_map.get(r) for r in re.sub('^;|;$', "", k).split(";") ]) res = ms.get_dict(t="prices_tb", c={"skuId": sku_result['skuId']}) if res: ms.update(t="prices_tb", set=sku_result, c={"skuId": sku_result['skuId']}) else: sku_result['stockid'] = "no_match" + str(count) sku_result['SpiderDate'] = time_ago(minutes=60) sku_result['need_to_update'] = 1 ms.insert(t="prices_tb", d=sku_result) count += 1 logger.info(sku_result) del ms await self._goto_the_next()