async def run(): while 1: update() ms = MySql(db_setting=TEST_SERVER_DB_TEST) ms.update(t="spider_monitor", set={"latest_time": time_now()}, c={"spider_address": SPIDER_ADDRESS}) restart_signal = ms.get_one(t="spider_monitor", cn=["restart_signal"], c={"spider_address": SPIDER_ADDRESS}) if SPIDER_ADDRESS == "3_floor": sql = "SELECT MAX(updateTime) as updateTime,fromStore FROM tb_order_spider WHERE fromStore IN ('KY','TB') GROUP BY fromStore" else: sql = "SELECT MAX(updateTime) as updateTime,fromStore FROM tb_order_spider WHERE fromStore IN ('YJ','YK') GROUP BY fromStore" results = MySql.cls_get_dict(sql=sql) t = time_ago(minutes=15) for result in results: if str(result['updateTime']) < t: restart_signal = 1 break if restart_signal: ms.update(t="spider_monitor", set={"restart_signal": 0}, c={"spider_address": SPIDER_ADDRESS}) restart() del ms await asyncio.sleep(60)
def _get_item(): column_name = [ "shop_id", "link_id", "description", "price_tb", "promotionprice", "sales", "rates", ] while 1: results = MySql.cls_get_dict(db_setting=TEST_SERVER_DB_TEST, t="tb_master", c={ "isUsed": 0, "isMut": 1, "flag!": "XiaJia" }, cn=column_name, l=["0", "1"]) if results: results[0]['price_tb'] = float(results[0]['price_tb']) results[0]['promotionprice'] = float( results[0]['promotionprice']) results[0]['typeabbrev'] = store_trans(results[0]['shop_id'], 'id_2_code') return results[0] else: logger.info('没有数据需要爬取!') my_sleep()
def _get_curls(shop_id): curls = [] results = MySql.cls_get_dict(db_setting=test_db, t="tb_search_curl", c={'shop_id': shop_id}) for res in results: curls.append(res) if curls: return random.choice(curls) else: return 0
def _get_order_info(self): today = datetime.datetime.now() one_day = datetime.timedelta(minutes=60) earlier_15_minutes = today - one_day updateTime = earlier_15_minutes.strftime("%Y-%m-%d %H:%M:%S") payTime = yesterday("18:00:00") sql = """ SELECT tos.orderNo,createTime FROM tb_order_spider tos WHERE tos.updateTime<'{}' AND tos.`orderStatus` = '买家已付款' AND tos.`fromStore` = '{}' AND tos.payTime<'{}' ORDER BY updateTime; """.format(updateTime, self.fromStore, payTime) res = MySql.cls_get_dict(sql=sql) order_no = None days = 0 if res: order_no = res[0]['orderNo'] days = (today - res[0]['createTime']).days return days, order_no
async def get_page(self): await self.page.bringToFront() results = MySql.cls_get_dict(t="tb_order_spider", cn=["detailURL", "orderNo"], c={ "isDetaildown": 0, "fromStore": self.fromStore, }, o=["createTime"], om="d") for result in results: ms = MySql() tb_order_item = TBOrderItem(**result) logger.info(store_trans(self.fromStore)) logger.info("开始订单 " + result["orderNo"] + " 详情爬取") while 1: try: await self.page.goto(tb_order_item.detailURL) except errors.PageError: return 1 except errors.TimeoutError: return 1 else: break try: await self.page.waitForSelector('#detail-panel', timeout=30000) except errors.TimeoutError: await self.login.slider(self.page) is_logout = re.search(r"login.taobao.com", self.page.url) if is_logout: logger.info("登陆状态超时") return 1 continue content = await self.page.content() a = re.search(r"var data = JSON.parse\('(.*)'\);", content).group(1) # a = a.encode("").decode("unicode_escape") b = a.replace('\\\\\\"', '') data = b.replace('\\"', '"') m = json.loads(data) tb_order_item.actualFee = jsonpath(m, '$..actualFee.value')[0] tb_order_item.deliverFee = re.findall('\(快递:(\d+\.\d+)', str(m))[0] tb_order_item.orderStatus = status_format( jsonpath(m, '$..statusInfo.text')[0]) if tb_order_item.orderStatus == '等待买家付款': tb_order_item.isDetaildown = 2 else: tb_order_item.isDetaildown = 1 tb_order_item.couponPrice = await self.get_coupon(m) if jsonpath(m, '$..buyMessage'): tb_order_item.buyerComments = jsonpath(m, '$..buyMessage')[0] orderNo = m['mainOrder']['id'] order_info = m['mainOrder']['orderInfo']['lines'][1]['content'] for i in range(len(order_info)): if order_info[i]['value']['name'] == '支付宝交易号:': try: tb_order_item.tradeNo = order_info[i]['value']['value'] except KeyError: tb_order_item.tradeNo = None elif order_info[i]['value']['name'] == '创建时间:': tb_order_item.createTime = order_info[i]['value']['value'] # elif order_info[i]['value']['name'] == '发货时间:': # tb_order_item = order_info[i]['value']['value'] elif order_info[i]['value']['name'] == '付款时间:': tb_order_item.payTime = order_info[i]['value']['value'] if jsonpath(m, '$..logisticsName'): tb_order_item.shippingCompany = jsonpath( m, '$..logisticsName')[0] tb_order_item.shippingMethod = jsonpath(m, '$..shipType')[0] tb_order_item.shippingNo = jsonpath(m, '$..logisticsNum')[0] rec_info = jsonpath(m, '$..tabs..address')[0] tb_order_item.receiverName = rec_info.split(",")[0].replace( " ", "") tb_order_item.receiverPhone = rec_info.split(",")[1] tb_order_item.receiverAddress = "".join(rec_info.split(",")[2:]) tb_order_item.save(ms) sub_orders = m['mainOrder']['subOrders'] for i in range(len(sub_orders)): tb_order_detail_item = TBOrderDetailItem(orderNo=orderNo, itemNo=i) tb_order_detail_item.unitBenefits = 0 if sub_orders[i]['promotionInfo']: for j in sub_orders[i]['promotionInfo']: for x in j['content']: for k, v in x.items(): if k == 'value': f_prom = re.match("Exercise", v) p_list = re.findall("-?\d+\.\d+", v) if p_list and not f_prom: tb_order_detail_item.unitBenefits += float( p_list.pop()) tb_order_detail_item.save(ms) del ms await my_async_sleep(seconds=15, random_sleep=True) verify()