Exemple #1
0
async def run():
    while 1:
        update()
        ms = MySql(db_setting=TEST_SERVER_DB_TEST)
        ms.update(t="spider_monitor",
                  set={"latest_time": time_now()},
                  c={"spider_address": SPIDER_ADDRESS})
        restart_signal = ms.get_one(t="spider_monitor",
                                    cn=["restart_signal"],
                                    c={"spider_address": SPIDER_ADDRESS})
        if SPIDER_ADDRESS == "3_floor":
            sql = "SELECT MAX(updateTime) as updateTime,fromStore FROM tb_order_spider WHERE fromStore IN ('KY','TB') GROUP BY fromStore"
        else:
            sql = "SELECT MAX(updateTime) as updateTime,fromStore FROM tb_order_spider WHERE fromStore IN ('YJ','YK') GROUP BY fromStore"
        results = MySql.cls_get_dict(sql=sql)
        t = time_ago(minutes=15)
        for result in results:
            if str(result['updateTime']) < t:
                restart_signal = 1
                break
        if restart_signal:
            ms.update(t="spider_monitor",
                      set={"restart_signal": 0},
                      c={"spider_address": SPIDER_ADDRESS})
            restart()
        del ms
        await asyncio.sleep(60)
Exemple #2
0
 def _get_item():
     column_name = [
         "shop_id",
         "link_id",
         "description",
         "price_tb",
         "promotionprice",
         "sales",
         "rates",
     ]
     while 1:
         results = MySql.cls_get_dict(db_setting=TEST_SERVER_DB_TEST,
                                      t="tb_master",
                                      c={
                                          "isUsed": 0,
                                          "isMut": 1,
                                          "flag!": "XiaJia"
                                      },
                                      cn=column_name,
                                      l=["0", "1"])
         if results:
             results[0]['price_tb'] = float(results[0]['price_tb'])
             results[0]['promotionprice'] = float(
                 results[0]['promotionprice'])
             results[0]['typeabbrev'] = store_trans(results[0]['shop_id'],
                                                    'id_2_code')
             return results[0]
         else:
             logger.info('没有数据需要爬取!')
             my_sleep()
Exemple #3
0
 def _get_curls(shop_id):
     curls = []
     results = MySql.cls_get_dict(db_setting=test_db,
                                  t="tb_search_curl",
                                  c={'shop_id': shop_id})
     for res in results:
         curls.append(res)
     if curls:
         return random.choice(curls)
     else:
         return 0
 def _get_order_info(self):
     today = datetime.datetime.now()
     one_day = datetime.timedelta(minutes=60)
     earlier_15_minutes = today - one_day
     updateTime = earlier_15_minutes.strftime("%Y-%m-%d %H:%M:%S")
     payTime = yesterday("18:00:00")
     sql = """      
                            SELECT 
                            tos.orderNo,createTime
                            FROM tb_order_spider tos
                            WHERE  tos.updateTime<'{}'
                            AND tos.`orderStatus` = '买家已付款' 
                            AND tos.`fromStore` = '{}' 
                            AND tos.payTime<'{}'
                            ORDER BY updateTime;
                            """.format(updateTime, self.fromStore, payTime)
     res = MySql.cls_get_dict(sql=sql)
     order_no = None
     days = 0
     if res:
         order_no = res[0]['orderNo']
         days = (today - res[0]['createTime']).days
     return days, order_no
    async def get_page(self):
        await self.page.bringToFront()

        results = MySql.cls_get_dict(t="tb_order_spider",
                                     cn=["detailURL", "orderNo"],
                                     c={
                                         "isDetaildown": 0,
                                         "fromStore": self.fromStore,
                                     },
                                     o=["createTime"],
                                     om="d")
        for result in results:
            ms = MySql()
            tb_order_item = TBOrderItem(**result)
            logger.info(store_trans(self.fromStore))
            logger.info("开始订单 " + result["orderNo"] + " 详情爬取")
            while 1:
                try:
                    await self.page.goto(tb_order_item.detailURL)
                except errors.PageError:
                    return 1
                except errors.TimeoutError:
                    return 1
                else:
                    break
            try:
                await self.page.waitForSelector('#detail-panel', timeout=30000)
            except errors.TimeoutError:
                await self.login.slider(self.page)
                is_logout = re.search(r"login.taobao.com", self.page.url)
                if is_logout:
                    logger.info("登陆状态超时")
                    return 1
                continue
            content = await self.page.content()
            a = re.search(r"var data = JSON.parse\('(.*)'\);",
                          content).group(1)
            # a = a.encode("").decode("unicode_escape")
            b = a.replace('\\\\\\"', '')
            data = b.replace('\\"', '"')
            m = json.loads(data)
            tb_order_item.actualFee = jsonpath(m, '$..actualFee.value')[0]
            tb_order_item.deliverFee = re.findall('\(快递:(\d+\.\d+)', str(m))[0]
            tb_order_item.orderStatus = status_format(
                jsonpath(m, '$..statusInfo.text')[0])
            if tb_order_item.orderStatus == '等待买家付款':
                tb_order_item.isDetaildown = 2
            else:
                tb_order_item.isDetaildown = 1
            tb_order_item.couponPrice = await self.get_coupon(m)

            if jsonpath(m, '$..buyMessage'):
                tb_order_item.buyerComments = jsonpath(m, '$..buyMessage')[0]
            orderNo = m['mainOrder']['id']
            order_info = m['mainOrder']['orderInfo']['lines'][1]['content']
            for i in range(len(order_info)):
                if order_info[i]['value']['name'] == '支付宝交易号:':
                    try:
                        tb_order_item.tradeNo = order_info[i]['value']['value']
                    except KeyError:
                        tb_order_item.tradeNo = None
                elif order_info[i]['value']['name'] == '创建时间:':
                    tb_order_item.createTime = order_info[i]['value']['value']
                # elif order_info[i]['value']['name'] == '发货时间:':
                #     tb_order_item = order_info[i]['value']['value']
                elif order_info[i]['value']['name'] == '付款时间:':
                    tb_order_item.payTime = order_info[i]['value']['value']
            if jsonpath(m, '$..logisticsName'):
                tb_order_item.shippingCompany = jsonpath(
                    m, '$..logisticsName')[0]
                tb_order_item.shippingMethod = jsonpath(m, '$..shipType')[0]
                tb_order_item.shippingNo = jsonpath(m, '$..logisticsNum')[0]
            rec_info = jsonpath(m, '$..tabs..address')[0]
            tb_order_item.receiverName = rec_info.split(",")[0].replace(
                " ", "")
            tb_order_item.receiverPhone = rec_info.split(",")[1]
            tb_order_item.receiverAddress = "".join(rec_info.split(",")[2:])
            tb_order_item.save(ms)
            sub_orders = m['mainOrder']['subOrders']
            for i in range(len(sub_orders)):
                tb_order_detail_item = TBOrderDetailItem(orderNo=orderNo,
                                                         itemNo=i)
                tb_order_detail_item.unitBenefits = 0
                if sub_orders[i]['promotionInfo']:
                    for j in sub_orders[i]['promotionInfo']:
                        for x in j['content']:
                            for k, v in x.items():
                                if k == 'value':
                                    f_prom = re.match("Exercise", v)
                                    p_list = re.findall("-?\d+\.\d+", v)
                                    if p_list and not f_prom:
                                        tb_order_detail_item.unitBenefits += float(
                                            p_list.pop())
                tb_order_detail_item.save(ms)
            del ms
            await my_async_sleep(seconds=15, random_sleep=True)
        verify()