Beispiel #1
0
 def save_in_sql(self, item, tableName):
     if 'goodsCode' in item:
         """判断是否orderdetail"""
         dict_select_condition = {
             'orderNo': item['orderNo'],
             'itemNo': item['itemNo']
         }
         result = mysql.get_data(t=tableName, c=dict_select_condition)
         if result:
             item.pop("goodsCode")
             mysql.update_data(t=tableName,
                               set=item,
                               c=dict_select_condition)
             # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|订单详情更新成功|")
         else:
             mysql.insert_data(t=tableName, d=item)
             # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|新订单详情写入成功")
     else:
         dict_select_condition = {'orderNo': item['orderNo']}
         result = mysql.get_data(t=tableName, c=dict_select_condition)
         if result:
             mysql.update_data(t=tableName,
                               set=item,
                               c=dict_select_condition)
             # logger.info(time_now() + " " + concat(dict_select_condition, "|") + "|订单详情更新成功|")
         else:
             mysql.insert_data(t=tableName, d=item)
Beispiel #2
0
def Verify():
    l_orderNo = []
    column_name = [
        'orderNo', 'deliverFee', 'actualFee', 'couponPrice', 'fromStore',
        'orderStatus'
    ]
    condition = {'isVerify': '0', 'isDetaildown': '1'}
    # kwargs = {'isVerify': '2', 'isDetaildown': '1'}
    result = mysql.get_data(t="tb_order_spider", cn=column_name, c=condition)
    if result:
        for i in result:
            total = 0
            orderNo = i[0]
            deliverFee = i[1]
            actualFee = i[2]
            couponPrice = i[3]
            fromStore = i[4]
            column_name = ['unitPrice', 'sellNum', 'unitBenefits']
            condition = {'orderNo': orderNo}
            result2 = mysql.get_data(t="tb_order_detail_spider",
                                     cn=column_name,
                                     c=condition)
            for j in result2:
                unitPrice = j[0]
                sellNum = j[1]
                unitBenefits = j[2]
                total = total + unitPrice * sellNum - unitBenefits
            a = round(total, 3) + deliverFee - actualFee - couponPrice
            if int(a) != 0 and i[5] != '交易关闭':
                list_tmp = []
                list_tmp.append(str(round(total, 2)))
                list_tmp.append(str(deliverFee))
                list_tmp.append(str(actualFee))
                list_tmp.append(str(couponPrice))
                list_tmp.append(str(round(a, 2)))
                list_tmp.append(store_trans(fromStore))
                list_tmp.append(orderNo)
                l_orderNo.append("|".join(list_tmp))
                mysql.update_data(t="tb_order_spider",
                                  set={'isVerify': 2},
                                  c={'orderNo': orderNo})
            else:
                mysql.update_data(t="tb_order_spider",
                                  set={'isVerify': 1},
                                  c={'orderNo': orderNo})
                # print('没有异常数据,验证完成!')
    if l_orderNo:
        s = "\n".join(l_orderNo)
        # print(s)
        mail("数据异常报告", s, ["*****@*****.**"])
    taobao_check()
    def report_in(self, **kwargs):

        if kwargs['flag'] == 'lookup':
            res = mysql.get_data("update_reports",
                                 l=1,
                                 cn=["lookup"],
                                 c={"link_id": "count"},
                                 db=self.db_test)
            mysql.update_data(t="update_reports",
                              set={
                                  'lookup': res[0][0] + 1,
                                  'last_time': kwargs['SpiderDate']
                              },
                              c={
                                  'link_id': 'count',
                                  'shop_id': kwargs['shop_id']
                              })
        else:
            self.report_item['stockid'] = kwargs['stockid']
            self.report_item['link_id'] = kwargs['link_id']
            self.report_item['shop_id'] = kwargs['shop_id']
            self.report_item['price_tb'] = kwargs['price_tb']
            # self.report_item['first_discount'] = kwargs['first_discount']
            self.report_item['last_time'] = kwargs['SpiderDate']
            self.report_item['attribute'] = kwargs['attribute']
            self.report_item['flag'] = kwargs['flag']
            self.report_item['description'] = kwargs['description']
            mysql.insert_data(t="update_reports",
                              d=self.report_item,
                              db=self.db_test)
    def data_compare(self, **kwargs):
        res = mysql.get_data(t="prices_tb",
                             cn=["price_tb", "SpiderDate"],
                             c={
                                 "stockid": kwargs["goodsCode"],
                                 "link_id": kwargs["link_id"],
                                 "shop_id": self.shop_id(kwargs["fromStore"])
                             })

        if res:
            # d = datetime.datetime.strptime(res[0][1], "%Y-%m-%d %H:%M:%S")
            # print(type(res[0][1]))
            update_condition = float(kwargs['unitPrice']) - float(res[0][0])
            ratio = float(kwargs['unitPrice']) / float(res[0][0])
            if res[0][1] == '0000-00-00 00:00:00':
                return "更新", ratio
            days = (datetime.datetime.now() - res[0][1]).days
            if abs(update_condition) >= 0.01 or days > 7:
                self.report_item['price_before'] = res[0][0]
                return "更新", ratio
            else:
                return None, None

        else:
            return "创建", None
Beispiel #5
0
 async def save(self):
     """
     处理_parse()中获得的数据模型,并写入到数据库中
     """
     async for i in self._parse():
         if not i:
             print("需要要切换淘宝账户")
             return 0
         res = mysql.get_data(db=test_server, t="tb_master",
                              c={'link_id': i['link_id']}, dict_result=True)
         flag = ["update"]
         narrative = []
         if res:
             if res[0]['price'] != i['price']:
                 flag.append("price")
                 narrative.append("更新销售价格:[{}]=>[{}]".format(res[0]['price'], i['price']))
             if res[0]['promotionPrice'] != i['promotionPrice']:
                 flag.append("promotion")
                 narrative.append("更新优惠售价格:[{}]=>[{}]".format(res[0]['promotionPrice'], i['promotionPrice']))
             if res[0]['sale_num'] != i['sale_num']:
                 flag.append("sale")
                 narrative.append("更新销量:[{}]=>[{}]".format(res[0]['sale_num'], i['sale_num']))
             i['flag'] = "_".join(flag)
             i['narrative'] = ";".join(narrative)
             mysql.update_data(db=test_server, t='tb_master', set=i, c={"link_id": i['link_id']})
         else:
             i['flag'] = 'insert'
             mysql.insert_data(db=test_server, t="tb_master", d=i)
Beispiel #6
0
async def run():
    b = await launch(**dev)
    p = await b.newPage()

    await p.setViewport({"width": 1440, "height": 900})
    await p.goto("https://login.taobao.com")
    ms = await p.J(".module-static")
    if ms:
        ls = await p.J(".login-switch")
        box = await ls.boundingBox()
        await p.mouse.click(box['x'] + 10, box['y'])
    while 1:
        try:
            await p.waitForSelector("#J_QRCodeImg")
            image = await p.J("#J_QRCodeImg")
            await image.screenshot({'path': './qrcode.png'})
        except Exception as e:
            pass
        else:
            break

    qrcode = mpimg.imread('qrcode.png')  # 读取和代码处于同一目录下的 qrcode.png
    plt.imshow(qrcode)  # 显示图片
    plt.axis('off')  # 不显示坐标轴
    plt.show()

    await p.waitForNavigation()
    start_url = 'https://shop.taobao.com/'
    sql = "select shop_id from shop_info where shop_id!='88888888'"  # 获取所有的店铺ID
    shop_infos = mysql.get_data(sql=sql, dict_result=True)

    for shop_info in shop_infos:
        item = {"shop_id": shop_info['shop_id']}
        url = start_url.replace("shop", "shop" + shop_info["shop_id"])
        await p.goto(url)
        await asyncio.sleep(5)
        await p.waitForSelector(".all-cats-trigger.popup-trigger")
        await p.click(".all-cats-trigger.popup-trigger")
        await asyncio.sleep(5)
        item['user_agent'] = await b.userAgent()
        cookies = await p.cookies()
        item['cookies'] = ";".join(
            [c['name'] + "=" + c['value'] for c in cookies])
        item['refer'] = p.url
        await p.waitForSelector('input[name="_ksTS"]')
        content = await p.content()
        print(content)
        doc = PyQuery(content)
        _ksTS = doc('input[name="_ksTS"]').val()

        item['_ksTS'] = _ksTS.split("_").pop()
        item['callback'] = 'jsonp' + str(int(item['_ksTS']) + 1)
        item['mid'] = doc('input[name="mid"]').val()
        item['wid'] = doc('input[name="wid"]').val()
        item['spm'] = doc('div.pagination form input[name="spm"]').val()

        mysql.insert_data(db=test_server, t='user_record', d=item)
        break
Beispiel #7
0
 def _get_item():
     sql = "SELECT shop_id,link_id,description,price,promotionPrice,sale_num FROM tb_master WHERE isUsed=0 and link_id='{}' LIMIT 1".format(
         "586886697621")
     while 1:
         result = mysql.get_data(db=test_server, sql=sql, dict_result=True)
         if result:
             yield result[0]
         else:
             break
 def shop_id(self, fromStore):
     res = mysql.get_data(t="shop_info",
                          l=1,
                          c={
                              "typeabbrev": fromStore,
                              "shopindex": 0
                          })
     if res:
         return res[0][0]
Beispiel #9
0
def is_in_database(data, col_name, table_name):
    #判断数据是否在数据库内
    cursor = mysql.connet_mysql()
    mysql_data = mysql.get_data(cursor, col_name, table_name)
    list_data = []
    for i in range(len(mysql_data)):
        list_data.append(mysql_data[i][0])
    if data in list_data:
        return True
    else:
        return False
Beispiel #10
0
 def shop_id(self, fromStore):
     time.sleep(1)
     res = mysql.get_data(t="shop_info",
                          l=1,
                          cn=["shop_id"],
                          c={
                              "typeabbrev": fromStore,
                              "shopindex": 0
                          })
     # res = mysql.get_data(t="salestypes", l=1, cn=["shop_id"], c={"typeabbrev": fromStore})
     return res[0][0]
 def get_link_id(self, **kwargs):
     shop_id = self.shop_id(fromStore=kwargs['fromStore'])
     result = mysql.get_data(t="prices_tb",
                             cn=["link_id"],
                             c={
                                 "stockid": kwargs['goodsCode'],
                                 "shop_id": shop_id,
                                 "attribute": kwargs['goodsAttribute']
                             })
     # result = self.sql_temp.select_data('prices_tb', 0, 'linkId', **kwargs_temp)
     if len(result) == 1:
         return result[0][0]
     else:
         return None
Beispiel #12
0
    async def verify(self, p):
        try:
            await p.waitForSelector("div.aq_overlay_mask", timeout=10000)
        except errors.TimeoutError:
            pass
        else:
            logger.info("需要要手机验证码")
            if LINUX:
                test_server = ts.copy()
                test_server['db'] = "test"
                id = random.randint(0, 100)
                mysql.insert_data(db=test_server,
                                  t="phone_verify",
                                  d={"id": id})
                frames = p.frames
                net_check()
                verify_code = "0"
                while True:
                    net_check()
                    await frames[1].click(".J_SendCodeBtn")
                    for i in range(120):
                        await asyncio.sleep(5)
                        res = mysql.get_data(
                            db=test_server,
                            cn=["verify_code"],
                            t="phone_verify",
                            c={"id": id},
                        )
                        verify_code = res[0][0]
                        if verify_code != "0":
                            mysql.delete_data(db=test_server,
                                              t="phone_verify",
                                              c={"id": id})
                            break
                    if verify_code != "0":
                        break
                    await asyncio.sleep(10)
            else:
                frames = p.frames
                net_check()
                await frames[1].click(".J_SendCodeBtn")
                verify_code = input(time_now() + " | 请输入6位数字验证码:")

            # await frames[1].click(".J_SendCodeBtn")
            # verify_code = input(time_now() + " | 请输入6位数字验证码:")
            await frames[1].type(".J_SafeCode", verify_code,
                                 {'delay': self.input_time_random() - 50})
            net_check()
            await frames[1].click("#J_FooterSubmitBtn")
Beispiel #13
0
 async def run_order_detail_spider(self):
     sql = """
             SELECT COUNT(id),fromStore 
             FROM tb_order_spider 
             WHERE isDetaildown=0 
             GROUP BY fromStore 
             ORDER BY COUNT(id) DESC 
             LIMIT 1
             """
     res = mysql.get_data(sql=sql)
     if res:
         b, p, f = await self.login(**STORE_INFO[res[0][1]])
         await self.order_detail_spider(p, f)
     else:
         # if self.b:
         #     await self.b.close()
         #     self.b = None
         await self.run_link_spider()
Beispiel #14
0
 async def run_link_spider(self):
     sql = """
     SELECT COUNT(a.id),fromStore FROM tb_order_detail_spider a
     JOIN tb_order_spider b ON a.`orderNo`=b.`orderNo`
     WHERE link_id="1" AND a.url IS NOT NULL
     GROUP BY fromStore
     ORDER BY COUNT(a.id) DESC
     """
     time.sleep(2)
     res = mysql.get_data(sql=sql)
     if res:
         b, p, f = await self.login(**STORE_INFO[res[0][1]])
         await self.link_spider(p, f)
     else:
         mysql.update_data(t="tb_order_spider",
                           set={"isDetaildown": 0},
                           c={"isDetaildown": 2})
         # if self.b:
         #     await self.b.close()
         #     self.b = None
         await self.run_order_detail_spider()
Beispiel #15
0
def load_day(date_load, online, lmysql, lbigquery):
    """
    Get day data from mysql and load to bigquery.
    Before this delete data in big query from that day
    :param date_load:  day
    :param online: if True, never finish, load data, sleep and load new data
    :param lmysql: Mysql conection
    :param lbigquery:  BigQuery connection
    :return:
    """
    print("Load day, delete " + str(date_load) + " online " + str(online))
    big_query.delete_day(date_load, lbigquery)
    print("Load day " + str(date_load) + " online " + str(online))
    end = False
    last_index = 0  # Last index read from mysql database
    while not end:
        # Get data from mysql
        data = mysql.get_data(lmysql, date_load, last_index)
        data = process_daraframe(data)
        if data is not None:
            # Last column is database index.
            # Store for new sql querys and remove from dataframes
            print("Loading " + str(data.shape))
            last_index = data[15].max()
            data = data.drop(columns=15)
            if args.only_print:
                print_lines(data)
            else:
                if args.online:
                    # If online, we store the last five window for future data incoming
                    load_df(data,  window_remain=5)
                else:
                    # If not online, not future data incomming, not store.
                    load_df(data,  window_remain=0)
            if not args.online:
                end = True
            else:
                sleep(60)
Beispiel #16
0
def competitor_data():
    if request.method == 'POST':
        # print("adlsfjlsdjf")
        result = request.form
        res = result.to_dict()
        stk_list = result.getlist("stockid[]")
        res.pop("stockid[]")
        attr_list = result.getlist("attribute[]")
        res.pop("attribute[]")
        price_list = result.getlist("price_tb[]")
        res.pop("price_tb[]")
        pgn_list = result.getlist("package_number[]")
        res.pop("package_number[]")
        for i in range(len(stk_list)):
            item = res.copy()
            item['stockid'] = stk_list[i]
            if not item['stockid']:
                continue
            item['attribute'] = attr_list[i]
            item['price_tb'] = price_list[i]
            pgn = item.pop("package_number_t")
            if int(pgn_list[i]) > 1:
                item['package_number'] = pgn_list[i]
            elif int(pgn) > 1:
                item['package_number'] = pgn
            else:
                item['package_number'] = 1
            c = {"stockid": item["stockid"], "link_id": item["link_id"]}
            res_sql = mysql.get_data(c=c, t="prices_tb")
            if res_sql:
                mysql.update_data(set=item, c=c, t="prices_tb")
            else:
                mysql.insert_data(d=item, t="prices_tb")
            # print(res)
            # print(item)
    return "添加成功!"
Beispiel #17
0
    async def _get_html(self, speed=1):
        """
        :param speed: 翻页间隔时间,秒
        :return: 返回爬取页面的HTML内容
        """
        sql = "select shop_id from shop_info where shop_id!='88888888'"  # 获取所有的店铺ID
        shop_infos = mysql.get_data(sql=sql, dict_result=True)
        shop_ids = []
        for shop_info in shop_infos:
            page_control = Format._read(shop_id=shop_info['shop_id'], flag="total_page")  # 获得存储在本地的店铺总的页码数量
            if not page_control:
                page_control = 1000  # 如果没有获取到页码总数,给个1000的总数

            shop_ids.append(shop_info['shop_id'])  # 将店铺ID存储起来用于后面重置翻页数据

            url = self.start_url.replace("shop", "shop" + shop_info["shop_id"])  # 获得到店铺首页url地址
            await self.page.goto(url)
            await self._jump_to_search_page()
            page_num = Format._read(shop_info['shop_id'], "page_num")  # 读取存储在本地的page_num

            while page_num < page_control:
                start_time = time.time()  # 本页面开始的时间存入变量

                try:
                    # if page_num:
                    await self._goto_last_page_num(page_num + 1)
                    await asyncio.sleep(5)
                    frames = self.page.frames
                    for f in frames:
                        if await f.J("#TPL_username_1"):
                            yield 0, 0
                    frame = await self.login.get_nc_frame(frames=frames)
                    if frame:
                        await self.login.slider(self.page, 1)

                except Exception as e:
                    print(e)
                    await asyncio.sleep(5)
                    continue
                try:
                    await self.page.waitForSelector(".shop-hesper-bd.grid")
                except errors.TimeoutError:
                    break
                except Exception as e:
                    print(e)
                    continue
                Format._write(shop_id=shop_info['shop_id'], flag="page_num", value=page_num + 1)  # 将下次需要爬取的页码存入本地的配件中
                page_num = Format._read(shop_info['shop_id'], "page_num")  # 读取下一次要爬取的页码

                yield await self.page.content(), shop_info['shop_id']  # 返回页面HTML内容和

                page_control = Format._read(shop_id=shop_info['shop_id'], flag="total_page")  # 获得存储在本地的店铺总的页码数量

                await asyncio.sleep(speed)  # 翻页间隔时间
                spent_time_this_page = time.time() - start_time  # 计算本页完成时间
                spent_time = Format._read(shop_id=shop_info['shop_id'], flag="spent_time")  # 读取上一次存储在本地的时间
                Format._write(shop_id=shop_info['shop_id'], flag="spent_time",
                              value=spent_time + spent_time_this_page)  # 将本页面完成时间加上后并存储在本地
            is_mail = Format._read(shop_info['shop_id'], "mail")
            if not is_mail:
                Reports().report(shop_info['shop_id'].split(" "))

        for shop_id in shop_ids:
            Format._del(shop_id=shop_id, flag="page_num")  # 重置翻页的数据
            Format._del(shop_id=shop_id, flag="total_page")  # 重置总页码数据
            Format._del(shop_id=shop_id, flag="mail")  # 重置邮件标记
            Format._del(shop_id=shop_id, flag="spent_time")  # 重置完成时间
Beispiel #18
0
             report_mail()
             t1, t2, t3 = 1, 0, 0
         elif t[1] < now < t[2] and t2 == 0:
             report_mail()
             t1, t2, t3 = 0, 1, 0
         elif now > t[2] and t3 == 0:
             report_mail()
             t1, t2, t3 = 0, 0, 1
         sql = """
         SELECT fromStore
         FROM prices_tb_fix WHERE isComplete='0'
         GROUP BY fromStore ORDER BY COUNT(link_id) DESC
         """
         ts = test_server.copy()
         ts['db'] = 'test'
         res = mysql.get_data(db=ts, sql=sql)
         if res:
             b, p, f = loop.run_until_complete(
                 ss.login(**STORE_INFO[res[0][0]]))
             ptb = PriceTaoBao(ss, b, p, f)
             loop.run_until_complete(ptb.run())
             loop.run_until_complete(p.close())
             if len(res) == 1:
                 loop.run_until_complete(b.close())
                 ss.b = None
         else:
             sleep(10)
 else:
     b, p, f = loop.run_until_complete(l.login())
     ptb = PriceTaoBao(l, b, p, f)
     loop.run_until_complete(ptb.run())
Beispiel #19
0
 async def order_detail_spider(self, p, f):
     sql1 = """
     SELECT datailURL,a.orderNo FROM tb_order_spider a
     JOIN taobaoorders b ON a.orderNo = b.OrderNo
     WHERE  isDetaildown=0 AND fromStore='%s' AND b.Flag = 8
     ORDER BY createTime DESC;
     """ % (f)
     sql = """
         SELECT datailURL,orderNo FROM tb_order_spider 
         WHERE  isDetaildown=0 AND fromStore='%s' 
         ORDER BY createTime DESC
     """ % (f)
     results = mysql.get_data(sql=sql1, dict_result=True)
     if not results:
         results = mysql.get_data(sql=sql, dict_result=True)
     if results:
         for result in results:
             order = {}
             url = result['datailURL']
             try:
                 net_check()
                 await p.goto(url)
             except errors.TimeoutError:
                 continue
             slider = await p.J('#nocaptcha')
             if slider:
                 while True:
                     print("出现滑块验证码")
                     await asyncio.sleep(2)
                     await p.hover('#nc_1_n1z')
                     await p.mouse.down()
                     await p.mouse.move(
                         2000, 0, {'delay': random.randint(1000, 2000)})
                     await p.mouse.up()
                     try:
                         p.waitForSelector(".nc-lang-cnt a", timeout=10000)
                         await asyncio.sleep(2)
                         await p.click(".nc-lang-cnt a")
                     except errors.TimeoutError:
                         break
                     except errors.PageError:
                         break
             try:
                 await p.waitForSelector('#detail-panel', timeout=30000)
             except Exception as e:
                 continue
             content = await p.content()
             a = re.search("var data = JSON.parse\('(.*)'\);",
                           content).group(1)
             b = a.replace('\\\\\\"', '')
             data = b.replace('\\"', '"')
             m = json.loads(data)
             order['actualFee'] = m['mainOrder']['payInfo']['actualFee'][
                 'value']
             order['orderStatus'] = status_format(
                 m['mainOrder']['statusInfo']['text'])
             if order['orderStatus'] == '等待买家付款':
                 order['isDetaildown'] = 2
             else:
                 order['isDetaildown'] = 1
             coupon = 0
             for k, v in m['mainOrder']['payInfo'].items():
                 if k == 'promotions':
                     promotions = m['mainOrder']['payInfo']['promotions']
                     for i in range(len(promotions)):
                         if 'prefix' and 'suffix' in promotions[i]:
                             coupon_temp = re.search(
                                 "(\d+\.\d+)", promotions[i]['value'])
                             if coupon_temp:
                                 coupon += float(coupon_temp.group(1))
             order['couponPrice'] = round(coupon, 2)
             for k, v in m.items():
                 if k == 'buyMessage':
                     order['buyerComments'] = v
             orderNo = m['mainOrder']['id']
             order_info = m['mainOrder']['orderInfo']['lines'][1]['content']
             for i in range(len(order_info)):
                 if order_info[i]['value']['name'] == '支付宝交易号:':
                     try:
                         order['tradeNo'] = order_info[i]['value']['value']
                     except KeyError:
                         order['tradeNo'] = None
                 # elif order_info[i]['value']['name'] == '创建时间:':
                 #     order['createTime'] = order_info[i]['value']['value']
                 # elif order_info[i]['value']['name'] == '发货时间:':
                 #     order['shipTime'] = order_info[i]['value']['value']
                 elif order_info[i]['value']['name'] == '付款时间:':
                     order['payTime'] = order_info[i]['value']['value']
             ship_info = m['tabs']
             for i in range(len(ship_info)):
                 if ship_info[i]['id'] == 'logistics':
                     temp = ship_info[i]['content']
                     for k, v in temp.items():
                         if k == 'logisticsName':
                             order['shippingCompany'] = v
                         elif k == 'shipType':
                             order['shippingMethod'] = v
                         elif k == 'logisticsNum':
                             order['shippingNo'] = v
                         # elif k == 'logisticsUrl':
                         #     order['shipUrl'] = "https" + v
                         elif k == 'address':
                             rec_info = v
                             order['receiverName'] = rec_info.split(
                                 ",")[0].replace(" ", "")
                             order['receiverPhone'] = rec_info.split(",")[1]
                             order['receiverAddress'] = "".join(
                                 rec_info.split(",")[2:])
             sub_orders = m['mainOrder']['subOrders']
             # mainOrder.subOrders[10].tradeStatus[0].content[0].value
             line_no = 0
             for i in range(len(sub_orders)):
                 if sub_orders[i]['tradeStatus'][0]['content'][0][
                         'value'] == '已取消':
                     continue
                 item = {}
                 temp = 0
                 itemNo = line_no
                 line_no += 1
                 if sub_orders[i]['promotionInfo']:
                     for j in sub_orders[i]['promotionInfo']:
                         for x in j['content']:
                             for k, v in x.items():
                                 if k == 'value':
                                     p_list = re.findall("-?\d+\.\d+", v)
                                     if p_list:
                                         temp += float(p_list.pop())
                 item['unitBenefits'] = temp
                 mysql.update_data(t="tb_order_detail_spider",
                                   set=item,
                                   c={
                                       'orderNo': orderNo,
                                       'itemNo': itemNo
                                   })
             mysql.update_data(t="tb_order_spider",
                               set=order,
                               c={'orderNo': orderNo})
             Verify()
             while True:
                 s = random.random()
                 if s > 0.9:
                     for i in range(int(s * n_o_time)):
                         await asyncio.sleep(1)
                         print(">", end="", flush=True)
                     print("")
                     break
     else:
         pass
     await p.close()
     await self.run_order_detail_spider()
Beispiel #20
0
    async def fix_data(self, link_id=None):
        # page = await self.browser.newPage()
        self.complete = 0
        self.prices = {}
        self.promo_price = {}
        await asyncio.sleep(2)
        await self.page.focus("input[name='queryItemId']")
        await self.page.keyboard.down("ShiftLeft")
        await self.page.keyboard.press("Home")
        await self.page.keyboard.down("ShiftLeft")
        await self.page.keyboard.press("Delete")
        server_name = 'production_server'
        self.sn = server_name
        if not link_id:
            if MODE == 1:
                link_id = "585308692855"
            elif MODE == 2:
                while True:
                    link_id = input(time_now() + " | 输入link_id:")
                    isMatch = re.match("^\d{10,20}$", link_id)
                    if isMatch:
                        break
            elif MODE == 3:
                sql = """
                SELECT link_id,updateTime,server,operator
                FROM prices_tb_fix 
                WHERE fromStore='%s' and isComplete=0
                ORDER BY flag LIMIT 1
                """ % (self.fromStore)
                res = mysql.get_data(sql=sql, db=self.db_test)
                if res:
                    self.target_server = self.server[res[0][2]]
                    link_id = res[0][0]
                    updateTime = res[0][1]
                    server_name = res[0][2]
                    self.sn = server_name
                    self.operator = res[0][3]
                else:
                    return 1

        logger.info(link_id)
        page = await self.browser.newPage()
        await page.setViewport({'width': 1600, 'height': 900})
        net_check()
        await page.goto("https://item.taobao.com/item.htm?id=" + link_id,
                        timeout=0)
        await asyncio.sleep(3)
        error_page = await page.J(".error-notice-hd")  # 判断宝贝是否正常在售
        offline = await page.J("#J_detail_offline")  # 判断宝贝是否正常在售
        if error_page or offline:
            logger.info("商品已下架")
            mysql.update_data(t="prices_tb",
                              set={
                                  "flag": "XiaJia",
                                  "typeabbrev": self.fromStore
                              },
                              c={"link_id": link_id},
                              db=self.target_server)
            # mysql.update_data(t="tb_order_detail_spider",
            #                   set={"link_id": link_id + "xiajia"},
            #                   c={"link_id": link_id},
            #                   db=self.target_server)
            mysql.update_data(db=self.db_test,
                              t="prices_tb_fix",
                              set={
                                  "isComplete": "2",
                                  "updateTime": time_now()
                              },
                              c={
                                  "link_id": link_id,
                                  "server": server_name
                              })
            await page.close()
            return
        else:
            while True:
                content = await page.content()
                # print(content)
                doc = pq(content)
                self.common['rates'] = doc.find("#J_RateCounter").text()
                self.common['sales'] = doc.find("#J_SellCounter").text()
                self.common['freight'] = doc.find("#J_WlServiceTitle").text()
                mat1 = re.match("\d+", self.common['sales'])
                mat2 = re.match("\d+", self.common['rates'])
                if mat1 and mat2:
                    break
            res = re.findall('";(.*?);".*?e":"(\d+\.\d+).*?d":"(\d+)"',
                             content)  # 判断是否存在多属性
            if res:
                control = 1
                benefit_price = 0
                for r in res:
                    data_values = r[0].split(";")
                    prop = []
                    for data in data_values:
                        prop.append(
                            doc.find("li[data-value='" + data +
                                     "'] span").text())

                    if control:
                        for data in data_values:
                            try:
                                await page.click('li[data-value="' + data +
                                                 '"]')
                            except errors.PageError:
                                pass
                        content_p = await page.content()
                        promo_price = re.findall(
                            '<em id="J_PromoPriceNum".*?>(\d+\.?\d*)</em>',
                            content_p)  # 判断是否存在优惠
                        if len(promo_price) == 1:
                            benefit_price = float(r[1]) - float(promo_price[0])
                            control = 0

                    self.prices[r[2]] = r[1]
                    prop.reverse()
                    self.prop[r[2]] = "-".join(prop)

                for r in res:
                    if benefit_price:
                        self.promo_price[r[2]] = round(
                            float(r[1]) - benefit_price, 2)
            else:
                promo_price = re.findall(
                    '<em id="J_PromoPriceNum".*?>(\d+.*\d*)</em>',
                    content)  # 判断是否存在优惠
                if promo_price:
                    self.promo_price[link_id] = promo_price[0]
                else:
                    self.promo_price[link_id] = 0
            # print(self.prices)
            # print(self.promo_price)

            await page.close()
            await self.page.type("input[name='queryItemId']", link_id)
            await self.page.setRequestInterception(True)
            self.page.on('request', self.intercept_request)
            self.page.on('response', self.intercept_response)
            await asyncio.sleep(1)
            net_check()
            await self.page.click(".filter-footer button:first-child")
            while True:
                await asyncio.sleep(1)
                if self.complete == 1:
                    res = mysql.get_data(db=self.db_test,
                                         t="prices_tb_fix",
                                         c={
                                             "link_id": link_id,
                                             "server": server_name
                                         })
                    if res:
                        mysql.update_data(db=self.db_test,
                                          t="prices_tb_fix",
                                          set={
                                              "isComplete": "1",
                                              "updateTime": time_now()
                                          },
                                          c={
                                              "link_id": link_id,
                                              "server": server_name
                                          })
                    break
                elif self.complete == 2:
                    mysql.update_data(db=self.db_test,
                                      t="prices_tb_fix",
                                      set={"spe_link": "1"},
                                      c={
                                          "link_id": link_id,
                                          "server": server_name
                                      })
                    break
Beispiel #21
0
    async def login(self, page=None, **kwargs):
        # shutil.rmtree(CHROME_PROFILE_PATH, True)
        if not page:
            page = await self.get_new_page()

        while 1:
            try:
                net_check()
                await page.goto(login_url)
            except errors.PageError:
                pass
            except errors.TimeoutError:
                pass
            else:
                break
        while True:
            try:
                await page.waitForSelector(".forget-pwd.J_Quick2Static",
                                           visible=True,
                                           timeout=10000)
                await page.click(".forget-pwd.J_Quick2Static")
            except errors.TimeoutError:
                pass
            except errors.ElementHandleError:
                await page.reload()
                continue
            finally:
                try:
                    await page.type('#TPL_username_1', kwargs['username'],
                                    {'delay': self.input_time_random() - 50})
                    await page.type('#TPL_password_1', kwargs['password'],
                                    {'delay': self.input_time_random()})
                except errors.ElementHandleError:
                    await page.reload()
                else:
                    break

        net_check()
        # 检测页面是否有滑块。原理是检测页面元素。
        slider = await page.Jeval('#nocaptcha', 'node => node.style')  # 是否有滑块
        if slider:
            print("出现滑块情况判定")
            await self.mouse_slide(p=page)
            await page.click("#J_SubmitStatic")  # 调用page模拟点击登录按钮。
            time.sleep(2)
            await self.get_cookie(page)
        else:
            await page.click("#J_SubmitStatic")

        try:
            await page.waitForSelector("#container", timeout=10000)
        except errors.TimeoutError:
            print("超时需要手机验证!")
            frames = page.frames
            try:
                await frames[1].waitForSelector("button#J_GetCode",
                                                timeout=10000)
            except errors.TimeoutError:
                pass
            else:
                print("需要要手机验证码")
                test_server['db'] = "test"
                id = random.randint(0, 100)
                mysql.insert_data(db=test_server,
                                  t="phone_verify",
                                  d={"id": id})
                # frames = page.frames
                # await frames[1].click(".J_SendCodeBtn")
                verify_code = "0"
                while True:
                    net_check()
                    await frames[1].click("button#J_GetCode")
                    for i in range(120):
                        await asyncio.sleep(5)
                        res = mysql.get_data(
                            db=test_server,
                            cn=["verify_code"],
                            t="phone_verify",
                            c={"id": id},
                        )
                        verify_code = res[0][0]
                        if verify_code != "0":
                            mysql.delete_data(db=test_server,
                                              t="phone_verify",
                                              c={"id": id})
                            break
                    if verify_code != "0":
                        break

                await frames[1].type("input#J_Phone_Checkcode", verify_code,
                                     {"delay": self.input_time_random() - 50})
                # await frames[1].type(".J_SafeCode", a, {'delay': self.input_time_random() - 50})
                net_check()
                await frames[1].click("input#submitBtn")
                # await frames[1].click("#J_FooterSubmitBtn")
            net_check()
            await page.goto("https://myseller.taobao.com/home.htm")
        await page.waitForSelector("#container", timeout=30000)

        return self.b, page, kwargs['fromStore']
Beispiel #22
0
    async def parse(self, data):
        if data != "q":
            for i in range(len(data)):
                self.item = {}
                self.item = self.common.copy()
                self.item['stockid'] = re.search(
                    "编码:(.*)",
                    data[i]['itemDesc']['desc'][1]['text']).group(1).upper()
                self.item['link_id'] = data[i]['itemId']
                self.item['attribute'] = ""
                self.item['flag'] = "update"
                self.item['typeabbrev'] = self.fromStore
                self.item['shop_id'] = self.shop_id(self.fromStore)
                self.item['SpiderDate'] = time_now()
                temp_des = data[i]['itemDesc']['desc'][0]['text']
                self.item['description'] = temp_des.replace("(", "(").replace(
                    ")", ")")
                self.item['price_tb'] = re.findall(
                    "(\d+.?\d*)", data[i]["managerPrice"]['currentPrice'])[0]
                self.item['promotionprice'] = self.promo_price.get(
                    self.item['link_id'])
                # print(self.promo_price)

                sql = "select spe_link from prices_tb_fix where link_id='%s' and server='%s'" % (
                    self.item['link_id'], self.sn)
                spe_link_id = mysql.get_data(db=self.db_test,
                                             sql=sql,
                                             return_one=True)
                isMut = re.search("^MUT\D*", self.item['stockid'])

                if isMut or spe_link_id:
                    await self.page.setRequestInterception(True)
                    self.page.on('request', self.intercept_request)
                    self.page.on('response', self.intercept_response)
                    await asyncio.sleep(1)
                    net_check()
                    await self.page.click(
                        ".next-table-row td:nth-child(2) div.product-desc-hasImg span:nth-child(2) i"
                    )
                    await asyncio.sleep(1)
                    await self.page.keyboard.press('Escape')
                else:
                    # print(self.item)
                    if self.item['promotionprice'] is None:
                        mail("price_tb_error",
                             self.fromStore + ":" + self.item['link_id'],
                             ["*****@*****.**"])
                        logger.error("error:" + self.fromStore + " : " +
                                     self.item['link_id'] + " and " +
                                     mysql.concat(self.promo_price, "="))
                        self.complete = 2
                        break
                    condition = {
                        "stockid": self.item['stockid'],
                        "link_id": self.item['link_id'],
                        "shop_id": self.item['shop_id'],
                    }
                    res = mysql.get_data(t="prices_tb",
                                         l=1,
                                         cn=["id"],
                                         c=condition,
                                         db=self.target_server)
                    if res:
                        self.item['ratio'] = round(
                            float(self.item['price_tb']) / float(res[0][0]), 2)
                        print(self.item)
                        mysql.update_data(t="prices_tb",
                                          set=self.item,
                                          c=condition,
                                          db=self.target_server)
                    else:
                        insert_item = self.item.copy()
                        insert_item["currabrev"] = "CNY"
                        insert_item["price_erp"] = 0
                        insert_item["operator"] = self.operator
                        insert_item["last_time"] = time_now()
                        if self.operator == "爬虫维护":
                            insert_item["flag"] = "create"
                        else:
                            insert_item['flag'] = "add"
                        insert_item["ratio"] = 1
                        insert_item["package_number"] = 1
                        insert_item["Checker"] = ""
                        insert_item["CheckDate"] = "0000-00-00 00:00:00"
                        print(insert_item)

                        with open(
                                "reports/report_" + self.fromStore +
                                "_insert.txt", "a") as file:
                            file.writelines("物料编码:" + insert_item['stockid'] +
                                            " 与 商品ID:" +
                                            insert_item['link_id'] +
                                            " 为最新匹配,添加至ERP系统。\n" +
                                            self.item_url +
                                            insert_item['link_id'] + "\n" +
                                            self.item_erp_url +
                                            insert_item['link_id'] + "\n\n")

                        mysql.insert_data(t="prices_tb",
                                          d=insert_item,
                                          db=self.target_server)
                    result = mysql.get_data(
                        t="prices_tb",
                        cn=["*"],
                        c={"link_id": self.item['link_id']},
                        db=self.target_server,
                        dict_result=True)

                    if len(result) > 1:
                        for r in result:
                            if r['stockid'] != self.item['stockid'] and r[
                                    'flag'] != "del":
                                with open(
                                        "reports/report_" + self.fromStore +
                                        "_delete.txt", "a") as file:
                                    file.writelines("物料编码:" + r['stockid'] +
                                                    " 与 商品ID:" +
                                                    self.item['link_id'] +
                                                    " 不匹配,已被爬虫从ERP系统中删除。\n" +
                                                    self.item_url +
                                                    self.item['link_id'] +
                                                    "\n" + self.item_erp_url +
                                                    self.item['link_id'] +
                                                    "\n\n")

                                mysql.update_data(t="prices_tb",
                                                  c={"id": r['id']},
                                                  db=self.target_server,
                                                  set={"flag": "del"})

                    self.complete = 1
        else:
            pass
            self.complete = 1
Beispiel #23
0
    async def link_spider(self, p, f):
        test_server["db"] = "test"
        while True:
            sql = """
                SELECT a.id,url,goodsCode,a.orderNo FROM tb_order_detail_spider a
                JOIN tb_order_spider b ON a.`orderNo`=b.`orderNo`
                WHERE link_id="1" AND b.`fromStore`='%s' AND a.url IS NOT NULL
                ORDER BY b.createTime DESC
                LIMIT 1
                    """ % (f)
            url = "https://smf.taobao.com/promotionmonitor/orderPromotionQuery.htm?orderNo="
            results = mysql.get_data(sql=sql, dict_result=True)
            if not results:
                break
            orderno = results[0]['orderNo']
            url += orderno
            await p.goto(url)
            content = await p.content()
            data = re.findall(">(\{.*?\})<", content)
            order = json.loads(data[0])
            try:
                sub_orders = order["data"]["subOrderViewDTOs"]
            except KeyError:
                continue
            for so in sub_orders:
                order_no = so["orderNoStr"]
                link_id = so["itemId"]
                sql = "select goodsCode from tb_order_detail_spider where url like '%%%s%%'" % (
                    order_no)
                print(sql)
                goodsCode = mysql.get_data(sql=sql, return_one=True)
                del sql
                sql = "update tb_order_detail_spider set link_id='%s' where url like '%%%s%%'" % (
                    link_id, order_no)
                mysql.update_data(sql=sql)
                del sql
                sql = """
                SELECT SpiderDate
                FROM prices_tb
                WHERE link_id='%s'
                AND stockid='%s'
                AND flag NOT IN ('del','XiaJia')
                """ % (link_id, goodsCode)

                res = mysql.get_data(sql=sql)
                res_fix = mysql.get_data(db=test_server,
                                         dict_result=True,
                                         t='prices_tb_fix',
                                         c={
                                             "link_id": link_id,
                                             "server": "production_server"
                                         })
                if res:
                    spider_date = res[0][0]
                    days = 1
                    if spider_date != '0000-00-00 00:00:00':
                        days = (datetime.datetime.now() - spider_date).days
                    if spider_date == '0000-00-00 00:00:00' or days > 14:
                        if not res_fix:
                            mysql.insert_data(db=test_server,
                                              t="prices_tb_fix",
                                              d={
                                                  "link_id": link_id,
                                                  "fromStore": f,
                                                  "flag": 1
                                              })
                        elif res_fix[0]["isComplete"] != 0:
                            mysql.update_data(db=test_server,
                                              t="prices_tb_fix",
                                              set={
                                                  "isComplete": 0,
                                                  "flag": 1
                                              },
                                              c={
                                                  "link_id": link_id,
                                                  "server": "production_server"
                                              })
                else:
                    if not res_fix:
                        mysql.insert_data(db=test_server,
                                          t="prices_tb_fix",
                                          d={
                                              "link_id": link_id,
                                              "fromStore": f,
                                              "flag": 0
                                          })
                    elif res_fix[0]["isComplete"] != 0:
                        mysql.update_data(db=test_server,
                                          t="prices_tb_fix",
                                          set={
                                              "flag": 0,
                                              "isComplete": 0
                                          },
                                          c={
                                              "link_id": link_id,
                                              "server": "production_server"
                                          })
            sleep(5)
        await p.close()
        await self.run_link_spider()
Beispiel #24
0
    async def parse(self, mainOrders, pageNum):
        """解析爬取内容信息"""
        t = time_zone(["08:00", "18:00", "23:59"])
        a = datetime.datetime.now()
        if a < t[0]:
            eoc = EARLIEST_ORDER_CREATETIME
        elif t[0] < a < t[1]:
            eoc = 2
        else:
            eoc = 20

        start_time = datetime.datetime.now()
        logger.info("开始第 " + str(pageNum) + " 页订单爬取")
        logger.info(store_trans(self.fromStore))
        if pageNum == 1:
            self._loop_start_time = datetime.datetime.now()
        loop_control = 0
        for i in range(len(mainOrders)):
            order = {}  # 用于存储订单详细信息
            order['orderNo'] = mainOrders[i]["id"]
            order['createTime'] = mainOrders[i]['orderInfo']['createTime']
            order['buyerName'] = mainOrders[i]['buyer']['nick']
            flag = mainOrders[i]['extra']['sellerFlag']
            order['actualFee'] = mainOrders[i]['payInfo']['actualFee']
            order['deliverFee'] = re.search(
                "\(含快递:¥(\d+\.\d+)\)",
                mainOrders[i]['payInfo']['postType']).group(1)
            order['datailURL'] = "https:" + mainOrders[i]['statusInfo'][
                'operations'][0]['url']
            order['orderStatus'] = mainOrders[i]['statusInfo']['text']
            order['fromStore'] = self.fromStore
            order['updateTime'] = time_now()
            if flag == 1:
                data_url = self.base_url + mainOrders[i]['operations'][0][
                    'dataUrl']
                order['sellerFlag'] = await self.get_flag_text(data_url)
            try:
                order['isPhoneOrder'] = mainOrders[i]['payInfo']['icons'][0][
                    'linkTitle']
            except KeyError:
                pass
            items = mainOrders[i]['subOrders']
            line_no = 0
            for j in range(len(items)):
                continue_code = 0
                item = {}  # 用于存储售出商品详细信息
                item['orderNo'] = mainOrders[i]["id"]
                item['itemNo'] = line_no
                try:
                    item['goodsCode'] = items[j]['itemInfo']['extra'][0][
                        'value']
                except KeyError:
                    item['goodsCode'] = 'error'
                    logger.error(time_now() + " 订单:" + item['orderNo'])
                item['tbName'] = items[j]['itemInfo']['title'].strip() \
                    .replace("&plusmn;", "±").replace("&Phi;", "Φ").replace("&Omega;", "Ω") \
                    .replace("&mdash;", "—").replace("&deg;", "°").replace("&times;", "×") \
                    .replace("&mu;", "μ").replace("&nbsp;", "").replace("(", "(").replace(")", ")")
                item['unitPrice'] = items[j]['priceInfo']['realTotal']
                item['sellNum'] = items[j]['quantity']
                item['orderStatus'] = order['orderStatus']
                if self.orderno:
                    logger.info(item['orderStatus'])
                item['refundStatus'] = None
                item['isRefund'] = 0
                item['goodsAttribute'] = ""
                item['url'] = "https:" + items[j]['itemInfo']['itemUrl']
                try:
                    goodsAttributes = items[j]['itemInfo']['skuText']
                except KeyError:
                    pass
                else:
                    temp = []
                    for k in range(len(goodsAttributes)):
                        try:
                            goodsAttributes[k]['name']
                        except KeyError:
                            n = len(temp)
                            temp[n - 1] += goodsAttributes[k]['value'].replace(
                                "&Omega", "Ω").replace("&middot", "·")
                        else:
                            temp.append(goodsAttributes[k]['value'].replace(
                                "&Omega", "Ω").replace("&middot", "·"))
                    temp_ga = "-".join(temp)
                    item['goodsAttribute'] = temp_ga.replace("(", "(").replace(
                        ")", ")")
                try:
                    operations = items[j]['operations']
                except KeyError:
                    pass
                else:
                    for x in range(len(operations)):
                        t = operations[x]['style']
                        if t in ['t12', 't16'
                                 ] and operations[x]['text'] != "退运保险":
                            item['refundStatus'] = operations[x]['text']
                            item['isRefund'] = "1"
                        elif t == 't0' and operations[x]['text'] == '已取消':
                            continue_code = 1
                            delete_item = {
                                'orderNo': item['orderNo'],
                                'itemNo': item['itemNo'],
                                'goodsCode': item['goodsCode']
                            }
                            is_exist = mysql.get_data(
                                t="tb_order_detail_spider", l=1, c=delete_item)
                            if is_exist:
                                mysql.delete_data(t="tb_order_detail_spider",
                                                  c=delete_item)
                            sql = """
                            UPDATE tb_order_detail_spider
                            SET itemNo=itemNo-1
                            WHERE OrderNo='%s' and itemNo>'%s'
                            """ % (item['orderNo'], item['itemNo'])
                            mysql.update_data(sql=sql)
                            pass
                if continue_code:
                    continue
                else:
                    line_no += 1
                self.save_in_sql(item=item, tableName='tb_order_detail_spider')
            self.save_in_sql(item=order, tableName='tb_order_spider')
            if self.orderno:
                logger.info("定向爬取订单完成")
                return
            date = datetime.date.today()
            date_limit = (
                date - datetime.timedelta(eoc)).strftime("%Y-%m-%d %H:%M:%S")
            if order['createTime'] < date_limit:
                logger.info("完成本轮爬取,共翻 " + str(pageNum) + " 页。")
                loop_control = 1
                break
        end_time = datetime.datetime.now()
        spend_time = end_time - start_time
        logger.info(
            str(spend_time.seconds) + " 秒完成第 " + str(pageNum) + " 页订单爬取")
        if loop_control:
            self._loop_end_time = datetime.datetime.now()
            loop_spend_time = round(
                (self._loop_end_time - self._loop_start_time).seconds / 60, 0)
            logger.info(str(loop_spend_time) + " 分钟完成本轮订单爬取")
            self.complete = 2
        else:
            self.complete = 1
Beispiel #25
0
    async def order_page(self, browser_in=None, page_in=None):
        """爬取订单详情"""
        while 1:
            result = mysql.get_data(t="tb_order_spider",
                                    cn=["datailURL", "orderNo"],
                                    c={
                                        "isDetaildown": 0,
                                        "fromStore": self.fromStore
                                    },
                                    o=["createTime"],
                                    om="d")
            if result:
                logger.info("订单详情爬取")
                for url in result:
                    start_time = datetime.datetime.now()
                    logger.info(store_trans(self.fromStore))
                    logger.info("开始订单 " + url[1] + " 详情爬取")
                    order = {}
                    await self._page_order_detail.bringToFront()
                    # if browser_in:
                    #     page = await browser_in.newPage()
                    # else:
                    #     page = page_in
                    page = self._page_order_detail
                    while 1:
                        try:
                            await page.goto(url[0])
                        except errors.PageError:
                            sleep(5)
                        except errors.TimeoutError:
                            sleep(5)
                        else:
                            break
                    try:
                        await page.waitForSelector('#detail-panel',
                                                   timeout=30000)
                    except errors.TimeoutError:
                        continue

                    content = await page.content()
                    a = re.search("var data = JSON.parse\('(.*)'\);",
                                  content).group(1)
                    b = a.replace('\\\\\\"', '')
                    data = b.replace('\\"', '"')
                    m = json.loads(data)
                    order['actualFee'] = m['mainOrder']['payInfo'][
                        'actualFee']['value']
                    order['orderStatus'] = status_format(
                        m['mainOrder']['statusInfo']['text'])
                    if order['orderStatus'] == '等待买家付款':
                        order['isDetaildown'] = 2
                    else:
                        order['isDetaildown'] = 1
                    coupon = 0
                    for k, v in m['mainOrder']['payInfo'].items():
                        if k == 'promotions':
                            promotions = m['mainOrder']['payInfo'][
                                'promotions']
                            for i in range(len(promotions)):
                                if 'prefix' and 'suffix' in promotions[i]:
                                    coupon_temp = re.search(
                                        "(\d+\.\d+)", promotions[i]['value'])
                                    if coupon_temp:
                                        coupon += float(coupon_temp.group(1))
                    order['couponPrice'] = round(coupon, 2)
                    for k, v in m.items():
                        if k == 'buyMessage':
                            order['buyerComments'] = v
                    orderNo = m['mainOrder']['id']
                    order_info = m['mainOrder']['orderInfo']['lines'][1][
                        'content']
                    for i in range(len(order_info)):
                        if order_info[i]['value']['name'] == '支付宝交易号:':
                            try:
                                order['tradeNo'] = order_info[i]['value'][
                                    'value']
                            except KeyError:
                                order['tradeNo'] = None
                        # elif order_info[i]['value']['name'] == '创建时间:':
                        #     order['createTime'] = order_info[i]['value']['value']
                        # elif order_info[i]['value']['name'] == '发货时间:':
                        #     order['shipTime'] = order_info[i]['value']['value']
                        elif order_info[i]['value']['name'] == '付款时间:':
                            order['payTime'] = order_info[i]['value']['value']
                    ship_info = m['tabs']
                    for i in range(len(ship_info)):
                        if ship_info[i]['id'] == 'logistics':
                            temp = ship_info[i]['content']
                            for k, v in temp.items():
                                if k == 'logisticsName':
                                    order['shippingCompany'] = v
                                elif k == 'shipType':
                                    order['shippingMethod'] = v
                                elif k == 'logisticsNum':
                                    order['shippingNo'] = v
                                # elif k == 'logisticsUrl':
                                #     order['shipUrl'] = "https" + v
                                elif k == 'address':
                                    rec_info = v
                                    order['receiverName'] = rec_info.split(
                                        ",")[0].replace(" ", "")
                                    order['receiverPhone'] = rec_info.split(
                                        ",")[1]
                                    order['receiverAddress'] = "".join(
                                        rec_info.split(",")[2:])
                    sub_orders = m['mainOrder']['subOrders']
                    # print(len(sub_orders))
                    for i in range(len(sub_orders)):
                        item = {}
                        temp = 0
                        itemNo = i
                        if sub_orders[i]['promotionInfo']:
                            for j in sub_orders[i]['promotionInfo']:
                                for x in j['content']:
                                    for k, v in x.items():
                                        if k == 'value':
                                            p_list = re.findall(
                                                "-?\d+\.\d+", v)
                                            if p_list:
                                                temp += float(p_list.pop())
                        item['unitBenefits'] = temp
                        mysql.update_data(t="tb_order_detail_spider",
                                          set=item,
                                          c={
                                              'orderNo': orderNo,
                                              'itemNo': itemNo
                                          })
                        logger.info("详细订单状态更新成功")
                        # print(item)
                    # print(order)
                    mysql.update_data(t="tb_order_spider",
                                      set=order,
                                      c={'orderNo': orderNo})
                    logger.info("订单状态更新成功")

                    # if browser_in:
                    #     await page.close()
                    await self.page.bringToFront()
                    Verify()
                    end_time = datetime.datetime.now()
                    spend_time = end_time - start_time
                    logger.info(
                        str(spend_time.seconds) + " 秒完成订单 " + url[1] + " 详情爬取")
                    while True:
                        s = random.random()
                        if s > 0.3:
                            logger.info("休息 " + str(int(s * n_o_time)) +
                                        " 秒完开始下一单详情爬取")
                            for i in range(int(s * n_o_time)):
                                await asyncio.sleep(1)
                            break
            else:
                logger.info("没有可以爬取的详情")
                break
 def report_mail(self):
     d = time_zone(["18:05", "18:05"])
     d1, d2 = d[0], d[1]
     d = (d1 - datetime.timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
     sql = "SELECT shop_id,flag,COUNT(flag),lookup FROM update_reports " \
           "WHERE last_time < '%s' AND last_time > '%s' " \
           "GROUP BY Flag,shop_id" % (d1, d)
     sql2 = "SELECT * FROM update_reports WHERE last_time < '%s' AND last_time > '%s' " % (
         d1, d)
     res = mysql.get_data(sql=sql, db=self.db_test)
     con, c, cd = mysql.connection(self.db_test)
     df = pd.read_sql(sql2, con)
     con.close()
     date = date_now_str()
     df.to_csv("./reports/reports" + date + ".csv")
     out_list = []
     out_list.append("今日爬虫维护 开源店 价格  :<br>")
     for r in res:
         # print(r)
         if r[0] == '115443253':
             if r[1] == 'create':
                 string = '创建了 ' + str(r[2]) + ' 条数据。<br>'
                 out_list.append(string)
             elif r[1] == 'update':
                 string = '更新了 ' + str(r[2]) + ' 条数据。<br>'
                 out_list.append(string)
             elif r[1] == 'lookup':
                 string = '查看了 ' + str(r[3]) + ' 条数据。<br>'
                 out_list.append(string)
     out_list.append("今日爬虫维护 玉佳企业店 价格:<br>")
     for r in res:
         # print(r)
         if r[0] == '197444037':
             if r[1] == 'create':
                 string = '创建了 ' + str(r[2]) + ' 条数据。<br>'
                 out_list.append(string)
             elif r[1] == 'update':
                 string = '更新了 ' + str(r[2]) + ' 条数据。<br>'
                 out_list.append(string)
             elif r[1] == 'lookup':
                 string = '查看了 ' + str(r[3]) + ' 条数据。<br>'
                 out_list.append(string)
     out_list.append("今日爬虫维护 赛宝电子店 价格:<br>")
     for r in res:
         # print(r)
         if r[0] == '34933991':
             if r[1] == 'create':
                 string = '创建了 ' + str(r[2]) + ' 条数据。<br>'
                 out_list.append(string)
             elif r[1] == 'update':
                 string = '更新了 ' + str(r[2]) + ' 条数据。<br>'
                 out_list.append(string)
             elif r[1] == 'lookup':
                 string = '查看了 ' + str(r[3]) + ' 条数据。<br>'
                 out_list.append(string)
     out_list.append("今日爬虫维护 玉佳电子店 价格:<br>")
     for r in res:
         # print(r)
         if r[0] == '68559944':
             if r[1] == 'create':
                 string = '创建了 ' + str(r[2]) + ' 条数据。<br>'
                 out_list.append(string)
             elif r[1] == 'update':
                 string = '更新了 ' + str(r[2]) + ' 条数据。<br>'
                 out_list.append(string)
             elif r[1] == 'lookup':
                 string = '查看了 ' + str(r[3]) + ' 条数据。<br>'
                 out_list.append(string)
     # print("".join(out_list))
     mail_reports("爬虫更新erp价格报告", "".join(out_list), date,
                  *["*****@*****.**", '*****@*****.**'])  #
     dt = (d1 - datetime.timedelta(days=3)).strftime("%Y-%m-%d %H:%M:%S")
     print(dt)
     sql = "delete from update_reports where last_time<'%s'" % (dt)
     mysql.delete_data(sql=sql, db=self.db_test)
     mysql.update_data(t="update_reports",
                       set={"loopup": 0},
                       c={"link_id": "count"})
Beispiel #27
0
    async def parse_2(self, data):
        verify = []
        repeat_list = []
        for i in data['skuOuterIdTable']['dataSource']:
            self.item['stockid'] = i['skuOuterId']
            logger.info(self.item['stockid'])
            if not self.item['stockid']:
                continue
            else:
                if self.item['stockid'] not in verify:
                    verify.append(self.item['stockid'])
                else:
                    if self.item['stockid'] not in repeat_list:
                        repeat_list.append(self.item['stockid'])
            skuId = str(i['skuId'])
            temp_attr = self.prop.get(skuId)
            self.item['attribute'] = temp_attr.replace("(",
                                                       "(").replace(")", ")")
            if not self.item['attribute']:
                self.item.pop('attribute')
            self.item['price_tb'] = self.prices.get(skuId)
            if self.promo_price:
                self.item["promotionprice"] = self.promo_price.get(skuId)
            else:
                self.item["promotionprice"] = 0

            condition = {
                "stockid": self.item['stockid'],
                "link_id": self.item['link_id'],
                "shop_id": self.item['shop_id'],
            }
            res = mysql.get_data(t="prices_tb",
                                 l=1,
                                 cn=["price_tb"],
                                 c=condition,
                                 db=self.target_server)
            if res:

                if res[0][0] == 0:
                    self.item['ratio'] = 1
                else:
                    self.item['ratio'] = round(
                        float(self.item['price_tb']) / float(res[0][0]), 2)

                print(self.item)
                mysql.update_data(t="prices_tb",
                                  set=self.item,
                                  c=condition,
                                  db=self.target_server)
            else:
                insert_item = self.item.copy()
                insert_item["currabrev"] = "CNY"
                insert_item["price_erp"] = 0
                insert_item["operator"] = self.operator
                insert_item["last_time"] = time_now()
                if self.operator == "爬虫维护":
                    insert_item["flag"] = "create"
                else:
                    insert_item['flag'] = "add"
                insert_item["ratio"] = 1
                insert_item["package_number"] = 1
                insert_item["Checker"] = ""
                insert_item["CheckDate"] = "0000-00-00 00:00:00"
                print(insert_item)

                with open("reports/report_" + self.fromStore + "_insert.txt",
                          "a") as file:
                    file.write("物料编码:" + insert_item['stockid'] + " 与商品ID:" +
                               insert_item['link_id'] + " 为最新匹配,添加至ERP系统。\n" +
                               self.item_url + insert_item['link_id'] + "\n" +
                               self.item_erp_url + insert_item['link_id'] +
                               "\n\n")

                mysql.insert_data(t="prices_tb",
                                  d=insert_item,
                                  db=self.target_server)

        if repeat_list:
            with open("reports/report_" + self.fromStore + "_repeat.txt",
                      "a") as file:
                file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" +
                           self.item['link_id'] + " 重复编码\n" + "重复编码:" +
                           ",".join(repeat_list) + "\n" + self.item_url +
                           self.item['link_id'] + "\n\n")

        if not verify:
            with open("reports/report_" + self.fromStore + "_empty.txt",
                      "a") as file:
                file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" +
                           self.item['link_id'] + " 空编码\n" + self.item_url +
                           self.item['link_id'] + "\n\n")

        sql = """
        select id,stockid 
        from prices_tb 
        where link_id='%s' 
        and flag not in('del','XiaJia')
        """ % (self.item['link_id'])
        res_verify = mysql.get_data(sql=sql, db=self.target_server)

        for rv in res_verify:
            if rv[1] not in verify:
                with open("reports/report_" + self.fromStore + "_delete.txt",
                          "a") as file:
                    file.write("物料编码:" + rv[1] + " 与 商品ID:" +
                               self.item['link_id'] + " 不匹配,已被爬虫从ERP系统中删除。\n" +
                               self.item_url + self.item['link_id'] + "\n" +
                               self.item_erp_url + self.item['link_id'] +
                               "\n\n")

                mysql.update_data(t="prices_tb",
                                  c={"id": rv[0]},
                                  db=self.target_server,
                                  set={
                                      "flag": "del",
                                      "operator": self.operator,
                                      "last_time": time_now()
                                  })

        self.complete = 1