コード例 #1
0
def get_detail(content):
    item = {}
    # print(content)
    doc = pq(content)
    item['link_id'] = doc.find("#J_Pine").attr("data-itemid")
    item['shop_id'] = doc.find("#J_Pine").attr("data-shopid")
    item['typeabbrev'] = ""
    item['price_erp'] = 0
    item['currabrev'] = "CNY"
    item['operator'] = ""
    item['last_time'] = time_now()
    item['flag'] = "add"
    item['freight'] = doc("#J_WlServiceTitle").text()
    item['ratio'] = 1
    item['promotionprice'] = 0
    item['package_number'] = 1
    item['SpiderDate'] = time_now()
    item['Checker'] = ""
    item['CheckDate'] = time_now()
    item["description"] = doc.find(".tb-main-title").text()
    item["rates"] = doc.find("#J_RateCounter").text()
    if item['rates'] == "-":
        item['rates'] = 0
    item["sales"] = doc.find("#J_SellCounter").text()
    if item['sales'] == "-":
        item['sales'] = 0
    # print(item)
    return item
コード例 #2
0
 def maintain(self, operation, **kwargs):
     item = {
         'stockid': kwargs['goodsCode'],
         'link_id': kwargs['link_id'],
         'shop_id': self.shop_id(kwargs['fromStore']),
         'price_tb': kwargs['unitPrice'],
         # 'first_discount': kwargs['unitBenefits'],
         'currabrev': 'CNY',
         'operator': '爬虫维护',
         'SpiderDate': time_now(),
         'attribute': kwargs['goodsAttribute'],
         'flag': None,
         'description': kwargs['tbName'],
         'typeabbrev': "",
         'price_erp': 0,
         'last_time': time_now(),
         'freight': "",
         'ratio': 1,
         'promotionprice': 0,
         'sales': 0,
         'rates': 0,
         'Checker': "",
         'package_number': 1,
         'CheckDate': time_now(),
     }
     if operation == "更新":
         item['flag'] = 'update'
         item_set = {
             'SpiderDate': time_now(),
             'flag': 'update',
             'price_tb': kwargs['unitPrice'],
             'description': kwargs['tbName'],
             'ratio': kwargs['ratio'],
             'attribute': kwargs['goodsAttribute'],
             # 'first_discount': kwargs['unitBenefits']
         }
         item_where = {
             'stockid': kwargs['goodsCode'],
             'link_id': kwargs['link_id'],
             'shop_id': self.shop_id(kwargs['fromStore'])
         }
         mysql.update_data(t="prices_tb", set=item_set, c=item_where)
     elif operation == "创建":
         item['flag'] = 'create'
         mysql.insert_data(t="prices_tb", d=item)
     else:
         item['flag'] = 'lookup'
     self.report_in(**item)
コード例 #3
0
ファイル: Login_New.py プロジェクト: ghostcfl/tb_master
    async def verify(self, p):
        try:
            await p.waitForSelector("div.aq_overlay_mask", timeout=10000)
        except errors.TimeoutError:
            pass
        else:
            logger.info("需要要手机验证码")
            if LINUX:
                test_server = ts.copy()
                test_server['db'] = "test"
                id = random.randint(0, 100)
                mysql.insert_data(db=test_server,
                                  t="phone_verify",
                                  d={"id": id})
                frames = p.frames
                net_check()
                verify_code = "0"
                while True:
                    net_check()
                    await frames[1].click(".J_SendCodeBtn")
                    for i in range(120):
                        await asyncio.sleep(5)
                        res = mysql.get_data(
                            db=test_server,
                            cn=["verify_code"],
                            t="phone_verify",
                            c={"id": id},
                        )
                        verify_code = res[0][0]
                        if verify_code != "0":
                            mysql.delete_data(db=test_server,
                                              t="phone_verify",
                                              c={"id": id})
                            break
                    if verify_code != "0":
                        break
                    await asyncio.sleep(10)
            else:
                frames = p.frames
                net_check()
                await frames[1].click(".J_SendCodeBtn")
                verify_code = input(time_now() + " | 请输入6位数字验证码:")

            # await frames[1].click(".J_SendCodeBtn")
            # verify_code = input(time_now() + " | 请输入6位数字验证码:")
            await frames[1].type(".J_SafeCode", verify_code,
                                 {'delay': self.input_time_random() - 50})
            net_check()
            await frames[1].click("#J_FooterSubmitBtn")
コード例 #4
0
 async def verify(self, p):
     try:
         await p.waitForSelector("div.aq_overlay_mask", timeout=10000)
     except errors.TimeoutError:
         pass
     else:
         logger.info("需要要手机验证码")
         await asyncio.sleep(10)
         frames = p.frames
         net_check()
         await frames[1].click(".J_SendCodeBtn")
         a = input(time_now() + " | 请输入6位数字验证码:")
         await frames[1].type(".J_SafeCode", a,
                              {'delay': self.input_time_random() - 50})
         net_check()
         await frames[1].click("#J_FooterSubmitBtn")
コード例 #5
0
ファイル: price_tb.py プロジェクト: ghostcfl/headlessspider
    async def run(self):
        net_check()
        await self.page.goto(self.url)
        await asyncio.sleep(2)
        await self.page.waitForSelector("input[name='queryItemId']", timeout=0)
        frames = self.page.frames
        frame = await self.get_nc_frame(frames)

        if frame:
            logger.info("条形验证码")
            while True:
                await asyncio.sleep(1)
                await frame.hover("#nc_1_n1z")
                await self.page.mouse.down()
                await self.page.mouse.move(
                    2000, 0, {'delay': random.randint(1000, 2000)})
                await self.page.mouse.up()
                try:
                    frame.waitForSelector(".nc-lang-cnt a", timeout=10000)
                    await asyncio.sleep(2)
                    await frame.click(".nc-lang-cnt a")
                except errors.TimeoutError:
                    await asyncio.sleep(1)
                    slider = await frame.J("#nc_1_n1z")
                    if not slider:
                        break
                except errors.PageError:
                    await asyncio.sleep(1)
                    slider = await frame.J("#nc_1_n1z")
                    if not slider:
                        break

        operator = ""
        if MODE == 2:
            operator = input(time_now() + " | 输入操作者名字:")
        if operator:
            self.operator = operator
        logger.info("当前操作者 :" + self.operator)
        while True:
            a = await self.fix_data()
            if a == 1:
                break
コード例 #6
0
ファイル: spider.py プロジェクト: ghostcfl/headlessspider
    async def next_page(self, page_num=1):
        """执行翻页"""
        temp = 0
        while 1:
            t = time_zone(["08:00", "18:00", "23:00"])
            a = datetime.datetime.now()
            if a < t[0]:
                if not temp:
                    temp = 0
                n_p_time = 600
            elif t[0] < a < t[1]:
                temp += 1
                if temp == 1:
                    page_num = 1
                n_p_time = NEXT_PAGE_TIME
            elif a > t[2]:
                n_p_time = 60
                if not LINUX:
                    subprocess.call("shutdown /s")
                    exit("到点关机")
            else:
                n_p_time = 60

            await self.page.bringToFront()
            if self.orderno:
                await self.page.focus("#bizOrderId")
                await asyncio.sleep(1)
                await self.page.keyboard.down("ShiftLeft")
                await asyncio.sleep(1)
                await self.page.keyboard.press("Home")
                await asyncio.sleep(1)
                await self.page.keyboard.down("ShiftLeft")
                await asyncio.sleep(1)
                await self.page.keyboard.press("Delete")
                await asyncio.sleep(1)

                orderno = input(time_now() + " | 输入订单号:")

                await self.page.type("#bizOrderId", orderno)
                await self.page.setRequestInterception(True)
                self.page.on('request', self.intercept_request)
                self.page.on('response', self.intercept_response)
                net_check()
                await self.page.click(".button-mod__primary___17-Uv")
                await asyncio.sleep(10)
            else:
                while 1:
                    try:
                        await self.page.waitForSelector(
                            ".pagination-options-go")
                        await self.page.focus(".pagination-options input")
                        # await self.page.click(".pagination-options input", clickCount=2)
                        await self.page.keyboard.press("Delete")
                        await self.page.keyboard.press("Delete")
                        await self.page.keyboard.press("Delete")
                        await self.page.keyboard.press("Backspace")
                        await self.page.keyboard.press("Backspace")
                        await self.page.keyboard.press("Backspace")
                        await self.page.setRequestInterception(True)
                        self.page.on('request', self.intercept_request)
                        self.page.on('response', self.intercept_response)
                        net_check()
                        await self.page.type(".pagination-options input",
                                             str(page_num))
                        await self.page.keyboard.press("Enter")
                        self.page.waitForSelector(
                            ".pagination-item.pagination-item-" +
                            str(page_num) + ".pagination-item-active",
                            timeout=10000)
                    except errors.TimeoutError:
                        logger.info('翻页超时,5秒后重新翻页')
                        sleep(5)
                    else:
                        break
                # await self.page.waitForSelector(".pagination-item-" + str(page_num) + " a", timeout=30000)
                # await self.page.click(".pagination-item-" + str(page_num) + " a")
                while 1:
                    if self.complete == 1:
                        s = random.random()
                        if s > 0.5:
                            await self.link_spider()
                            await self.order_page()
                            logger.info(str(int(s * n_p_time)) + " 秒后开始下一页爬取")
                            sleep(int(s * n_p_time))
                            break
                    elif self.complete == 2:
                        page_num = 0
                        s = random.random()
                        if s > 0.9:
                            mysql.update_data(t="tb_order_spider",
                                              set={"isDetaildown": 0},
                                              c={
                                                  "isDetaildown": 2,
                                                  "fromStore": self.fromStore
                                              })
                            sleep(int(s * n_p_time))
                            break
                    else:
                        # if i == 59:
                        #     logger.info("超时")
                        #     await self.page.screenshot({'path': './headless-test-result.png'})
                        await asyncio.sleep(3)
                self.complete = 0
                page_num += 1
コード例 #7
0
ファイル: spider.py プロジェクト: ghostcfl/headlessspider
    async def parse(self, mainOrders, pageNum):
        """解析爬取内容信息"""
        t = time_zone(["08:00", "18:00", "23:59"])
        a = datetime.datetime.now()
        if a < t[0]:
            eoc = EARLIEST_ORDER_CREATETIME
        elif t[0] < a < t[1]:
            eoc = 2
        else:
            eoc = 20

        start_time = datetime.datetime.now()
        logger.info("开始第 " + str(pageNum) + " 页订单爬取")
        logger.info(store_trans(self.fromStore))
        if pageNum == 1:
            self._loop_start_time = datetime.datetime.now()
        loop_control = 0
        for i in range(len(mainOrders)):
            order = {}  # 用于存储订单详细信息
            order['orderNo'] = mainOrders[i]["id"]
            order['createTime'] = mainOrders[i]['orderInfo']['createTime']
            order['buyerName'] = mainOrders[i]['buyer']['nick']
            flag = mainOrders[i]['extra']['sellerFlag']
            order['actualFee'] = mainOrders[i]['payInfo']['actualFee']
            order['deliverFee'] = re.search(
                "\(含快递:¥(\d+\.\d+)\)",
                mainOrders[i]['payInfo']['postType']).group(1)
            order['datailURL'] = "https:" + mainOrders[i]['statusInfo'][
                'operations'][0]['url']
            order['orderStatus'] = mainOrders[i]['statusInfo']['text']
            order['fromStore'] = self.fromStore
            order['updateTime'] = time_now()
            if flag == 1:
                data_url = self.base_url + mainOrders[i]['operations'][0][
                    'dataUrl']
                order['sellerFlag'] = await self.get_flag_text(data_url)
            try:
                order['isPhoneOrder'] = mainOrders[i]['payInfo']['icons'][0][
                    'linkTitle']
            except KeyError:
                pass
            items = mainOrders[i]['subOrders']
            line_no = 0
            for j in range(len(items)):
                continue_code = 0
                item = {}  # 用于存储售出商品详细信息
                item['orderNo'] = mainOrders[i]["id"]
                item['itemNo'] = line_no
                try:
                    item['goodsCode'] = items[j]['itemInfo']['extra'][0][
                        'value']
                except KeyError:
                    item['goodsCode'] = 'error'
                    logger.error(time_now() + " 订单:" + item['orderNo'])
                item['tbName'] = items[j]['itemInfo']['title'].strip() \
                    .replace("&plusmn;", "±").replace("&Phi;", "Φ").replace("&Omega;", "Ω") \
                    .replace("&mdash;", "—").replace("&deg;", "°").replace("&times;", "×") \
                    .replace("&mu;", "μ").replace("&nbsp;", "").replace("(", "(").replace(")", ")")
                item['unitPrice'] = items[j]['priceInfo']['realTotal']
                item['sellNum'] = items[j]['quantity']
                item['orderStatus'] = order['orderStatus']
                if self.orderno:
                    logger.info(item['orderStatus'])
                item['refundStatus'] = None
                item['isRefund'] = 0
                item['goodsAttribute'] = ""
                item['url'] = "https:" + items[j]['itemInfo']['itemUrl']
                try:
                    goodsAttributes = items[j]['itemInfo']['skuText']
                except KeyError:
                    pass
                else:
                    temp = []
                    for k in range(len(goodsAttributes)):
                        try:
                            goodsAttributes[k]['name']
                        except KeyError:
                            n = len(temp)
                            temp[n - 1] += goodsAttributes[k]['value'].replace(
                                "&Omega", "Ω").replace("&middot", "·")
                        else:
                            temp.append(goodsAttributes[k]['value'].replace(
                                "&Omega", "Ω").replace("&middot", "·"))
                    temp_ga = "-".join(temp)
                    item['goodsAttribute'] = temp_ga.replace("(", "(").replace(
                        ")", ")")
                try:
                    operations = items[j]['operations']
                except KeyError:
                    pass
                else:
                    for x in range(len(operations)):
                        t = operations[x]['style']
                        if t in ['t12', 't16'
                                 ] and operations[x]['text'] != "退运保险":
                            item['refundStatus'] = operations[x]['text']
                            item['isRefund'] = "1"
                        elif t == 't0' and operations[x]['text'] == '已取消':
                            continue_code = 1
                            delete_item = {
                                'orderNo': item['orderNo'],
                                'itemNo': item['itemNo'],
                                'goodsCode': item['goodsCode']
                            }
                            is_exist = mysql.get_data(
                                t="tb_order_detail_spider", l=1, c=delete_item)
                            if is_exist:
                                mysql.delete_data(t="tb_order_detail_spider",
                                                  c=delete_item)
                            sql = """
                            UPDATE tb_order_detail_spider
                            SET itemNo=itemNo-1
                            WHERE OrderNo='%s' and itemNo>'%s'
                            """ % (item['orderNo'], item['itemNo'])
                            mysql.update_data(sql=sql)
                            pass
                if continue_code:
                    continue
                else:
                    line_no += 1
                self.save_in_sql(item=item, tableName='tb_order_detail_spider')
            self.save_in_sql(item=order, tableName='tb_order_spider')
            if self.orderno:
                logger.info("定向爬取订单完成")
                return
            date = datetime.date.today()
            date_limit = (
                date - datetime.timedelta(eoc)).strftime("%Y-%m-%d %H:%M:%S")
            if order['createTime'] < date_limit:
                logger.info("完成本轮爬取,共翻 " + str(pageNum) + " 页。")
                loop_control = 1
                break
        end_time = datetime.datetime.now()
        spend_time = end_time - start_time
        logger.info(
            str(spend_time.seconds) + " 秒完成第 " + str(pageNum) + " 页订单爬取")
        if loop_control:
            self._loop_end_time = datetime.datetime.now()
            loop_spend_time = round(
                (self._loop_end_time - self._loop_start_time).seconds / 60, 0)
            logger.info(str(loop_spend_time) + " 分钟完成本轮订单爬取")
            self.complete = 2
        else:
            self.complete = 1
コード例 #8
0
ファイル: price_tb.py プロジェクト: ghostcfl/headlessspider
    async def parse(self, data):
        if data != "q":
            for i in range(len(data)):
                self.item = {}
                self.item = self.common.copy()
                self.item['stockid'] = re.search(
                    "编码:(.*)",
                    data[i]['itemDesc']['desc'][1]['text']).group(1).upper()
                self.item['link_id'] = data[i]['itemId']
                self.item['attribute'] = ""
                self.item['flag'] = "update"
                self.item['typeabbrev'] = self.fromStore
                self.item['shop_id'] = self.shop_id(self.fromStore)
                self.item['SpiderDate'] = time_now()
                temp_des = data[i]['itemDesc']['desc'][0]['text']
                self.item['description'] = temp_des.replace("(", "(").replace(
                    ")", ")")
                self.item['price_tb'] = re.findall(
                    "(\d+.?\d*)", data[i]["managerPrice"]['currentPrice'])[0]
                self.item['promotionprice'] = self.promo_price.get(
                    self.item['link_id'])
                # print(self.promo_price)

                sql = "select spe_link from prices_tb_fix where link_id='%s' and server='%s'" % (
                    self.item['link_id'], self.sn)
                spe_link_id = mysql.get_data(db=self.db_test,
                                             sql=sql,
                                             return_one=True)
                isMut = re.search("^MUT\D*", self.item['stockid'])

                if isMut or spe_link_id:
                    await self.page.setRequestInterception(True)
                    self.page.on('request', self.intercept_request)
                    self.page.on('response', self.intercept_response)
                    await asyncio.sleep(1)
                    net_check()
                    await self.page.click(
                        ".next-table-row td:nth-child(2) div.product-desc-hasImg span:nth-child(2) i"
                    )
                    await asyncio.sleep(1)
                    await self.page.keyboard.press('Escape')
                else:
                    # print(self.item)
                    if self.item['promotionprice'] is None:
                        mail("price_tb_error",
                             self.fromStore + ":" + self.item['link_id'],
                             ["*****@*****.**"])
                        logger.error("error:" + self.fromStore + " : " +
                                     self.item['link_id'] + " and " +
                                     mysql.concat(self.promo_price, "="))
                        self.complete = 2
                        break
                    condition = {
                        "stockid": self.item['stockid'],
                        "link_id": self.item['link_id'],
                        "shop_id": self.item['shop_id'],
                    }
                    res = mysql.get_data(t="prices_tb",
                                         l=1,
                                         cn=["id"],
                                         c=condition,
                                         db=self.target_server)
                    if res:
                        self.item['ratio'] = round(
                            float(self.item['price_tb']) / float(res[0][0]), 2)
                        print(self.item)
                        mysql.update_data(t="prices_tb",
                                          set=self.item,
                                          c=condition,
                                          db=self.target_server)
                    else:
                        insert_item = self.item.copy()
                        insert_item["currabrev"] = "CNY"
                        insert_item["price_erp"] = 0
                        insert_item["operator"] = self.operator
                        insert_item["last_time"] = time_now()
                        if self.operator == "爬虫维护":
                            insert_item["flag"] = "create"
                        else:
                            insert_item['flag'] = "add"
                        insert_item["ratio"] = 1
                        insert_item["package_number"] = 1
                        insert_item["Checker"] = ""
                        insert_item["CheckDate"] = "0000-00-00 00:00:00"
                        print(insert_item)

                        with open(
                                "reports/report_" + self.fromStore +
                                "_insert.txt", "a") as file:
                            file.writelines("物料编码:" + insert_item['stockid'] +
                                            " 与 商品ID:" +
                                            insert_item['link_id'] +
                                            " 为最新匹配,添加至ERP系统。\n" +
                                            self.item_url +
                                            insert_item['link_id'] + "\n" +
                                            self.item_erp_url +
                                            insert_item['link_id'] + "\n\n")

                        mysql.insert_data(t="prices_tb",
                                          d=insert_item,
                                          db=self.target_server)
                    result = mysql.get_data(
                        t="prices_tb",
                        cn=["*"],
                        c={"link_id": self.item['link_id']},
                        db=self.target_server,
                        dict_result=True)

                    if len(result) > 1:
                        for r in result:
                            if r['stockid'] != self.item['stockid'] and r[
                                    'flag'] != "del":
                                with open(
                                        "reports/report_" + self.fromStore +
                                        "_delete.txt", "a") as file:
                                    file.writelines("物料编码:" + r['stockid'] +
                                                    " 与 商品ID:" +
                                                    self.item['link_id'] +
                                                    " 不匹配,已被爬虫从ERP系统中删除。\n" +
                                                    self.item_url +
                                                    self.item['link_id'] +
                                                    "\n" + self.item_erp_url +
                                                    self.item['link_id'] +
                                                    "\n\n")

                                mysql.update_data(t="prices_tb",
                                                  c={"id": r['id']},
                                                  db=self.target_server,
                                                  set={"flag": "del"})

                    self.complete = 1
        else:
            pass
            self.complete = 1
コード例 #9
0
ファイル: price_tb.py プロジェクト: ghostcfl/headlessspider
    async def fix_data(self, link_id=None):
        # page = await self.browser.newPage()
        self.complete = 0
        self.prices = {}
        self.promo_price = {}
        await asyncio.sleep(2)
        await self.page.focus("input[name='queryItemId']")
        await self.page.keyboard.down("ShiftLeft")
        await self.page.keyboard.press("Home")
        await self.page.keyboard.down("ShiftLeft")
        await self.page.keyboard.press("Delete")
        server_name = 'production_server'
        self.sn = server_name
        if not link_id:
            if MODE == 1:
                link_id = "585308692855"
            elif MODE == 2:
                while True:
                    link_id = input(time_now() + " | 输入link_id:")
                    isMatch = re.match("^\d{10,20}$", link_id)
                    if isMatch:
                        break
            elif MODE == 3:
                sql = """
                SELECT link_id,updateTime,server,operator
                FROM prices_tb_fix 
                WHERE fromStore='%s' and isComplete=0
                ORDER BY flag LIMIT 1
                """ % (self.fromStore)
                res = mysql.get_data(sql=sql, db=self.db_test)
                if res:
                    self.target_server = self.server[res[0][2]]
                    link_id = res[0][0]
                    updateTime = res[0][1]
                    server_name = res[0][2]
                    self.sn = server_name
                    self.operator = res[0][3]
                else:
                    return 1

        logger.info(link_id)
        page = await self.browser.newPage()
        await page.setViewport({'width': 1600, 'height': 900})
        net_check()
        await page.goto("https://item.taobao.com/item.htm?id=" + link_id,
                        timeout=0)
        await asyncio.sleep(3)
        error_page = await page.J(".error-notice-hd")  # 判断宝贝是否正常在售
        offline = await page.J("#J_detail_offline")  # 判断宝贝是否正常在售
        if error_page or offline:
            logger.info("商品已下架")
            mysql.update_data(t="prices_tb",
                              set={
                                  "flag": "XiaJia",
                                  "typeabbrev": self.fromStore
                              },
                              c={"link_id": link_id},
                              db=self.target_server)
            # mysql.update_data(t="tb_order_detail_spider",
            #                   set={"link_id": link_id + "xiajia"},
            #                   c={"link_id": link_id},
            #                   db=self.target_server)
            mysql.update_data(db=self.db_test,
                              t="prices_tb_fix",
                              set={
                                  "isComplete": "2",
                                  "updateTime": time_now()
                              },
                              c={
                                  "link_id": link_id,
                                  "server": server_name
                              })
            await page.close()
            return
        else:
            while True:
                content = await page.content()
                # print(content)
                doc = pq(content)
                self.common['rates'] = doc.find("#J_RateCounter").text()
                self.common['sales'] = doc.find("#J_SellCounter").text()
                self.common['freight'] = doc.find("#J_WlServiceTitle").text()
                mat1 = re.match("\d+", self.common['sales'])
                mat2 = re.match("\d+", self.common['rates'])
                if mat1 and mat2:
                    break
            res = re.findall('";(.*?);".*?e":"(\d+\.\d+).*?d":"(\d+)"',
                             content)  # 判断是否存在多属性
            if res:
                control = 1
                benefit_price = 0
                for r in res:
                    data_values = r[0].split(";")
                    prop = []
                    for data in data_values:
                        prop.append(
                            doc.find("li[data-value='" + data +
                                     "'] span").text())

                    if control:
                        for data in data_values:
                            try:
                                await page.click('li[data-value="' + data +
                                                 '"]')
                            except errors.PageError:
                                pass
                        content_p = await page.content()
                        promo_price = re.findall(
                            '<em id="J_PromoPriceNum".*?>(\d+\.?\d*)</em>',
                            content_p)  # 判断是否存在优惠
                        if len(promo_price) == 1:
                            benefit_price = float(r[1]) - float(promo_price[0])
                            control = 0

                    self.prices[r[2]] = r[1]
                    prop.reverse()
                    self.prop[r[2]] = "-".join(prop)

                for r in res:
                    if benefit_price:
                        self.promo_price[r[2]] = round(
                            float(r[1]) - benefit_price, 2)
            else:
                promo_price = re.findall(
                    '<em id="J_PromoPriceNum".*?>(\d+.*\d*)</em>',
                    content)  # 判断是否存在优惠
                if promo_price:
                    self.promo_price[link_id] = promo_price[0]
                else:
                    self.promo_price[link_id] = 0
            # print(self.prices)
            # print(self.promo_price)

            await page.close()
            await self.page.type("input[name='queryItemId']", link_id)
            await self.page.setRequestInterception(True)
            self.page.on('request', self.intercept_request)
            self.page.on('response', self.intercept_response)
            await asyncio.sleep(1)
            net_check()
            await self.page.click(".filter-footer button:first-child")
            while True:
                await asyncio.sleep(1)
                if self.complete == 1:
                    res = mysql.get_data(db=self.db_test,
                                         t="prices_tb_fix",
                                         c={
                                             "link_id": link_id,
                                             "server": server_name
                                         })
                    if res:
                        mysql.update_data(db=self.db_test,
                                          t="prices_tb_fix",
                                          set={
                                              "isComplete": "1",
                                              "updateTime": time_now()
                                          },
                                          c={
                                              "link_id": link_id,
                                              "server": server_name
                                          })
                    break
                elif self.complete == 2:
                    mysql.update_data(db=self.db_test,
                                      t="prices_tb_fix",
                                      set={"spe_link": "1"},
                                      c={
                                          "link_id": link_id,
                                          "server": server_name
                                      })
                    break
コード例 #10
0
ファイル: price_tb.py プロジェクト: ghostcfl/headlessspider
    async def parse_2(self, data):
        verify = []
        repeat_list = []
        for i in data['skuOuterIdTable']['dataSource']:
            self.item['stockid'] = i['skuOuterId']
            logger.info(self.item['stockid'])
            if not self.item['stockid']:
                continue
            else:
                if self.item['stockid'] not in verify:
                    verify.append(self.item['stockid'])
                else:
                    if self.item['stockid'] not in repeat_list:
                        repeat_list.append(self.item['stockid'])
            skuId = str(i['skuId'])
            temp_attr = self.prop.get(skuId)
            self.item['attribute'] = temp_attr.replace("(",
                                                       "(").replace(")", ")")
            if not self.item['attribute']:
                self.item.pop('attribute')
            self.item['price_tb'] = self.prices.get(skuId)
            if self.promo_price:
                self.item["promotionprice"] = self.promo_price.get(skuId)
            else:
                self.item["promotionprice"] = 0

            condition = {
                "stockid": self.item['stockid'],
                "link_id": self.item['link_id'],
                "shop_id": self.item['shop_id'],
            }
            res = mysql.get_data(t="prices_tb",
                                 l=1,
                                 cn=["price_tb"],
                                 c=condition,
                                 db=self.target_server)
            if res:

                if res[0][0] == 0:
                    self.item['ratio'] = 1
                else:
                    self.item['ratio'] = round(
                        float(self.item['price_tb']) / float(res[0][0]), 2)

                print(self.item)
                mysql.update_data(t="prices_tb",
                                  set=self.item,
                                  c=condition,
                                  db=self.target_server)
            else:
                insert_item = self.item.copy()
                insert_item["currabrev"] = "CNY"
                insert_item["price_erp"] = 0
                insert_item["operator"] = self.operator
                insert_item["last_time"] = time_now()
                if self.operator == "爬虫维护":
                    insert_item["flag"] = "create"
                else:
                    insert_item['flag'] = "add"
                insert_item["ratio"] = 1
                insert_item["package_number"] = 1
                insert_item["Checker"] = ""
                insert_item["CheckDate"] = "0000-00-00 00:00:00"
                print(insert_item)

                with open("reports/report_" + self.fromStore + "_insert.txt",
                          "a") as file:
                    file.write("物料编码:" + insert_item['stockid'] + " 与商品ID:" +
                               insert_item['link_id'] + " 为最新匹配,添加至ERP系统。\n" +
                               self.item_url + insert_item['link_id'] + "\n" +
                               self.item_erp_url + insert_item['link_id'] +
                               "\n\n")

                mysql.insert_data(t="prices_tb",
                                  d=insert_item,
                                  db=self.target_server)

        if repeat_list:
            with open("reports/report_" + self.fromStore + "_repeat.txt",
                      "a") as file:
                file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" +
                           self.item['link_id'] + " 重复编码\n" + "重复编码:" +
                           ",".join(repeat_list) + "\n" + self.item_url +
                           self.item['link_id'] + "\n\n")

        if not verify:
            with open("reports/report_" + self.fromStore + "_empty.txt",
                      "a") as file:
                file.write("店铺:" + store_trans(self.fromStore) + ",商品id:" +
                           self.item['link_id'] + " 空编码\n" + self.item_url +
                           self.item['link_id'] + "\n\n")

        sql = """
        select id,stockid 
        from prices_tb 
        where link_id='%s' 
        and flag not in('del','XiaJia')
        """ % (self.item['link_id'])
        res_verify = mysql.get_data(sql=sql, db=self.target_server)

        for rv in res_verify:
            if rv[1] not in verify:
                with open("reports/report_" + self.fromStore + "_delete.txt",
                          "a") as file:
                    file.write("物料编码:" + rv[1] + " 与 商品ID:" +
                               self.item['link_id'] + " 不匹配,已被爬虫从ERP系统中删除。\n" +
                               self.item_url + self.item['link_id'] + "\n" +
                               self.item_erp_url + self.item['link_id'] +
                               "\n\n")

                mysql.update_data(t="prices_tb",
                                  c={"id": rv[0]},
                                  db=self.target_server,
                                  set={
                                      "flag": "del",
                                      "operator": self.operator,
                                      "last_time": time_now()
                                  })

        self.complete = 1