예제 #1
0
    def TMItem(self):
        if self.item_url != '':
            page = self.crawler.getData(self.item_url, self.refers)
            if not page or page == '':
                raise Common.InvalidPageException(
                    "# TMItem: not find item page,itemid:%s,item_url:%s" %
                    (str(self.item_id), self.item_url))

            m = re.search(r'sellerId:"(\d+)",', page, flags=re.S)
            if m:
                self.seller_id = m.group(1)
            m = re.search(r'shopId:"(\d+)",', page, flags=re.S)
            if m:
                self.shop_id = m.group(1)
            m = re.search(
                r'<div class="slogo">\s*<a class="slogo-shopname" href="(.+?)".+?><strong>(.+?)</strong></a>',
                page,
                flags=re.S)
            if m:
                self.shop_url, self.shop_name = Common.fix_url(
                    m.group(1)), m.group(2).strip()

            m = re.search(r'TShop\.Setup\((.+?)\);', page, flags=re.S)
            if m:
                TShop_s = m.group(1).strip()
                m = re.search(r'"brand":"(.+?)",', TShop_s, flags=re.S)
                if m:
                    self.brand_name = Common.htmlDecode(m.group(1).strip())
                m = re.search(r'"brandId":"(\d+)",', TShop_s, flags=re.S)
                if m:
                    self.brand_id = m.group(1)
                m = re.search(r'"categoryId":"(\d+)",', TShop_s, flags=re.S)
                if m:
                    self.category_id = m.group(1)
                m = re.search(r'"sellerNickName":"(.+?)",',
                              TShop_s,
                              flags=re.S)
                if m:
                    self.seller_name = Common.urlDecode(m.group(1).strip())

                m = re.search(r'"initApi":"(.+?)",', TShop_s, flags=re.S)
                if m:
                    ts = "&callback=setMdskip&timestamp=%s" % str(
                        int(time.time() * 1000))
                    initapi_url = Common.fix_url(m.group(1).strip(
                    )) + ts + "&ref=%s" % Common.urlCode(self.refers)
                    init_page = self.crawler.getData(initapi_url,
                                                     self.item_url)
                    if not init_page and init_page == '':
                        print '# init page is null..'
                    else:
                        m = re.search(r'"sellCountDO":{"sellCount":(\d+),',
                                      init_page,
                                      flags=re.S)
                        if m:
                            self.item_sellCount = m.group(1)
예제 #2
0
파일: Item.py 프로젝트: xzhoutxd/tb
    def TMItem(self):
        if self.item_url != '':
            page = self.crawler.getData(self.item_url, self.refers)
            if not page or page == '':
                raise Common.InvalidPageException("# TMItem: not find item page,itemid:%s,item_url:%s"%(str(self.item_id), self.item_url))

            m = re.search(r'sellerId:"(\d+)",', page, flags=re.S)
            if m:
                self.seller_id = m.group(1)
            m = re.search(r'shopId:"(\d+)",', page, flags=re.S)
            if m:
                self.shop_id = m.group(1)
            m = re.search(r'<div class="slogo">\s*<a class="slogo-shopname" href="(.+?)".+?><strong>(.+?)</strong></a>', page, flags=re.S)
            if m:
                self.shop_url, self.shop_name = Common.fix_url(m.group(1)), m.group(2).strip()

            m = re.search(r'TShop\.Setup\((.+?)\);', page, flags=re.S)
            if m:
                TShop_s = m.group(1).strip()
                m = re.search(r'"brand":"(.+?)",', TShop_s, flags=re.S)
                if m:
                    self.brand_name = Common.htmlDecode(m.group(1).strip())
                m = re.search(r'"brandId":"(\d+)",', TShop_s, flags=re.S)
                if m:
                    self.brand_id = m.group(1)
                m = re.search(r'"categoryId":"(\d+)",', TShop_s, flags=re.S)
                if m:
                    self.category_id = m.group(1)
                m = re.search(r'"sellerNickName":"(.+?)",', TShop_s, flags=re.S)
                if m:
                    self.seller_name = Common.urlDecode(m.group(1).strip())

                m = re.search(r'"initApi":"(.+?)",', TShop_s, flags=re.S)
                if m:
                    ts = "&callback=setMdskip&timestamp=%s" % str(int(time.time()*1000))
                    initapi_url = Common.fix_url(m.group(1).strip()) + ts + "&ref=%s" % Common.urlCode(self.refers)
                    init_page = self.crawler.getData(initapi_url, self.item_url)
                    if not init_page and init_page == '':
                        print '# init page is null..'
                    else:
                        m = re.search(r'"sellCountDO":{"sellCount":(\d+),', init_page, flags=re.S)
                        if m:
                            self.item_sellCount = m.group(1)