def TMItem(self): if self.item_url != '': page = self.crawler.getData(self.item_url, self.refers) if not page or page == '': raise Common.InvalidPageException( "# TMItem: not find item page,itemid:%s,item_url:%s" % (str(self.item_id), self.item_url)) m = re.search(r'sellerId:"(\d+)",', page, flags=re.S) if m: self.seller_id = m.group(1) m = re.search(r'shopId:"(\d+)",', page, flags=re.S) if m: self.shop_id = m.group(1) m = re.search( r'<div class="slogo">\s*<a class="slogo-shopname" href="(.+?)".+?><strong>(.+?)</strong></a>', page, flags=re.S) if m: self.shop_url, self.shop_name = Common.fix_url( m.group(1)), m.group(2).strip() m = re.search(r'TShop\.Setup\((.+?)\);', page, flags=re.S) if m: TShop_s = m.group(1).strip() m = re.search(r'"brand":"(.+?)",', TShop_s, flags=re.S) if m: self.brand_name = Common.htmlDecode(m.group(1).strip()) m = re.search(r'"brandId":"(\d+)",', TShop_s, flags=re.S) if m: self.brand_id = m.group(1) m = re.search(r'"categoryId":"(\d+)",', TShop_s, flags=re.S) if m: self.category_id = m.group(1) m = re.search(r'"sellerNickName":"(.+?)",', TShop_s, flags=re.S) if m: self.seller_name = Common.urlDecode(m.group(1).strip()) m = re.search(r'"initApi":"(.+?)",', TShop_s, flags=re.S) if m: ts = "&callback=setMdskip×tamp=%s" % str( int(time.time() * 1000)) initapi_url = Common.fix_url(m.group(1).strip( )) + ts + "&ref=%s" % Common.urlCode(self.refers) init_page = self.crawler.getData(initapi_url, self.item_url) if not init_page and init_page == '': print '# init page is null..' else: m = re.search(r'"sellCountDO":{"sellCount":(\d+),', init_page, flags=re.S) if m: self.item_sellCount = m.group(1)
def TMItem(self): if self.item_url != '': page = self.crawler.getData(self.item_url, self.refers) if not page or page == '': raise Common.InvalidPageException("# TMItem: not find item page,itemid:%s,item_url:%s"%(str(self.item_id), self.item_url)) m = re.search(r'sellerId:"(\d+)",', page, flags=re.S) if m: self.seller_id = m.group(1) m = re.search(r'shopId:"(\d+)",', page, flags=re.S) if m: self.shop_id = m.group(1) m = re.search(r'<div class="slogo">\s*<a class="slogo-shopname" href="(.+?)".+?><strong>(.+?)</strong></a>', page, flags=re.S) if m: self.shop_url, self.shop_name = Common.fix_url(m.group(1)), m.group(2).strip() m = re.search(r'TShop\.Setup\((.+?)\);', page, flags=re.S) if m: TShop_s = m.group(1).strip() m = re.search(r'"brand":"(.+?)",', TShop_s, flags=re.S) if m: self.brand_name = Common.htmlDecode(m.group(1).strip()) m = re.search(r'"brandId":"(\d+)",', TShop_s, flags=re.S) if m: self.brand_id = m.group(1) m = re.search(r'"categoryId":"(\d+)",', TShop_s, flags=re.S) if m: self.category_id = m.group(1) m = re.search(r'"sellerNickName":"(.+?)",', TShop_s, flags=re.S) if m: self.seller_name = Common.urlDecode(m.group(1).strip()) m = re.search(r'"initApi":"(.+?)",', TShop_s, flags=re.S) if m: ts = "&callback=setMdskip×tamp=%s" % str(int(time.time()*1000)) initapi_url = Common.fix_url(m.group(1).strip()) + ts + "&ref=%s" % Common.urlCode(self.refers) init_page = self.crawler.getData(initapi_url, self.item_url) if not init_page and init_page == '': print '# init page is null..' else: m = re.search(r'"sellCountDO":{"sellCount":(\d+),', init_page, flags=re.S) if m: self.item_sellCount = m.group(1)