예제 #1
0
 def outItemSql(self):
     return (Common.time_s(self.crawling_time), self.item_id,
             self.item_name, self.item_price, self.item_sellCount,
             self.item_url, self.seller_id, self.seller_name, self.shop_id,
             self.shop_name, self.shop_url, self.brand_id, self.brand_name,
             self.category_id, self.crawling_beginDate,
             self.crawling_beginHour)
예제 #2
0
 def getPage(self, url):
     position = 1
     i = 1
    
     i_url = url
     refers = self.home_url
     max_page = 10
     size_page = 48
     while i <= max_page:
         page = self.crawler.getData(i_url, refers)
         refers = i_url
         i_url = url + '&bcoffset=1&s=%s' % str(i*size_page)
         i += 1
         if not page or page == '':
             print 'not find data url:',i_url
             time.sleep(4)
             continue
         m = re.search(r'<script>\s+g_page_config = ({.+?});.+?</script>', page, flags=re.S)
         if m:
             page_config = m.group(1)
             page_config_s = re.sub(r'\n+','',page_config)
             data = json.loads(page_config_s)
             if data.has_key("mods"):
                 if data["mods"].has_key("itemlist"):
                     itemlist = data["mods"]["itemlist"]
                     if itemlist.has_key("data"):
                         itemlist_data = itemlist["data"]
                         if itemlist_data.has_key("auctions"):
                             for item in itemlist_data["auctions"]:
                                 item_id = position
                                 m = re.search(r'id=(\d+)', item["detail_url"], flags=re.S)
                                 if m:
                                     item_id = m.group(1)
                                 item_sales = item["view_sales"]
                                 m = re.search(r'(\d+)', item["view_sales"], flags=re.S)
                                 if m:
                                     item_sales = m.group(1)
                                 print Common.time_s(Common.now()), position, item_id, item["raw_title"], item["view_price"], item_sales, item["user_id"], item["nick"], "http:" + item["detail_url"], "http:" + item["shopLink"]
                                 self.mysqlAccess.insert_item((Common.time_s(Common.now()), str(item_id), str(position), str(item["raw_title"]), str(item["view_price"]), str(item_sales), "http:" + item["detail_url"], item["user_id"], str(item["nick"]), "http:" + item["shopLink"]))
                                 position += 1
         time.sleep(4)
예제 #3
0
파일: bagItem.py 프로젝트: xzhoutxd/brand
 def outTuple(self):
     return (Common.time_s(self.crawling_time), self.brand_type, self.serie_title, self.item_title, self.item_name, self.item_price, self.item_unit, self.item_size, self.item_url, self.item_img, self.item_number, self.crawling_beginDate, self.crawling_beginHour)
예제 #4
0
파일: Item.py 프로젝트: xzhoutxd/tb
 def outItemSql(self):
     return (Common.time_s(self.crawling_time),self.item_id,self.item_name,self.item_price,self.item_sellCount,self.item_url,self.seller_id,self.seller_name,self.shop_id,self.shop_name,self.shop_url,self.brand_id,self.brand_name,self.category_id,self.crawling_beginDate,self.crawling_beginHour)
예제 #5
0
파일: bagItem.py 프로젝트: xzhoutxd/brand
 def outTuple(self):
     return (Common.time_s(self.crawling_time), self.brand_type,
             self.serie_title, self.item_title, self.item_name,
             self.item_price, self.item_unit, self.item_size, self.item_url,
             self.item_img, self.item_number, self.crawling_beginDate,
             self.crawling_beginHour)