def handle_spider_closed(self, spider): Sql.store_cate_level1() work_time = datetime.now() - spider.started_on print('total spent:', work_time) print(len(self.product_pool), 'item fetched') print('done')
def start_requests(self): products = Sql.findall_asin_level1() for row in products: yield scrapy.Request( url='https://www.amazon.com/gp/offer-listing/' + row['asin'] + '/?f_new=true', callback=self.parse, meta={ 'asin': row['asin'], 'cid': row['cid'] })
def start_requests(self): cates = Sql.findall_cate_level1() for row in cates: row['link'] += '?ajax=1' yield scrapy.Request(url=row['link'] + '&pg=1', callback=self.parse, meta={ 'cid': row['id'], 'page': 1, 'link': row['link'] })
def handle_spider_closed(self, spider): Sql.store_cate_level1() work_time = datetime.now() - spider.started_on print('total spent:', work_time) print('done')