def parse1(self, response): hxs = Selector(response) items = [] # 章节链接地址 urls = hxs.xpath('//dd/a[1]/@href').extract() # 章节名 dir_names = hxs.xpath('//dd/a[1]/text()').extract() # 保存章节链接和章节名 for index in range(len(urls)): item = ComicItem() item['link_url'] = self.server_link + urls[index] item['dir_name'] = dir_names[index] items.append(item) #根据每个章节的链接,发送request请求,并传递item参数 for item in items[-13:-1]: yield scrapy.Request(url=item['link_url'], meta={'item': item}, callback=self.parse2)
def parse1(self, response): hxs = Selector(response) items = [] #章節鏈接地址 urls = hxs.xpath('//dd/a[1]/@href').extract() #章節名 dir_names = hxs.xpath('//dd/a[1]/text()').extract() #保存章節鏈接和章節名 for index in range(len(urls)): item = ComicItem() item['link_url'] = self.server_link + urls[index] item['dir_name'] = dir_names[index] items.append(item) #根據每個章節的鏈接,發送Request請求,並傳遞item參數 for item in items: yield scrapy.Request(url=item['link_url'], meta={'item': item}, callback=self.parse2)
def parse1(self, response): hxs = Selector(response) items = [] # 章节链接地址 urls = hxs.xpath('//dd/a[1]/@href').extract() # 章节名 dir_names = hxs.xpath('//dd/a[1]/text()').extract() # 保存章节链接和章节名 for index in range(len(urls)): item = ComicItem() item['link_url'] = self.server_link + urls[index] item['dir_name'] = dir_names[index].replace(" ", "_") items.append(item) # Find last downloaded file name, then download newly published episode print('Current file amount: '+str(len(items))) print('Index of last update: '+lastlen) with open (BASE_DIR+'/logg.txt','w') as w: w.write(str(len(items))) # 根据每个章节的链接,发送Request请求,并传递item参数 for item in items[int(lastlen):]: print(item['dir_name']) yield scrapy.Request(url=item['link_url'], meta={'item': item}, callback=self.parse2)