def parse_item(self, response): a_list = response.xpath('//a[@class="news14"]') for a in a_list: item = FbsproItem() item['title'] = a.xpath('./text()').extract_first() yield item
def parse_item(self, response): # 获取全站标题 li_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li') for li in li_list: title = li.xpath('./span[3]/a/text()').extract_first() item = FbsproItem() item['title'] = title yield item
def parse_item(self, response): new_list = response.xpath('/html/body/div[8]/table[2]//tr') for i in new_list: new_id = i.xpath('./td[1]/text()').extract_first() new_title = i.xpath('./td[3]/a/text()').extract_first() item = FbsproItem() item['new_id'] = new_id item['new_title'] = new_title yield item
def parse_item(self, response): tr_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li') for li in tr_list: id_num = li.xpath('./span/text()').extract_first() title = li.xpath('./span/a/text()').extract_first() item = FbsproItem() item['id_num'] = id_num item['title'] = title yield item
def parse_item(self, response): tr_list = response.xpath( '//*[@id="morelist"]/div/table[2]//tr/td/table//tr') for tr in tr_list: title = tr.xpath('./td[2]/a[2]/@title').extract_first() status = tr.xpath('./td[3]/span/text()').extract_first() item = FbsproItem() item['title'] = title item['status'] = status yield item
def parse_item(self, response): li_lists = response.xpath('/html/body/div[2]/div[3]/ul[2]//li') for li in li_lists: new_num = li.xpath( './span[@class="state1"]/text()').extract_first() new_title = li.xpath( './span[@class="state3"]/a/text()').extract_first() item = FbsproItem() item['title'] = new_title item['new_num'] = new_num yield item
def parse_item(self, response): li_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li') for li in li_list: new_id = li.xpath('./span/text()').extract_first() new_title = li.xpath('./span[3]//text()').extract_first() item = FbsproItem() item['new_id'] = new_id item['new_title'] = new_title print(new_id, new_title) yield item
def parse_item(self, response): li_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li') for li in li_list: # !!!!xpath中不能出现tbody标签会为空 将tbody标签改成/就好 status = li.xpath('./span[2]/text()').extract_first().split() title = li.xpath('./span[3]/a/text()').extract_first() item = FbsproItem() item['title'] = title item['status'] = status yield item
def parse_item(self, response): tr_list = response.xpath('/html/body/div[8]/table[2]//tr') for tr in tr_list: item = FbsproItem() num = tr.xpath('./td[1]/text()').extract_first() title = tr.xpath('./td[3]/a/@title').extract_first() item['num'] = num item['title'] = title yield item
def parse_item(self, response): tr_list = response.xpath( '//*[@id="morelist"]/div/table[2]//tr/td/table//tr') for tr in tr_list: new_num = tr.xpath('./td[1]/text()').extract_first() new_title = tr.xpath('./td[2]/a[2]/@title').extract_first() item = FbsproItem() item['title'] = new_title item['new_num'] = new_num yield item
def parse_novel_name(self, response): # item = {} # #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get() # #item['name'] = response.xpath('//div[su@id="name"]').get() # #item['description'] = response.xpath('//div[@id="description"]').get() # return item print('\n', response) # 注意:xpath表达式中不可以出现tbody标签 li_list = response.xpath( '/html/body/div[3]/div/div/div[2]/div[1]/div[2]/ul/li') for li in li_list: novel_category = li.xpath('./span[1]/text()').extract_first() novel_name = li.xpath('./span[2]/a/text()').extract_first() novel_author = li.xpath('./span[4]/text()').extract_first() # print(novel_category, novel_name, novel_author) item = FbsproItem() item['novel_category'] = novel_category item['novel_name'] = novel_name item['novel_author'] = novel_author