Example #1
0
    def parse_item(self, response):
        a_list = response.xpath('//a[@class="news14"]')
        for a in a_list:
            item = FbsproItem()
            item['title'] = a.xpath('./text()').extract_first()

            yield item
Example #2
0
 def parse_item(self, response):
     # 获取全站标题
     li_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li')
     for li in li_list:
         title = li.xpath('./span[3]/a/text()').extract_first()
         item = FbsproItem()
         item['title'] = title
         yield item
Example #3
0
 def parse_item(self, response):
     new_list = response.xpath('/html/body/div[8]/table[2]//tr')
     for i in new_list:
         new_id = i.xpath('./td[1]/text()').extract_first()
         new_title = i.xpath('./td[3]/a/text()').extract_first()
         item = FbsproItem()
         item['new_id'] = new_id
         item['new_title'] = new_title
         yield item
Example #4
0
 def parse_item(self, response):
     tr_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li')
     for li in tr_list:
         id_num = li.xpath('./span/text()').extract_first()
         title = li.xpath('./span/a/text()').extract_first()
         item = FbsproItem()
         item['id_num'] = id_num
         item['title'] = title
         yield item
Example #5
0
File: fbs.py Project: wuyuz/Scrapy
 def parse_item(self, response):
     tr_list = response.xpath(
         '//*[@id="morelist"]/div/table[2]//tr/td/table//tr')
     for tr in tr_list:
         title = tr.xpath('./td[2]/a[2]/@title').extract_first()
         status = tr.xpath('./td[3]/span/text()').extract_first()
         item = FbsproItem()
         item['title'] = title
         item['status'] = status
         yield item
Example #6
0
 def parse_item(self, response):
     li_lists = response.xpath('/html/body/div[2]/div[3]/ul[2]//li')
     for li in li_lists:
         new_num = li.xpath(
             './span[@class="state1"]/text()').extract_first()
         new_title = li.xpath(
             './span[@class="state3"]/a/text()').extract_first()
         item = FbsproItem()
         item['title'] = new_title
         item['new_num'] = new_num
         yield item
Example #7
0
    def parse_item(self, response):
        li_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li')
        for li in li_list:
            new_id = li.xpath('./span/text()').extract_first()
            new_title = li.xpath('./span[3]//text()').extract_first()
            item = FbsproItem()
            item['new_id'] = new_id
            item['new_title'] = new_title
            print(new_id, new_title)

            yield item
Example #8
0
    def parse_item(self, response):
        li_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li')
        for li in li_list:
            # !!!!xpath中不能出现tbody标签会为空 将tbody标签改成/就好
            status = li.xpath('./span[2]/text()').extract_first().split()
            title = li.xpath('./span[3]/a/text()').extract_first()
            item = FbsproItem()
            item['title'] = title
            item['status'] = status

            yield item
Example #9
0
    def parse_item(self, response):
        tr_list = response.xpath('/html/body/div[8]/table[2]//tr')

        for tr in tr_list:
            item = FbsproItem()
            num = tr.xpath('./td[1]/text()').extract_first()
            title = tr.xpath('./td[3]/a/@title').extract_first()
            item['num'] = num
            item['title'] = title

            yield item
Example #10
0
    def parse_item(self, response):
        tr_list = response.xpath(
            '//*[@id="morelist"]/div/table[2]//tr/td/table//tr')
        for tr in tr_list:
            new_num = tr.xpath('./td[1]/text()').extract_first()
            new_title = tr.xpath('./td[2]/a[2]/@title').extract_first()

            item = FbsproItem()
            item['title'] = new_title
            item['new_num'] = new_num

            yield item
Example #11
0
    def parse_novel_name(self, response):
        # item = {}
        # #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
        # #item['name'] = response.xpath('//div[su@id="name"]').get()
        # #item['description'] = response.xpath('//div[@id="description"]').get()
        # return item

        print('\n', response)
        # 注意:xpath表达式中不可以出现tbody标签
        li_list = response.xpath(
            '/html/body/div[3]/div/div/div[2]/div[1]/div[2]/ul/li')
        for li in li_list:
            novel_category = li.xpath('./span[1]/text()').extract_first()
            novel_name = li.xpath('./span[2]/a/text()').extract_first()
            novel_author = li.xpath('./span[4]/text()').extract_first()
            # print(novel_category, novel_name, novel_author)

            item = FbsproItem()
            item['novel_category'] = novel_category
            item['novel_name'] = novel_name
            item['novel_author'] = novel_author