Ejemplo n.º 1
0
    def parse_item(self, response):
        #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
        #i['name'] = response.xpath('//div[@id="name"]').extract()
        #i['description'] = response.xpath('//div[@id="description"]').extract()
        print response.url
        for each in response.xpath('//div[@class="pagecenter p3"]'):
            item = DongguanItem()
            item["title"] = each.xpath(
                ".//strong[@class='tgray14']/text()").extract()[0]

            item["num"] = item["title"].split(" ")[-1].split(":")[-1]

            item["content"] = each.xpath(
                ".//div[@class='c1 text14_2']/text()").extract()[0]
            print "666666"

            yield item
Ejemplo n.º 2
0
 def parse_item(self, response):
     item = DongguanItem()
     url = response.url
     title_num = response.xpath(
         '//div[@class="pagecenter p3"]//strong/text()').extract()[0]
     title = title_num.split('  ')[0]
     title = title.split(':')[1]
     num = title_num.split('  ')[1]
     num = num.split(':')[1]
     content = response.xpath(
         '//div[@class="content text14_2"]/div[@class="c1 text14_2"]/text() | //div[@class="contentext"]/text()'
     ).extract()
     content = ''.join(content).strip()
     item['url'] = url
     item['title'] = title
     item['num'] = num
     item['content'] = content
     yield item
Ejemplo n.º 3
0
    def process_item(self, response):
        item = DongguanItem()
        url = response.url
        title_num = response.xpath(
            '//div[@class="pagecenter p3"]//strong/text()').extract()

        title = title_num[0].split('\xa0\xa0')[0]
        title = title.split(':')[1]
        # print(title)
        number = title_num[0].split('\xa0\xa0')[1]
        number = number.split(':')[1]
        # print(number)
        content = response.xpath(
            '//div[@class="c1 text14_2"]/text() | //div[@class="contentext"]/text()'
        ).extract()
        content = ''.join(content).strip()
        # print(content)
        item['url'] = url
        item['title'] = title
        item['number'] = number
        item['content'] = content
        yield item