def parse_item(self, response): #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() #i['name'] = response.xpath('//div[@id="name"]').extract() #i['description'] = response.xpath('//div[@id="description"]').extract() print response.url for each in response.xpath('//div[@class="pagecenter p3"]'): item = DongguanItem() item["title"] = each.xpath( ".//strong[@class='tgray14']/text()").extract()[0] item["num"] = item["title"].split(" ")[-1].split(":")[-1] item["content"] = each.xpath( ".//div[@class='c1 text14_2']/text()").extract()[0] print "666666" yield item
def parse_item(self, response): item = DongguanItem() url = response.url title_num = response.xpath( '//div[@class="pagecenter p3"]//strong/text()').extract()[0] title = title_num.split(' ')[0] title = title.split(':')[1] num = title_num.split(' ')[1] num = num.split(':')[1] content = response.xpath( '//div[@class="content text14_2"]/div[@class="c1 text14_2"]/text() | //div[@class="contentext"]/text()' ).extract() content = ''.join(content).strip() item['url'] = url item['title'] = title item['num'] = num item['content'] = content yield item
def process_item(self, response): item = DongguanItem() url = response.url title_num = response.xpath( '//div[@class="pagecenter p3"]//strong/text()').extract() title = title_num[0].split('\xa0\xa0')[0] title = title.split(':')[1] # print(title) number = title_num[0].split('\xa0\xa0')[1] number = number.split(':')[1] # print(number) content = response.xpath( '//div[@class="c1 text14_2"]/text() | //div[@class="contentext"]/text()' ).extract() content = ''.join(content).strip() # print(content) item['url'] = url item['title'] = title item['number'] = number item['content'] = content yield item