def parse_item(self, response): i = MycwpjtItem() #根据Xpath表达式提取新闻网页中的标题 i["name"]=response.xpath("/html/head/title/text()").extract() #根据Xpath表达式提取当前新闻网页的链接 i["link"]=response.xpath("//link[@rel='canonical']/@href").extract() return i
def parse_item(self, response): i = MycwpjtItem() # i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() # i['name'] = response.xpath('//div[@id="name"]').extract() # i['description'] = response.xpath('//div[@id="description"]').extract() i["name"] = response.xpath("/html/head/title/text()").extract() i["link"] = response.xpath("//link[@rel='canonical']/@href").extract() return i
def parse_item(self, response): item = MycwpjtItem() item["name"] = response.xpath("/html/head/title/text()").extract() # item = {} #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get() #item['name'] = response.xpath('//div[@id="name"]').get() #item['description'] = response.xpath('//div[@id="description"]').get() return item
def parse_item(self, response): i = MycwpjtItem() #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() # 根据Xpath表达式提取新闻网页中的标题 i["name"] = response.xpath("/html/head/title/text()").extract() # 根据Xpath表达式提取当前新闻网页的链接 i["link"] = response.xpath("//link[@rel='canonical']/@href").extract() return i
def parse_item(self, response): i = MycwpjtItem() # 根据XPath表达式提取新闻网页中的标题 i['name'] = response.xpath('/html/head/title/text()').extract() # 根据XPath表达式提取新闻网页中的标题 i['link'] = response.xpath('//link[@rel="canonical"]/@href').extract() #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() #i['name'] = response.xpath('//div[@id="name"]').extract() #i['description'] = response.xpath('//div[@id="description"]').extract() return i
def parse_item(self, response): i = MycwpjtItem() reload(sys) sys.setdefaultencoding('utf-8') type = sys.getfilesystemencoding() #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() #i['name'] = response.xpath('//div[@id="name"]').extract() #i['description'] = response.xpath('//div[@id="description"]').extract() i["name"] = response.xpath("/html/head/title/text()").extract().decode( 'utf-8').encode(type) i["link"] = response.xpath("//link[@rel='canonical']/@href").extract( ).decode('utf-8').encode(type) return i
def parse_item(self, response): i = MycwpjtItem() i['name'] = response.xpath('/html/head/title/text()').extract() i['link'] = response.xpath("//link[@rel='canonical']/@href").extract() return i