Ejemplo n.º 1
0
    def parse_item(self, response):
        i = MycwpjtItem()
#根据Xpath表达式提取新闻网页中的标题
        i["name"]=response.xpath("/html/head/title/text()").extract()
#根据Xpath表达式提取当前新闻网页的链接
        i["link"]=response.xpath("//link[@rel='canonical']/@href").extract()
        return i
Ejemplo n.º 2
0
 def parse_item(self, response):
     i = MycwpjtItem()
     # i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
     # i['name'] = response.xpath('//div[@id="name"]').extract()
     # i['description'] = response.xpath('//div[@id="description"]').extract()
     i["name"] = response.xpath("/html/head/title/text()").extract()
     i["link"] = response.xpath("//link[@rel='canonical']/@href").extract()
     return i
Ejemplo n.º 3
0
 def parse_item(self, response):
     item = MycwpjtItem()
     item["name"] = response.xpath("/html/head/title/text()").extract()
     # item = {}
     #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
     #item['name'] = response.xpath('//div[@id="name"]').get()
     #item['description'] = response.xpath('//div[@id="description"]').get()
     return item
Ejemplo n.º 4
0
 def parse_item(self, response):
     i = MycwpjtItem()
     #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
     # 根据Xpath表达式提取新闻网页中的标题
     i["name"] = response.xpath("/html/head/title/text()").extract()
     # 根据Xpath表达式提取当前新闻网页的链接
     i["link"] = response.xpath("//link[@rel='canonical']/@href").extract()
     return i
Ejemplo n.º 5
0
 def parse_item(self, response):
     i = MycwpjtItem()
     # 根据XPath表达式提取新闻网页中的标题
     i['name'] = response.xpath('/html/head/title/text()').extract()
     # 根据XPath表达式提取新闻网页中的标题
     i['link'] = response.xpath('//link[@rel="canonical"]/@href').extract()
     #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
     #i['name'] = response.xpath('//div[@id="name"]').extract()
     #i['description'] = response.xpath('//div[@id="description"]').extract()
     return i
Ejemplo n.º 6
0
 def parse_item(self, response):
     i = MycwpjtItem()
     reload(sys)
     sys.setdefaultencoding('utf-8')
     type = sys.getfilesystemencoding()
     #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
     #i['name'] = response.xpath('//div[@id="name"]').extract()
     #i['description'] = response.xpath('//div[@id="description"]').extract()
     i["name"] = response.xpath("/html/head/title/text()").extract().decode(
         'utf-8').encode(type)
     i["link"] = response.xpath("//link[@rel='canonical']/@href").extract(
     ).decode('utf-8').encode(type)
     return i
Ejemplo n.º 7
0
 def parse_item(self, response):
     i = MycwpjtItem()
     i['name'] = response.xpath('/html/head/title/text()').extract()
     i['link'] = response.xpath("//link[@rel='canonical']/@href").extract()
     return i