def parse(self, response): movie_list = response.xpath( "//div[@class='article']//ol[@class='grid_view']/li") for i_item in movie_list: douban_item = MyfirstpjtItem() douban_item['serial_number'] = i_item.xpath( ".//div[@class='item']//em/text()").extract_first() douban_item['movie_name'] = i_item.xpath( ".//div[@class='info']/div[@class='hd']/a/span[1]/text()" ).extract_first() content = i_item.xpath( ".//div[@class='info']//div[@class='bd']/p[1]/text()").extract( ) for i_content in content: content_s = "".join(i_content.split()) douban_item['introduce'] = content_s douban_item['star'] = i_item.xpath( ".//span[@class='rating_num']/text()").extract_first() douban_item['evaluate'] = i_item.xpath( ".//div[@class='star']//span[4]//text()").extract_first() douban_item['describe'] = i_item.xpath( ".//p[@class='quote']/span/text()").extract_first() # print(douban_item) yield douban_item next_link = response.xpath( "//span[@class='next']/link/@href").extract() if next_link: next_link = next_link[0] yield scrapy.Request("https://movie.douban.com/top250" + next_link, callback=self.parse)
def parse(self, response): item = MyfirstpjtItem() item["urlname"] = response.xpath("/html/head/title/text()") print("标题:") print(item["urlname"]) #12.8见myxml
def parse(self, response): # pass item = MyfirstpjtItem() # item['urlname'] = response.xpath('/html/head/title/text()') # print(item['urlname']) item['title'] = response.xpath('/html/head/title/text()').extract() print(item['title'])
def parse(self, response): item = MyfirstpjtItem() item["urlname"] = response.xpath("//a[@target='_blank']/text()") print(item["urlname"]) pass
def parse(self, response): item = MyfirstpjtItem() item['urlname'] = response.xpath('/html/head/title/text()') print("以下将显示网址标题:") print(item['urlname'])
def parse(self, response): item=MyfirstpjtItem() item["urlname"]=response.xpath("/html/head/title/text()") print("以下将显示爬取的网址的标题") print(item["urlname"])
def parse(self, response): item = MyfirstpjtItem() item['urltitle'] = response.xpath("/html/head/title/text()").extract() print u"以下将显示爬取的网址的标题" return item
def parse(self, response): item = MyfirstpjtItem() item['urlname'] = response.xpath('/html/head/title/text()').extract() print('以下将显示爬取的网址的标题') print(item['urlname']) return item