def parse_item(self, response): print("===================parse_item") subject = Subject() self.get_douban_id(subject, response) subject['type'] = 'movie' print("\nChange User-Agent: ", response.request.headers['User-Agent']) return subject
def parse(self, response): movie_list = response.xpath( "//div[@class='article']//ol[@class='grid_view']/li") for movie in movie_list: douban_item = Subject() url_subject = movie.xpath( "./div[@class='item']/div[@class='info']//a/@href").get() douban_item['douban_id'] = url_subject.split('subject/')[1][:-1] douban_item['movie_name'] = movie.xpath( ".//div[@class='info']/div[@class='hd']/a/span[1]/text()").get( ) douban_item['star'] = movie.xpath( ".//span[@class='rating_num']/text()").get() douban_item['description'] = movie.xpath( ".//p[@class='quote']/span/text()").get() yield douban_item next_link = response.xpath("//span[@class='next']/link/@href").get() if next_link: yield scrapy.Request("https://movie.douban.com/top250" + next_link, callback=self.parse)
def parse_item(self, response): subject = Subject() self.get_douban_id(subject, response) subject['type'] = 'book' return subject
def parse_item(, response): subject = Subject() get_douban_id(subject, response) subject['type'] = 'movie' return subject
def parse_item(self, response): subject = Subject() self.set_douban_id(subject, response) subject["type"] = "book" return subject