def parse(self, response): for course in response.css('a.course-box'): item=MultipageCourseItem() item['name']=course.xpath('.//div[@class="course-name"]/text()').extract_first() item['image']=course.xpath('.//img/@src').extract_first() course_url=response.urljoin(course.xpath('@href').extract_first()) request=scrapy.Request(url=course_url,callback=self.parse_author) request.meta['item']=item yield request
def parse(self, response): for course in response.css('div.col-md-3'): item = MultipageCourseItem( name=course.css( 'h6.course-name::text').extract_first().strip(), image=course.css('img.cover-image::attr(src)').extract_first()) course_url = course.css('a::attr(href)').extract_first() full_course_url = response.urljoin(course_url) request = scrapy.Request(full_course_url, self.parse_author) request.meta['item'] = item yield request
def parse(self, response): for course in response.css('div.col-10'): item = MultipageCourseItem( name=course.css('h3 a::text').extract_first().split(), update_time=course.css( 'relative-time::attr(datetime)').extract_first().split()) #course_url = str(course.css('a.name.codeRepository::attr(href)').extract_first()) full_course_url = response.urljoin( course.xpath('.//a/@href').extract_first()) request = scrapy.Request(full_course_url, callback=self.parse_details) request.meta['item'] = item yield request
def parse(self, response): for course in response.css('a.course-box'): item = MultipageCourseItem() # 解析课程名称 item['name'] = course.xpath('.//div[@class="course-name"]/text()').extract_first() # 解析课程图片 item['image'] = course.xpath('.//img/@src').extract_first() # 构造课程详情页面的链接,爬取到的链接是相对链接,调用 urljoin 方法构造全链接 course_url = response.urljoin(course.xpath('@href').extract_first()) # 构造到课程详情页的请求,指定回调函数 request = scrapy.Request(course_url, callback=self.parse_author) # 将未完成的 item 通过 meta 传入 parse_author request.meta['item'] = item yield request
def parse(self, response): for course in response.css('div.col-md-3'): item = MultipageCourseItem( # 课程名称 name=course.css( 'h6.course-name::text').extract_first().strip(), # 课程图片 image=course.css('img.cover-image::attr(src)').extract_first()) # 构造课程详情页面的链接,爬到的链接是相对链接,调用urljon构造 course_url = course.css('a::attr(href)').extract_first() full_course_url = response.urljoin(course_url) # 构造课程详情页的请求,指定回调函数 request = scrapy.Request(full_course_url, self.parse_author) request.meta['item'] = item yield request
def parse(self, response): for course in response.css('a.course-box'): item = MultipageCourseItem() item['name'] = course.xpath( './/div[@class="course-name"]/text()').extract_first() item['image'] = course.xpath('.//img/@src').extract_first() # response.urljoin() 可以身成URL course_url = response.urljoin( course.xpath('@href').extract_first()) # yield request 会身成一个response # scrapy.Request(url,callback) 产生一个request # request.mata[''] 可以在 request中存放 内容 (实现页面跟随) request = scrapy.Request(course_url, callback=self.parse_author) request.meta['item'] = item yield request
def parse(self, response): for course in response.css('div.col-10'): item = MultipageCourseItem( name=course.css('h3 a::text').extract_first().split(), update_time=course.css( 'relative-time::attr(datetime)').extract_first().split()) #course_url = str(course.css('a.name.codeRepository::attr(href)').extract_first()) full_course_url = response.urljoin( course.xpath('.//a/@href').extract_first()) request = scrapy.Request(full_course_url, callback=self.parse_details) request.meta['item'] = item yield request spans = response.xpath( '//div[@class="BtnGroup"]/button[@disabled="disabled"]/text()') if len(spans) == 0: next_url = response.xpath( '//div[@class="BtnGroup"]/a/@href').extract()[1] yield response.follow(next_url, callback=self.parse) elif spans[0] == 'Previous': next_url = response.xpath( '//div[@class="BtnGroup"]/a/@href').extract()[0] yield response.follow(next_url, callback=self.parse)