def parse(self, response): for course in response.css('div.course-body') item = CourseItem({ 'name': course.css('div.course-name::text').extract_first(), 'description': course.css('div.course-desc::text').extract_first(), 'type': course.css('div.course-footer span.pull-right::text').extract_first(default='Free'), 'students': course.xpath('.//span[contains(@class, "pull-left")]/text()[2]').re_first('[^\d]*(\d+)[^\d]*') }) yield item
def parse(self, response): for course in response.css('div.course-body'): # return the package CourseItem item = CourseItem( {'name': course.css('div.course-name::text').extract_first()}) yield item
def parse(self, response): for i in response.css('li.col-12'): item = CourseItem({ "name": i.css('a::text').re_first('\s*(\w*)'), "update_time": i.css('relative-time::attr(datetime)').extract_first() }) yield item
def parse(self, response): for course in response.css('div.course-body'): # 将返回结果包装为 CourseItem 其它地方同上一节 item = CourseItem({ 'name': course.css('div.course-name::text').extract_first(), 'description': course.css('div.course-desc::text').extract_first(), 'type': course.css('div.course-footer span.pull-right::text').extract_first(default='免费'), 'students': course.xpath('.//span[contains(@class, "pull-left")]/text()[2]').re_first('[^\d]*(\d*)[^\d]*') }) #顺序要和Items.py里面写的一样 yield item #for每一次循环都要变成CourseItem传出去
def parse(self, response): for i in response.css('li.col-12'): item = CourseItem({ "name": i.css('a::text').re_first('\s*(\w*)'), "update_time":i.css('relative-time::attr(datetime)').extract_first() }) course_url = response.urljoin(i.xpath('.//a/@href').extract_first()) #print(course_url) request = scrapy.Request(url=course_url,callback=self.parse_code) request.meta['item'] = item yield request
def parse(self, response): for course in response.css('li.col-12'): item = CourseItem({ 'name': course.xpath('.//h3/a/text()').re_first('(\S+)'), 'update_time': course.xpath('.//relative-time/@datetime').extract_first() }) course_url = response.urljoin( course.xpath('.//h3/a/@href').extract_first()) request = scrapy.Request(course_url, callback=self.detail_parse) request.meta['item'] = item yield request
def parse(self, response): for course in response.css('div.col-sm-12.col-md-3'): item = CourseItem({ 'name': course.css('h6::text').extract_first().strip(), 'description': course.css( 'div.course-description::text').extract_first().strip(), 'type': course.css('span.course-type::text').extract_first().strip(), 'students': course.css('span.students-count span::text').extract_first() }) yield item
def parse(self, response): for course in response.xpath('//div[@class="course-body"]'): item = CourseItem() item['name'] = course.xpath( './/div[class="course-name"]/text()').extract_first() item['description'] = course.xpath( './/div[@class="course-desc"]/text()').extract_first() item['type'] = course.xpath( './/span[contains(@class,"pull-right")]/text()').extract_first( default="Free") item['students'] = course.xpath( './/span[contains(@class,"pull-left")]/text()[2]').re_first( '\s*(\d+)\s*') yield item
def parse(self, response): for course in response.xpath("//div[contains(@class, 'course-body')]"): # 使用 xpath 语法对每个 course 提取数据 yield CourseItem({ # 课程名称 'name': course.xpath('.//div[contains(@class, "course-name")]/text()').extract_first(default="未知"), # 课程描述 'description': course.xpath('.//div[contains(@class, "course-desc")]/text()').extract_first(default="空"), # 课程类型,实验楼的课程有免费,会员,训练营三种,免费课程并没有字样显示,也就是说没有 span.pull-right 这个标签,没有这个标签就代表时免费课程,使用默认值 `免费`就可以了。 'type':course.xpath( './/div[contains(@class, "course-footer")]/span[contains(@class, "pull-right")]/text()').extract_first( default="免费"), # 注意 // 前面的 .,没有点表示整个文档所有的 div.course-body,有 . 才表示当前迭代的这个 div.course-body 'students': course.xpath('.//span[contains(@class, "pull-left")]/text()[2]').re_first( '[^\d]*(\d*)[^\d]*') })
def parse(self, response): print("*******************************************************") print(response.url) print("*******************************************************") for course in response.xpath( '//*[@id="__layout"]/div/div[1]/div/div[1]/div/ div[1]/div/div[2]/div[1]/div' ): item = CourseItem({ 'name': course.xpath('.//h6[contains(@class,"course-name")]/text()' ).extract_first(), 'description': course.xpath('.//div[contains(@class,"course-desc")]/text()' ).extract_first(), 'type': course.xpath('.//span[contains(@class,"course-type")]/text()' ).extract_first(default='Free'), 'students': course.xpath( './/span[contains(@class,"students-count")]/span/text()'). extract_first(), }) yield item