Ejemplo n.º 1
0
 def parse(self, response):
     for course in response.css('div.course-body')
         item = CourseItem({
             'name': course.css('div.course-name::text').extract_first(),
             'description': course.css('div.course-desc::text').extract_first(),
             'type': course.css('div.course-footer span.pull-right::text').extract_first(default='Free'),
             'students': course.xpath('.//span[contains(@class, "pull-left")]/text()[2]').re_first('[^\d]*(\d+)[^\d]*')
         })  
         yield item
Ejemplo n.º 2
0
    def parse(self, response):

        for course in response.css('div.course-body'):

            # return the package CourseItem

            item = CourseItem(
                {'name': course.css('div.course-name::text').extract_first()})
            yield item
Ejemplo n.º 3
0
    def parse(self, response):
        for i in response.css('li.col-12'):

            item = CourseItem({
                "name":
                i.css('a::text').re_first('\s*(\w*)'),
                "update_time":
                i.css('relative-time::attr(datetime)').extract_first()
            })
            yield item
Ejemplo n.º 4
0
	def parse(self, response): 
            for course in response.css('div.course-body'): 
            # 将返回结果包装为 CourseItem 其它地方同上一节 
                    item = CourseItem({ 
                            'name': course.css('div.course-name::text').extract_first(), 
                            'description': course.css('div.course-desc::text').extract_first(), 
                            'type': course.css('div.course-footer span.pull-right::text').extract_first(default='免费'), 
                            'students': course.xpath('.//span[contains(@class, "pull-left")]/text()[2]').re_first('[^\d]*(\d*)[^\d]*') }) 
                            #顺序要和Items.py里面写的一样
                    
                    yield item  #for每一次循环都要变成CourseItem传出去
Ejemplo n.º 5
0
 def parse(self, response):
     for i in response.css('li.col-12'):
         item = CourseItem({
             "name": i.css('a::text').re_first('\s*(\w*)'),
             "update_time":i.css('relative-time::attr(datetime)').extract_first()
         })
         course_url = response.urljoin(i.xpath('.//a/@href').extract_first())
         #print(course_url)
         request = scrapy.Request(url=course_url,callback=self.parse_code)
         request.meta['item'] = item
         yield request
Ejemplo n.º 6
0
 def parse(self, response):
     for course in response.css('li.col-12'):
         item = CourseItem({
             'name':
             course.xpath('.//h3/a/text()').re_first('(\S+)'),
             'update_time':
             course.xpath('.//relative-time/@datetime').extract_first()
         })
         course_url = response.urljoin(
             course.xpath('.//h3/a/@href').extract_first())
         request = scrapy.Request(course_url, callback=self.detail_parse)
         request.meta['item'] = item
         yield request
Ejemplo n.º 7
0
 def parse(self, response):
     for course in response.css('div.col-sm-12.col-md-3'):
         item = CourseItem({
             'name':
             course.css('h6::text').extract_first().strip(),
             'description':
             course.css(
                 'div.course-description::text').extract_first().strip(),
             'type':
             course.css('span.course-type::text').extract_first().strip(),
             'students':
             course.css('span.students-count span::text').extract_first()
         })
         yield item
Ejemplo n.º 8
0
 def parse(self, response):
     for course in response.xpath('//div[@class="course-body"]'):
         item = CourseItem()
         item['name'] = course.xpath(
             './/div[class="course-name"]/text()').extract_first()
         item['description'] = course.xpath(
             './/div[@class="course-desc"]/text()').extract_first()
         item['type'] = course.xpath(
             './/span[contains(@class,"pull-right")]/text()').extract_first(
                 default="Free")
         item['students'] = course.xpath(
             './/span[contains(@class,"pull-left")]/text()[2]').re_first(
                 '\s*(\d+)\s*')
     yield item
Ejemplo n.º 9
0
    def parse(self, response):

        for course in response.xpath("//div[contains(@class, 'course-body')]"):
                # 使用 xpath 语法对每个 course 提取数据
            yield CourseItem({
                # 课程名称
                'name': course.xpath('.//div[contains(@class, "course-name")]/text()').extract_first(default="未知"),
                # 课程描述
                'description': course.xpath('.//div[contains(@class, "course-desc")]/text()').extract_first(default="空"),
                # 课程类型,实验楼的课程有免费,会员,训练营三种,免费课程并没有字样显示,也就是说没有 span.pull-right 这个标签,没有这个标签就代表时免费课程,使用默认值 `免费`就可以了。

                'type':course.xpath(
                './/div[contains(@class, "course-footer")]/span[contains(@class, "pull-right")]/text()').extract_first(
                default="免费"),
                # 注意 // 前面的 .,没有点表示整个文档所有的 div.course-body,有 . 才表示当前迭代的这个 div.course-body
                'students': course.xpath('.//span[contains(@class, "pull-left")]/text()[2]').re_first(
                    '[^\d]*(\d*)[^\d]*')
            })
Ejemplo n.º 10
0
 def parse(self, response):
     print("*******************************************************")
     print(response.url)
     print("*******************************************************")
     for course in response.xpath(
             '//*[@id="__layout"]/div/div[1]/div/div[1]/div/          div[1]/div/div[2]/div[1]/div'
     ):
         item = CourseItem({
             'name':
             course.xpath('.//h6[contains(@class,"course-name")]/text()'
                          ).extract_first(),
             'description':
             course.xpath('.//div[contains(@class,"course-desc")]/text()'
                          ).extract_first(),
             'type':
             course.xpath('.//span[contains(@class,"course-type")]/text()'
                          ).extract_first(default='Free'),
             'students':
             course.xpath(
                 './/span[contains(@class,"students-count")]/span/text()').
             extract_first(),
         })
         yield item