Example #1
0
    def parse(self, response):
        course = Course()
        course['site'] = '开源力量'.decode('utf-8')
        title = response.css('h2.title::text').extract_first()
        if not title is None:
            course['title'] = title.strip()
        else:
            return

        #sbutitle
        #about
        course['cover'] = response.urljoin(response.css('div.course-img img::attr(src)').extract_first())
        course['url'] = response.url

        priceTxt = response.css('span.price::text').extract_first()
        if not priceTxt is None:
            if priceTxt.find('免费'.decode('utf-8')) > -1:
                course['price'] = 0.0
            else:
                course['price'] = priceTxt.replace('元'.decode('utf-8'),'').strip()

        #rating 
        #ratingN 
        #hitN 
        #ctype
        course['ctype'] = 'N'; #普通课程
        #site 
        #tags 
        course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())

        #out
        if not priceTxt is None:
            course['o_price'] = priceTxt.strip()
        #o_rating
        o_starts = response.css('div.score i').re(r'class=\"es-icon (.*)\"')
        o_rating = 0
        for o_start in o_starts:
            if 'es-icon-star' == o_start:
                o_rating = o_rating + 2
            elif 'es-icon-starhalf' == o_start:
                o_rating = o_rating + 1
        course['o_rating'] = o_rating

        
        

        #o_ratingN
        o_ratingN = response.css('div.score span').re(r'(\d+)')
        if not o_ratingN is None:
            course['o_ratingN'] = int(o_ratingN[0])


        #o_stuN 
        o_stuN = response.css('div.student-num::text').re(r'(\d+)')
        if not o_stuN is None:
            course['o_stuN'] = int(o_stuN[0])
        #adm
        #adm_rating

        return course
Example #2
0
    def parse(self, response):
        for section in response.css('ul.course-lists li'):
            course = Course()
            #title
            title = section.css('p.font14::text').extract_first()
            if not title is None:
                course['title'] = title.strip()
            else:
                return

            #subtitle

            #about
            about = section.css('p.description::text').extract_first()
            if not title is None:
                course['about'] = about.strip()

            #price
            course['price'] = 0.0

            #cover
            course['cover'] = response.urljoin(
                section.css('p img::attr(src)').extract_first())
            #url
            course['url'] = response.urljoin(
                section.css('a::attr(href)').extract_first())

            #rating
            #ratingN
            #hitN
            #ctype
            course['ctype'] = 'N'  #普通课程
            #site
            course['site'] = self.site
            #tags

            course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())

            ###out

            #o_rating
            #o_price

            #o_rating
            #o_ratingN
            #o_stuN
            o_stuN = section.css('p.color99::text').re(r'(\d+)')
            if not o_stuN is None:
                course['o_stuN'] = o_stuN
            #o_reviewN

            yield course
Example #3
0
    def parse(self, response):
        course = Course()
        course['site'] = '小象学院'.decode('utf-8')
        title = response.css('h2.title::text').extract_first()
        if not title is None:
            course['title'] = title.strip()
        else:
            return

        #sbutitle
        #about

        course['cover'] = response.urljoin(
            response.css('div.class-img img::attr(src)').extract_first())
        course['url'] = response.url

        priceTxt = response.css('div.price span::text').extract_first()
        if not priceTxt is None:
            if priceTxt.find('免费'.decode('utf-8')) > -1:
                course['price'] = 0.0
            elif priceTxt.strip() == '':
                course['price'] = 0.0
            else:
                course['price'] = priceTxt.replace('元'.decode('utf-8'),
                                                   '').strip()

        #rating
        #ratingN
        #hitN
        #ctype
        course['ctype'] = 'C'
        #班级
        #site
        #tags
        course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())

        #out
        if not priceTxt is None:
            course['o_price'] = priceTxt.strip()

        #o_rating
        o_starts = response.css('div.score i').re(r'class=\"es-icon (.*)\"')
        o_rating = 0
        for o_start in o_starts:
            if 'es-icon-star' == o_start:
                o_rating = o_rating + 2
            elif 'es-icon-starhalf' == o_start:
                o_rating = o_rating + 1
        course['o_rating'] = o_rating

        yield course
Example #4
0
    def parse(self, response):
        for section in response.css('div.lesson-list ul.cf li'):
            course = Course()
            #title
            title = section.css(
                'div.lesson-infor h2.lesson-info-h2 a::text').extract_first()
            if not title is None:
                course['title'] = title.strip()
            else:
                return

            #subtitle
            subtitle = section.css('div.lesson-infor p::text').extract_first()
            if not subtitle is None:
                course['subtitle'] = subtitle.strip()

            #about
            #price
            #cover
            course['cover'] = section.css(
                'div.lessonimg-box a img::attr(src)').extract_first()
            #url
            course['url'] = section.css(
                'div.lessonimg-box a::attr(href)').extract_first()

            #rating
            #ratingN
            #hitN
            #ctype
            course['ctype'] = 'N'  #普通课程
            #site
            course['site'] = self.site
            #tags

            course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())

            ###out

            #o_rating
            #o_price
            #o_rating
            #o_ratingN
            #o_stuN
            o_stuN = section.css('div.lesson-infor em.learn-number').re(
                r'(\d+)')
            if not o_stuN is None:
                course['o_stuN'] = int(o_stuN[0])
            #o_reviewN

            yield course
Example #5
0
    def parse(self, response):
        for section in response.css('div.course-item'):
            course = Course()
            course['site'] = '小象学院'.decode('utf-8')
            title = section.css(
                'div.course-info div.title a::text').extract_first()
            if not title is None:
                course['title'] = title.strip()
            else:
                return

            #sbutitle
            #about

            #cover
            course['cover'] = response.urljoin(
                section.css('div.course-img a img::attr(src)').extract_first())
            course['url'] = response.urljoin(
                section.css('div.course-img a::attr(href)').extract_first())

            #price
            freeTxt = section.css(
                'span.price span.text-danger::text').extract_first()
            if not freeTxt is None:
                course['price'] = 0.0
            else:
                priceTxt = section.css('span.price::text')[1].extract()
                if not priceTxt is None:
                    course['price'] = priceTxt.strip()

            #rating
            #ratingN
            #hitN
            #ctype
            course['ctype'] = 'N'
            #普通课程
            #site
            #tags
            course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())

            #out
            #o_rating
            #o_starts
            #o_stuN
            o_stuN = section.css('span.num::text').re(r'(\d+)')
            if not o_stuN is None:
                course['o_stuN'] = int(o_stuN[0])

            yield course
Example #6
0
    def parse(self, response):
        for section in response.css('div.event_list div.col-lg-3'):
            course = Course()
            #title
            title = section.css('div.infotip div.name::text').extract_first()
            if not title is None:
                course['title'] = title.strip()
            else:
                return

            #subtitle

            #about

            #price

            priceTxt = response.css('span.price::text').extract_first()
            if not priceTxt is None:
                if priceTxt.find('免费'.decode('utf-8')) > -1:
                    course['price'] = 0.0
                elif priceTxt.strip() == '':
                    course['price'] = 0.0
                else:
                    course['price'] = priceTxt.replace('¥'.decode('utf-8'),
                                                       '').strip()

            #cover
            course['cover'] = response.urljoin(
                section.css('div.event_cover img.img_lazy::attr(data-original)'
                            ).extract_first())
            #url
            course['url'] = response.urljoin(
                section.css('a::attr(href)').extract_first())

            #rating
            #ratingN
            #hitN
            #ctype
            course['ctype'] = 'V'  #视频
            #site
            course['site'] = self.site
            #tags

            course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())

            ###out

            #o_rating
            #o_price
            if not priceTxt is None:
                course['o_price'] = priceTxt.strip()
            #o_rating
            #o_ratingN
            #o_stuN
            #o_reviewN

            #o_hitN
            request = scrapy.Request(url=course['url'],
                                     callback=self.parseDetail)
            request.meta['course'] = course

            yield request