def parse(self, response): course = Course() course['site'] = '开源力量'.decode('utf-8') title = response.css('h2.title::text').extract_first() if not title is None: course['title'] = title.strip() else: return #sbutitle #about course['cover'] = response.urljoin(response.css('div.course-img img::attr(src)').extract_first()) course['url'] = response.url priceTxt = response.css('span.price::text').extract_first() if not priceTxt is None: if priceTxt.find('免费'.decode('utf-8')) > -1: course['price'] = 0.0 else: course['price'] = priceTxt.replace('元'.decode('utf-8'),'').strip() #rating #ratingN #hitN #ctype course['ctype'] = 'N'; #普通课程 #site #tags course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) #out if not priceTxt is None: course['o_price'] = priceTxt.strip() #o_rating o_starts = response.css('div.score i').re(r'class=\"es-icon (.*)\"') o_rating = 0 for o_start in o_starts: if 'es-icon-star' == o_start: o_rating = o_rating + 2 elif 'es-icon-starhalf' == o_start: o_rating = o_rating + 1 course['o_rating'] = o_rating #o_ratingN o_ratingN = response.css('div.score span').re(r'(\d+)') if not o_ratingN is None: course['o_ratingN'] = int(o_ratingN[0]) #o_stuN o_stuN = response.css('div.student-num::text').re(r'(\d+)') if not o_stuN is None: course['o_stuN'] = int(o_stuN[0]) #adm #adm_rating return course
def parse(self, response): for section in response.css('ul.course-lists li'): course = Course() #title title = section.css('p.font14::text').extract_first() if not title is None: course['title'] = title.strip() else: return #subtitle #about about = section.css('p.description::text').extract_first() if not title is None: course['about'] = about.strip() #price course['price'] = 0.0 #cover course['cover'] = response.urljoin( section.css('p img::attr(src)').extract_first()) #url course['url'] = response.urljoin( section.css('a::attr(href)').extract_first()) #rating #ratingN #hitN #ctype course['ctype'] = 'N' #普通课程 #site course['site'] = self.site #tags course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) ###out #o_rating #o_price #o_rating #o_ratingN #o_stuN o_stuN = section.css('p.color99::text').re(r'(\d+)') if not o_stuN is None: course['o_stuN'] = o_stuN #o_reviewN yield course
def parse(self, response): course = Course() course['site'] = '小象学院'.decode('utf-8') title = response.css('h2.title::text').extract_first() if not title is None: course['title'] = title.strip() else: return #sbutitle #about course['cover'] = response.urljoin( response.css('div.class-img img::attr(src)').extract_first()) course['url'] = response.url priceTxt = response.css('div.price span::text').extract_first() if not priceTxt is None: if priceTxt.find('免费'.decode('utf-8')) > -1: course['price'] = 0.0 elif priceTxt.strip() == '': course['price'] = 0.0 else: course['price'] = priceTxt.replace('元'.decode('utf-8'), '').strip() #rating #ratingN #hitN #ctype course['ctype'] = 'C' #班级 #site #tags course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) #out if not priceTxt is None: course['o_price'] = priceTxt.strip() #o_rating o_starts = response.css('div.score i').re(r'class=\"es-icon (.*)\"') o_rating = 0 for o_start in o_starts: if 'es-icon-star' == o_start: o_rating = o_rating + 2 elif 'es-icon-starhalf' == o_start: o_rating = o_rating + 1 course['o_rating'] = o_rating yield course
def parse(self, response): for section in response.css('div.lesson-list ul.cf li'): course = Course() #title title = section.css( 'div.lesson-infor h2.lesson-info-h2 a::text').extract_first() if not title is None: course['title'] = title.strip() else: return #subtitle subtitle = section.css('div.lesson-infor p::text').extract_first() if not subtitle is None: course['subtitle'] = subtitle.strip() #about #price #cover course['cover'] = section.css( 'div.lessonimg-box a img::attr(src)').extract_first() #url course['url'] = section.css( 'div.lessonimg-box a::attr(href)').extract_first() #rating #ratingN #hitN #ctype course['ctype'] = 'N' #普通课程 #site course['site'] = self.site #tags course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) ###out #o_rating #o_price #o_rating #o_ratingN #o_stuN o_stuN = section.css('div.lesson-infor em.learn-number').re( r'(\d+)') if not o_stuN is None: course['o_stuN'] = int(o_stuN[0]) #o_reviewN yield course
def parse(self, response): for section in response.css('div.course-item'): course = Course() course['site'] = '小象学院'.decode('utf-8') title = section.css( 'div.course-info div.title a::text').extract_first() if not title is None: course['title'] = title.strip() else: return #sbutitle #about #cover course['cover'] = response.urljoin( section.css('div.course-img a img::attr(src)').extract_first()) course['url'] = response.urljoin( section.css('div.course-img a::attr(href)').extract_first()) #price freeTxt = section.css( 'span.price span.text-danger::text').extract_first() if not freeTxt is None: course['price'] = 0.0 else: priceTxt = section.css('span.price::text')[1].extract() if not priceTxt is None: course['price'] = priceTxt.strip() #rating #ratingN #hitN #ctype course['ctype'] = 'N' #普通课程 #site #tags course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) #out #o_rating #o_starts #o_stuN o_stuN = section.css('span.num::text').re(r'(\d+)') if not o_stuN is None: course['o_stuN'] = int(o_stuN[0]) yield course
def parse(self, response): for section in response.css('div.event_list div.col-lg-3'): course = Course() #title title = section.css('div.infotip div.name::text').extract_first() if not title is None: course['title'] = title.strip() else: return #subtitle #about #price priceTxt = response.css('span.price::text').extract_first() if not priceTxt is None: if priceTxt.find('免费'.decode('utf-8')) > -1: course['price'] = 0.0 elif priceTxt.strip() == '': course['price'] = 0.0 else: course['price'] = priceTxt.replace('¥'.decode('utf-8'), '').strip() #cover course['cover'] = response.urljoin( section.css('div.event_cover img.img_lazy::attr(data-original)' ).extract_first()) #url course['url'] = response.urljoin( section.css('a::attr(href)').extract_first()) #rating #ratingN #hitN #ctype course['ctype'] = 'V' #视频 #site course['site'] = self.site #tags course['updated'] = strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) ###out #o_rating #o_price if not priceTxt is None: course['o_price'] = priceTxt.strip() #o_rating #o_ratingN #o_stuN #o_reviewN #o_hitN request = scrapy.Request(url=course['url'], callback=self.parseDetail) request.meta['course'] = course yield request