def get_train_info(self): train = {'name': self.get_train_name(), 'courses': []} current_page = 1 while (current_page): url = 'http://edu.51cto.com/center/wejob/usr/courseajax?train_id=%d&page=%d&size=1000'%\ (self.train_id,current_page) res = self.session.get(url) res = json.loads(res.text)['data'] current_page = res['current_page'] + 1 if res[ 'current_page'] < res['count_page'] else 0 for i in res['data']: course = { 'course_name': cto.filename_reg_check(i['course_name'].encode('utf-8')), 'train_id': i['train_id'], 'train_course_id': i['train_course_id'], 'lesson_num': i['lesson_num'], 'number': i['number'] } train['courses'].append(course) return train
def get_course_info(self, course_id): infos = [] current_page = 1 while (current_page): url = 'https://edu.51cto.com/center/wejob/user/course-info-ajax?&train_course_id=%d&page=%d&size=20' % ( course_id, current_page) res = self.session.get(url).text data = json.loads(res)['data'] current_page = data['current_page'] + 1 if data[ 'current_page'] < data['count_page'] else 0 data_list = data['data'] #qxx 可能是lesson的list, 也可能是chapter的list if data_list[0].has_key("lesson_id"): #data_list是lesson的list for lesson in data_list: info = self.parse_lesson(lesson) infos.append(info) else: #data_list是chapter的list for chapter in data_list: chapter_name = chapter["chapter_name"] chapter_name = cto.filename_reg_check(chapter_name) chapter_sort = chapter["chapter_sort"] chapter_sort_name = chapter_sort + ". " + chapter_name lessons = chapter[ "list"] #chapter["list"] 可能是lesson的list; 也可能是dict, key是分页的序号, value是lesson; lessons = lessons if type( lessons) is list else lessons.values() for lesson in lessons: info = self.parse_lesson(lesson, chapter_sort_name) infos.append(info) return infos
def get_train_info(self): train = {'name': self.get_train_name(), 'courses': []} current_page = 1 while (current_page): url = 'https://edu.51cto.com/center/wejob/user/train-course-ajax?train_id=%d&page=%d&size=20' % \ (self.train_id, current_page) res = self.session.get(url) try: res = json.loads(res.text) except ValueError as e: print "接口响应异常", "%s" % e print res.text exit() res = res['data'] current_page = res['current_page'] + 1 if res['current_page'] < res['count_page'] else 0 for i in res['data']: course = { 'course_name': cto.filename_reg_check(i['course_name'].encode('utf-8')), 'train_id': i['train_id'], 'train_course_id': i['train_course_id'], 'lesson_num': i['lesson_num'], 'number': i['sort'] # 课程的序号 } train['courses'].append(course) return train
def get_course_info(self, course_id): infos = [] current_page = 1 while (current_page): url = 'https://edu.51cto.com/center/wejob/user/course-info-ajax?&train_course_id=%d&page=%d&size=20' % ( course_id, current_page) res = self.session.get(url).text data = json.loads(res)['data'] current_page = data['current_page'] + 1 if data[ 'current_page'] < data['count_page'] else 0 pages = data['data'][0]['list'] # 判断list里的数据是list还是dict f = lambda m, pages: pages[m] if type(pages) is dict else m for m in pages: m = f(m, pages) lesson_name = m[u'lesson_name'] lesson_name = cto.filename_reg_check(lesson_name) info = { 'lesson_name': lesson_name, 'lesson_id': m['lesson_id'], 'video_id': m['video_id'] } infos.append(info) return infos
def get_train_name(self): url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % ( self.train_id) res = self.session.get(url).text soup = BeautifulSoup(res, 'html.parser') title = soup.find('h2', id='CourseTitle') if title == None: exit('找不到该课程') return cto.filename_reg_check(title.string)
def get_train_name(self): url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (self.train_id) res = self.session.get(url) tree = html.fromstring(res.text) title = tree.xpath("//div[@class='basismes']/div")[0].get("title") print title if title == None: exit('找不到该课程') return cto.filename_reg_check(title)
def parse_lesson(self, lesson, chapter_sort_name=""): """ lesson是服务器返回的lesson的完整信息; 这里处理一下可以记录lesson的结构; """ lesson_name = lesson[u'lesson_name'] lesson_name = cto.filename_reg_check(lesson_name) info = { 'lesson_name': lesson_name, 'lesson_id': lesson['lesson_id'], 'video_id': lesson['video_id'], "lesson_type": lesson["lesson_type"], "chapter_sort_name": chapter_sort_name, "show_number": lesson["show_number"] } return info
def get_course_info(self, course_id): infos = [] current_page = 1 while (current_page): url = 'http://edu.51cto.com/center/wejob/usr/course-infoajax?train_id=%d&train_course_id=%d&page=%d&size=20'\ %(self.train_id, course_id,current_page) res = self.session.get(url).text data = json.loads(res)['data'] current_page = data['current_page'] + 1 if data[ 'current_page'] < data['count_page'] else 0 pages = data['data'] for m in pages: info = { 'lesson_name': cto.filename_reg_check(m['lesson_name']), 'lesson_id': m['lesson_id'], 'video_id': m['video_id'] } infos.append(info) return infos