Esempio n. 1
0
    def get_train_info(self):
        train = {'name': self.get_train_name(), 'courses': []}
        current_page = 1

        while (current_page):
            url = 'http://edu.51cto.com/center/wejob/usr/courseajax?train_id=%d&page=%d&size=1000'%\
                  (self.train_id,current_page)
            res = self.session.get(url)
            res = json.loads(res.text)['data']
            current_page = res['current_page'] + 1 if res[
                'current_page'] < res['count_page'] else 0

            for i in res['data']:
                course = {
                    'course_name':
                    cto.filename_reg_check(i['course_name'].encode('utf-8')),
                    'train_id':
                    i['train_id'],
                    'train_course_id':
                    i['train_course_id'],
                    'lesson_num':
                    i['lesson_num'],
                    'number':
                    i['number']
                }
                train['courses'].append(course)
        return train
Esempio n. 2
0
    def get_course_info(self, course_id):
        infos = []
        current_page = 1
        while (current_page):
            url = 'https://edu.51cto.com/center/wejob/user/course-info-ajax?&train_course_id=%d&page=%d&size=20' % (
                course_id, current_page)
            res = self.session.get(url).text
            data = json.loads(res)['data']
            current_page = data['current_page'] + 1 if data[
                'current_page'] < data['count_page'] else 0

            data_list = data['data']  #qxx 可能是lesson的list, 也可能是chapter的list
            if data_list[0].has_key("lesson_id"):
                #data_list是lesson的list
                for lesson in data_list:
                    info = self.parse_lesson(lesson)
                    infos.append(info)
            else:
                #data_list是chapter的list
                for chapter in data_list:
                    chapter_name = chapter["chapter_name"]
                    chapter_name = cto.filename_reg_check(chapter_name)
                    chapter_sort = chapter["chapter_sort"]
                    chapter_sort_name = chapter_sort + ". " + chapter_name
                    lessons = chapter[
                        "list"]  #chapter["list"] 可能是lesson的list; 也可能是dict, key是分页的序号, value是lesson;
                    lessons = lessons if type(
                        lessons) is list else lessons.values()
                    for lesson in lessons:
                        info = self.parse_lesson(lesson, chapter_sort_name)
                        infos.append(info)
        return infos
Esempio n. 3
0
    def get_train_info(self):
        train = {'name': self.get_train_name(), 'courses': []}
        current_page = 1

        while (current_page):
            url = 'https://edu.51cto.com/center/wejob/user/train-course-ajax?train_id=%d&page=%d&size=20' % \
                  (self.train_id, current_page)
            res = self.session.get(url)

            try:
                res = json.loads(res.text)
            except ValueError as e:

                print "接口响应异常", "%s" % e
                print res.text
                exit()

            res = res['data']
            current_page = res['current_page'] + 1 if res['current_page'] < res['count_page'] else 0

            for i in res['data']:
                course = {
                    'course_name': cto.filename_reg_check(i['course_name'].encode('utf-8')),
                    'train_id': i['train_id'],
                    'train_course_id': i['train_course_id'],
                    'lesson_num': i['lesson_num'],
                    'number': i['sort']  # 课程的序号
                }
                train['courses'].append(course)
        return train
Esempio n. 4
0
    def get_course_info(self, course_id):
        infos = []
        current_page = 1
        while (current_page):
            url = 'https://edu.51cto.com/center/wejob/user/course-info-ajax?&train_course_id=%d&page=%d&size=20' % (
                course_id, current_page)
            res = self.session.get(url).text
            data = json.loads(res)['data']
            current_page = data['current_page'] + 1 if data[
                'current_page'] < data['count_page'] else 0
            pages = data['data'][0]['list']

            # 判断list里的数据是list还是dict
            f = lambda m, pages: pages[m] if type(pages) is dict else m
            for m in pages:
                m = f(m, pages)
                lesson_name = m[u'lesson_name']
                lesson_name = cto.filename_reg_check(lesson_name)
                info = {
                    'lesson_name': lesson_name,
                    'lesson_id': m['lesson_id'],
                    'video_id': m['video_id']
                }
                infos.append(info)
        return infos
Esempio n. 5
0
 def get_train_name(self):
     url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (
         self.train_id)
     res = self.session.get(url).text
     soup = BeautifulSoup(res, 'html.parser')
     title = soup.find('h2', id='CourseTitle')
     if title == None:
         exit('找不到该课程')
     return cto.filename_reg_check(title.string)
Esempio n. 6
0
    def get_train_name(self):
        url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (self.train_id)
        res = self.session.get(url)
        tree = html.fromstring(res.text)
        title = tree.xpath("//div[@class='basismes']/div")[0].get("title")
        print title

        if title == None:
            exit('找不到该课程')
        return cto.filename_reg_check(title)
Esempio n. 7
0
 def parse_lesson(self, lesson, chapter_sort_name=""):
     """
     lesson是服务器返回的lesson的完整信息;
     这里处理一下可以记录lesson的结构;  
     """
     lesson_name = lesson[u'lesson_name']
     lesson_name = cto.filename_reg_check(lesson_name)
     info = {
         'lesson_name': lesson_name,
         'lesson_id': lesson['lesson_id'],
         'video_id': lesson['video_id'],
         "lesson_type": lesson["lesson_type"],
         "chapter_sort_name": chapter_sort_name,
         "show_number": lesson["show_number"]
     }
     return info
Esempio n. 8
0
    def get_course_info(self, course_id):
        infos = []
        current_page = 1
        while (current_page):
            url  = 'http://edu.51cto.com/center/wejob/usr/course-infoajax?train_id=%d&train_course_id=%d&page=%d&size=20'\
                  %(self.train_id, course_id,current_page)
            res = self.session.get(url).text
            data = json.loads(res)['data']
            current_page = data['current_page'] + 1 if data[
                'current_page'] < data['count_page'] else 0
            pages = data['data']

            for m in pages:
                info = {
                    'lesson_name': cto.filename_reg_check(m['lesson_name']),
                    'lesson_id': m['lesson_id'],
                    'video_id': m['video_id']
                }
                infos.append(info)
        return infos