예제 #1
0
    def train(self, train_id):
        self.train_id = train_id
        train_start = time.time()
        train = self.get_train_info()
        train_name = train['name']
        self.path += '/' + train_name
        cto.check_or_make_dir(self.path)

        print(u'微职位名称:' + train_name + u'\n获取课程列表')
        courses = train['courses']

        total_course = len(courses)
        print('总计%d门course' % total_course)

        # 打印course名称
        for course in courses:
            print course
            print(str(courses.index(course) + 1) + '.' + course['course_name'])

        action = raw_input('是否下载y/n? 默认y:')
        # action = 'y'

        if action == 'n':
            print '终止下载,程序退出'
            exit()
        # 保留train index page
        url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (
            self.train_id)
        resp = self.session.get(url)
        with open(self.path + '/index.html', 'ab') as f:
            f.write(resp.content)
        # 将课程信息保存到json中去
        with open(self.path + '/info.json', 'w') as f:
            data = json.dumps(courses, ensure_ascii=False, indent=2)
            f.write(data)

        for course in courses:
            course_id = int(course['train_course_id'])
            course_index = courses.index(course) + 1

            file_path = os.path.join(
                self.path, course['number'] + '.' + course['course_name'])
            cto.check_or_make_dir(file_path)

            train_introduce = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (
                self.train_id)
            train_page = self.session.get(train_introduce)
            with open(file_path + '/index.html', 'ab') as f:
                f.write(train_page.content)

            print('%d/%d获取%s详情' %
                  (course_index, total_course, course['course_name']))
            lessons = self.get_course_info(course_id)
            total_lesson = len(lessons)

            for lesson in lessons:
                lesson_type = lesson["lesson_type"]
                lesson_name = lesson['lesson_name']
                if lesson_type != "1":
                    print u"跳过练习 " + lesson_name
                    continue
                lesson_id = '_'.join(
                    [str(course_id), str(lesson['lesson_id'])])

                show_number = lesson["show_number"]
                lesson_path = os.path.join(
                    file_path, lesson["chapter_sort_name"],
                    "%s.%s" % (show_number, lesson['lesson_name']))
                cto.check_or_make_dir(lesson_path)

                # filename = os.path.join(file_path, "%d.%s.ts" % (show_number, lesson['lesson_name']))
                # if os.path.exists(filename):
                #     continue

                print datetime.datetime.now().strftime(
                    "%H:%M:%S") + ' 正在下载(%s/%d)-%s' % (
                        show_number, total_lesson, lesson['lesson_name'])
                #qxx 下载、保存m3u8文件
                m3u8_file = os.path.join(lesson_path, "vedio.m3u8")
                if not os.path.exists(m3u8_file):
                    m3u8_content = self.get_m3u8_content(
                        lesson_id, lesson['video_id'])
                    with open(m3u8_file, 'w') as file:
                        file.write(m3u8_content)

                #qxx 下载、保存enkey(加密key)文件
                enkey_file = os.path.join(lesson_path, "enkey.key")
                if not os.path.exists(enkey_file):
                    enkey_content = self.get_enkey(lesson_id,
                                                   lesson['video_id'])
                    with open(enkey_file, 'w') as file:
                        file.write(enkey_content)

                    #qxx 解密、保存key
                    key_file = os.path.join(lesson_path, "key.key")
                    if not os.path.exists(key_file):
                        key_content = decode_helper.decode(
                            enkey_content, lesson_id)
                        with open(key_file, 'w') as file:
                            file.write(key_content)

                # urls = self.get_download_urls(m3u8_content)
                # try:
                #     cto.download(lesson_path, urls)
                # except Exception :
                #     time.sleep(random.uniform(5,10))
                #     cto.download(lesson_path, urls)

        print 'train下载用时总计' + cto.total_time(time.time() - train_start)
예제 #2
0
    def train(self):
        train_start = time.time()
        train = self.get_train_info()
        train_name = train['name']
        self.path += '/' + train_name
        cto.check_or_make_dir(self.path)

        print('微职位名称:' + train_name + '\n获取课程列表')
        courses = train['courses']

        total_course = len(courses)
        print('总计%d门course' % (total_course))
        #打印course名称
        for course in courses:
            print(str(courses.index(course) + 1) + '.' + course['course_name'])

        action = raw_input('是否下载y/n? 默认y:')
        if action == 'n':
            print '终止下载,程序退出'
            exit()
        #保留train index page
        url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (
            self.train_id)
        resp = self.session.get(url)
        with open(self.path + '/index.html', 'ab') as f:
            f.write(resp.content)
        #将课程信息保存到json中去
        with open(self.path + '/info.json', 'w') as f:
            data = json.dumps(courses, ensure_ascii=False, indent=2)
            f.write(data)

        for course in courses:
            course_id = int(course['train_course_id'])
            course_index = courses.index(course) + 1

            file_path = os.path.join(
                self.path, course['number'] + '.' + course['course_name'])
            cto.check_or_make_dir(file_path)

            train_introduce = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (
                self.train_id)
            train_page = self.session.get(train_introduce)
            with open(file_path + '/index.html', 'ab') as f:
                f.write(train_page.content)

            print('%d/%d获取%s详情' %
                  (course_index, total_course, course['course_name']))
            lessons = self.get_course_info(course_id)
            total_lesson = len(lessons)

            for lesson in lessons:
                lesson_id = '_'.join(
                    [str(course_id), str(lesson['lesson_id'])])
                lesson_index = lessons.index(lesson) + 1
                filename = os.path.join(
                    file_path,
                    "%d.%s.ts" % (lesson_index, lesson['lesson_name']))
                if os.path.exists(filename):
                    continue

                print datetime.datetime.now().strftime(
                    "%H:%M:%S") + ' 正在下载(%d/%d)-%s' % (
                        lesson_index, total_lesson, lesson['lesson_name'])
                urls = self.get_download_url(lesson_id, lesson['video_id'])
                try:
                    cto.download(filename, urls)
                except Exception:
                    time.sleep(random.uniform(5, 10))
                    cto.download(filename, urls)

        print 'train下载用时总计' + cto.total_time(time.time() - train_start)
예제 #3
0
파일: wejob.py 프로젝트: luone/luone_python
    def train(self, train_id):
        self.train_id = train_id
        train_start = time.time()
        train = self.get_train_info()
        train_name = train['name']
        self.path += '/' + train_name
        cto.check_or_make_dir(self.path)

        print('微职位名称:' + train_name + '\n获取课程列表')
        courses = train['courses']

        total_course = len(courses)
        print('总计%d门course' % total_course)

        # 打印course名称
        # for course in courses:
        # print(str(course, encoding='utf-8'))
        # print(str(courses.index(course) + 1) + '.' + course['course_name'])

        action = raw_input('是否下载y/n? 默认y:')
        if action == 'n':
            print('终止下载,程序退出')
            exit()
        # 保留train index page
        url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (
            self.train_id)
        resp = self.session.get(url)
        with open(self.path + '/index.html', 'ab') as f:
            f.write(resp.content)
        # 将课程信息保存到json中去
        # with open(self.path + '/info.json', 'w') as f:
        #     data = json.dumps(courses, ensure_ascii=False, indent=2)
        #     f.write(data)

        for course in courses:
            course_id = int(course['train_course_id'])
            course_index = courses.index(course) + 1

            file_path = os.path.join(
                self.path,
                str(course['number']) + '.' + str(course['course_name']))
            cto.check_or_make_dir(file_path)

            train_introduce = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % (
                self.train_id)
            train_page = self.session.get(train_introduce)
            with open(file_path + '/index.html', 'ab') as f:
                f.write(train_page.content)

            print('%d/%d获取%s详情' %
                  (course_index, total_course, course['course_name']))
            lessons = self.get_course_info(course_id)
            total_lesson = len(lessons)

            for lesson in lessons:
                lesson_id = '_'.join(
                    [str(course_id), str(lesson['lesson_id'])])
                lesson_index = lessons.index(lesson) + 1
                filename = os.path.join(
                    file_path,
                    "%d.%s.ts" % (lesson_index, lesson['lesson_name']))
                if os.path.exists(filename):
                    continue

                print(datetime.datetime.now().strftime("%H:%M:%S") +
                      ' 正在下载(%d/%d)-%s' %
                      (lesson_index, total_lesson, lesson['lesson_name']))
                try:
                    urls, get_key_url = self.get_download_url(
                        lesson_id, lesson['video_id'])
                except Exception as e:
                    print("解析m3u8地址保存,可能视频不存在")
                    continue
                #print "lession_id", course_id," get key url:", get_key_url
                play_key = le.Lesson(self.session).get_key_for_wejob(
                    lesson_id, course_id, get_key_url)

                def func_decode(video_data):
                    #print "play_key:%s, lesson_id:%s, " % (play_key, lesson['lesson_id'])
                    return decory_video.Video().decory(play_key, lesson_id,
                                                       video_data)

                try:
                    cto.download(filename, urls, func_decode=func_decode)
                except Exception:
                    time.sleep(random.uniform(5, 10))
                    cto.download(filename, urls)

        print('train下载用时总计' + cto.total_time(time.time() - train_start))