def train(self, train_id): self.train_id = train_id train_start = time.time() train = self.get_train_info() train_name = train['name'] self.path += '/' + train_name cto.check_or_make_dir(self.path) print(u'微职位名称:' + train_name + u'\n获取课程列表') courses = train['courses'] total_course = len(courses) print('总计%d门course' % total_course) # 打印course名称 for course in courses: print course print(str(courses.index(course) + 1) + '.' + course['course_name']) action = raw_input('是否下载y/n? 默认y:') # action = 'y' if action == 'n': print '终止下载,程序退出' exit() # 保留train index page url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % ( self.train_id) resp = self.session.get(url) with open(self.path + '/index.html', 'ab') as f: f.write(resp.content) # 将课程信息保存到json中去 with open(self.path + '/info.json', 'w') as f: data = json.dumps(courses, ensure_ascii=False, indent=2) f.write(data) for course in courses: course_id = int(course['train_course_id']) course_index = courses.index(course) + 1 file_path = os.path.join( self.path, course['number'] + '.' + course['course_name']) cto.check_or_make_dir(file_path) train_introduce = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % ( self.train_id) train_page = self.session.get(train_introduce) with open(file_path + '/index.html', 'ab') as f: f.write(train_page.content) print('%d/%d获取%s详情' % (course_index, total_course, course['course_name'])) lessons = self.get_course_info(course_id) total_lesson = len(lessons) for lesson in lessons: lesson_type = lesson["lesson_type"] lesson_name = lesson['lesson_name'] if lesson_type != "1": print u"跳过练习 " + lesson_name continue lesson_id = '_'.join( [str(course_id), str(lesson['lesson_id'])]) show_number = lesson["show_number"] lesson_path = os.path.join( file_path, lesson["chapter_sort_name"], "%s.%s" % (show_number, lesson['lesson_name'])) cto.check_or_make_dir(lesson_path) # filename = os.path.join(file_path, "%d.%s.ts" % (show_number, lesson['lesson_name'])) # if os.path.exists(filename): # continue print datetime.datetime.now().strftime( "%H:%M:%S") + ' 正在下载(%s/%d)-%s' % ( show_number, total_lesson, lesson['lesson_name']) #qxx 下载、保存m3u8文件 m3u8_file = os.path.join(lesson_path, "vedio.m3u8") if not os.path.exists(m3u8_file): m3u8_content = self.get_m3u8_content( lesson_id, lesson['video_id']) with open(m3u8_file, 'w') as file: file.write(m3u8_content) #qxx 下载、保存enkey(加密key)文件 enkey_file = os.path.join(lesson_path, "enkey.key") if not os.path.exists(enkey_file): enkey_content = self.get_enkey(lesson_id, lesson['video_id']) with open(enkey_file, 'w') as file: file.write(enkey_content) #qxx 解密、保存key key_file = os.path.join(lesson_path, "key.key") if not os.path.exists(key_file): key_content = decode_helper.decode( enkey_content, lesson_id) with open(key_file, 'w') as file: file.write(key_content) # urls = self.get_download_urls(m3u8_content) # try: # cto.download(lesson_path, urls) # except Exception : # time.sleep(random.uniform(5,10)) # cto.download(lesson_path, urls) print 'train下载用时总计' + cto.total_time(time.time() - train_start)
def train(self): train_start = time.time() train = self.get_train_info() train_name = train['name'] self.path += '/' + train_name cto.check_or_make_dir(self.path) print('微职位名称:' + train_name + '\n获取课程列表') courses = train['courses'] total_course = len(courses) print('总计%d门course' % (total_course)) #打印course名称 for course in courses: print(str(courses.index(course) + 1) + '.' + course['course_name']) action = raw_input('是否下载y/n? 默认y:') if action == 'n': print '终止下载,程序退出' exit() #保留train index page url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % ( self.train_id) resp = self.session.get(url) with open(self.path + '/index.html', 'ab') as f: f.write(resp.content) #将课程信息保存到json中去 with open(self.path + '/info.json', 'w') as f: data = json.dumps(courses, ensure_ascii=False, indent=2) f.write(data) for course in courses: course_id = int(course['train_course_id']) course_index = courses.index(course) + 1 file_path = os.path.join( self.path, course['number'] + '.' + course['course_name']) cto.check_or_make_dir(file_path) train_introduce = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % ( self.train_id) train_page = self.session.get(train_introduce) with open(file_path + '/index.html', 'ab') as f: f.write(train_page.content) print('%d/%d获取%s详情' % (course_index, total_course, course['course_name'])) lessons = self.get_course_info(course_id) total_lesson = len(lessons) for lesson in lessons: lesson_id = '_'.join( [str(course_id), str(lesson['lesson_id'])]) lesson_index = lessons.index(lesson) + 1 filename = os.path.join( file_path, "%d.%s.ts" % (lesson_index, lesson['lesson_name'])) if os.path.exists(filename): continue print datetime.datetime.now().strftime( "%H:%M:%S") + ' 正在下载(%d/%d)-%s' % ( lesson_index, total_lesson, lesson['lesson_name']) urls = self.get_download_url(lesson_id, lesson['video_id']) try: cto.download(filename, urls) except Exception: time.sleep(random.uniform(5, 10)) cto.download(filename, urls) print 'train下载用时总计' + cto.total_time(time.time() - train_start)
def train(self, train_id): self.train_id = train_id train_start = time.time() train = self.get_train_info() train_name = train['name'] self.path += '/' + train_name cto.check_or_make_dir(self.path) print('微职位名称:' + train_name + '\n获取课程列表') courses = train['courses'] total_course = len(courses) print('总计%d门course' % total_course) # 打印course名称 # for course in courses: # print(str(course, encoding='utf-8')) # print(str(courses.index(course) + 1) + '.' + course['course_name']) action = raw_input('是否下载y/n? 默认y:') if action == 'n': print('终止下载,程序退出') exit() # 保留train index page url = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % ( self.train_id) resp = self.session.get(url) with open(self.path + '/index.html', 'ab') as f: f.write(resp.content) # 将课程信息保存到json中去 # with open(self.path + '/info.json', 'w') as f: # data = json.dumps(courses, ensure_ascii=False, indent=2) # f.write(data) for course in courses: course_id = int(course['train_course_id']) course_index = courses.index(course) + 1 file_path = os.path.join( self.path, str(course['number']) + '.' + str(course['course_name'])) cto.check_or_make_dir(file_path) train_introduce = 'http://edu.51cto.com/center/wejob/index/view?id=%d&force=3&orig=try' % ( self.train_id) train_page = self.session.get(train_introduce) with open(file_path + '/index.html', 'ab') as f: f.write(train_page.content) print('%d/%d获取%s详情' % (course_index, total_course, course['course_name'])) lessons = self.get_course_info(course_id) total_lesson = len(lessons) for lesson in lessons: lesson_id = '_'.join( [str(course_id), str(lesson['lesson_id'])]) lesson_index = lessons.index(lesson) + 1 filename = os.path.join( file_path, "%d.%s.ts" % (lesson_index, lesson['lesson_name'])) if os.path.exists(filename): continue print(datetime.datetime.now().strftime("%H:%M:%S") + ' 正在下载(%d/%d)-%s' % (lesson_index, total_lesson, lesson['lesson_name'])) try: urls, get_key_url = self.get_download_url( lesson_id, lesson['video_id']) except Exception as e: print("解析m3u8地址保存,可能视频不存在") continue #print "lession_id", course_id," get key url:", get_key_url play_key = le.Lesson(self.session).get_key_for_wejob( lesson_id, course_id, get_key_url) def func_decode(video_data): #print "play_key:%s, lesson_id:%s, " % (play_key, lesson['lesson_id']) return decory_video.Video().decory(play_key, lesson_id, video_data) try: cto.download(filename, urls, func_decode=func_decode) except Exception: time.sleep(random.uniform(5, 10)) cto.download(filename, urls) print('train下载用时总计' + cto.total_time(time.time() - train_start))