def get_source_list(tid): # 2018.6 抓取的数据包没有 getMocTermDto.dwr,怀疑是MOOC改版了。只能通过查getLastLearnedMocTermDto.dwr得到资源列表 # 但是这就需要加入Cookie了,还要保持对话什么的...不是很方便,就暂时还用网络大神的老版本吧 # url = 'https://www.icourse163.org/dwr/call/plaincall/CourseBean.getLastLearnedMocTermDto.dwr' # POST请求,视频链接在r的js中 url = 'http://www.icourse163.org/dwr/call/plaincall/CourseBean.getMocTermDto.dwr' # 网络大神的旧链接 data = { 'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'c0-scriptName': 'CourseBean', 'c0-methodName': 'getMocTermDto', 'c0-id': 0, 'c0-param0': 'number:' + tid, # tid,termId 'c0-param1': 'number:1', 'c0-param2': 'boolean:true', 'batchId': unixtime.now() } try: r = requests.post(url, headers=headers, data=data) r.raise_for_status() # test.detect_encoding(r) # 检测到响应的编码时'ascii' page = r.text.encode('utf-8').decode( 'unicode_escape') # 解码为 unicode_escape 便于print将汉字打印输出 # print(page[3000:4000]) # 测试所用 # test.outputHTML(page, '获取资源列表') return page except requests.HTTPError as ex: print('>>> 课程搜索页面访问出错...\n[-]ERROR: %s' % str(ex)) raise
def test_main(): now = unixtime.now() dtcurrent = unixtime.current_datetime() tscurrent = unixtime.from_datetime(dtcurrent) dtnow = unixtime.to_datetime(now) tsnow = unixtime.from_datetime(dtnow) assert round(now) == round(tsnow) assert round(now) == round(tscurrent) assert dtnow.isoformat().endswith("+00:00") assert dtcurrent.isoformat().endswith("+00:00")
def get_download_info(dataList, sourceType, Quality=None, fileFormat=None): url = 'http://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr' content_id = dataList[0] file_id = dataList[1] file_name = re.sub(r'[/\\*|<>:?"]', '', dataList[2]) # 移除Windows文件名非法字符 data = { 'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'c0-scriptName': 'CourseBean', 'c0-methodName': 'getLessonUnitLearnVo', 'c0-id': '0', 'c0-param0': 'number:' + content_id, # contentId 'c0-param1': 'number:{}'.format(sourceType), 'c0-param2': 'number:0', 'c0-param3': 'number:' + file_id, # 文件id 'batchId': unixtime.now() } try: r = requests.post(url, headers=headers, data=data) r.raise_for_status() page = r.text # test.outputHTML(page,'下载链接') except requests.HTTPError as ex: print('课程搜索页面访问出错...\n[-]ERROR: %s' % str(ex)) raise if Quality: # 进行视频文件的解析 re_videoLink = r'{}{}Url="(.+?)";'.format(fileFormat, Quality) video_url = re.findall(re_videoLink, page) re_srtLink = r's\d+\.name="([\w\\]+?)";s\d+\.url="(.+?)";' srt_url = re.findall(re_srtLink, page) if video_url: if srt_url: return [video_url[0], srt_url[0][1]], file_name else: return [video_url[0]], file_name else: return [], file_name else: # 进行课件文件的解析 re_PDFLink = r'http://nos.netease.com/.*?\.pdf' pdf_url = re.findall(re_PDFLink, page) if pdf_url: return [pdf_url[0]], file_name else: return [], file_name
def msg(message): message = "[" + str(unixtime.toDateString( unixtime.now())) + "]" + " " + str(message) if (config.output_place == config.output_console): print(message)