Beispiel #1
0
def get_source_list(tid):
    # 2018.6 抓取的数据包没有 getMocTermDto.dwr,怀疑是MOOC改版了。只能通过查getLastLearnedMocTermDto.dwr得到资源列表
    # 但是这就需要加入Cookie了,还要保持对话什么的...不是很方便,就暂时还用网络大神的老版本吧
    # url = 'https://www.icourse163.org/dwr/call/plaincall/CourseBean.getLastLearnedMocTermDto.dwr'  # POST请求,视频链接在r的js中
    url = 'http://www.icourse163.org/dwr/call/plaincall/CourseBean.getMocTermDto.dwr'  # 网络大神的旧链接
    data = {
        'callCount': '1',
        'scriptSessionId': '${scriptSessionId}190',
        'c0-scriptName': 'CourseBean',
        'c0-methodName': 'getMocTermDto',
        'c0-id': 0,
        'c0-param0': 'number:' + tid,  # tid,termId
        'c0-param1': 'number:1',
        'c0-param2': 'boolean:true',
        'batchId': unixtime.now()
    }
    try:
        r = requests.post(url, headers=headers, data=data)
        r.raise_for_status()
        # test.detect_encoding(r)  # 检测到响应的编码时'ascii'
        page = r.text.encode('utf-8').decode(
            'unicode_escape')  # 解码为 unicode_escape 便于print将汉字打印输出
        # print(page[3000:4000])    # 测试所用
        # test.outputHTML(page, '获取资源列表')
        return page
    except requests.HTTPError as ex:
        print('>>> 课程搜索页面访问出错...\n[-]ERROR: %s' % str(ex))
        raise
Beispiel #2
0
def test_main():
    now = unixtime.now()
    dtcurrent = unixtime.current_datetime()
    tscurrent = unixtime.from_datetime(dtcurrent)
    dtnow = unixtime.to_datetime(now)
    tsnow = unixtime.from_datetime(dtnow)
    assert round(now) == round(tsnow)
    assert round(now) == round(tscurrent)
    assert dtnow.isoformat().endswith("+00:00")
    assert dtcurrent.isoformat().endswith("+00:00")
Beispiel #3
0
def get_download_info(dataList, sourceType, Quality=None, fileFormat=None):
    url = 'http://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr'
    content_id = dataList[0]
    file_id = dataList[1]
    file_name = re.sub(r'[/\\*|<>:?"]', '', dataList[2])  # 移除Windows文件名非法字符
    data = {
        'callCount': '1',
        'scriptSessionId': '${scriptSessionId}190',
        'c0-scriptName': 'CourseBean',
        'c0-methodName': 'getLessonUnitLearnVo',
        'c0-id': '0',
        'c0-param0': 'number:' + content_id,  # contentId
        'c0-param1': 'number:{}'.format(sourceType),
        'c0-param2': 'number:0',
        'c0-param3': 'number:' + file_id,  # 文件id
        'batchId': unixtime.now()
    }
    try:
        r = requests.post(url, headers=headers, data=data)
        r.raise_for_status()
        page = r.text
        # test.outputHTML(page,'下载链接')
    except requests.HTTPError as ex:
        print('课程搜索页面访问出错...\n[-]ERROR: %s' % str(ex))
        raise
    if Quality:  # 进行视频文件的解析
        re_videoLink = r'{}{}Url="(.+?)";'.format(fileFormat, Quality)
        video_url = re.findall(re_videoLink, page)
        re_srtLink = r's\d+\.name="([\w\\]+?)";s\d+\.url="(.+?)";'
        srt_url = re.findall(re_srtLink, page)
        if video_url:
            if srt_url:
                return [video_url[0], srt_url[0][1]], file_name
            else:
                return [video_url[0]], file_name
        else:
            return [], file_name
    else:  # 进行课件文件的解析
        re_PDFLink = r'http://nos.netease.com/.*?\.pdf'
        pdf_url = re.findall(re_PDFLink, page)
        if pdf_url:
            return [pdf_url[0]], file_name
        else:
            return [], file_name
Beispiel #4
0
def msg(message):
    message = "[" + str(unixtime.toDateString(
        unixtime.now())) + "]" + " " + str(message)
    if (config.output_place == config.output_console): print(message)