コード例 #1
0
ファイル: crawler.py プロジェクト: youyouzh/python-base
def get_ts_ave_dir(m3u8_url: str):
    parse_url = urlparse(urljoin(m3u8_url, ''))
    url_path = os.path.dirname(parse_url.path)
    save_dir = os.path.join(r'result\ts',
                            u_file.convert_windows_path(url_path))
    u_file.ready_dir(save_dir)
    return save_dir
コード例 #2
0
def move_test_file(predict_test_file, main_file_path, main_filename):
    """
    移动测试文件
    :param predict_test_file: 测试文件
    :param main_file_path: main文件夹路径
    :param main_filename: main下的class文件名
    :return:
    """
    move_target_test_path = main_file_path.replace('main', 'test')
    move_target_test_path = os.path.join(
        move_target_test_path, main_filename.replace('.java', 'Test.java'))
    log.info('The test file is exist. move {} -> {}'.format(
        predict_test_file, move_target_test_path))

    # 移动文件
    u_file.ready_dir(move_target_test_path)
    os.replace(predict_test_file, move_target_test_path)

    # 修改类中的类名
    handler = open(move_target_test_path, 'r+', encoding='UTF-8')
    content = handler.read()
    handler.seek(0)
    handler.write(
        content.replace(
            os.path.split(predict_test_file)[1].split('.')[0],
            main_filename.replace('.java', 'Test')))
    handler.close()
コード例 #3
0
ファイル: jiemo_crawler.py プロジェクト: youyouzh/python-base
def crawler_exam_questions():
    """
    下载所有试卷题目列表
    :return:
    """
    log.info('--->begin crawler exam questions.')
    exam_list_url = 'https://share.jiemo.net/NSeries/getrealQuestionList'
    exam_question_url = 'https://share.jiemo.net/NSeries/getrealQuestionPaper'
    response = u_file.get_json(exam_list_url)
    exams = m_get(response, 'data')
    if m_get(response, 'result') != 0 or exams is None:
        log.error('request exam list error. response: {}'.format(response))
        return
    exam_infos = []
    log.info('request exam list success. exams size: {}'.format(len(exams)))
    for exam in exams:
        for sub_exam in m_get(exam, 'paperList'):
            exam_infos.append({
                'level': m_get(exam, 'level'),
                'title': m_get(sub_exam, 'title').replace('年-', '年真题-')
            })
    log.info('exam paper size: {}'.format(len(exam_infos)))
    for exam_info in exam_infos:
        log.info('--->begin download exam paper: {}-{}'.format(exam_info['level'], exam_info['title']))
        # 检查本地缓存试卷题目
        exam_question_cache_file = r'result\jiemo-exam\{}-{}.json'.format(exam_info['level'], exam_info['title'])
        u_file.ready_dir(exam_question_cache_file)
        if os.path.isfile(exam_question_cache_file):
            log.info('The exam question cache file is exist: {}'.format(exam_question_cache_file))
            continue

        response = requests.post(exam_question_url,
                                 data={'level': exam_info['level'], 'title': exam_info['title']},
                                 verify=False)
        if response.status_code != 200:
            log.error('request status code is not 200. code: {}'.format(response.status_code))
            continue
        response = json.loads(response.text)
        exam_questions = m_get(response, 'data')
        if m_get(response, 'result') != 0 or exams is None:
            log.error('request exam questions error. response: {}'.format(response))
            return
        log.info('get exam questions success. size: {}'.format(len(exam_questions)))
        u_file.cache_json(exam_questions, exam_question_cache_file)
        log.info('--->end download exam paper: {}-{}'.format(exam_info['level'], exam_info['title']))
    log.info('--->end crawler exam questions.')
コード例 #4
0
ファイル: crawler.py プロジェクト: youyouzh/python-base
def merge_ts_file(m3u8_url: str, video_name: str, decrypt_function=None):
    merge_file_path = os.path.join(r'result\video', video_name + '.mp4')
    u_file.ready_dir(merge_file_path)
    merge_file_handle = open(merge_file_path, 'wb')

    ts_dir = get_ts_ave_dir(m3u8_url)
    for ts_filename in os.listdir(ts_dir):
        if not ts_filename.rstrip().endswith('.ts'):
            continue
        ts_filepath = os.path.join(ts_dir, ts_filename)
        ts_file_handle = open(ts_filepath, 'rb')
        ts_file_content = ts_file_handle.read()
        if decrypt_function is not None:
            # if defined decrypt function, decrypt the data
            ts_file_content = decrypt_function(m3u8_url, ts_file_content)
        shutil.copyfileobj(ts_file_handle, merge_file_handle)
        merge_file_handle.write(ts_file_content)
        ts_file_handle.close()
    merge_file_handle.close()
    log.info('merge file success: {}'.format(merge_file_path))
コード例 #5
0
ファイル: jiemo_crawler.py プロジェクト: youyouzh/python-base
def crawler_grammar():
    """
    芥末日语考级app,下载所有等级语法讲解json
    :return:
    """
    grammar_url = 'https://ns-api.jiemo.net/v2/NSeries/getGrammarCategroy'
    levels = ['N1', 'N2', 'N3', 'N4', 'N5']
    for level in levels:
        log.info('--->begin download grammar: {}'.format(level))
        grammar_cache_file = r'result\jiemo-grammar\grammar-{}.json'.format(level)
        u_file.ready_dir(grammar_cache_file)
        if os.path.isfile(grammar_cache_file):
            log.info('The grammar is exist. file: {}'.format(grammar_cache_file))
            continue
        param_json = COMMON_PARAMS.copy()
        param_json['level'] = level
        data = post_special(grammar_url, {'level': level})
        if data is None:
            log.info('request grammar failed. level: {}'.format(level))
            continue
        u_file.cache_json(data, grammar_cache_file)
        log.info('--->end download grammar: {}'.format(level))
コード例 #6
0
def download_exam_questions():
    """
    从羊驼日语单词app下载真题题目列表json数据
    目前只有N1-N3三个等级的题库,缺少部分年份题目
    :return:
    """
    n_levels = [1, 2, 3]
    for n_level in n_levels:
        log.info('--->begin download exam question. category: N{}真题'.format(n_level))
        exam_list_url = 'http://vocabulary.ytaxx.com/api/exam/getExamList?category={}'.format(n_level - 1)
        response = u_file.get_json(exam_list_url)
        if m_get(response, 'code') != 0 or m_get(response, 'data') is None:
            log.error('request exam list error. category: N{}真题'.format(n_level))
            continue
        exams = m_get(response, 'data', [])
        log.info('request category exams success. exam size: {}'.format(len(exams)))

        for exam in exams:
            # 检测真题已经下载过则跳过
            exam_cache_file = r'result\yt-exam\N{}-{}-{}-json'.format(n_level, exam['examName'], exam['id'])
            u_file.ready_dir(exam_cache_file)
            if os.path.isfile(exam_cache_file):
                log.info('The exam questions is downloaded. id: {}, name: {}'.format(exam['id'], exam['examName']))
                continue

            # 下载真题json,并保存到本地文件
            log.info('begin download exam question. exam name: {}'.format(exam['examName']))
            exam_question_url = 'http://vocabulary.ytaxx.com/api/exam/questions?examId={}'.format(exam['id'])
            response = u_file.get_json(exam_question_url)
            if m_get(response, 'code') != 0 or m_get(response, 'data') is None:
                log.error('request exam questions error. category: N{}真题'.format(n_level))
                continue
            questions = response['data'][0]['questionList']
            exam['question'] = questions
            log.info('request exam question success. question size: {}'.format(len(questions)))
            u_file.cache_json(exam, exam_cache_file)
            time.sleep(0.2)
        log.info('--->end download exam question. category: N{}真题'.format(n_level))