def get_ts_ave_dir(m3u8_url: str): parse_url = urlparse(urljoin(m3u8_url, '')) url_path = os.path.dirname(parse_url.path) save_dir = os.path.join(r'result\ts', u_file.convert_windows_path(url_path)) u_file.ready_dir(save_dir) return save_dir
def move_test_file(predict_test_file, main_file_path, main_filename): """ 移动测试文件 :param predict_test_file: 测试文件 :param main_file_path: main文件夹路径 :param main_filename: main下的class文件名 :return: """ move_target_test_path = main_file_path.replace('main', 'test') move_target_test_path = os.path.join( move_target_test_path, main_filename.replace('.java', 'Test.java')) log.info('The test file is exist. move {} -> {}'.format( predict_test_file, move_target_test_path)) # 移动文件 u_file.ready_dir(move_target_test_path) os.replace(predict_test_file, move_target_test_path) # 修改类中的类名 handler = open(move_target_test_path, 'r+', encoding='UTF-8') content = handler.read() handler.seek(0) handler.write( content.replace( os.path.split(predict_test_file)[1].split('.')[0], main_filename.replace('.java', 'Test'))) handler.close()
def crawler_exam_questions(): """ 下载所有试卷题目列表 :return: """ log.info('--->begin crawler exam questions.') exam_list_url = 'https://share.jiemo.net/NSeries/getrealQuestionList' exam_question_url = 'https://share.jiemo.net/NSeries/getrealQuestionPaper' response = u_file.get_json(exam_list_url) exams = m_get(response, 'data') if m_get(response, 'result') != 0 or exams is None: log.error('request exam list error. response: {}'.format(response)) return exam_infos = [] log.info('request exam list success. exams size: {}'.format(len(exams))) for exam in exams: for sub_exam in m_get(exam, 'paperList'): exam_infos.append({ 'level': m_get(exam, 'level'), 'title': m_get(sub_exam, 'title').replace('年-', '年真题-') }) log.info('exam paper size: {}'.format(len(exam_infos))) for exam_info in exam_infos: log.info('--->begin download exam paper: {}-{}'.format(exam_info['level'], exam_info['title'])) # 检查本地缓存试卷题目 exam_question_cache_file = r'result\jiemo-exam\{}-{}.json'.format(exam_info['level'], exam_info['title']) u_file.ready_dir(exam_question_cache_file) if os.path.isfile(exam_question_cache_file): log.info('The exam question cache file is exist: {}'.format(exam_question_cache_file)) continue response = requests.post(exam_question_url, data={'level': exam_info['level'], 'title': exam_info['title']}, verify=False) if response.status_code != 200: log.error('request status code is not 200. code: {}'.format(response.status_code)) continue response = json.loads(response.text) exam_questions = m_get(response, 'data') if m_get(response, 'result') != 0 or exams is None: log.error('request exam questions error. response: {}'.format(response)) return log.info('get exam questions success. size: {}'.format(len(exam_questions))) u_file.cache_json(exam_questions, exam_question_cache_file) log.info('--->end download exam paper: {}-{}'.format(exam_info['level'], exam_info['title'])) log.info('--->end crawler exam questions.')
def merge_ts_file(m3u8_url: str, video_name: str, decrypt_function=None): merge_file_path = os.path.join(r'result\video', video_name + '.mp4') u_file.ready_dir(merge_file_path) merge_file_handle = open(merge_file_path, 'wb') ts_dir = get_ts_ave_dir(m3u8_url) for ts_filename in os.listdir(ts_dir): if not ts_filename.rstrip().endswith('.ts'): continue ts_filepath = os.path.join(ts_dir, ts_filename) ts_file_handle = open(ts_filepath, 'rb') ts_file_content = ts_file_handle.read() if decrypt_function is not None: # if defined decrypt function, decrypt the data ts_file_content = decrypt_function(m3u8_url, ts_file_content) shutil.copyfileobj(ts_file_handle, merge_file_handle) merge_file_handle.write(ts_file_content) ts_file_handle.close() merge_file_handle.close() log.info('merge file success: {}'.format(merge_file_path))
def crawler_grammar(): """ 芥末日语考级app,下载所有等级语法讲解json :return: """ grammar_url = 'https://ns-api.jiemo.net/v2/NSeries/getGrammarCategroy' levels = ['N1', 'N2', 'N3', 'N4', 'N5'] for level in levels: log.info('--->begin download grammar: {}'.format(level)) grammar_cache_file = r'result\jiemo-grammar\grammar-{}.json'.format(level) u_file.ready_dir(grammar_cache_file) if os.path.isfile(grammar_cache_file): log.info('The grammar is exist. file: {}'.format(grammar_cache_file)) continue param_json = COMMON_PARAMS.copy() param_json['level'] = level data = post_special(grammar_url, {'level': level}) if data is None: log.info('request grammar failed. level: {}'.format(level)) continue u_file.cache_json(data, grammar_cache_file) log.info('--->end download grammar: {}'.format(level))
def download_exam_questions(): """ 从羊驼日语单词app下载真题题目列表json数据 目前只有N1-N3三个等级的题库,缺少部分年份题目 :return: """ n_levels = [1, 2, 3] for n_level in n_levels: log.info('--->begin download exam question. category: N{}真题'.format(n_level)) exam_list_url = 'http://vocabulary.ytaxx.com/api/exam/getExamList?category={}'.format(n_level - 1) response = u_file.get_json(exam_list_url) if m_get(response, 'code') != 0 or m_get(response, 'data') is None: log.error('request exam list error. category: N{}真题'.format(n_level)) continue exams = m_get(response, 'data', []) log.info('request category exams success. exam size: {}'.format(len(exams))) for exam in exams: # 检测真题已经下载过则跳过 exam_cache_file = r'result\yt-exam\N{}-{}-{}-json'.format(n_level, exam['examName'], exam['id']) u_file.ready_dir(exam_cache_file) if os.path.isfile(exam_cache_file): log.info('The exam questions is downloaded. id: {}, name: {}'.format(exam['id'], exam['examName'])) continue # 下载真题json,并保存到本地文件 log.info('begin download exam question. exam name: {}'.format(exam['examName'])) exam_question_url = 'http://vocabulary.ytaxx.com/api/exam/questions?examId={}'.format(exam['id']) response = u_file.get_json(exam_question_url) if m_get(response, 'code') != 0 or m_get(response, 'data') is None: log.error('request exam questions error. category: N{}真题'.format(n_level)) continue questions = response['data'][0]['questionList'] exam['question'] = questions log.info('request exam question success. question size: {}'.format(len(questions))) u_file.cache_json(exam, exam_cache_file) time.sleep(0.2) log.info('--->end download exam question. category: N{}真题'.format(n_level))