(json.dumps(x, ensure_ascii=False) + '\n').encode('utf-8')) n_success = 0 n_error = 0 def get_data_response(url): global n_success, n_error resp = requests.get(url) if resp.status_code == 200: data = extract_content(resp.content) n_success += 1 print("Success: {}, Error: {}".format(n_success, n_error)) data['url'] = url return data n_error += 1 return None if __name__ == '__main__': for i in range(12, 36): urls = get_data_from_file(url_file.format(i)) for j in range(10): data = multithread_helper(urls[j * 10:(j + 1) * 10], get_data_response, timeout_concurrent_by_second=720, max_workers=50, debug=False) store_json_perline_to_file(data, data_file.format(i), True)
batch_sbd = 5000 max_sbd = get_min_max_by_code(provide_id) # logger.info(max_sbd) # max_sbd = 5743 lst_sbd = [] for pos in range(1, max_sbd): sbd = build_sbd(provide_id=provide_id, post_sbd=pos) lst_sbd.append(sbd) for idx, sub_lst_sbd in enumerate( get_sublists(lst_sbd, int(len(lst_sbd) / 5000) + 1)): file_diemthi_path = ConfigUniversityProject( ).file_diemthi_2019_path(provide_id=provide_id, part=idx) if os.path.exists(file_diemthi_path): logger.info(f'skip: {file_diemthi_path}') continue obj_sbd = multithread_helper( items=sub_lst_sbd, method=get_info, timeout_concurrent_by_second=36000, max_workers=50, debug=False) store_jsons_perline_in_file(jsons_obj=obj_sbd, file_output_path=file_diemthi_path) logger.info(f'write: {file_diemthi_path}') except Exception as e: logger.error(e) logger.info('done')
} return None def method_univerisy_data(university_obj): university_diemchuan_data = extract_data_diemchuan( url_diemchuan=university_obj.get('url'), university_meta=university_obj) return university_diemchuan_data if __name__ == '__main__': file_university_path = ConfigUniversityProject().file_university_path universities = load_jsonl_from_gz(file_university_path) # logger.info(universities) universities_diemchuan_data = multithread_helper( items=universities, method=method_univerisy_data, timeout_concurrent_by_second=360, debug=False, max_workers=20) file_university_diemchuan_path = ConfigUniversityProject( ).file_university_diemchuan_path store_jsons_perline_in_file( jsons_obj=universities_diemchuan_data, file_output_path=file_university_diemchuan_path) logger.info( f'stored file_university_diemchuan_path: {file_university_diemchuan_path}' )