def get_second_page_data(debug=False): """#get second page data, and save result to file ./api-data/second_page_data.json""" t0 = time.time() helper.remove_file(config.FILENAME_SECOND_PAGE_DATA) helper.remove_file(config.FILENAME_SECOND_PAGE_SECTIONIDS_DATA) helper.create_dir(config.SAVE_FILE_DIR) helper.my_print("total has %d grades...\n" % len(config.GRADES), debug) for grade in config.GRADES: time.sleep(config.SLEEP_TIME) helper.my_print("%s:" % grade, debug) url = helper.get_second_page_url(grade=grade) content = helper.get_url_content(url) # 获取知识章节列表 if content is None: helper.my_print_error("the content is None.") else: helper.save_content_to_file(config.FILENAME_SECOND_PAGE_DATA, content) helper.my_print(content, debug) # 获取知识章节对应的ID列表 json_data = json.loads(content, encoding='utf-8') for course_list in json_data['list']: for course in course_list['list']: section_id = course['courseSectionID'] helper.save_content_to_file( config.FILENAME_SECOND_PAGE_SECTIONIDS_DATA, section_id) helper.my_print(section_id, debug) helper.my_print("", debug) print('Done. %f seconds cost.' % (time.time() - t0))
def main(): args = get_args() os.makedirs(args.csv_converted_folder, exist_ok=True) files = glob(os.path.join(args.csv_folder, '*.csv')) for file in files: name = get_filename(file, with_extension=False) print(name) new_filename = os.path.join(args.csv_converted_folder, name + '.csv') lines = get_csv(file) if not rows_are_valid(lines): print('\t (Skipping)') remove_file(new_filename) continue converted_lines = list(map(convert_line, lines)) write_csv(new_filename, converted_lines) print()
def get_main_page_data(debug=False): """#get main page data, and save the result to file ./api-data/main_page_data.json""" helper.remove_file(config.FILENAME_MAIN_PAGE_DATA) # 先删除可能已存在的文件后, 再重新保存 helper.create_dir(config.SAVE_FILE_DIR) t0 = time.time() url = helper.get_main_page_url() content = helper.get_url_content(url) # 获取首页的一级页面列表(包含年级列表,滚动图列表,学习计划列表等) if content is None: helper.my_print_error("main_page_data is None.") else: helper.save_content_to_file(config.FILENAME_MAIN_PAGE_DATA, content) helper.my_print(content, debug) print('Done. %f seconds cost.' % (time.time() - t0))
def get_third_page_data(debug=False): """#get third page data, and save result to file ./api-data/third_page_data.json""" t0 = time.time() helper.remove_file(config.FILENAME_THIRD_PAGE_DATA) helper.remove_file(config.FILENAME_THIRD_SECTIONIDS_DATA) helper.create_dir(config.SAVE_FILE_DIR) with open(config.FILENAME_SECOND_PAGE_SECTIONIDS_DATA) as f: i = 0 lines = f.readlines() helper.my_print("total has %d chapters...\n" % len(lines), debug) for line in lines: i += 1 section_id = str(int(line)) helper.my_print("line:%d sectionID:%s" % (i, section_id), debug) time.sleep(config.SLEEP_TIME) url = helper.get_third_page_url(section_id) content = helper.get_url_content( url) # 根据某个章节的 sectionID, 获取其知识点列表 if content is None: helper.my_print_error("the content is None.") else: helper.save_content_to_file(config.FILENAME_THIRD_PAGE_DATA, content) helper.my_print(content, debug) # 获取知识点对应的课程ID列表(用于根据课程ID, 获取题目列表) json_data = json.loads(content) for course in json_data['list']: course_dic = { 'courseSectionID': course['courseSectionID'], 'sectionName': course['sectionName'], 'parentID': course['parentID'] } data = json.dumps(course_dic, ensure_ascii=False) helper.save_content_to_file( config.FILENAME_THIRD_SECTIONIDS_DATA, data) helper.my_print(data, debug) helper.my_print("", debug) print('Done. %f seconds cost.' % (time.time() - t0))
def main(): # 根据程序名称获取pid pid = helper.get_pid(cfg) # 若pid不存在,则进程已经停止 if not pid: helper.remove_file(cfg.pid_lock()) return "{0} not run".format(cfg.name()) # 关闭进程 helper.stop_proc(pid) # 若pid不存在,则进程停止成功 if not helper.get_pid(cfg): helper.remove_file(cfg.pid_lock()) return "{0}({1}) stop success".format(cfg.name(), pid) else: return "{0}({1}) stop failed".format(cfg.name(), pid)
def main(): if not os.path.exists("restart.py"): return "restart.py not exist" crontab_tmp = "./crontab_tmp" helper.crontab_to_file(crontab_tmp) try: line_numbers = helper.grep_line_numbers(cfg.timer_commamd(), crontab_tmp) if len(line_numbers) == 0: return "timer not install" else: for number in line_numbers: helper.remove_file_line(number, crontab_tmp) helper.crontab_load_file(crontab_tmp) return "timer remove success" finally: helper.remove_file(crontab_tmp)
def main(): if not os.path.exists("restart.py"): return "restart.py not exist" crontab_tmp = "./crontab_tmp" helper.crontab_to_file(crontab_tmp) try: line_numbers = helper.grep_line_numbers(cfg.timer_commamd(), crontab_tmp) if len(line_numbers) != 0: return "timer already installed" # 添加定时重启命令 helper.append_file(cfg.timer_commamd(), crontab_tmp) # 将定时脚本加载到crontab if helper.crontab_load_file(crontab_tmp): return "timer install success" else: return "timer install failed" finally: helper.remove_file(crontab_tmp)
print helper.coloured_output("Uploaded new file done" , 'green') #print file_result print helper.coloured_output("Getting list of children files" , 'yellow') children_files = helper.retrieve_all_files( drive_service, config['backup_folder_id'] ) # print "children_files", children_files #print children_files print "This folder have {0} files".format( str(len(children_files)) ) if len( children_files ) > config['max_file_in_folder']: #Remove old backup file number_delete_file = len(children_files) - config['max_file_in_folder'] count = 0 index_delete_file = len(children_files) -1 while count < number_delete_file: children_id = children_files[index_delete_file]['id'] print helper.coloured_output( "Removing old file with id " + children_id , 'yellow') helper.remove_file( drive_service, children_id ) count +=1 index_delete_file -=1 print helper.coloured_output("Done backup file to google drive !" , 'green') except Exception, e: print "error" print e finally: pass
print(helper.coloured_output("Getting list of children files", 'yellow')) children_files = helper.retrieve_all_files(drive_service, config['backup_folder_id']) # print("children_files", children_files) #print(children_files) print("This folder have {0} files".format(str(len(children_files)))) if len(children_files) > config['max_file_in_folder']: #Remove old backup file number_delete_file = len(children_files) - config['max_file_in_folder'] count = 0 index_delete_file = len(children_files) - 1 while count < number_delete_file: children_id = children_files[index_delete_file]['id'] print( helper.coloured_output( "Removing old file with id " + children_id, 'yellow')) helper.remove_file(drive_service, children_id) count += 1 index_delete_file -= 1 print(helper.coloured_output("Done backup file to google drive !", 'green')) except Exception as e: print("error") print(e) finally: pass
def get_question_data(debug=False, start=1, count=0): """#get question data, and save result to file ./api-data/question_data.json""" t0 = time.time() with open(config.FILENAME_THIRD_SECTIONIDS_DATA) as f: i = 0 lines = f.readlines() line_count = len(lines) # 检查开始索引参数 start try: start_index = int(start) except ValueError as e: helper.my_print_error( 'Error: %s\n the "start" param must be a Integer number!' % e) return else: if start_index < 1 or start_index > line_count: helper.my_print_error( 'Error: the "start" param must in range(1, len(lines)+1)') return # 检查获取条目参数 count try: limit_count = int(count) except ValueError as e: helper.my_print_error( 'Error: %s\n the "count" param must be a Integer number!' % e) return else: if limit_count <= 0 or (start_index - 1 + limit_count) > line_count: limit_count = line_count - start_index + 1 if start_index == 1: # 当 start_index 值为1时, 表示重新开始获取, 此时应先清除之间存在的数据文件 helper.remove_file(config.FILENAME_QUESTION_DATA) helper.create_dir(config.SAVE_FILE_DIR) helper.my_print("total has %d sections...\n" % line_count, debug) for line in lines: i += 1 if i < start_index: continue elif i >= (start_index + limit_count): break json_data = json.loads(line) section_id = json_data['courseSectionID'] helper.my_print("line:%d sectionID:%s" % (i, section_id), debug) time.sleep(config.SLEEP_TIME) url = helper.get_question_url(section_id) content = helper.get_url_content(url) # 根据知识点的 sectionID, 获取题目对表 if content is None: # 若发生了异常时, 直接退出 helper.my_print_error("the content is None.") helper.my_print_error( "An Exception has happen. get_question_data will be exit. " "the next start_index should be %d" % i) break else: result_data = json.loads(content) if result_data['code'] != '99999': # 若返回码不是表示"成功"时, 直接退出 helper.my_print_error( "result code is not 99999. get_question_data will be exit. " "the next start_index should be %d" % i) break helper.save_content_to_file(config.FILENAME_QUESTION_DATA, content) helper.my_print(content, debug) helper.my_print("", debug) print('Done. %f seconds cost.' % (time.time() - t0))