2 selenium方式 data_type:数据采集类型 26 国产器械 27 进口器械 root_path:文件存储路径 ===================================================== # ''' #运行程序基础参数 config_filename = cf.get("default_config", "config_filename") log_name = cf.get("default_config", "log_name") get_type = cf.get("base_config", "get_type") # 该参数暂时未生效,未来可能需要实现方式 data_type = cf.get("base_config", "data_type") root_path = cf.get("base_config", "root_path") #0.当前数据采集存储路径 curr_date = file_utils.get_curr_date() curr_root_path = config.get_curr_root_path(root_path, data_type, curr_date) #1.读取配置信息 config_dict = None if not os.path.exists(curr_root_path + config_filename): print("程序运行基础配置信息:%s:未初始化,请先运行init.py!" % (config_filename)) sys.exit(0) else: config_dict = config.get_config(root_path, data_type, curr_date) #2.初始化日志 log_utils.log_config(curr_root_path + log_name) #3.开始采集 data_collection(config_dict)
get_type = cf.get("base_config", "get_type") # 该参数暂时未生效,未来可能需要实现方式 data_type = cf.get("base_config", "data_type") root_path = cf.get("base_config", "root_path") curr_date = file_utils.get_curr_date() curr_root_path = config.get_curr_root_path(root_path, data_type, curr_date) tips = "===============================\n" \ "程序运行前先对base_config.ini进行配置:\n" \ "https://github.com/xiaodeme \n" \ "运行日志请查看 logs/data_collection.log \n" \ "===============================" print tips # 日志初始化配置 log_foloder_name = curr_root_path + "/logs/" file_utils.mkdir_path(log_foloder_name) log_utils.log_config(log_foloder_name + LOG_NAME) #1. 初始化程序运行配置基础信息 config.init_config(root_path, data_type, get_type) config_dict = config.get_config(root_path, data_type, curr_date) #2. 开始采集 run_result = data_list_collection.data_collection(config_dict) if run_result < 1: logging.error("[1]执行不成功,终止程序运行:%s" % (run_result)) #清空当天文件夹 data_list_folder_name = config_dict["data_list_folder_name"] if file_utils.clear_folder(data_list_folder_name): logging.info("清空文件夹文件:%s" % (data_list_folder_name))
for index in range(img_count): img_download_url = cf.get("base_config", "img_download_url") img_download_url = img_download_url.format(img_type_id, index) # logging.info("当前图片下载地址:%s" % (img_download_url)) file_download_utils.img_download(save_folder, img_download_url) logging.info("当前[%s]图集下载完成" % (img_name)) if __name__ == '__main__': root_path = cf.get("base_config", "root_path") index_url = cf.get("base_config", "index_url") file_utils.mkdir_path(root_path) log_name = root_path + "/" + "log.log" log_utils.log_config(log_name) print("图集正确地址查看:%s" % (index_url)) print("图集正确输入地址:%s" % ("https://www.meituri.com/a/22523/")) # href = 'https://www.meituri.com/a/22523/' href = raw_input('请输入图集下载类型:') print("您输入的图集地址是:%s" % (href)) print("当前图集保存文件夹:%s " % (root_path)) print("当前图集下载日志查看:%s " % (log_name)) img_download(root_path, href) # pass