def img_download(root_path, img_url): #1. 获取下载图集信息 img_info = get_img_info(img_url) img_name = img_info["img_name"] img_count = int(img_info["img_count"]) #2.创建图片保存路径 save_folder = root_path + "/" + img_name file_utils.mkdir_path(save_folder) #3.开始图集下载 img_type_id = img_url[img_url.rfind("a") + 2:img_url.rfind("/")] logging.info("当前[%s]图集下载地址:%s" % (img_name, img_url)) logging.info("当前[%s]图集下载总数img_count=%s,img_type_id=%s" % (img_name, img_count, img_type_id)) for index in range(img_count): img_download_url = cf.get("base_config", "img_download_url") img_download_url = img_download_url.format(img_type_id, index) # logging.info("当前图片下载地址:%s" % (img_download_url)) file_download_utils.img_download(save_folder, img_download_url) logging.info("当前[%s]图集下载完成" % (img_name))
log_name = cf.get("default_config", "log_name") get_type = cf.get("base_config", "get_type") # 该参数暂时未生效,未来可能需要实现方式 data_type = cf.get("base_config", "data_type") root_path = cf.get("base_config", "root_path") curr_date = file_utils.get_curr_date() curr_root_path = config.get_curr_root_path(root_path, data_type, curr_date) tips = "===============================\n" \ "程序运行前先对base_config.ini进行配置:\n" \ "https://github.com/xiaodeme \n" \ "运行日志请查看 logs/data_collection.log \n" \ "===============================" print tips # 日志初始化配置 log_foloder_name = curr_root_path + "/logs/" file_utils.mkdir_path(log_foloder_name) log_utils.log_config(log_foloder_name + LOG_NAME) #1. 初始化程序运行配置基础信息 config.init_config(root_path, data_type, get_type) config_dict = config.get_config(root_path, data_type, curr_date) #2. 开始采集 run_result = data_list_collection.data_collection(config_dict) if run_result < 1: logging.error("[1]执行不成功,终止程序运行:%s" % (run_result)) #清空当天文件夹 data_list_folder_name = config_dict["data_list_folder_name"] if file_utils.clear_folder(data_list_folder_name): logging.info("清空文件夹文件:%s" % (data_list_folder_name))
import sys reload(sys) sys.setdefaultencoding('utf-8') from utils import etc_utils from utils import access_data_utils from utils import file_utils #获取配置信息 dataTypeConfig = etc_utils.DataTypeConfig(26, "../etc/example1_get_type.cfg") save_root_path = dataTypeConfig.get_save_root_path() total_count = int(dataTypeConfig.get_total_count()) #data_list数据保存路径 DATA_LIST_PATH = save_root_path + "/data_list/" file_utils.mkdir_path(DATA_LIST_PATH) # 器械详情保存路径 DATA_INFO_PATH = save_root_path + "/data_info/" file_utils.mkdir_path(DATA_INFO_PATH) #日志保存路径 LOG_PATH = save_root_path + "/logs/" file_utils.mkdir_path(LOG_PATH) #获取采集数据Id集合 file_list = file_utils.get_file_list(DATA_LIST_PATH) id_list = file_utils.get_all_data_id(file_list) print("采集数据总量:%s" % (id_list.qsize())) ''' 获取data_info数据
(img_name, img_count, img_type_id)) for index in range(img_count): img_download_url = cf.get("base_config", "img_download_url") img_download_url = img_download_url.format(img_type_id, index) # logging.info("当前图片下载地址:%s" % (img_download_url)) file_download_utils.img_download(save_folder, img_download_url) logging.info("当前[%s]图集下载完成" % (img_name)) if __name__ == '__main__': root_path = cf.get("base_config", "root_path") index_url = cf.get("base_config", "index_url") file_utils.mkdir_path(root_path) log_name = root_path + "/" + "log.log" log_utils.log_config(log_name) print("图集正确地址查看:%s" % (index_url)) print("图集正确输入地址:%s" % ("https://www.meituri.com/a/22523/")) # href = 'https://www.meituri.com/a/22523/' href = raw_input('请输入图集下载类型:') print("您输入的图集地址是:%s" % (href)) print("当前图集保存文件夹:%s " % (root_path)) print("当前图集下载日志查看:%s " % (log_name)) img_download(root_path, href)