2 selenium方式
    data_type:数据采集类型 
            26 国产器械  
            27 进口器械
    root_path:文件存储路径
    =====================================================
    # '''
    #运行程序基础参数
    config_filename = cf.get("default_config", "config_filename")
    log_name = cf.get("default_config", "log_name")
    get_type = cf.get("base_config", "get_type")  # 该参数暂时未生效,未来可能需要实现方式
    data_type = cf.get("base_config", "data_type")
    root_path = cf.get("base_config", "root_path")

    #0.当前数据采集存储路径
    curr_date = file_utils.get_curr_date()
    curr_root_path = config.get_curr_root_path(root_path, data_type, curr_date)

    #1.读取配置信息
    config_dict = None
    if not os.path.exists(curr_root_path + config_filename):
        print("程序运行基础配置信息:%s:未初始化,请先运行init.py!" % (config_filename))
        sys.exit(0)
    else:
        config_dict = config.get_config(root_path, data_type, curr_date)

    #2.初始化日志
    log_utils.log_config(curr_root_path + log_name)

    #3.开始采集
    data_collection(config_dict)
Ejemplo n.º 2
0
    get_type = cf.get("base_config", "get_type")  # 该参数暂时未生效,未来可能需要实现方式
    data_type = cf.get("base_config", "data_type")
    root_path = cf.get("base_config", "root_path")
    curr_date = file_utils.get_curr_date()
    curr_root_path = config.get_curr_root_path(root_path, data_type, curr_date)
    tips = "===============================\n" \
           "程序运行前先对base_config.ini进行配置:\n" \
           "https://github.com/xiaodeme    \n" \
           "运行日志请查看 logs/data_collection.log    \n" \
           "==============================="
    print tips

    # 日志初始化配置
    log_foloder_name = curr_root_path + "/logs/"
    file_utils.mkdir_path(log_foloder_name)
    log_utils.log_config(log_foloder_name + LOG_NAME)

    #1. 初始化程序运行配置基础信息
    config.init_config(root_path, data_type, get_type)
    config_dict = config.get_config(root_path, data_type, curr_date)

    #2. 开始采集
    run_result = data_list_collection.data_collection(config_dict)
    if run_result < 1:
        logging.error("[1]执行不成功,终止程序运行:%s" % (run_result))

        #清空当天文件夹
        data_list_folder_name = config_dict["data_list_folder_name"]
        if file_utils.clear_folder(data_list_folder_name):
            logging.info("清空文件夹文件:%s" % (data_list_folder_name))
Ejemplo n.º 3
0
    for index in range(img_count):
        img_download_url = cf.get("base_config", "img_download_url")
        img_download_url = img_download_url.format(img_type_id, index)
        # logging.info("当前图片下载地址:%s" % (img_download_url))

        file_download_utils.img_download(save_folder, img_download_url)

    logging.info("当前[%s]图集下载完成" % (img_name))


if __name__ == '__main__':
    root_path = cf.get("base_config", "root_path")
    index_url = cf.get("base_config", "index_url")
    file_utils.mkdir_path(root_path)

    log_name = root_path + "/" + "log.log"
    log_utils.log_config(log_name)

    print("图集正确地址查看:%s" % (index_url))
    print("图集正确输入地址:%s" % ("https://www.meituri.com/a/22523/"))
    # href = 'https://www.meituri.com/a/22523/'
    href = raw_input('请输入图集下载类型:')

    print("您输入的图集地址是:%s" % (href))
    print("当前图集保存文件夹:%s " % (root_path))
    print("当前图集下载日志查看:%s " % (log_name))

    img_download(root_path, href)

    # pass