Example #1
0
def get_new_data(config_dict):
    """
    获取当日新增数据
    :param config_dict:
    :return:
    """
    curr_date = file_utils.get_curr_date()


    add_folder_name = config_dict["add_folder_name"]
    add_filename = add_folder_name + "add_data.json"
    NEW_DATA_LIST = file_utils.get_add_data_id(add_filename)
    add_data_count = NEW_DATA_LIST.qsize()
    # logging.info("[data_info]采集日期=%s,计划新增数据采集数据总量=:%s" % (curr_date, add_data_count))

    # data_info > save 数据采集总量检查
    data_info_save_folder_name = config_dict["data_info_save_folder_name"]
    file_list = file_utils.get_file_list(data_info_save_folder_name)
    data_info_count = file_utils.data_info_count(file_list)
    # logging.info("[data_info]采集日期=%s,实际新增数据采集数据总量=:%s" % (curr_date, data_info_count))

    #数据采集前判断是否已经采集完成
    if add_data_count == data_info_count:
        logging.info("采集日期=%s,新增数据采集已经完成!" % (curr_date))
        return 1

    try:
        logging.info("开始采集今日[%s]新增数据" % (curr_date))
        get_data_info(NEW_DATA_LIST,config_dict)
        return 1

    except BaseException, e:
        return -1
Example #2
0
import check_data

# 读取基础配置文件
cf = ConfigParser.ConfigParser()
cf.read("../etc/base_config.cfg")

#日志文件
LOG_NAME = "data_collection.log"
if __name__ == "__main__":
    # 运行程序基础参数
    config_filename = cf.get("default_config", "config_filename")
    log_name = cf.get("default_config", "log_name")
    get_type = cf.get("base_config", "get_type")  # 该参数暂时未生效,未来可能需要实现方式
    data_type = cf.get("base_config", "data_type")
    root_path = cf.get("base_config", "root_path")
    curr_date = file_utils.get_curr_date()
    curr_root_path = config.get_curr_root_path(root_path, data_type, curr_date)
    tips = "===============================\n" \
           "程序运行前先对base_config.ini进行配置:\n" \
           "https://github.com/xiaodeme    \n" \
           "运行日志请查看 logs/data_collection.log    \n" \
           "==============================="
    print tips

    # 日志初始化配置
    log_foloder_name = curr_root_path + "/logs/"
    file_utils.mkdir_path(log_foloder_name)
    log_utils.log_config(log_foloder_name + LOG_NAME)

    #1. 初始化程序运行配置基础信息
    config.init_config(root_path, data_type, get_type)
def data_process(config_dict):
    """
    本次采集数据与当前数据库对比,将新增、减少数据存入add、reduct文件夹
    :param config_dict:
    :return:
    """
    add_filename = config_dict["add_folder_name"] + ADD_DATA_FILENAME
    reduce_filename = config_dict["reduct_folder_name"] + REDUCE_DATA_FILENAME
    data_list_folder_name = config_dict["data_list_folder_name"]

    if os.path.exists(add_filename):
        logging.info("数据分析:本次新增减少[add/reduct]文件已经存储:%s" % (add_filename))
        return

    data_type = cf.get("base_config", "data_type")
    root_path = cf.get("base_config", "root_path")
    """
    获取上一天数据总量(上一天可能采集失败,继续循环获取上一天,直到获取数据)
    """
    last_date_num = 1
    last_date = file_utils.get_last_date(last_date_num)
    last_data_list_folder_name = config.get_last_root_path(
        root_path, data_type, last_date)
    file_list = file_utils.get_file_list(last_data_list_folder_name +
                                         "/data_list/")
    id_list = file_utils.get_data_info_id(file_list)
    curr_data_id_list = list(id_list.queue)
    curr_data_id_list_len = len(curr_data_id_list)
    while curr_data_id_list_len == 0:
        logging.warn("数据分析:[%s]数据采集数量: %s" %
                     (last_date, curr_data_id_list_len))

        last_date_num += 1
        last_date = file_utils.get_last_date(last_date_num)

        last_data_list_folder_name = config.get_last_root_path(
            root_path, data_type, last_date)
        file_list = file_utils.get_file_list(last_data_list_folder_name +
                                             "/data_list/")
        id_list = file_utils.get_data_info_id(file_list)
        curr_data_id_list = list(id_list.queue)
        curr_data_id_list_len = len(curr_data_id_list)

    logging.info("数据分析:[%s]数据采集数量: %s" % (last_date, curr_data_id_list_len))

    file_list = file_utils.get_file_list(data_list_folder_name)
    id_list = file_utils.get_data_info_id(file_list)
    new_data_id_list = list(id_list.queue)
    logging.info("数据分析:今天[%s]数据采集数量: %s" %
                 (file_utils.get_curr_date(), len(new_data_id_list)))

    # 本次新增数据
    add_data = list(set(new_data_id_list).difference(
        set(curr_data_id_list)))  # b中有而a中没有的
    file_utils.write_file(add_filename, str(add_data))
    logging.info("数据分析:本次新增数据:%s" % (len(add_data)))
    if len(add_data) > 0:
        logging.info("本次新增数据标识已经保存")

    # 本次减少数据
    reduce_data = list(
        set(curr_data_id_list).difference(set(new_data_id_list)))  # a中有而b中没有的
    file_utils.write_file(reduce_filename, str(reduce_data))
    logging.info("数据分析:本次减少数据:%s" % (len(reduce_data)))
    if len(reduce_data) > 0:
        logging.info("本次减少数据标识已经保存")