Ejemplo n.º 1
0
def get_ds_cap_rate(parm_csv_floder, parm_csv_file_list):
    conf = config.WeeklyStatConfig()
    logger = Logger(path=conf.LOG_PATH)
    rst = pd.DataFrame({})
    for csv_file in parm_csv_file_list:
        # 1. 获取CSV文件路径
        csv_file_path = os.path.join("%s%s%s" %
                                     (parm_csv_floder, "/", csv_file))
        logger.info("csv_file_path = %s " % csv_file_path)
        csv_file_path = "E:/myself/VBA/csv_files/source\ds/0427-0503/DS_0427-0503_094955.csv"

        # 2. 读取CSV文件
        reader = pd.read_csv(csv_file_path,
                             encoding="gbk",
                             chunksize=5000,
                             iterator=True,
                             dtype=str)

        # 3. 数据处理
        df_cap_rate = get_cap_rate(reader)
        rst = rst.append(df_cap_rate, ignore_index=True)

    # 4. 计算获取率
    rst["td_cap_rate"] = rst["td_cap_cnt"] / rst["trx_cnt"]
    rst["zy_cap_rate"] = rst["zy_cap_cnt"] / rst["trx_cnt"]
    rst["batch_no"] = time.strftime("%Y%m%d%H%M%S")
    logger.info("rst %s \n" % rst)
    return rst
Ejemplo n.º 2
0
def common_logger():
    conf = config.WeeklyStatConfig()
    logger = Logger(path=conf.LOG_PATH)
    return logger
Ejemplo n.º 3
0
def get_recg_rate(parm_reader):
    conf = config.WeeklyStatConfig()
    logger = Logger(path=conf.LOG_PATH)
    all_trx = pd.DataFrame({})
    prod_id = pd.Series()
    for chunk in parm_reader:
        x = chunk[["机构号", "同盾设备指纹", "自研设备指纹", "平台日期"]]
        all_trx = all_trx.append(x)
        prod_id_list = chunk.apply(get_prod_id, axis=1)
        prod_id = prod_id.append(prod_id_list)
        all_trx["prod_id"] = prod_id
    # 1. 数据准备,得到所有交易
    all_trx.rename(columns={
        "机构号": "inst_id",
        "同盾设备指纹": "td_device",
        "自研设备指纹": "zy_device",
        "平台日期": "plat_date"
    },
                   inplace=True)

    # 2. 获取交易ID,时间范围
    rtn_prod_id = all_trx.iloc[0]["prod_id"]
    all_trx_sort = all_trx.sort_values(by=["plat_date"], ascending=True)
    start_date = all_trx_sort.iloc[0]["plat_date"].replace("-", "")
    end_date = all_trx_sort.iloc[-1]["plat_date"].replace("-", "")
    rtn_date_range = start_date + "-" + end_date

    # 3. 排除异常数据(空值,111111,000000)
    all_trx = all_trx.dropna(axis=0, how="any")
    all_trx = all_trx[all_trx["td_device"] != "111111"]
    all_trx = all_trx[all_trx["zy_device"] != "000000"]

    # 4. 统计自研识别后,认为是同一设备的设备个数(zy_recg_cnt)
    trx_cnt = pd.pivot_table(all_trx,
                             index=["zy_device"],
                             values="inst_id",
                             aggfunc=len)
    if len(trx_cnt) == 0:
        rst = pd.DataFrame(
            {
                "date_range": [rtn_date_range],
                "prod_id": [rtn_prod_id],
                "td_recg_cnt": 0,
                "zy_recg_cnt": 0,
            },
            index=None)
    else:
        trx_cnt = trx_cnt[trx_cnt["inst_id"] > 1]
        zy_recg_cnt = len(trx_cnt)
        # logger.info("zy_recg_cnt: %s" % zy_recg_cnt)

        # 5. 找到自研认为是同一设备的设备指纹(zy_dup_list)
        trx_cnt = trx_cnt.reset_index()
        zy_dup_list = trx_cnt[["zy_device"]]

        # 6.  找到自研认为是同一设备的设备指纹所对应的同盾设备指纹,并统计其个数(td_recg_cnt)
        td_di = pd.merge(all_trx, zy_dup_list, how="inner", on="zy_device")
        td_di = td_di.drop_duplicates(["td_device"])
        td_recg_cnt = len(td_di)
        # logger.info("td_recg_cnt: %s \n" % td_recg_cnt)

        # 7.  合并DF返回
        rst = pd.DataFrame(
            {
                "date_range": [rtn_date_range],
                "prod_id": [rtn_prod_id],
                "td_recg_cnt": [td_recg_cnt],
                "zy_recg_cnt": [zy_recg_cnt],
            },
            index=None)
    # logger.info("rst\n %s" % rst)
    return rst
Ejemplo n.º 4
0
    global TO_BE_BUSI_TYPE, TO_BE_CSV_FLODER
    # TO_BE_BUSI_TYPE: CS, CW, DS
    TO_BE_BUSI_TYPE = "DS"
    TO_BE_CSV_FLODER = "E:/myself/VBA/csv_files/pythonTest"
    logger.info("\n####LOG START####")
    logger.info("\n--------------------------------------------")
    logger.info("[1]. SYSTEM CONSTANT:")
    logger.info("LOG PATH: %s" % conf.LOG_PATH)
    logger.info("RESULT PATH: %s" % conf.RESULT_PATH)
    logger.info("[2]. BUSINESS CONSTANT:")
    logger.info("TO_BE_BUSI_TYPE: %s" % TO_BE_BUSI_TYPE)
    logger.info("TO_BE_CSV_FLODER: %s" % TO_BE_CSV_FLODER)
    logger.info("--------------------------------------------")


if __name__ == '__main__':
    start_time = datetime.datetime.now()
    conf = config.WeeklyStatConfig()
    logger = Logger(path=conf.LOG_PATH)
    init()
    # csv_folder = get_csv_folder()
    # logger.info('CSV FILES FOLDER: %s' % csv_folder)
    main_process(TO_BE_CSV_FLODER)
    end_time = datetime.datetime.now()
    logger.info("--------------------------------------------")
    logger.info("Total Processing Time:")
    logger.info('START TIME = %s' % start_time)
    logger.info('END TIME = %s' % end_time)
    logger.info('DIFF TIME = %s' % (end_time - start_time))
    logger.info('System Processing Have Done...')