Ejemplo n.º 1
0
def ProcessOperation(df):

    data = pd.DataFrame()
    data[pri_id] = pd.concat((_train[pri_id], _test[pri_id]))
    # day处理 (分为上 中 下旬)
    temp = _F.MonthCount(df, pri_id)
    data = pd.merge(data, temp, on=pri_id, how='left')

    # mode
    temp = _F.CatRowsToCols(df, pri_id, 'mode', 'os')
    data = pd.merge(data, temp, on=pri_id, how='left')

    # success
    temp = _F.CatRowsToCols(df, pri_id, 'success', 'os')
    data = pd.merge(data, temp, on=pri_id, how='left')

    # version
    temp = _F.CatRowsToCols(df, pri_id, 'version', 'os')
    data = pd.merge(data, temp, on=pri_id, how='left')

    # time
    df['day_period'] = df['time'].apply(_F.TimeInterval)
    temp = _F.CatRowsToCols(df, pri_id, 'day_period', 'os')
    data = pd.merge(data, temp, on=pri_id, how='left')

    cols = [
        'device2', 'ip1', 'ip2', 'mac1', 'mac2', 'device_code1',
        'device_code2', 'device_code3'
    ]

    for col in cols:
        # device2 (用户有多少不同型号的设备)
        temp = _F.GetCount(df, pri_id, col, 'os')
        data = pd.merge(data, temp, on=pri_id, how='left')

    # 统计地理位置次数
    temp = _F.CountWS(df)
    data = pd.merge(data, temp, on=pri_id, how='left')
    #
    # # 危险设备(安卓)
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code1','time',500)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 危险设备(安卓)
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code2','time',500)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    #
    # # 危险设备(苹果)
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code3','time',200)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险地理位置
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','geo_code','time',500)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险操作类型
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','mode','time',10000)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险mac1地址
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','mac1','time',600)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险mac2地址
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','mac2','time',1000)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险ip1地址
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip1','time',1000)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险ip1_sub地址
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip1_sub','time',1000)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险ip2_sub地址
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip2_sub','time',50)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计每个用户最常出现的经纬度
    # temp = _F.PositionWS(df)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 地理位置聚类

    data = data.fillna(0)
    return data
Ejemplo n.º 2
0
def ProcessTrans(df):
    data = pd.DataFrame()
    data[pri_id] = pd.concat((_train[pri_id], _test[pri_id]))

    # channel 统计次数
    temp = _F.CatRowsToCols(df, pri_id, 'channel', 'day')
    data = pd.merge(data, temp, on=pri_id, how='left')

    # day
    temp = _F.MonthCount(df, pri_id)
    data = pd.merge(data, temp, on=pri_id, how='left')

    # time
    df['day_period'] = df['time'].apply(_F.TimeInterval)
    temp = _F.CatRowsToCols(df, pri_id, 'day_period', 'day')
    data = pd.merge(data, temp, on=pri_id, how='left')

    # trans_amt (交易金额)(最大值,最小值,平均值)
    # temp = _F.GetValMaxMin(df,pri_id,'trans_amt')
    # data = pd.merge(data,temp,on=pri_id,how='left')
    # temp = _F.GetValAvg(df,pri_id,'trans_amt')
    # data = pd.merge(data,temp,on=pri_id,how='left')
    # temp = _F.GetValSum(df,pri_id,'trans_amt')
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 计算不同的次数
    cols = [
        'device2', 'ip1', 'mac1', 'device_code1', 'device_code2',
        'device_code3', 'amt_src1', 'amt_src2', 'merchant', 'trans_type1',
        'trans_type2', 'acc_id1', 'market_type', 'market_code'
    ]
    for col in cols:
        temp = _F.GetCount(df, pri_id, col, 'day')
        data = pd.merge(data, temp, on=pri_id, how='left')

    # 脱敏后的余额(最大值,最小值,平均值)
    # temp = _F.GetValMaxMin(df,pri_id,'bal')
    # data = pd.merge(data,temp,on=pri_id,how='left')
    # temp = _F.GetValAvg(df,pri_id,'bal')
    # data = pd.merge(data,temp,on=pri_id,how='left')
    # temp = _F.GetValSum(df,pri_id,'bal')
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 危险设备(安卓)
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code1','time',100)
    # data = pd.merge(data,temp,on=pri_id,how='left')
    #
    # # 危险设备(安卓)
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code2','time',100)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 危险设备(苹果)
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code3','time',80)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险地理位置
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','geo_code','time',200)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险商家
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','merchant','time',1000)
    # data = pd.merge(data,temp,on=pri_id,how='left')
    #
    # # 统计地理位置次数
    temp = _F.CountWS(df)
    data = pd.merge(data, temp, on=pri_id, how='left')
    #
    # 统计危险mac1地址
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','mac1','time',600)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险ip1地址
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip1','time',1000)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险ip1_sub地址
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip1_sub','time',1000)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计危险交易账户
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','acc_id1','time',200)
    # data = pd.merge(data,temp,on=pri_id,how='left')
    #
    # # 统计危险转出账户
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','acc_id2','time',100)
    # data = pd.merge(data,temp,on=pri_id,how='left')
    #
    # # 统计危险转入账户
    # temp = _F.CountDangerous(df,label,pri_id,'Tag','acc_id3','time',100)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    # 统计每个用户最常出现的经纬度
    # temp = _F.PositionWS(df)
    # data = pd.merge(data,temp,on=pri_id,how='left')

    data = data.fillna(0)
    return data