def ProcessOperation(df): data = pd.DataFrame() data[pri_id] = pd.concat((_train[pri_id], _test[pri_id])) # day处理 (分为上 中 下旬) temp = _F.MonthCount(df, pri_id) data = pd.merge(data, temp, on=pri_id, how='left') # mode temp = _F.CatRowsToCols(df, pri_id, 'mode', 'os') data = pd.merge(data, temp, on=pri_id, how='left') # success temp = _F.CatRowsToCols(df, pri_id, 'success', 'os') data = pd.merge(data, temp, on=pri_id, how='left') # version temp = _F.CatRowsToCols(df, pri_id, 'version', 'os') data = pd.merge(data, temp, on=pri_id, how='left') # time df['day_period'] = df['time'].apply(_F.TimeInterval) temp = _F.CatRowsToCols(df, pri_id, 'day_period', 'os') data = pd.merge(data, temp, on=pri_id, how='left') cols = [ 'device2', 'ip1', 'ip2', 'mac1', 'mac2', 'device_code1', 'device_code2', 'device_code3' ] for col in cols: # device2 (用户有多少不同型号的设备) temp = _F.GetCount(df, pri_id, col, 'os') data = pd.merge(data, temp, on=pri_id, how='left') # 统计地理位置次数 temp = _F.CountWS(df) data = pd.merge(data, temp, on=pri_id, how='left') # # # 危险设备(安卓) # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code1','time',500) # data = pd.merge(data,temp,on=pri_id,how='left') # 危险设备(安卓) # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code2','time',500) # data = pd.merge(data,temp,on=pri_id,how='left') # # # 危险设备(苹果) # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code3','time',200) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险地理位置 # temp = _F.CountDangerous(df,label,pri_id,'Tag','geo_code','time',500) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险操作类型 # temp = _F.CountDangerous(df,label,pri_id,'Tag','mode','time',10000) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险mac1地址 # temp = _F.CountDangerous(df,label,pri_id,'Tag','mac1','time',600) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险mac2地址 # temp = _F.CountDangerous(df,label,pri_id,'Tag','mac2','time',1000) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险ip1地址 # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip1','time',1000) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险ip1_sub地址 # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip1_sub','time',1000) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险ip2_sub地址 # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip2_sub','time',50) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计每个用户最常出现的经纬度 # temp = _F.PositionWS(df) # data = pd.merge(data,temp,on=pri_id,how='left') # 地理位置聚类 data = data.fillna(0) return data
def ProcessTrans(df): data = pd.DataFrame() data[pri_id] = pd.concat((_train[pri_id], _test[pri_id])) # channel 统计次数 temp = _F.CatRowsToCols(df, pri_id, 'channel', 'day') data = pd.merge(data, temp, on=pri_id, how='left') # day temp = _F.MonthCount(df, pri_id) data = pd.merge(data, temp, on=pri_id, how='left') # time df['day_period'] = df['time'].apply(_F.TimeInterval) temp = _F.CatRowsToCols(df, pri_id, 'day_period', 'day') data = pd.merge(data, temp, on=pri_id, how='left') # trans_amt (交易金额)(最大值,最小值,平均值) # temp = _F.GetValMaxMin(df,pri_id,'trans_amt') # data = pd.merge(data,temp,on=pri_id,how='left') # temp = _F.GetValAvg(df,pri_id,'trans_amt') # data = pd.merge(data,temp,on=pri_id,how='left') # temp = _F.GetValSum(df,pri_id,'trans_amt') # data = pd.merge(data,temp,on=pri_id,how='left') # 计算不同的次数 cols = [ 'device2', 'ip1', 'mac1', 'device_code1', 'device_code2', 'device_code3', 'amt_src1', 'amt_src2', 'merchant', 'trans_type1', 'trans_type2', 'acc_id1', 'market_type', 'market_code' ] for col in cols: temp = _F.GetCount(df, pri_id, col, 'day') data = pd.merge(data, temp, on=pri_id, how='left') # 脱敏后的余额(最大值,最小值,平均值) # temp = _F.GetValMaxMin(df,pri_id,'bal') # data = pd.merge(data,temp,on=pri_id,how='left') # temp = _F.GetValAvg(df,pri_id,'bal') # data = pd.merge(data,temp,on=pri_id,how='left') # temp = _F.GetValSum(df,pri_id,'bal') # data = pd.merge(data,temp,on=pri_id,how='left') # 危险设备(安卓) # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code1','time',100) # data = pd.merge(data,temp,on=pri_id,how='left') # # # 危险设备(安卓) # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code2','time',100) # data = pd.merge(data,temp,on=pri_id,how='left') # 危险设备(苹果) # temp = _F.CountDangerous(df,label,pri_id,'Tag','device_code3','time',80) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险地理位置 # temp = _F.CountDangerous(df,label,pri_id,'Tag','geo_code','time',200) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险商家 # temp = _F.CountDangerous(df,label,pri_id,'Tag','merchant','time',1000) # data = pd.merge(data,temp,on=pri_id,how='left') # # # 统计地理位置次数 temp = _F.CountWS(df) data = pd.merge(data, temp, on=pri_id, how='left') # # 统计危险mac1地址 # temp = _F.CountDangerous(df,label,pri_id,'Tag','mac1','time',600) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险ip1地址 # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip1','time',1000) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险ip1_sub地址 # temp = _F.CountDangerous(df,label,pri_id,'Tag','ip1_sub','time',1000) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计危险交易账户 # temp = _F.CountDangerous(df,label,pri_id,'Tag','acc_id1','time',200) # data = pd.merge(data,temp,on=pri_id,how='left') # # # 统计危险转出账户 # temp = _F.CountDangerous(df,label,pri_id,'Tag','acc_id2','time',100) # data = pd.merge(data,temp,on=pri_id,how='left') # # # 统计危险转入账户 # temp = _F.CountDangerous(df,label,pri_id,'Tag','acc_id3','time',100) # data = pd.merge(data,temp,on=pri_id,how='left') # 统计每个用户最常出现的经纬度 # temp = _F.PositionWS(df) # data = pd.merge(data,temp,on=pri_id,how='left') data = data.fillna(0) return data