Ejemplo n.º 1
0
def _F_nunique_ratio(n=3):

    if os.path.exists(data_path + "data/_F_nunique_ratio.feather"):
        df = feather.read_dataframe(data_path +
                                    "data/_F_nunique_ratio.feather")
        return df

    temp = pd.DataFrame()
    temp[pri_id] = pd.concat((_train[pri_id], _test[pri_id]))

    # 增加天和月份的片
    df = pd.concat((op_info[[pri_id, 'day']], trans_info[[pri_id, 'day']]))
    month = _F.Day2Month(df, 'day')
    temp = pd.merge(temp,
                    _F.TopN_col_distinct_ratio(month[[pri_id, 'month', 'day']],
                                               pri_id, 'month', 'day', n),
                    on=pri_id,
                    how='left')

    df = pd.concat((op_info[[pri_id, 'time',
                             'day']], trans_info[[pri_id, 'time', 'day']]))
    df['day_period'] = df['time'].apply(_F.TimeInterval)
    temp = pd.merge(
        temp,
        _F.TopN_col_distinct_ratio(df[[pri_id, 'day_period', 'day']], pri_id,
                                   'day_period', 'day', n))

    # topN ratio
    for col in [
            'geo_code', 'ip1', 'ip1_sub', 'mac1', 'merchant', 'ip2', 'ip2_sub',
            'mode', 'mac2', 'os', 'channel', 'trans_type1', 'trans_type2',
            'code1', 'code2', 'market_code', 'market_type', 'device_code1',
            'device_code2', 'device_code3', 'wifi'
    ]:
        if col in [
                'merchant', 'channel', 'trans_type1', 'trans_type2', 'code1',
                'code2', 'market_code', 'market_type'
        ]:
            df = trans_info[[pri_id, col, 'day']]
        elif col in ['ip2', 'ip2_sub', 'mode', 'mac2', 'os', 'wifi']:
            df = op_info[[pri_id, col, 'day']]
        else:
            df = pd.concat((op_info[[pri_id, col,
                                     'day']], trans_info[[pri_id, col,
                                                          'day']]))
        temp = pd.merge(temp,
                        _F.TopN_col_distinct_ratio(df, pri_id, col, 'day', n),
                        on=pri_id,
                        how='left')

    feather.write_dataframe(temp, data_path + "data/_F_nunique_ratio.feather")
    return temp