Ejemplo n.º 1
0
def preprocessing(df, unix, diff):
    df.drop(df[df['simple_journal'] == 'Refused'].index, inplace=True)
    # df = df[df['shoppercountrycode'] != 'GB']
    df['cvcresponsecode'] = df['cvcresponsecode'].map(lambda x: float(x))
    df.loc[df['cvcresponsecode'] >= 3.0, 'cvcresponsecode'] = 3.0
    df['cvcresponsecode'] = df['cvcresponsecode'].apply(lambda x: x if x <=2.0 else 3.0)
    df['amount'] = df['amount'].map(lambda x: float(x))
    df['card_id'] = df['card_id'].map(lambda x: x[4:])
    df['ip_id'] = df['ip_id'].map(lambda x: x[2:])
    df['mail_id'] = df['mail_id'].map(lambda x: x[5:])
    df['country'] = df['shoppercountrycode'].apply(lambda x: x if x in ['MX', 'AU', 'GB'] else 'other')
    df = df.drop(['shoppercountrycode'], axis=1)
    if unix:
        df['creationdate_unix'] = pd.DatetimeIndex(df['creationdate']).astype(np.int64) / 1000000000
        df['creationdate_unix'].sub(df['creationdate_unix'].min(), axis=0)
        df['creationdate_unix'] = df['creationdate_unix'].map(lambda x: math.log(x))
    if diff:
        df = time_diff(df)
    df['AUD_currency'] = df[['currencycode', 'amount']].apply(lambda x: CurrencyConverter.convert_currency_from_AUD(x), axis=1)
    df.dropna(axis=0, how='any', inplace=True)
    df = df[~df.isin(['NaN', 'NaT', 'NA']).any(axis=1)]
    return df