Esempio n. 1
0
def Data_prep(df, flag):
    # Removing Loan Id and Loan Status for One-Hot encoding and Imputation
    # ########################################################
    predictors = df.columns
    predictors[1]
    predictors = np.delete(predictors, 0)
    if flag != 1:
        predictors = np.delete(predictors, -1)
        print("train module flag 0")
        flag = 1
    df[predictors]

    # One-Hot Encoding
    # ########################################################
    df_dummy = pd.get_dummies(df[predictors], dummy_na=True)
    df_dummy
    df_dummy.count()
    df_dummy.head()
    newcols = df_dummy.columns
    newcols

    # Data Imputations
    # ########################################################
    from fancyimpute import IterativeImputer
    df_imputed = IterativeImputer().fit_transform(df_dummy)
    df_imputed = pd.DataFrame(df_imputed, columns=newcols)
    df_imputed.head()
    df_imputed.count()

    return (df_imputed)
# One-Hot Encoding
# ########################################################
train_dummy = pd.get_dummies(train[predictors],dummy_na = True)
train_dummy
train_dummy.count()
train_dummy.head()
newcols = train_dummy.columns
newcols

# Data Imputations
# ########################################################
from fancyimpute import IterativeImputer
train_imputed = IterativeImputer().fit_transform(train_dummy)
train_imputed = pd.DataFrame(train_imputed,columns = newcols)
train_imputed.head()
train_imputed.count()

# Adding Loan Id and Loan Status back again to Data
# ########################################################
train_imputed['Loan_Status'] = train['Loan_Status']
train_imputed.head()
train_imputed['Loan_Status'] = train_imputed.Loan_Status.map(dict(Y=1,N=0))
#train_imputed.to_csv("train_imputed.csv",sep=',')

# Split DataFrame into Train and Test sets
# ########################################################
msk = np.random.rand(len(train_imputed))<0.8
msk

data_train = train_imputed[msk]
data_test = train_imputed[~msk]