Ejemplo n.º 1
0
def complete_build(x_train, x_test, y_train, y_test):
    #Called function post label encoding
    lab_stats = create_stats(x_train,
                             x_test,
                             y_train,
                             y_test,
                             enc='labelencoder')

    #Prepare data for one hot encoding
    x_train, x_test, y_train, y_test = split_dataset(df)
    category_index = [
        x for x in range(len(df.columns))
        if df[df.columns[x]].dtype == 'object'
    ]

    #one hot encoding
    x_train, x_test = ohe_encode(x_train, x_test, category_index)

    #Called function post one hot encoding
    ohe_stats = create_stats(x_train,
                             x_test,
                             y_train,
                             y_test,
                             enc='oheencoder')

    final_stats = pd.concat([lab_stats, ohe_stats], axis=0)
    final_stats = final_stats[['c_val', 'rmse', 'mae', 'r2']]

    return final_stats
Ejemplo n.º 2
0
def complete_build(x_train, x_test, y_train, y_test):
    stats_label = create_stats(x_train, x_test, y_train, y_test)
    all_indices = list(range(0, x_train.shape[-1]))
    all_indices.remove(2)
    x_train_ohe, x_test_ohe = ohe_encode(x_train, x_test, all_indices)
    stats_ohe = create_stats(pd.DataFrame(x_train_ohe),
                             pd.DataFrame(x_test_ohe),
                             y_train,
                             y_test,
                             enc="ohe")
    model_plot = pd.concat([stats_label, stats_ohe])
    model_plot.sort_values(['r2', 'rmse'], ascending=[0, 1])
    return model_plot
Ejemplo n.º 3
0
def complete_build(x_train, x_test, y_train, y_test):

    category_index = [
        x for x in range(len(x_train.columns))
        if x_train[x_train.columns[x]].dtype == 'object'
    ]
    x_train_t, x_test_t = ohe_encode(x_train, x_test, category_index)
    train = pd.DataFrame(x_train_t)
    test = pd.DataFrame(x_test_t)
    train.columns = x_train.columns.values
    test.columns = x_test.columns.values
    complete_stats1 = create_stats(x_train, x_test, y_train, y_test)
    complete_stats = create_stats(train, test, y_train, y_test)
    return pd.concat([complete_stats1, complete_stats], axis=0)