Exemple #1
0
def complete_build(x_train, x_test, y_train, y_test):
    #Called function post label encoding
    lab_stats = create_stats(x_train,
                             x_test,
                             y_train,
                             y_test,
                             enc='labelencoder')

    #Prepare data for one hot encoding
    x_train, x_test, y_train, y_test = split_dataset(df)
    category_index = [
        x for x in range(len(df.columns))
        if df[df.columns[x]].dtype == 'object'
    ]

    #one hot encoding
    x_train, x_test = ohe_encode(x_train, x_test, category_index)

    #Called function post one hot encoding
    ohe_stats = create_stats(x_train,
                             x_test,
                             y_train,
                             y_test,
                             enc='oheencoder')

    final_stats = pd.concat([lab_stats, ohe_stats], axis=0)
    final_stats = final_stats[['c_val', 'rmse', 'mae', 'r2']]

    return final_stats
def plot_corr(df, size=11):
    x_train, x_test, y_train, y_test = split_dataset(df)
    df_train = pd.concat([x_train, y_train], axis=1)
    corr = df_train.corr()
    fig, ax = subplots(figsize=(size, size))
    plt.set_cmap('YlOrRd')
    ax.matshow(corr)
    xticks(range(len(corr.columns)), corr.columns, rotation=90)
    yticks(range(len(corr.columns)), corr.columns)
    fig.savefig('./images/data_image.png')
    return ax
# -*- coding: utf-8 -*-
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
data = load_data('data/student-mat.csv')
x_train, x_test, y_train, y_test = split_dataset(data)
x_train, x_test = label_encode(x_train, x_test)


def visualise_data(data, figname):
    plt.figure()
    scatter_matrix(data, alpha=0.2, figsize=(15, 15), diagonal='kde')
    #plt.savefig(figname)
    plt.show()
Exemple #4
0
# %load q03_ohe_encoder/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
import pandas as pd

path = 'data/student-mat.csv'
df = load_data(path)
category_index = [
    x for x in range(len(df.columns)) if df[df.columns[x]].dtype == 'object'
]
columns = [col for col in (df.columns) if df[col].dtype == 'object']
#print(df.shape)
print(category_index)
df_new = pd.get_dummies(df, columns=columns)
X_train, X_test, y_train, y_test = split_dataset(df_new)


def ohe_encode(X_train, X_test, defaults=category_index):
    X_transform, X_test_transform = label_encode(X_train, X_test)
    return X_transform, X_test_transform


ohe_encode(X_train, X_test, category_index)
Exemple #5
0
def visualise_data(data, figname):    
    x_train, x_test, y_train, y_test =  split_dataset(data)
    plt = scatter_matrix(data, alpha=0.2)
    #plt.show()
    #plt.figtext = figname
    return plt