Beispiel #1
0
# 3.4 Apply encoding on dataset.
# OHE applied on categorical data which has more the 2 values
# LE applied on categorical data which has the 2 values
df_le = dataManager.applyEncodingToNonNumericData(dfskew)
#Display One hot encoding table
df_le
# Display Histogram.To check general data distibution on numrical data after OHE. File output is at \output\05_OHE_DistPlot.png
analyser.histogramOrBoxPlotAnalysis(df_le,
                                    strCols=True,
                                    hist=True,
                                    boxSize=size,
                                    fileName='05_LE')
#%%
# 3.5 Perform scaling on encoded data.
# Perform scaling on encoded data
df_Scale_le = dataManager.scaleData(df_le)
# Display one hot encoded data which has been scaled
df_Scale_le.to_excel(output + "DF_Scale_LE.xlsx")
df_Scale_le
# Display Histogram.To check general data distibution on all data after  scaling.File output is at \output\06_SCALE_DistPlot.png
analyser.histogramOrBoxPlotAnalysis(df_Scale_le,
                                    strCols=True,
                                    hist=True,
                                    boxSize=size,
                                    fileName='06_SCALE')

#%%
# 3.6 Remove outlier data.
# Remove outliers on dataframes
# Base on boxplot, there are outliers in data frame
df_noOutlier_ohe = dataManager.removeOutlier(df_Scale_le)