Beispiel #1
0
# Check for skewed data in numerical data and process skewed data to normalize it
dfskew = dataManager.checkSkew(df_drop_idUnamed)
dfskew.to_excel(output + "DF_Skew.xlsx")
dfskew
# Display Histogram.To check general data distibution on numrical data after unskew. File output is at \output\04_SKEW_DistPlot.png
analyser.histogramOrBoxPlotAnalysis(dfskew,
                                    strCols=False,
                                    hist=True,
                                    boxSize=size,
                                    fileName='04_SKEW')

#%%
# 3.4 Apply encoding on dataset.
# OHE applied on categorical data which has more the 2 values
# LE applied on categorical data which has the 2 values
df_le = dataManager.applyEncodingToNonNumericData(dfskew)
#Display One hot encoding table
df_le
# Display Histogram.To check general data distibution on numrical data after OHE. File output is at \output\05_OHE_DistPlot.png
analyser.histogramOrBoxPlotAnalysis(df_le,
                                    strCols=True,
                                    hist=True,
                                    boxSize=size,
                                    fileName='05_LE')
#%%
# 3.5 Perform scaling on encoded data.
# Perform scaling on encoded data
df_Scale_le = dataManager.scaleData(df_le)
# Display one hot encoded data which has been scaled
df_Scale_le.to_excel(output + "DF_Scale_LE.xlsx")
df_Scale_le