def label_encode_engineer(self): # must be called AFTER sg_ordinals lce = LabelCountEncoder() self.label_df = self.ord_df.copy() for c in self.train().columns: if self.label_df[c].dtype == 'object': lce = LabelCountEncoder() self.label_df[c] = lce.fit_transform(self.label_df[c])
def label_encode_engineer(self): lce = LabelCountEncoder() for c in self.all.columns: if self.all[c].dtype == 'object': lce = LabelCountEncoder() self.all[c] = lce.fit_transform(self.all[c])
dfS=df[['SalePrice','Heating','HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', \ '2ndFlrSF', \ 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', \ 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', \ 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu']] dfS[['GrLivArea', '1stFlrSF']].corr() dfS.GrLivArea.value_counts() dfS[['GrLivArea', '2ndFlrSF']].corr() dfS.BsmtHalfBath.value_counts() # Wonder if the Bsmt bathrooms parameters covary with BsmtCond or BsmtFinType1 # %% Heating Variables from LabelClass import LabelCountEncoder lce = LabelCountEncoder() dfS['Heating'] = lce.fit_transform(dfS['Heating']) dfS[['HeatingQC', 'Heating']].corr() df.HeatingQC.value_counts() df.Heating.value_counts() # %% df.loc[df.Functional == 'Maj2'] dfS.loc[dfS.Functional == 3] dfS['Kitchen*Quality'] = dfS.KitchenAbvGr * dfS.KitchenQual functional_dic = { 'Typ': 8, 'Min1': 7,