def fill_na_categorical_columns(self):
        columns = [
            'BsmtQual', 'BsmtCond', 'FireplaceQu', 'GarageQual', 'GarageCond',
            'GarageFinish', 'GarageType', 'GarageYrBlt', 'MasVnrType',
            'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Fence'
        ]

        for dataset in self.loop_datasets():
            fill_nan_with_value(dataset, columns, 'NP')
    def fill_with_zero(self):
        columns = [
            'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF',
            'GarageCars', 'GarageArea', 'BsmtFullBath', 'BsmtHalfBath',
            'MasVnrArea'
        ]

        for column in columns:
            for dataset in self.loop_datasets():
                fill_nan_with_value(dataset, [column], 0)
Ejemplo n.º 3
0
    def test_fill_nan_with_value(self):
        dataset = self.create_dummy_dataframe(10)

        dataset.iloc[[1, 2, 3], dataset.columns.get_loc('a')] = np.NaN
        fill_nan_with_value(dataset, ['a'], 0)

        expected_dataset = self.create_dummy_dataframe(10)
        expected_dataset.iloc[[1, 2, 3],
                              expected_dataset.columns.get_loc('a')] = 0

        self.assertTrue(dataset.equals(expected_dataset))

        dataset = self.create_dummy_dataframe(10)

        dataset.iloc[[1, 2, 3], dataset.columns.get_loc('a')] = np.NaN
        fill_nan_with_value(dataset, ['a'], 'TS')

        expected_dataset = self.create_dummy_dataframe(10)
        expected_dataset.iloc[[1, 2, 3],
                              expected_dataset.columns.get_loc('a')] = 'TS'

        self.assertTrue(dataset.equals(expected_dataset))
    def mode_fill(self, column):
        train = self.data[0]
        mode = train[column].mode()[0]

        for dataset in self.loop_datasets():
            fill_nan_with_value(dataset, [column], mode)
    def fill_lot_frontage(self):
        train = self.data[0]
        mean = train['LotFrontage'].mean()

        for dataset in self.loop_datasets():
            fill_nan_with_value(dataset, ['LotFrontage'], mean)
 def fill_mas_vnr_type(self):
     for dataset in self.loop_datasets():
         fill_nan_with_value(dataset, ['MasVnrType'], 'NP')