def test_with_numpy_array(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        X_numpy = X.values.copy()

        with warnings.catch_warnings(record=True) as w:
            imputer.fit(X)
            self.assertEqual(len(w), 1)
            self.assertEqual(
                "The columns ['NaNs' 'PINF' 'NINF'] did not have any finite values. Filling with zeros.",
                str(w[0].message))

        selected_X = imputer.transform(X)

        # re-initialize for new dicts
        imputer = PerColumnImputer()
        with warnings.catch_warnings(record=True) as w:
            imputer.fit(X_numpy)
            self.assertEqual(len(w), 1)
            self.assertEqual(
                "The columns [0 1 2] did not have any finite values. Filling with zeros.",
                str(w[0].message))

        selected_X_numpy = imputer.transform(X_numpy)

        npt.assert_array_equal(selected_X.values, selected_X_numpy.values)

        self.assertTrue(selected_X_numpy.shape, (1, 100))
Esempio n. 2
0
    def test_different_shapes_fitted_and_transformed(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(10)))
        X["a"] = np.ones(10)

        imputer.fit(X)
        X["b"] = np.ones(10)

        self.assertRaises(ValueError, imputer.transform, X)
    def test_different_shapes_fitted_and_transformed(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(10)))
        X["a"] = np.ones(10)

        imputer.fit(X)
        X["b"] = np.ones(10)

        self.assertRaises(ValueError, imputer.transform, X)
    def test_standard_replacement_behavior(self):
        imputer = PerColumnImputer()

        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Esempio n. 5
0
    def test_standard_replacement_behavior(self):
        imputer = PerColumnImputer()

        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
    def test_only_nans_and_infs(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        self.assertTrue((selected_X.values == 0).all())
Esempio n. 7
0
    def test_only_nans_and_infs(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        self.assertTrue((selected_X.values == 0).all())
    def test_partial_preset_col_to_PINF_given(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        col_to_max = {"a": 100}
        imputer = PerColumnImputer(col_to_PINF_repl_preset=col_to_max)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Esempio n. 9
0
    def test_partial_preset_col_to_PINF_given(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        col_to_max = {"a": 100}
        imputer = PerColumnImputer(col_to_PINF_repl_preset=col_to_max)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
    def test_preset_has_higher_priority_than_fit(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]

        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        col_to_median = {"a": 0}
        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)
        imputer.fit(X)

        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Esempio n. 11
0
    def test_preset_has_higher_priority_than_fit(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]

        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        col_to_median = {"a": 0}
        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)
        imputer.fit(X)

        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Esempio n. 12
0
    def test_only_subset_of_columns_given(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth_a = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]
        truth_b = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data, "b": data})
        true_X = pd.DataFrame({"a": truth_a, "b": truth_b})

        col_to_median = {"a": 0}
        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
    def test_only_subset_of_columns_given(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth_a = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]
        truth_b = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data, "b":data})
        true_X = pd.DataFrame({"a":truth_a, "b":truth_b})

        col_to_median = {"a": 0}
        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X,true_X)
Esempio n. 14
0
    def test_NAN_preset_contains_more_columns_than_dataframe_to_fit(self):
        X = pd.DataFrame(index=list(range(10)))
        X["a"] = np.ones(10)

        col_to_median = {"a": 0, "b": 0}

        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)

        self.assertRaises(ValueError, imputer.fit, X)
Esempio n. 15
0
    def test_only_nans_and_infs(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        with warnings.catch_warnings(record=True) as w:
            imputer.fit(X)
            self.assertEqual(len(w), 1)
            self.assertEqual(
                "The columns ['NaNs' 'PINF' 'NINF'] did not have any finite values. Filling with zeros.",
                str(w[0].message))

        selected_X = imputer.transform(X)

        self.assertTrue((selected_X.values == 0).all())
Esempio n. 16
0
    def test_with_numpy_array(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        X_numpy = X.values

        imputer.fit(X)
        selected_X = imputer.transform(X)

        # re-initialize for new dicts
        imputer = PerColumnImputer()
        imputer.fit(X_numpy)
        selected_X_numpy = imputer.transform(X_numpy)

        npt.assert_array_equal(selected_X.values, selected_X_numpy.values)

        self.assertTrue(selected_X_numpy.shape, (1, 100))
Esempio n. 17
0
    def test_only_parameters_of_last_fit_count(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        data_2 = [np.NINF, np.PINF, np.nan, 10.0, -10.0, 3.0, 3.0]
        truth_a = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]
        truth_b = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]

        X = pd.DataFrame({"a": data, "b": data})
        X_2 = pd.DataFrame({"a": data_2, "b": data_2})
        true_X = pd.DataFrame({"a": truth_a, "b": truth_b})

        imputer = PerColumnImputer()

        imputer.fit(X)
        imputer.fit(X_2)

        selected_X = imputer.transform(X_2)

        pdt.assert_frame_equal(selected_X, true_X)
    def test_with_numpy_array(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        X_numpy = X.values

        imputer.fit(X)
        selected_X = imputer.transform(X)

        #re-initialize for new dicts
        imputer = PerColumnImputer()
        imputer.fit(X_numpy)
        selected_X_numpy = imputer.transform(X_numpy)

        npt.assert_array_equal(selected_X.values, selected_X_numpy.values)

        self.assertTrue(selected_X_numpy.shape, (1, 100))
    def test_only_parameters_of_last_fit_count(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        data_2 = [np.NINF, np.PINF, np.nan, 10.0, -10.0, 3.0, 3.0]
        truth_a = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]
        truth_b = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]

        X = pd.DataFrame({"a": data, "b": data})
        X_2 = pd.DataFrame({"a": data_2, "b": data_2})
        true_X = pd.DataFrame({"a": truth_a, "b": truth_b})

        imputer = PerColumnImputer()

        imputer.fit(X)
        imputer.fit(X_2)

        selected_X = imputer.transform(X_2)

        pdt.assert_frame_equal(selected_X, true_X)
Esempio n. 20
0
    def test_not_fitted(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame()

        self.assertRaises(NotFittedError, imputer.transform, X)