def test_fit_float_outlier(self): domain = ContinuousDomain(outlier_treatment="as_missing_values", low_value=0.0, high_value=3.0, missing_values=float("NaN"), missing_value_replacement=1.0) domain = clone(domain) self.assertEqual(0.0, domain.low_value) self.assertEqual(3.0, domain.high_value) X = DataFrame([[-2.0, float("NaN")], [2.0, 4.0], [float("NaN"), 0.0]]) self.assertEqual([[False, True], [False, False], [True, False]], domain._missing_value_mask(X).values.tolist()) self.assertEqual([[True, False], [False, True], [False, False]], domain._outlier_mask(X).values.tolist()) Xt = domain.fit_transform(X) self.assertEqual([1.0, 2.0, 1.0], Xt[0].tolist()) self.assertEqual([1.0, 1.0, 0.0], Xt[1].tolist()) domain = ContinuousDomain(outlier_treatment="as_extreme_values", low_value=0.0, high_value=3.0, missing_values=-1.0) X = DataFrame([[-2.0, -1.0], [2.0, 4.0], [-1.0, 0.0]]) self.assertEqual([[False, True], [False, False], [True, False]], domain._missing_value_mask(X).values.tolist()) self.assertEqual([[True, False], [False, True], [False, False]], domain._outlier_mask(X).values.tolist()) self.assertEqual([[True, False], [False, False], [False, False]], domain._negative_outlier_mask(X).values.tolist()) self.assertEqual([[False, False], [False, True], [False, False]], domain._positive_outlier_mask(X).values.tolist()) Xt = domain.fit_transform(X) self.assertEqual([0.0, 2.0, -1.0], X[0].tolist()) self.assertEqual([-1.0, 3.0, 0.0], X[1].tolist())
def test_fit_float(self): domain = ContinuousDomain(with_data = False, with_statistics = False) self.assertTrue(domain._empty_fit()) domain = ContinuousDomain(missing_values = float("NaN"), missing_value_treatment = "as_value", missing_value_replacement = -1.0, invalid_value_treatment = "as_is", invalid_value_replacement = 0.0) self.assertTrue(numpy.isnan(domain.missing_values)) self.assertEqual("as_value", domain.missing_value_treatment) self.assertEqual(-1.0, domain.missing_value_replacement) self.assertEqual("as_is", domain.invalid_value_treatment) self.assertEqual(0.0, domain.invalid_value_replacement) self.assertFalse(hasattr(domain, "data_min_")) self.assertFalse(hasattr(domain, "data_max_")) self.assertFalse(hasattr(domain, "counts_")) self.assertFalse(hasattr(domain, "numeric_info_")) self.assertFalse(domain._empty_fit()) X = DataFrame(numpy.array([1.0, float("NaN"), 3.0, 2.0, float("NaN"), 2.0])) Xt = domain.fit_transform(X) self.assertIsInstance(Xt, DataFrame) self.assertEqual(1.0, domain.data_min_) self.assertEqual(3.0, domain.data_max_) self.assertEqual({"totalFreq" : 6, "missingFreq" : 2, "invalidFreq" : 0}, domain.counts_) self.assertEqual({"minimum" : [1.0], "maximum" : [3.0], "mean" : [2.0], "standardDeviation" : [0.7071067811865476], "median" : [2.0], "interQuartileRange" : [0.5]}, _array_to_list(domain.numeric_info_)) self.assertEqual([1.0, -1.0, 3.0, 2.0, -1.0, 2.0], Xt[0].tolist()) X = numpy.array([float("NaN"), None]) Xt = domain.transform(X) self.assertEqual([-1.0, -1.0], Xt.tolist())
def test_fit_float_missing(self): domain = ContinuousDomain(missing_values=-1.0, missing_value_replacement=4.0) domain = clone(domain) self.assertEqual(-1.0, domain.missing_values) self.assertEqual(4.0, domain.missing_value_replacement) self.assertFalse(domain._empty_fit()) X = DataFrame([1.0, -1.0, 3.0, 2.0, -1.0, 2.0]) Xt = domain.fit_transform(X) self.assertIsInstance(Xt, DataFrame) self.assertEqual(1.0, domain.data_min_) self.assertEqual(3.0, domain.data_max_) self.assertEqual({ "totalFreq": 6, "missingFreq": 2, "invalidFreq": 0 }, domain.counts_) self.assertEqual( { "minimum": [1.0], "maximum": [3.0], "mean": [2.0], "standardDeviation": [0.7071067811865476], "median": [2.0], "interQuartileRange": [0.5] }, _array_to_list(domain.numeric_info_)) self.assertEqual([1.0, 4.0, 3.0, 2.0, 4.0, 2.0], Xt[0].tolist()) X = numpy.array([-1.0, -1.0]) Xt = domain.transform(X) self.assertEqual([4.0, 4.0], Xt.tolist())
def test_fit_float(self): domain = ContinuousDomain(missing_value_treatment="as_value", missing_value_replacement=-1.0, invalid_value_treatment="as_is") self.assertEqual("as_value", domain.missing_value_treatment) self.assertEqual(-1.0, domain.missing_value_replacement) self.assertEqual("as_is", domain.invalid_value_treatment) self.assertFalse(hasattr(domain, "data_min_")) self.assertFalse(hasattr(domain, "data_max_")) X = DataFrame( numpy.array([1.0, float('NaN'), 3.0, 2.0, float('NaN'), 2.0])) Xt = domain.fit_transform(X) self.assertEqual(1.0, domain.data_min_) self.assertEqual(3.0, domain.data_max_) self.assertEqual( numpy.array([1.0, -1.0, 3.0, 2.0, -1.0, 2.0]).tolist(), Xt[0].tolist()) X = numpy.array([float('NaN'), None]) Xt = domain.transform(X) self.assertEqual(numpy.array([-1.0, -1.0]).tolist(), Xt.tolist())