Ejemplo n.º 1
0
	def test_fit_float(self):
		domain = ContinuousDomain(with_data = False, with_statistics = False)
		self.assertTrue(domain._empty_fit())
		domain = ContinuousDomain(missing_values = float("NaN"), missing_value_treatment = "as_value", missing_value_replacement = -1.0, invalid_value_treatment = "as_is", invalid_value_replacement = 0.0)
		self.assertTrue(numpy.isnan(domain.missing_values))
		self.assertEqual("as_value", domain.missing_value_treatment)
		self.assertEqual(-1.0, domain.missing_value_replacement)
		self.assertEqual("as_is", domain.invalid_value_treatment)
		self.assertEqual(0.0, domain.invalid_value_replacement)
		self.assertFalse(hasattr(domain, "data_min_"))
		self.assertFalse(hasattr(domain, "data_max_"))
		self.assertFalse(hasattr(domain, "counts_"))
		self.assertFalse(hasattr(domain, "numeric_info_"))
		self.assertFalse(domain._empty_fit())
		X = DataFrame(numpy.array([1.0, float("NaN"), 3.0, 2.0, float("NaN"), 2.0]))
		Xt = domain.fit_transform(X)
		self.assertIsInstance(Xt, DataFrame)
		self.assertEqual(1.0, domain.data_min_)
		self.assertEqual(3.0, domain.data_max_)
		self.assertEqual({"totalFreq" : 6, "missingFreq" : 2, "invalidFreq" : 0}, domain.counts_)
		self.assertEqual({"minimum" : [1.0], "maximum" : [3.0], "mean" : [2.0], "standardDeviation" : [0.7071067811865476], "median" : [2.0], "interQuartileRange" : [0.5]}, _array_to_list(domain.numeric_info_))
		self.assertEqual([1.0, -1.0, 3.0, 2.0, -1.0, 2.0], Xt[0].tolist())
		X = numpy.array([float("NaN"), None])
		Xt = domain.transform(X)
		self.assertEqual([-1.0, -1.0], Xt.tolist())
Ejemplo n.º 2
0
 def test_fit_float_missing(self):
     domain = ContinuousDomain(missing_values=-1.0,
                               missing_value_replacement=4.0)
     domain = clone(domain)
     self.assertEqual(-1.0, domain.missing_values)
     self.assertEqual(4.0, domain.missing_value_replacement)
     self.assertFalse(domain._empty_fit())
     X = DataFrame([1.0, -1.0, 3.0, 2.0, -1.0, 2.0])
     Xt = domain.fit_transform(X)
     self.assertIsInstance(Xt, DataFrame)
     self.assertEqual(1.0, domain.data_min_)
     self.assertEqual(3.0, domain.data_max_)
     self.assertEqual({
         "totalFreq": 6,
         "missingFreq": 2,
         "invalidFreq": 0
     }, domain.counts_)
     self.assertEqual(
         {
             "minimum": [1.0],
             "maximum": [3.0],
             "mean": [2.0],
             "standardDeviation": [0.7071067811865476],
             "median": [2.0],
             "interQuartileRange": [0.5]
         }, _array_to_list(domain.numeric_info_))
     self.assertEqual([1.0, 4.0, 3.0, 2.0, 4.0, 2.0], Xt[0].tolist())
     X = numpy.array([-1.0, -1.0])
     Xt = domain.transform(X)
     self.assertEqual([4.0, 4.0], Xt.tolist())