예제 #1
0
 def test_fit_int(self):
     domain = CategoricalDomain(with_data=False, with_statistics=False)
     self.assertTrue(domain._empty_fit())
     domain = CategoricalDomain(missing_value_treatment="as_value",
                                missing_value_replacement=-999,
                                invalid_value_treatment="as_is",
                                invalid_value_replacement=0)
     domain = clone(domain)
     self.assertFalse(hasattr(domain, "missing_values_"))
     self.assertEqual("as_value", domain.missing_value_treatment)
     self.assertEqual(-999, domain.missing_value_replacement)
     self.assertEqual("as_is", domain.invalid_value_treatment)
     self.assertEqual(0, domain.invalid_value_replacement)
     self.assertFalse(hasattr(domain, "data_"))
     self.assertFalse(hasattr(domain, "counts_"))
     self.assertFalse(hasattr(domain, "discr_stats_"))
     self.assertFalse(domain._empty_fit())
     X = DataFrame(numpy.array([1, None, 3, 2, None, 2]))
     Xt = domain.fit_transform(X)
     self.assertIsInstance(Xt, DataFrame)
     self.assertEqual([1, 2, 3], domain.data_.tolist())
     self.assertEqual({
         "totalFreq": 6,
         "missingFreq": 2,
         "invalidFreq": 0
     }, domain.counts_)
     self.assertEqual({1: 1, 2: 2, 3: 1}, _value_count(domain.discr_stats_))
     self.assertEqual([1, -999, 3, 2, -999, 2], Xt[0].tolist())
     X = numpy.array([None, None])
     Xt = domain.transform(X)
     self.assertEqual([-999, -999], Xt.tolist())
예제 #2
0
 def test_fit_string_missing(self):
     domain = CategoricalDomain(missing_values="N/A",
                                missing_value_replacement="0")
     domain = clone(domain)
     self.assertEqual("N/A", domain.missing_values)
     self.assertEqual("0", domain.missing_value_replacement)
     self.assertFalse(domain._empty_fit())
     X = DataFrame(["1", "N/A", "3", "2", "N/A", "2"])
     Xt = domain.fit_transform(X)
     self.assertIsInstance(Xt, DataFrame)
     self.assertEqual(["1", "2", "3"], domain.data_.tolist())
     self.assertEqual({
         "totalFreq": 6,
         "missingFreq": 2,
         "invalidFreq": 0
     }, domain.counts_)
     self.assertEqual({
         "1": 1,
         "2": 2,
         "3": 1
     }, _value_count(domain.discr_stats_))
     self.assertEqual(["1", "0", "3", "2", "0", "2"], Xt.ix[:, 0].tolist())
     X = numpy.array(["N/A", "N/A"])
     Xt = domain.transform(X)
     self.assertEqual(["0", "0"], Xt.tolist())
예제 #3
0
	def test_fit_string(self):
		domain = CategoricalDomain(with_data = False, with_statistics = False)
		self.assertTrue(domain._empty_fit())
		domain = CategoricalDomain(missing_values = None, with_statistics = False)
		self.assertFalse(hasattr(domain, "missing_values_"))
		self.assertEqual("as_is", domain.missing_value_treatment)
		self.assertFalse(hasattr(domain, "missing_value_replacement"))
		self.assertEqual("return_invalid", domain.invalid_value_treatment)
		self.assertFalse(hasattr(domain, "invalid_value_replacement"))
		self.assertFalse(domain._empty_fit())
		X = DataFrame(numpy.array(["1", None, "3", "2", None, "2"]))
		Xt = domain.fit_transform(X)
		self.assertIsInstance(Xt, DataFrame)
		self.assertEqual(["1", "2", "3"], domain.data_.tolist())
		self.assertFalse(hasattr(domain, "counts_"))
		self.assertFalse(hasattr(domain, "discr_stats_"))
		self.assertEqual(["1", None, "3", "2", None, "2"], Xt.ix[:, 0].tolist())
		X = numpy.array([None, None])
		Xt = domain.transform(X)
		self.assertEqual([None, None], Xt.tolist())
예제 #4
0
	def test_fit_int_missing(self):
		domain = CategoricalDomain(missing_values = -1, missing_value_replacement = 0)
		self.assertEqual(-1, domain.missing_values)
		self.assertEqual(0, domain.missing_value_replacement)
		self.assertFalse(domain._empty_fit())
		X = DataFrame([1, -1, 3, 2, -1, 2])
		Xt = domain.fit_transform(X)
		self.assertIsInstance(Xt, DataFrame)
		self.assertEqual([1, 2, 3], domain.data_.tolist())
		self.assertEqual({"totalFreq" : 6, "missingFreq" : 2, "invalidFreq" : 0}, domain.counts_)
		self.assertEqual({1 : 1, 2 : 2, 3 : 1}, _value_count(domain.discr_stats_))
		self.assertEqual([1, 0, 3, 2, 0, 2], Xt[0].tolist())
		X = numpy.array([-1, -1])
		Xt = domain.transform(X)
		self.assertEqual([0, 0], Xt.tolist())