def test_fit_int(self): domain = CategoricalDomain(with_data=False, with_statistics=False) self.assertTrue(domain._empty_fit()) domain = CategoricalDomain(missing_value_treatment="as_value", missing_value_replacement=-999, invalid_value_treatment="as_is", invalid_value_replacement=0) domain = clone(domain) self.assertFalse(hasattr(domain, "missing_values_")) self.assertEqual("as_value", domain.missing_value_treatment) self.assertEqual(-999, domain.missing_value_replacement) self.assertEqual("as_is", domain.invalid_value_treatment) self.assertEqual(0, domain.invalid_value_replacement) self.assertFalse(hasattr(domain, "data_")) self.assertFalse(hasattr(domain, "counts_")) self.assertFalse(hasattr(domain, "discr_stats_")) self.assertFalse(domain._empty_fit()) X = DataFrame(numpy.array([1, None, 3, 2, None, 2])) Xt = domain.fit_transform(X) self.assertIsInstance(Xt, DataFrame) self.assertEqual([1, 2, 3], domain.data_.tolist()) self.assertEqual({ "totalFreq": 6, "missingFreq": 2, "invalidFreq": 0 }, domain.counts_) self.assertEqual({1: 1, 2: 2, 3: 1}, _value_count(domain.discr_stats_)) self.assertEqual([1, -999, 3, 2, -999, 2], Xt[0].tolist()) X = numpy.array([None, None]) Xt = domain.transform(X) self.assertEqual([-999, -999], Xt.tolist())
def test_fit_string_missing(self): domain = CategoricalDomain(missing_values="N/A", missing_value_replacement="0") domain = clone(domain) self.assertEqual("N/A", domain.missing_values) self.assertEqual("0", domain.missing_value_replacement) self.assertFalse(domain._empty_fit()) X = DataFrame(["1", "N/A", "3", "2", "N/A", "2"]) Xt = domain.fit_transform(X) self.assertIsInstance(Xt, DataFrame) self.assertEqual(["1", "2", "3"], domain.data_.tolist()) self.assertEqual({ "totalFreq": 6, "missingFreq": 2, "invalidFreq": 0 }, domain.counts_) self.assertEqual({ "1": 1, "2": 2, "3": 1 }, _value_count(domain.discr_stats_)) self.assertEqual(["1", "0", "3", "2", "0", "2"], Xt.ix[:, 0].tolist()) X = numpy.array(["N/A", "N/A"]) Xt = domain.transform(X) self.assertEqual(["0", "0"], Xt.tolist())
def test_fit_int_missing(self): domain = CategoricalDomain(missing_values = -1, missing_value_replacement = 0) self.assertEqual(-1, domain.missing_values) self.assertEqual(0, domain.missing_value_replacement) self.assertFalse(domain._empty_fit()) X = DataFrame([1, -1, 3, 2, -1, 2]) Xt = domain.fit_transform(X) self.assertIsInstance(Xt, DataFrame) self.assertEqual([1, 2, 3], domain.data_.tolist()) self.assertEqual({"totalFreq" : 6, "missingFreq" : 2, "invalidFreq" : 0}, domain.counts_) self.assertEqual({1 : 1, 2 : 2, 3 : 1}, _value_count(domain.discr_stats_)) self.assertEqual([1, 0, 3, 2, 0, 2], Xt[0].tolist()) X = numpy.array([-1, -1]) Xt = domain.transform(X) self.assertEqual([0, 0], Xt.tolist())
def test_fit_int(self): domain = CategoricalDomain(missing_value_treatment = "as_value", missing_value_replacement = -999, invalid_value_treatment = "as_is") self.assertEqual("as_value", domain.missing_value_treatment) self.assertEqual(-999, domain.missing_value_replacement) self.assertEqual("as_is", domain.invalid_value_treatment) self.assertFalse(hasattr(domain, "data_")) self.assertFalse(hasattr(domain, "counts_")) self.assertFalse(hasattr(domain, "discr_stats_")) X = DataFrame(numpy.array([1, None, 3, 2, None, 2])) Xt = domain.fit_transform(X) self.assertEqual(numpy.array([1, 2, 3]).tolist(), domain.data_.tolist()) self.assertEqual({"totalFreq" : 6, "missingFreq" : 2, "invalidFreq" : 0}, domain.counts_) self.assertEqual({1 : 1, 2 : 2, 3 : 1}, _value_count(domain.discr_stats_)) self.assertEqual(numpy.array([1, -999, 3, 2, -999, 2]).tolist(), Xt[0].tolist()) X = numpy.array([None, None]); Xt = domain.transform(X) self.assertEqual(numpy.array([-999, -999]).tolist(), Xt.tolist())
def test_fit_int(self): domain = CategoricalDomain(missing_value_treatment="as_value", missing_value_replacement=-999, invalid_value_treatment="as_is") self.assertEqual("as_value", domain.missing_value_treatment) self.assertEqual(-999, domain.missing_value_replacement) self.assertEqual("as_is", domain.invalid_value_treatment) self.assertFalse(hasattr(domain, "data_")) X = DataFrame(numpy.array([1, None, 3, 2, None, 2])) Xt = domain.fit_transform(X) self.assertEqual( numpy.array([1, 2, 3]).tolist(), domain.data_.tolist()) self.assertEqual( numpy.array([1, -999, 3, 2, -999, 2]).tolist(), Xt[0].tolist()) X = numpy.array([None, None]) Xt = domain.transform(X) self.assertEqual(numpy.array([-999, -999]).tolist(), Xt.tolist())
def test_fit_string(self): domain = CategoricalDomain(with_data = False, with_statistics = False) self.assertTrue(domain._empty_fit()) domain = CategoricalDomain(missing_values = None, with_statistics = False) self.assertFalse(hasattr(domain, "missing_values_")) self.assertEqual("as_is", domain.missing_value_treatment) self.assertFalse(hasattr(domain, "missing_value_replacement")) self.assertEqual("return_invalid", domain.invalid_value_treatment) self.assertFalse(hasattr(domain, "invalid_value_replacement")) self.assertFalse(domain._empty_fit()) X = DataFrame(numpy.array(["1", None, "3", "2", None, "2"])) Xt = domain.fit_transform(X) self.assertIsInstance(Xt, DataFrame) self.assertEqual(["1", "2", "3"], domain.data_.tolist()) self.assertFalse(hasattr(domain, "counts_")) self.assertFalse(hasattr(domain, "discr_stats_")) self.assertEqual(["1", None, "3", "2", None, "2"], Xt.ix[:, 0].tolist()) X = numpy.array([None, None]) Xt = domain.transform(X) self.assertEqual([None, None], Xt.tolist())