def test_stores_subset(self): path = datasetsDir(self) + 'numerical' + SEP + 'person' _training, attributes, klass, _test, _gold = self.get_instances(path, True, False) disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [4,6], [2,2]) self.assertEqual(2, len(disc.subset)) self.assertEqual(4, disc.subset[0].index) self.assertEqual(6, disc.subset[1].index)
def test_option_cannot_be_zero(self): path = datasetsDir(self) + 'numerical' + SEP + 'person' try: _training, attributes, klass, _test, _gold = self.get_instances(path, True, False) disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [4,6], [2,0]) self.fail('should raise error as an option is zero') except inv.InvalidDataError: pass
def test_instances_attributes_and_options_are_extracted_from_strings(self): path = datasetsDir(self) + 'numerical' + SEP + 'person' _training, attributes, klass, _test, _gold = self.get_instances(path, True, False) disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [0,1,4,5,6,7], [2,3,2,3,4,2]) self.assertEqual(6, len(disc.training)) self.assertEqual(2, len(disc.test)) self.assertEqual([0, 1, 4, 5, 6, 7], disc.attribute_indices) self.assertEqual([2, 3, 2, 3, 4, 2], disc.options)
def test_returns_array_of_discretised_attributes(self): path = datasetsDir(self) + 'numerical' + SEP + 'person' _training, attributes, klass, _test, _gold = self.get_instances(path, True, False) disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [4,6], [2,4]) disc_attrs = disc.discretised_attributes([nr.Range(0, 2), nr.Range(0, 120000)]) self.assertEqual(2, len(disc_attrs)) self.assertEqual(4, disc_attrs[0].index) self.assertEqual(2, len(disc_attrs[0].values)) self.assertEqual(4, len(disc_attrs[1].values))
def test_naive_supervised_discretisation(self): path = datasetsDir(self) + 'numerical' + SEP + 'person' _training, attributes, klass, _test, _gold = self.get_instances(path, True, False) disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [1]) self.assertEqual(1, len(disc.attributes[1].values)) disc.naive_supervised() self.assertEqual(3, len(disc.attributes[1].values))
def test_unsupervised_equal_frequency(self): path = datasetsDir(self) + 'numerical' + SEP + 'weather' _training, attributes, klass, _test, _gold = self.get_instances(path) disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [1], [3]) self.assertTrue(disc.attributes[1].is_continuous()) self.assertEqual(27.5, disc.training[0].value(disc.attributes[1])) self.assertEqual(32, disc.training[2].value(disc.attributes[1])) self.assertEqual(25.4, disc.test[0].value(disc.attributes[1])) values = disc.training.values_grouped_by_attribute([disc.attributes[1]]) values[0].sort() self.assertEqual([6.0, 9.0, 9.0, 10.699999999999999, 12.0, 12.0, 12.0, 14.1, 18.0, 27.5, 32.0, 33.100000000000001], values[0]) disc.unsupervised_equal_frequency() self.assertFalse(disc.attributes[1].is_continuous()) self.assertEqual(4, len(disc.attributes[1].values)) self.assertEqual('c', disc.training[0].value(disc.attributes[1])) self.assertEqual('d', disc.training[2].value(disc.attributes[1])) self.assertEqual('c', disc.test[0].value(disc.attributes[1]))
def test_unsupervised_equal_width_discretisation(self): path = datasetsDir(self) + 'numerical' + SEP + 'person' _training, attributes, klass, _test, _gold = self.get_instances(path, True, False) disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [1,4,5,6,7], [3,2,3,4,2]) self.assertTrue(disc.attributes[0].is_continuous()) self.assertTrue(disc.attributes[1].is_continuous()) self.assertTrue(disc.attributes[4].is_continuous()) self.assertTrue(disc.attributes[5].is_continuous()) self.assertTrue(disc.attributes[6].is_continuous()) self.assertTrue(disc.attributes[7].is_continuous()) self.assertEqual(25, disc.training[0].value(disc.attributes[1])) self.assertEqual(26, disc.test[0].value(disc.attributes[1])) disc.unsupervised_equal_width() self.assertTrue(disc.attributes[0].is_continuous()) self.assertFalse(disc.attributes[1].is_continuous()) self.assertFalse(disc.attributes[4].is_continuous()) self.assertFalse(disc.attributes[5].is_continuous()) self.assertFalse(disc.attributes[6].is_continuous()) self.assertFalse(disc.attributes[7].is_continuous()) self.assertEqual('a', disc.training[0].value(disc.attributes[1])) self.assertEqual('a', disc.test[0].value(disc.attributes[1]))