Esempio n. 1
0
 def test_stores_subset(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training, attributes, klass, _test, _gold = self.get_instances(path, True, False)
     disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [4,6], [2,2])
     self.assertEqual(2, len(disc.subset))
     self.assertEqual(4, disc.subset[0].index)
     self.assertEqual(6, disc.subset[1].index)
Esempio n. 2
0
 def test_option_cannot_be_zero(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     try:
         _training, attributes, klass, _test, _gold = self.get_instances(path, True, False)
         disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [4,6], [2,0])
         self.fail('should raise error as an option is zero')
     except inv.InvalidDataError:
         pass
Esempio n. 3
0
 def test_instances_attributes_and_options_are_extracted_from_strings(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training, attributes, klass, _test, _gold = self.get_instances(path, True, False)
     disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [0,1,4,5,6,7], [2,3,2,3,4,2])
     self.assertEqual(6, len(disc.training))
     self.assertEqual(2, len(disc.test))
     self.assertEqual([0, 1, 4, 5, 6, 7], disc.attribute_indices)
     self.assertEqual([2, 3, 2, 3, 4, 2], disc.options)
Esempio n. 4
0
 def test_returns_array_of_discretised_attributes(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training, attributes, klass, _test, _gold = self.get_instances(path, True, False)
     disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [4,6], [2,4])
     disc_attrs = disc.discretised_attributes([nr.Range(0, 2), nr.Range(0, 120000)])
     self.assertEqual(2, len(disc_attrs))
     self.assertEqual(4, disc_attrs[0].index)
     self.assertEqual(2, len(disc_attrs[0].values))
     self.assertEqual(4, len(disc_attrs[1].values))
Esempio n. 5
0
 def test_naive_supervised_discretisation(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training, attributes, klass, _test, _gold = self.get_instances(path, True, False)
     disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [1])
     self.assertEqual(1, len(disc.attributes[1].values))
     
     disc.naive_supervised()
     
     self.assertEqual(3, len(disc.attributes[1].values))
Esempio n. 6
0
 def test_unsupervised_equal_frequency(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'weather'
     _training, attributes, klass, _test, _gold = self.get_instances(path)
     disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [1], [3])
     self.assertTrue(disc.attributes[1].is_continuous())
     self.assertEqual(27.5, disc.training[0].value(disc.attributes[1]))
     self.assertEqual(32, disc.training[2].value(disc.attributes[1]))
     self.assertEqual(25.4, disc.test[0].value(disc.attributes[1]))
     values = disc.training.values_grouped_by_attribute([disc.attributes[1]])
     values[0].sort()
     self.assertEqual([6.0, 9.0, 9.0, 10.699999999999999, 12.0, 12.0, 12.0, 14.1, 18.0, 27.5, 32.0, 33.100000000000001], values[0])
     
     disc.unsupervised_equal_frequency()
     
     self.assertFalse(disc.attributes[1].is_continuous())
     self.assertEqual(4, len(disc.attributes[1].values))
     self.assertEqual('c', disc.training[0].value(disc.attributes[1]))
     self.assertEqual('d', disc.training[2].value(disc.attributes[1]))
     self.assertEqual('c', disc.test[0].value(disc.attributes[1]))
Esempio n. 7
0
 def test_unsupervised_equal_width_discretisation(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training, attributes, klass, _test, _gold = self.get_instances(path, True, False)
     disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [1,4,5,6,7], [3,2,3,4,2])
     self.assertTrue(disc.attributes[0].is_continuous())
     self.assertTrue(disc.attributes[1].is_continuous())
     self.assertTrue(disc.attributes[4].is_continuous())
     self.assertTrue(disc.attributes[5].is_continuous())
     self.assertTrue(disc.attributes[6].is_continuous())
     self.assertTrue(disc.attributes[7].is_continuous())
     self.assertEqual(25, disc.training[0].value(disc.attributes[1]))
     self.assertEqual(26, disc.test[0].value(disc.attributes[1]))
     disc.unsupervised_equal_width()
     self.assertTrue(disc.attributes[0].is_continuous())
     self.assertFalse(disc.attributes[1].is_continuous())
     self.assertFalse(disc.attributes[4].is_continuous())
     self.assertFalse(disc.attributes[5].is_continuous())
     self.assertFalse(disc.attributes[6].is_continuous())
     self.assertFalse(disc.attributes[7].is_continuous())
     self.assertEqual('a', disc.training[0].value(disc.attributes[1]))
     self.assertEqual('a', disc.test[0].value(disc.attributes[1]))