Ejemplo n.º 1
0
 def test_discretise_using_discretised_attributes(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training = training(path)
     _attributes = attributes(path)
     self.assertEqual(0.0, _training[0].value(_attributes[4]))
     self.assertEqual(65000.0, _training[0].value(_attributes[6]))
     disc_dependents = da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4)
     disc_annual_income = da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6)
     _training.discretise([disc_dependents, disc_annual_income])
     
     self.assertEqual('a', _training[0].value(disc_dependents))
     self.assertEqual('c', _training[0].value(disc_annual_income))
Ejemplo n.º 2
0
 def test_discretise_using_discretised_attributes(self):
     dependents = attribute.Attribute('dependents',['continuous'], 4)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     disc_dependents = da.DiscretisedAttribute('dependents', r.Range(0, 2, True).split(2), 4)
     disc_annual_salary = da.DiscretisedAttribute('annualsalary', r.Range(0, 120000, True).split(5), 6)
     discretised_attributes = [disc_dependents, disc_annual_salary]
     
     instance = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes')
     self.assertEqual(2, instance.value(dependents))
     self.assertEqual(120000, instance.value(annual_salary))
     instance.discretise(discretised_attributes)
     
     self.assertEqual('b', instance.value(disc_dependents))
     self.assertEqual('e', instance.value(disc_annual_salary))
 def test_maps_continuous_value_to_correct_discretised_equivalent(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('a', disc_attr.mapping(-10))
     self.assertEqual('b', disc_attr.mapping(0))
     self.assertEqual('b', disc_attr.mapping(1))
     self.assertEqual('c', disc_attr.mapping(10))
     self.assertEqual('e', disc_attr.mapping(40))
 def test_creates_class_values_for_ranges(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('temperature', disc_attr.name)
     self.assertEqual(['a', 'b', 'c', 'd', 'e'], disc_attr.values)
     self.assertEqual(ranges, disc_attr.ranges)
     self.assertEqual(1, disc_attr.index)
     self.assertEqual(attr.DISCRETE, disc_attr.type)
Ejemplo n.º 5
0
 def discretised_attributes(self, ranges):
     discretised_attributes = []
     for index in range(len(self.options)):
         _range, width, attribute = ranges[index], self.options[
             index], self.subset[index]
         discretised_attributes.append(
             da.DiscretisedAttribute(attribute.name, _range.split(width),
                                     attribute.index))
     return discretised_attributes
Ejemplo n.º 6
0
    def test_discretise_replaces_cont_attrs_in_args_with_disc_ones(self):
        attrs = attributes(datasetsDir(self) + 'numerical' + SEP + 'person')
        self.assertTrue(attrs[0].is_continuous())
        self.assertTrue(attrs[4].is_continuous())
        self.assertTrue(attrs[6].is_continuous())
        self.assertTrue(attrs[7].is_continuous())

        attrs.discretise([da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4), \
                          da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6)])

        self.assertFalse(attrs[4].is_continuous())
        self.assertFalse(attrs[6].is_continuous())

        self.assertTrue(attrs[0].is_continuous())
        self.assertTrue(attrs[7].is_continuous())

        self.assertEqual(['a', 'b'], attrs[4].values)
        self.assertEqual(['a', 'b', 'c', 'd', 'e'], attrs[6].values)
Ejemplo n.º 7
0
 def __supervised_discretisation(self, action):
     disc_attrs = []
     for index in range(len(self.subset)):
         attribute = self.subset[index]
         breakpoints = self.training.supervised_breakpoints(attribute)
         action(breakpoints, index)
         disc_attrs.append(
             da.DiscretisedAttribute(attribute.name,
                                     breakpoints.as_ranges(),
                                     attribute.index))
     self.__discretise(disc_attrs)
Ejemplo n.º 8
0
 def unsupervised_equal_frequency(self):
     values_array = self.training.values_grouped_by_attribute(self.subset)
     disc_attrs = []
     for index in range(len(self.subset)):
         values = values_array[index]
         values.sort()
         attribute = self.subset[index]
         ranges = ranges_from_chunks(
             get_chunks_with_frequency(values, self.options[index]))
         disc_attrs.append(
             da.DiscretisedAttribute(attribute.name, ranges,
                                     attribute.index))
     self.__discretise(disc_attrs)
 def test_finding_mapping_for_value_out_of_range_returns_nearest_match(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('e', disc_attr.mapping(50))
     self.assertEqual('a', disc_attr.mapping(-20))