def test_discretise_using_discretised_attributes(self): path = datasetsDir(self) + 'numerical' + SEP + 'person' _training = training(path) _attributes = attributes(path) self.assertEqual(0.0, _training[0].value(_attributes[4])) self.assertEqual(65000.0, _training[0].value(_attributes[6])) disc_dependents = da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4) disc_annual_income = da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6) _training.discretise([disc_dependents, disc_annual_income]) self.assertEqual('a', _training[0].value(disc_dependents)) self.assertEqual('c', _training[0].value(disc_annual_income))
def test_discretise_using_discretised_attributes(self): dependents = attribute.Attribute('dependents',['continuous'], 4) annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6) disc_dependents = da.DiscretisedAttribute('dependents', r.Range(0, 2, True).split(2), 4) disc_annual_salary = da.DiscretisedAttribute('annualsalary', r.Range(0, 120000, True).split(5), 6) discretised_attributes = [disc_dependents, disc_annual_salary] instance = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes') self.assertEqual(2, instance.value(dependents)) self.assertEqual(120000, instance.value(annual_salary)) instance.discretise(discretised_attributes) self.assertEqual('b', instance.value(disc_dependents)) self.assertEqual('e', instance.value(disc_annual_salary))
def test_maps_continuous_value_to_correct_discretised_equivalent(self): ranges = nr.Range(-10, 40, True).split(5) disc_attr = da.DiscretisedAttribute('temperature', ranges, 1) self.assertEqual('a', disc_attr.mapping(-10)) self.assertEqual('b', disc_attr.mapping(0)) self.assertEqual('b', disc_attr.mapping(1)) self.assertEqual('c', disc_attr.mapping(10)) self.assertEqual('e', disc_attr.mapping(40))
def test_creates_class_values_for_ranges(self): ranges = nr.Range(-10, 40, True).split(5) disc_attr = da.DiscretisedAttribute('temperature', ranges, 1) self.assertEqual('temperature', disc_attr.name) self.assertEqual(['a', 'b', 'c', 'd', 'e'], disc_attr.values) self.assertEqual(ranges, disc_attr.ranges) self.assertEqual(1, disc_attr.index) self.assertEqual(attr.DISCRETE, disc_attr.type)
def discretised_attributes(self, ranges): discretised_attributes = [] for index in range(len(self.options)): _range, width, attribute = ranges[index], self.options[ index], self.subset[index] discretised_attributes.append( da.DiscretisedAttribute(attribute.name, _range.split(width), attribute.index)) return discretised_attributes
def test_discretise_replaces_cont_attrs_in_args_with_disc_ones(self): attrs = attributes(datasetsDir(self) + 'numerical' + SEP + 'person') self.assertTrue(attrs[0].is_continuous()) self.assertTrue(attrs[4].is_continuous()) self.assertTrue(attrs[6].is_continuous()) self.assertTrue(attrs[7].is_continuous()) attrs.discretise([da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4), \ da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6)]) self.assertFalse(attrs[4].is_continuous()) self.assertFalse(attrs[6].is_continuous()) self.assertTrue(attrs[0].is_continuous()) self.assertTrue(attrs[7].is_continuous()) self.assertEqual(['a', 'b'], attrs[4].values) self.assertEqual(['a', 'b', 'c', 'd', 'e'], attrs[6].values)
def __supervised_discretisation(self, action): disc_attrs = [] for index in range(len(self.subset)): attribute = self.subset[index] breakpoints = self.training.supervised_breakpoints(attribute) action(breakpoints, index) disc_attrs.append( da.DiscretisedAttribute(attribute.name, breakpoints.as_ranges(), attribute.index)) self.__discretise(disc_attrs)
def unsupervised_equal_frequency(self): values_array = self.training.values_grouped_by_attribute(self.subset) disc_attrs = [] for index in range(len(self.subset)): values = values_array[index] values.sort() attribute = self.subset[index] ranges = ranges_from_chunks( get_chunks_with_frequency(values, self.options[index])) disc_attrs.append( da.DiscretisedAttribute(attribute.name, ranges, attribute.index)) self.__discretise(disc_attrs)
def test_finding_mapping_for_value_out_of_range_returns_nearest_match(self): ranges = nr.Range(-10, 40, True).split(5) disc_attr = da.DiscretisedAttribute('temperature', ranges, 1) self.assertEqual('e', disc_attr.mapping(50)) self.assertEqual('a', disc_attr.mapping(-20))