def test_include_expands_range(self):
        _range = r.Range()
        _range.include(4)
        self.assertFalse(_range.includes(0))
        self.assertFalse(_range.includes(3.99999))
        self.assertTrue(_range.includes(4))
        self.assertFalse(_range.includes(4.000002))

        _range.include(0)
        self.assertTrue(_range.includes(0))
        self.assertTrue(_range.includes(1))
        self.assertTrue(_range.includes(4))

        _other = r.Range(0, 4)
        self.assertTrue(_range, _other)
        _same = r.Range(0, 4, True)
        self.assertTrue(_range, _same)

        _other.include(4)
        self.assertEqual(0, _other.lower)
        self.assertEqual(4.000001, _other.upper)

        _range.include(5)
        self.assertTrue(_range.includes(4.1))
        self.assertTrue(_range.includes(5))
 def test_range_equality(self):
     _range = r.Range(0, 4)
     _same = r.Range(0, 4)
     self.assertEqual(_range, _same)
     self.assertEqual(hash(_range), hash(_same))
     _other = r.Range(0, 4.1)
     self.assertNotEqual(_range, _other)
def ranges_from_chunks(chunks):
    ranges = []
    if len(chunks) > 0: prev = chunks[0][0]
    for index in range(len(chunks) - 1):
        mid = float(chunks[index][-1] + chunks[index + 1][0]) / 2
        ranges.append(r.Range(prev, mid))
        prev = mid
    ranges.append(r.Range(prev, chunks[-1][-1], True))
    return ranges
Exemple #4
0
 def as_ranges(self):
     ranges, lower = [], self.attr_values[0]
     self.sort()
     for breakpoint in self.data:
         mid = (self.attr_values[breakpoint] + self.attr_values[breakpoint + 1]) / 2.0
         ranges.append(r.Range(lower, mid))
         lower = mid
     ranges.append(r.Range(lower, self.attr_values[-1], True))
     return ranges
Exemple #5
0
 def test_returns_array_of_discretised_attributes(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training, attributes, klass, _test, _gold = self.get_instances(path, True, False)
     disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [4,6], [2,4])
     disc_attrs = disc.discretised_attributes([nr.Range(0, 2), nr.Range(0, 120000)])
     self.assertEqual(2, len(disc_attrs))
     self.assertEqual(4, disc_attrs[0].index)
     self.assertEqual(2, len(disc_attrs[0].values))
     self.assertEqual(4, len(disc_attrs[1].values))
    def test_within_range(self):
        _range = r.Range(0, 4)
        self.assertTrue(_range.includes(0))
        self.assertTrue(_range.includes(1))
        self.assertTrue(_range.includes(3))
        self.assertTrue(_range.includes(3.9999))
        self.assertFalse(_range.includes(4))
        self.assertFalse(_range.includes(4.1))

        _new_range = r.Range(0, 4, True)
        self.assertTrue(_new_range.includes(4))
        self.assertFalse(_range.includes(4.1))
Exemple #7
0
 def test_discretise_using_discretised_attributes(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training = training(path)
     _attributes = attributes(path)
     self.assertEqual(0.0, _training[0].value(_attributes[4]))
     self.assertEqual(65000.0, _training[0].value(_attributes[6]))
     disc_dependents = da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4)
     disc_annual_income = da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6)
     _training.discretise([disc_dependents, disc_annual_income])
     
     self.assertEqual('a', _training[0].value(disc_dependents))
     self.assertEqual('c', _training[0].value(disc_annual_income))
 def test_discretise_using_discretised_attributes(self):
     dependents = attribute.Attribute('dependents',['continuous'], 4)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     disc_dependents = da.DiscretisedAttribute('dependents', r.Range(0, 2, True).split(2), 4)
     disc_annual_salary = da.DiscretisedAttribute('annualsalary', r.Range(0, 120000, True).split(5), 6)
     discretised_attributes = [disc_dependents, disc_annual_salary]
     
     instance = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes')
     self.assertEqual(2, instance.value(dependents))
     self.assertEqual(120000, instance.value(annual_salary))
     instance.discretise(discretised_attributes)
     
     self.assertEqual('b', instance.value(disc_dependents))
     self.assertEqual('e', instance.value(disc_annual_salary))
    def test_include_adds_the_max(self):
        _range = r.Range(5, 8.0)
        self.assertFalse(_range.includes(8))

        _range.include(8.0)
        self.assertTrue(_range.includes(8))
        self.assertAlmostEqual(8.000001, _range.upper)
 def test_creates_class_values_for_ranges(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('temperature', disc_attr.name)
     self.assertEqual(['a', 'b', 'c', 'd', 'e'], disc_attr.values)
     self.assertEqual(ranges, disc_attr.ranges)
     self.assertEqual(1, disc_attr.index)
     self.assertEqual(attr.DISCRETE, disc_attr.type)
 def test_split_returns_none_if_size_of_each_split_is_less_than_delta(self):
     try:
         _range = r.Range(0, 0.000005)
         _range.split(7)
     except (se.SystemError), e:
         self.assertEquals(
             'Splitting of range resulted in elements smaller than delta 1e-06.',
             e.message)
 def test_maps_continuous_value_to_correct_discretised_equivalent(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('a', disc_attr.mapping(-10))
     self.assertEqual('b', disc_attr.mapping(0))
     self.assertEqual('b', disc_attr.mapping(1))
     self.assertEqual('c', disc_attr.mapping(10))
     self.assertEqual('e', disc_attr.mapping(40))
    def test_discretise_replaces_cont_attrs_in_args_with_disc_ones(self):
        attrs = attributes(datasetsDir(self) + 'numerical' + SEP + 'person')
        self.assertTrue(attrs[0].is_continuous())
        self.assertTrue(attrs[4].is_continuous())
        self.assertTrue(attrs[6].is_continuous())
        self.assertTrue(attrs[7].is_continuous())

        attrs.discretise([da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4), \
                          da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6)])

        self.assertFalse(attrs[4].is_continuous())
        self.assertFalse(attrs[6].is_continuous())

        self.assertTrue(attrs[0].is_continuous())
        self.assertTrue(attrs[7].is_continuous())

        self.assertEqual(['a', 'b'], attrs[4].values)
        self.assertEqual(['a', 'b', 'c', 'd', 'e'], attrs[6].values)
 def test_binary_search(self):
     ranges = [nr.Range(2, 4), nr.Range(4, 6), nr.Range(6, 8), nr.Range(8, 10, True)]
     self.assertEqual(0, da.binary_search(ranges, 2))
     self.assertEqual(1, da.binary_search(ranges, 4))
     self.assertEqual(3, da.binary_search(ranges, 10))
     self.assertEqual(-1, da.binary_search(ranges, 1))
     self.assertEqual(-1, da.binary_search(ranges, 11))
     
     ranges = [nr.Range(2, 4), nr.Range(4, 6), nr.Range(6, 8, True)]
     self.assertEqual(-1, da.binary_search(ranges, 9))
     self.assertEqual(2, da.binary_search(ranges, 8))
     
     ranges = nr.Range(6, 32, True).split(3)
     self.assertEqual(0, da.binary_search(ranges, 12))
     
     ranges = nr.Range(0, 2, True).split(2)
     self.assertEqual(0, da.binary_search(ranges, 0))
    def test_split_includes_the_highest_and_lowest(self):
        _range = r.Range()
        _range.include(0)
        _range.include(4)
        splits = _range.split(4)
        self.assertEqual(0, splits[0].lower)
        self.assertEqual(1, splits[0].upper)
        self.assertEqual(1, splits[1].lower)
        self.assertEqual(2, splits[1].upper)
        self.assertEqual(2, splits[2].lower)
        self.assertEqual(3, splits[2].upper)
        self.assertEqual(3, splits[3].lower)
        self.assertEqual(4.000001, splits[3].upper)

        _range = r.Range()
        _range.include(2)
        _range.include(8)
        splits = _range.split(2)
        self.assertEqual(2, splits[0].lower)
        self.assertEqual(5, splits[0].upper)
        self.assertEqual(5, splits[1].lower)
        self.assertAlmostEqual(8.000001, splits[1].upper, 6)
Exemple #16
0
 def value_ranges(self, attributes):
     """
     Returns an array of range objects, in which each corresponds to the range of values an 
     attribute in the attributes parameter can take.
     len(returned range array) is equal to len(attributes)
     """
     ranges = []
     for attribute in attributes:
         if not attribute.is_continuous():
             raise inv.InvalidDataError('Cannot discretise non continuous attribute ' + attribute.name)
     values = self.values_grouped_by_attribute(attributes)
     for value in values: #each entry in values is the range of values for a particular attribute
         value.sort()
         ranges.append(r.Range(value[0], value[-1], True))
     return ranges
 def test_finding_mapping_for_value_out_of_range_returns_nearest_match(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('e', disc_attr.mapping(50))
     self.assertEqual('a', disc_attr.mapping(-20))
 def test_split_returns_none_when_lower_eq_upper(self):
     _range = r.Range()
     self.assertEquals(None, _range.split(2))
 def test_string_reprn(self):
     _range = r.Range()
     _range.include(0)
     _range.include(4)
     self.assertEqual('[0,4.000001]', str(_range))