コード例 #1
0
    def test_include_expands_range(self):
        _range = r.Range()
        _range.include(4)
        self.assertFalse(_range.includes(0))
        self.assertFalse(_range.includes(3.99999))
        self.assertTrue(_range.includes(4))
        self.assertFalse(_range.includes(4.000002))

        _range.include(0)
        self.assertTrue(_range.includes(0))
        self.assertTrue(_range.includes(1))
        self.assertTrue(_range.includes(4))

        _other = r.Range(0, 4)
        self.assertTrue(_range, _other)
        _same = r.Range(0, 4, True)
        self.assertTrue(_range, _same)

        _other.include(4)
        self.assertEqual(0, _other.lower)
        self.assertEqual(4.000001, _other.upper)

        _range.include(5)
        self.assertTrue(_range.includes(4.1))
        self.assertTrue(_range.includes(5))
コード例 #2
0
 def test_range_equality(self):
     _range = r.Range(0, 4)
     _same = r.Range(0, 4)
     self.assertEqual(_range, _same)
     self.assertEqual(hash(_range), hash(_same))
     _other = r.Range(0, 4.1)
     self.assertNotEqual(_range, _other)
コード例 #3
0
ファイル: discretise.py プロジェクト: sushengyang/NLP-project
def ranges_from_chunks(chunks):
    ranges = []
    if len(chunks) > 0: prev = chunks[0][0]
    for index in range(len(chunks) - 1):
        mid = float(chunks[index][-1] + chunks[index + 1][0]) / 2
        ranges.append(r.Range(prev, mid))
        prev = mid
    ranges.append(r.Range(prev, chunks[-1][-1], True))
    return ranges
コード例 #4
0
ファイル: instances.py プロジェクト: sushengyang/NLP-project
 def as_ranges(self):
     ranges, lower = [], self.attr_values[0]
     self.sort()
     for breakpoint in self.data:
         mid = (self.attr_values[breakpoint] + self.attr_values[breakpoint + 1]) / 2.0
         ranges.append(r.Range(lower, mid))
         lower = mid
     ranges.append(r.Range(lower, self.attr_values[-1], True))
     return ranges
コード例 #5
0
 def test_returns_array_of_discretised_attributes(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training, attributes, klass, _test, _gold = self.get_instances(path, True, False)
     disc = discretise.Discretiser(_training, attributes, klass, _test, _gold, [4,6], [2,4])
     disc_attrs = disc.discretised_attributes([nr.Range(0, 2), nr.Range(0, 120000)])
     self.assertEqual(2, len(disc_attrs))
     self.assertEqual(4, disc_attrs[0].index)
     self.assertEqual(2, len(disc_attrs[0].values))
     self.assertEqual(4, len(disc_attrs[1].values))
コード例 #6
0
    def test_within_range(self):
        _range = r.Range(0, 4)
        self.assertTrue(_range.includes(0))
        self.assertTrue(_range.includes(1))
        self.assertTrue(_range.includes(3))
        self.assertTrue(_range.includes(3.9999))
        self.assertFalse(_range.includes(4))
        self.assertFalse(_range.includes(4.1))

        _new_range = r.Range(0, 4, True)
        self.assertTrue(_new_range.includes(4))
        self.assertFalse(_range.includes(4.1))
コード例 #7
0
 def test_discretise_using_discretised_attributes(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'person'
     _training = training(path)
     _attributes = attributes(path)
     self.assertEqual(0.0, _training[0].value(_attributes[4]))
     self.assertEqual(65000.0, _training[0].value(_attributes[6]))
     disc_dependents = da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4)
     disc_annual_income = da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6)
     _training.discretise([disc_dependents, disc_annual_income])
     
     self.assertEqual('a', _training[0].value(disc_dependents))
     self.assertEqual('c', _training[0].value(disc_annual_income))
コード例 #8
0
 def test_discretise_using_discretised_attributes(self):
     dependents = attribute.Attribute('dependents',['continuous'], 4)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     disc_dependents = da.DiscretisedAttribute('dependents', r.Range(0, 2, True).split(2), 4)
     disc_annual_salary = da.DiscretisedAttribute('annualsalary', r.Range(0, 120000, True).split(5), 6)
     discretised_attributes = [disc_dependents, disc_annual_salary]
     
     instance = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes')
     self.assertEqual(2, instance.value(dependents))
     self.assertEqual(120000, instance.value(annual_salary))
     instance.discretise(discretised_attributes)
     
     self.assertEqual('b', instance.value(disc_dependents))
     self.assertEqual('e', instance.value(disc_annual_salary))
コード例 #9
0
    def test_include_adds_the_max(self):
        _range = r.Range(5, 8.0)
        self.assertFalse(_range.includes(8))

        _range.include(8.0)
        self.assertTrue(_range.includes(8))
        self.assertAlmostEqual(8.000001, _range.upper)
コード例 #10
0
 def test_creates_class_values_for_ranges(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('temperature', disc_attr.name)
     self.assertEqual(['a', 'b', 'c', 'd', 'e'], disc_attr.values)
     self.assertEqual(ranges, disc_attr.ranges)
     self.assertEqual(1, disc_attr.index)
     self.assertEqual(attr.DISCRETE, disc_attr.type)
コード例 #11
0
 def test_split_returns_none_if_size_of_each_split_is_less_than_delta(self):
     try:
         _range = r.Range(0, 0.000005)
         _range.split(7)
     except (se.SystemError), e:
         self.assertEquals(
             'Splitting of range resulted in elements smaller than delta 1e-06.',
             e.message)
コード例 #12
0
 def test_maps_continuous_value_to_correct_discretised_equivalent(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('a', disc_attr.mapping(-10))
     self.assertEqual('b', disc_attr.mapping(0))
     self.assertEqual('b', disc_attr.mapping(1))
     self.assertEqual('c', disc_attr.mapping(10))
     self.assertEqual('e', disc_attr.mapping(40))
コード例 #13
0
    def test_discretise_replaces_cont_attrs_in_args_with_disc_ones(self):
        attrs = attributes(datasetsDir(self) + 'numerical' + SEP + 'person')
        self.assertTrue(attrs[0].is_continuous())
        self.assertTrue(attrs[4].is_continuous())
        self.assertTrue(attrs[6].is_continuous())
        self.assertTrue(attrs[7].is_continuous())

        attrs.discretise([da.DiscretisedAttribute('dependents', nr.Range(0, 2, True).split(2), 4), \
                          da.DiscretisedAttribute('annualincome', nr.Range(0, 120000, True).split(5), 6)])

        self.assertFalse(attrs[4].is_continuous())
        self.assertFalse(attrs[6].is_continuous())

        self.assertTrue(attrs[0].is_continuous())
        self.assertTrue(attrs[7].is_continuous())

        self.assertEqual(['a', 'b'], attrs[4].values)
        self.assertEqual(['a', 'b', 'c', 'd', 'e'], attrs[6].values)
コード例 #14
0
 def test_binary_search(self):
     ranges = [nr.Range(2, 4), nr.Range(4, 6), nr.Range(6, 8), nr.Range(8, 10, True)]
     self.assertEqual(0, da.binary_search(ranges, 2))
     self.assertEqual(1, da.binary_search(ranges, 4))
     self.assertEqual(3, da.binary_search(ranges, 10))
     self.assertEqual(-1, da.binary_search(ranges, 1))
     self.assertEqual(-1, da.binary_search(ranges, 11))
     
     ranges = [nr.Range(2, 4), nr.Range(4, 6), nr.Range(6, 8, True)]
     self.assertEqual(-1, da.binary_search(ranges, 9))
     self.assertEqual(2, da.binary_search(ranges, 8))
     
     ranges = nr.Range(6, 32, True).split(3)
     self.assertEqual(0, da.binary_search(ranges, 12))
     
     ranges = nr.Range(0, 2, True).split(2)
     self.assertEqual(0, da.binary_search(ranges, 0))
コード例 #15
0
    def test_split_includes_the_highest_and_lowest(self):
        _range = r.Range()
        _range.include(0)
        _range.include(4)
        splits = _range.split(4)
        self.assertEqual(0, splits[0].lower)
        self.assertEqual(1, splits[0].upper)
        self.assertEqual(1, splits[1].lower)
        self.assertEqual(2, splits[1].upper)
        self.assertEqual(2, splits[2].lower)
        self.assertEqual(3, splits[2].upper)
        self.assertEqual(3, splits[3].lower)
        self.assertEqual(4.000001, splits[3].upper)

        _range = r.Range()
        _range.include(2)
        _range.include(8)
        splits = _range.split(2)
        self.assertEqual(2, splits[0].lower)
        self.assertEqual(5, splits[0].upper)
        self.assertEqual(5, splits[1].lower)
        self.assertAlmostEqual(8.000001, splits[1].upper, 6)
コード例 #16
0
ファイル: instances.py プロジェクト: sushengyang/NLP-project
 def value_ranges(self, attributes):
     """
     Returns an array of range objects, in which each corresponds to the range of values an 
     attribute in the attributes parameter can take.
     len(returned range array) is equal to len(attributes)
     """
     ranges = []
     for attribute in attributes:
         if not attribute.is_continuous():
             raise inv.InvalidDataError('Cannot discretise non continuous attribute ' + attribute.name)
     values = self.values_grouped_by_attribute(attributes)
     for value in values: #each entry in values is the range of values for a particular attribute
         value.sort()
         ranges.append(r.Range(value[0], value[-1], True))
     return ranges
コード例 #17
0
 def test_finding_mapping_for_value_out_of_range_returns_nearest_match(self):
     ranges = nr.Range(-10, 40, True).split(5)
     disc_attr = da.DiscretisedAttribute('temperature', ranges, 1)
     self.assertEqual('e', disc_attr.mapping(50))
     self.assertEqual('a', disc_attr.mapping(-20))
コード例 #18
0
 def test_split_returns_none_when_lower_eq_upper(self):
     _range = r.Range()
     self.assertEquals(None, _range.split(2))
コード例 #19
0
 def test_string_reprn(self):
     _range = r.Range()
     _range.include(0)
     _range.include(4)
     self.assertEqual('[0,4.000001]', str(_range))