def test_hamilton_distance(self):
        attributes = [
            attr.Attribute('A1', ['a', 'b'], 0),
            attr.Attribute('A2', ['continuous'], 1),
            attr.Attribute('A3', ['continuous'], 2),
            attr.Attribute('A4', ['g', 'h'], 3)
        ]
        instance1 = ins.TrainingInstance(['a', 5, 3.4, 'g'], 'y')
        instance2 = ins.TestInstance(['a', 5, 3.4, 'g'])
        self.assertEqual(
            0,
            distancemetric.hamiltonian_distance(instance1, instance2,
                                                attributes))

        instance2 = ins.TestInstance(['b', 5, 3.4, 'g'])
        self.assertEqual(
            1,
            distancemetric.hamiltonian_distance(instance1, instance2,
                                                attributes))

        instance2 = ins.TestInstance(['b', 4, 3.4, 'h'])
        self.assertEqual(
            3,
            distancemetric.hamiltonian_distance(instance1, instance2,
                                                attributes))

        instance2 = ins.TestInstance(['b', 4, 1.4, 'h'])
        self.assertEqual(
            5,
            distancemetric.hamiltonian_distance(instance1, instance2,
                                                attributes))
    def test_euclidean_distance(self):
        attributes = [
            attr.Attribute('A1', ['a', 'b'], 0),
            attr.Attribute('A2', ['continuous'], 1),
            attr.Attribute('A3', ['continuous'], 2),
            attr.Attribute('A4', ['g', 'h'], 3)
        ]
        instance1 = ins.TrainingInstance(['a', 5, 3.4, 'g'], 'y')
        instance2 = ins.TestInstance(['a', 5, 3.4, 'g'])
        self.assertEqual(
            0,
            distancemetric.euclidean_distance(instance1, instance2,
                                              attributes))

        instance2 = ins.TestInstance(['b', 5, 3.4, 'g'])
        self.assertEqual(
            1,
            distancemetric.euclidean_distance(instance1, instance2,
                                              attributes))

        instance2 = ins.TestInstance(['b', 4, 3.4, 'h'])
        self.assertEqual(
            math.sqrt(3),
            distancemetric.euclidean_distance(instance1, instance2,
                                              attributes))

        instance2 = ins.TestInstance(['b', 4, 1.4, 'h'])
        self.assertEqual(
            math.sqrt(7),
            distancemetric.euclidean_distance(instance1, instance2,
                                              attributes))
Esempio n. 3
0
 def test_remove_attrbutes(self):
     _training = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes')
     id = attribute.Attribute('id', ['continuous'], 0)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     _training.remove_attributes([id, annual_salary])
     self.assertEqual(6, len(_training.attrs))
     self.assertEqual('34', _training.attrs[0])
Esempio n. 4
0
 def test_attribute_creation(self):
     _attributes = attributes(
         datasetsDir(self) + 'numerical' + SEP + 'person')
     self.assertEqual(8, len(_attributes), '8 attributes should be present')
     self.assertEqual(a.Attribute('id', ['continuous'], 0), _attributes[0])
     self.assertEqual(a.Attribute('creditrating', ['continuous'], 7),
                      _attributes[7])
Esempio n. 5
0
    def test_is_countinuous_returns_true_if_continuous(self):
        cont_attr = a.Attribute('temperature', ['continuous'], 1)
        self.assertEqual(a.CONTINUOUS, cont_attr.type)
        self.assertTrue(cont_attr.is_continuous())

        disc_attr = a.Attribute('foo', ['a', 'b', 'c'], 0)
        self.assertEqual(a.DISCRETE, disc_attr.type)
        self.assertFalse(disc_attr.is_continuous())
Esempio n. 6
0
 def test_equality(self):
     attr = a.Attribute('foo', ['a', 'b', 'c'], 0)
     same = a.Attribute('foo', ['a', 'b', 'c'], 0)
     othername = a.Attribute('foobar', ['a', 'b', 'c'], 1)
     otherval = a.Attribute('foo', ['a', 'b', 'c', 'd'], 0)
     self.assertEqual(attr, same, 'they should be equal')
     self.assertNotEqual(attr, othername, 'they are not equal')
     self.assertNotEqual(attr, otherval, 'they are not equal')
Esempio n. 7
0
 def test_empty_freq_dists(self):
     attr1 = a.Attribute("first", ['a', 'b', 'c'], 0)
     attr2 = a.Attribute("second", ['d', 'e'], 1)
     attrs = a.Attributes([attr1, attr2])
     freq_dists = attrs.empty_freq_dists()
     self.assertEqual(2, len(freq_dists))
     self.assertEqual(3, len(freq_dists[attr1]))
     self.assertEqual(2, len(freq_dists[attr2]))
Esempio n. 8
0
 def test_values_of_atrributes(self):
     _training = ins.TrainingInstance(
         ['3', '34', 'self-employed', 'married', '2', '3', '120000', '2'],
         'yes')
     dependents = attribute.Attribute('dependents', ['continuous'], 4)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     self.assertEqual(['2', '120000'],
                      _training.values([dependents, annual_salary]))
Esempio n. 9
0
 def test_discretise_using_discretised_attributes(self):
     dependents = attribute.Attribute('dependents',['continuous'], 4)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     disc_dependents = da.DiscretisedAttribute('dependents', r.Range(0, 2, True).split(2), 4)
     disc_annual_salary = da.DiscretisedAttribute('annualsalary', r.Range(0, 120000, True).split(5), 6)
     discretised_attributes = [disc_dependents, disc_annual_salary]
     
     instance = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes')
     self.assertEqual(2, instance.value(dependents))
     self.assertEqual(120000, instance.value(annual_salary))
     instance.discretise(discretised_attributes)
     
     self.assertEqual('b', instance.value(disc_dependents))
     self.assertEqual('e', instance.value(disc_annual_salary))
Esempio n. 10
0
 def test_attempt_to_discretise_non_continuous_attribute_raises_error(self):
     path = datasetsDir(self) + 'numerical' + SEP + 'weather'
     _training = training(path)
     try:
         _training.value_ranges([a.Attribute('outlook', ['sunny','overcast','rainy'], 0)])
         self.fail('should throw error')
     except inv.InvalidDataError:
         pass
Esempio n. 11
0
    def test_get_attribute_value_from_instance_using_attribute(self):
        instance = ins.TrainingInstance(['bar','two'],'a')
        attr = attribute.Attribute('second', ['two','duo'], 1)
        self.assertEqual('two', instance.value(attr))
        
        test = ins.TestInstance(['bar','two'])
        self.assertEqual('two', test.value(attr))

        gold = ins.GoldInstance(['bar','two'],'a')
        self.assertEqual('two', gold.value(attr))
Esempio n. 12
0
 def metadata(self, file_path):
     lines = self.__get_lines(file_path, self.NAMES)
     klass_values = item.NameItem(lines[0]).processed().split(',')
     index,attributes = 0, []
     for line in lines:
         nameitem = item.NameItem(line)      
         processed = nameitem.processed()
         if not len(processed) == 0 and nameitem.isAttribute():
             attributes.append(a.Attribute(self.get_name(processed), self.get_values(processed), index))
             index += 1
     return (a.Attributes(attributes), klass_values)
Esempio n. 13
0
 def test_attributes_are_equal(self):
     attrs = a.Attributes([
         a.Attribute('band', ['dual', 'tri', 'quad'], 0),
         a.Attribute('size', ['big', 'small', 'medium'], 1)
     ])
     same = a.Attributes([
         a.Attribute('band', ['dual', 'tri', 'quad'], 0),
         a.Attribute('size', ['big', 'small', 'medium'], 1)
     ])
     self.assertEqual(attrs, same, 'they should be the same')
     other = a.Attributes([
         a.Attribute('band', ['dual', 'tri', 'quad'], 0),
         a.Attribute('pda', ['y', 'n'], 1)
     ])
     self.assertNotEqual(self.attrs, other, 'shouldnt be the same')
Esempio n. 14
0
 def test_attribute_creation(self):
     attr = a.Attribute('foo', ['a', 'b', 'c'], 0)
     self.assertEqual('foo', attr.name)
     self.assertEqual(['a', 'b', 'c'], attr.values)
Esempio n. 15
0
 def test_returns_true_if_value_is_present(self):
     attr = a.Attribute('foo', ['a', 'b', 'c'], 0)
     self.assertTrue(attr.has_value('c'))
     self.assertFalse(attr.has_value('d'))
Esempio n. 16
0
 def test_empty_freq_dists(self):
     attr = a.Attribute('foo', ['a', 'b', 'c'], 0)
     freq_dists = attr.empty_freq_dists()
     self.assertEqual(3, len(freq_dists))
     for each in attr.values:
         self.assertEqual(0, freq_dists[each].N())
Esempio n. 17
0
 def test_attributes_contain_an_attribute(self):
     self.assertTrue(
         self.attrs.__contains__(
             a.Attribute('band', ['dual', 'tri', 'quad'], 0)))
Esempio n. 18
0
 def test_to_string(self):
     attr1 = a.Attribute("first", ['a', 'b', 'c'], 0)
     attr2 = a.Attribute("second", ['d', 'e'], 1)
     attrs = a.Attributes([attr1, attr2])
     self.assertEqual('[first:[a,b,c] index:0, second:[d,e] index:1]',
                      str(attrs))
Esempio n. 19
0
 def test_values_as_str(self):
     attr = a.Attribute('foo', ['a', 'b', 'c'], 0)
     self.assertEqual('a,b,c', attr.values_as_str())
Esempio n. 20
0
 def test_to_string(self):
     attr = a.Attribute('foo', ['a', 'b', 'c'], 0)
     self.assertEqual('foo:[a,b,c] index:0', str(attr))