コード例 #1
0
 def test_training_intances_creation(self):
     instances = training(datasetsDir(self) + 'numerical' + SEP + 'person')
     self.assertEqual(6, len(instances), '6 instances should be present')
     self.assertEqual(
         ins.TrainingInstance(
             ['0', '25', 'salaried', 'single', '0', '0', '65000', '3'],
             'yes'), instances[0])
     self.assertEqual(
         ins.TrainingInstance(
             ['5', '42', 'salaried', 'married', '2', '6', '65000', '6'],
             'no'), instances[5])
コード例 #2
0
 def test_training_as_gold(self):
     training1 = instance.TrainingInstance(['a','b','c'],'x')
     training2 = instance.TrainingInstance(['d','b','c'],'y')
     training3 = instance.TrainingInstance(['e','b','c'],'z')
     training_instances = [training1, training2, training3]
     gold_instances = ins.training_as_gold(training_instances)
     self.assertEqual(3, len(gold_instances))
     
     for i in [0,1,2]:
         self.assertEqual(training_instances[i].attrs, gold_instances[i].attrs)
         self.assertEqual(training_instances[i].klass_value, gold_instances[i].klass_value)
コード例 #3
0
    def setup_instance_distances_with_6_instances(self):
        ins4 = ins.TrainingInstance(['bar', 'one'], 'a')
        ins5 = ins.TrainingInstance(['foo', 'one'], 'a')
        ins6 = ins.TrainingInstance(['baz', 'four'], 'b')

        id = knn.InstanceDistances()
        id.distance(1.0, self.ins1)
        id.distance(1.0, self.ins2)
        id.distance(1.0, self.ins3)
        id.distance(2.0, ins4)
        id.distance(3.0, ins5)
        id.distance(2.0, ins6)

        return id
コード例 #4
0
 def test_get_training_as_gold(self):
     _training = ins.TrainingInstance(
         ['3', '34', 'self-employed', 'married', '2', '3', '120000', '2'],
         'yes')
     gold = _training.as_gold()
     self.assertEqual(gold.attrs, _training.attrs)
     self.assertEqual(gold.klass_value, _training.klass_value)
コード例 #5
0
 def test_remove_attrbutes(self):
     _training = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes')
     id = attribute.Attribute('id', ['continuous'], 0)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     _training.remove_attributes([id, annual_salary])
     self.assertEqual(6, len(_training.attrs))
     self.assertEqual('34', _training.attrs[0])
コード例 #6
0
    def test_hamilton_distance(self):
        attributes = [
            attr.Attribute('A1', ['a', 'b'], 0),
            attr.Attribute('A2', ['continuous'], 1),
            attr.Attribute('A3', ['continuous'], 2),
            attr.Attribute('A4', ['g', 'h'], 3)
        ]
        instance1 = ins.TrainingInstance(['a', 5, 3.4, 'g'], 'y')
        instance2 = ins.TestInstance(['a', 5, 3.4, 'g'])
        self.assertEqual(
            0,
            distancemetric.hamiltonian_distance(instance1, instance2,
                                                attributes))

        instance2 = ins.TestInstance(['b', 5, 3.4, 'g'])
        self.assertEqual(
            1,
            distancemetric.hamiltonian_distance(instance1, instance2,
                                                attributes))

        instance2 = ins.TestInstance(['b', 4, 3.4, 'h'])
        self.assertEqual(
            3,
            distancemetric.hamiltonian_distance(instance1, instance2,
                                                attributes))

        instance2 = ins.TestInstance(['b', 4, 1.4, 'h'])
        self.assertEqual(
            5,
            distancemetric.hamiltonian_distance(instance1, instance2,
                                                attributes))
コード例 #7
0
    def test_euclidean_distance(self):
        attributes = [
            attr.Attribute('A1', ['a', 'b'], 0),
            attr.Attribute('A2', ['continuous'], 1),
            attr.Attribute('A3', ['continuous'], 2),
            attr.Attribute('A4', ['g', 'h'], 3)
        ]
        instance1 = ins.TrainingInstance(['a', 5, 3.4, 'g'], 'y')
        instance2 = ins.TestInstance(['a', 5, 3.4, 'g'])
        self.assertEqual(
            0,
            distancemetric.euclidean_distance(instance1, instance2,
                                              attributes))

        instance2 = ins.TestInstance(['b', 5, 3.4, 'g'])
        self.assertEqual(
            1,
            distancemetric.euclidean_distance(instance1, instance2,
                                              attributes))

        instance2 = ins.TestInstance(['b', 4, 3.4, 'h'])
        self.assertEqual(
            math.sqrt(3),
            distancemetric.euclidean_distance(instance1, instance2,
                                              attributes))

        instance2 = ins.TestInstance(['b', 4, 1.4, 'h'])
        self.assertEqual(
            math.sqrt(7),
            distancemetric.euclidean_distance(instance1, instance2,
                                              attributes))
コード例 #8
0
 def test_cannot_set_class_in_training_instance(self):
     instance = ins.TrainingInstance(['bar','two'],'a')
     try:
         getattr(instance, 'set_klass')(self.b)
         self.fail('should not be able to set a class on a Training Instance')
     except AttributeError:
         self.assertEqual(self.a, instance.klass_value, 'should not have changed the original class')
コード例 #9
0
 def test_values_of_atrributes(self):
     _training = ins.TrainingInstance(
         ['3', '34', 'self-employed', 'married', '2', '3', '120000', '2'],
         'yes')
     dependents = attribute.Attribute('dependents', ['continuous'], 4)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     self.assertEqual(['2', '120000'],
                      _training.values([dependents, annual_salary]))
コード例 #10
0
    def test_get_attribute_value_from_instance_using_attribute(self):
        instance = ins.TrainingInstance(['bar','two'],'a')
        attr = attribute.Attribute('second', ['two','duo'], 1)
        self.assertEqual('two', instance.value(attr))
        
        test = ins.TestInstance(['bar','two'])
        self.assertEqual('two', test.value(attr))

        gold = ins.GoldInstance(['bar','two'],'a')
        self.assertEqual('two', gold.value(attr))
コード例 #11
0
 def test_string_representation(self):
     instance = ins.TrainingInstance(['bar','two'],'a')
     self.assertEqual("[bar,two;a]", instance.__str__());
     
     instance = ins.TestInstance(['bar','two'])
     self.assertEqual("[bar,two; ]", instance.__str__());
     instance.set_klass('b')
     self.assertEqual("[bar,two;b]", instance.__str__());
             
     instance = ins.GoldInstance(['bar','two'],'a')
     self.assertEqual("[bar,two;a; ]", instance.__str__());
     instance.set_klass('b')
     self.assertEqual("[bar,two;a;b]", instance.__str__());
コード例 #12
0
 def test_discretise_using_discretised_attributes(self):
     dependents = attribute.Attribute('dependents',['continuous'], 4)
     annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6)
     disc_dependents = da.DiscretisedAttribute('dependents', r.Range(0, 2, True).split(2), 4)
     disc_annual_salary = da.DiscretisedAttribute('annualsalary', r.Range(0, 120000, True).split(5), 6)
     discretised_attributes = [disc_dependents, disc_annual_salary]
     
     instance = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes')
     self.assertEqual(2, instance.value(dependents))
     self.assertEqual(120000, instance.value(annual_salary))
     instance.discretise(discretised_attributes)
     
     self.assertEqual('b', instance.value(disc_dependents))
     self.assertEqual('e', instance.value(disc_annual_salary))
コード例 #13
0
 def test_split_info_with_equal_distribution(self):
     self.outlook_stump.update_count(
         instance.TrainingInstance(['sunny', 'mild', 'normal', 'true'],
                                   'yes'))
     self.outlook_stump.update_count(
         instance.TrainingInstance(['overcast', 'mild', 'normal', 'true'],
                                   'no'))
     self.outlook_stump.update_count(
         instance.TrainingInstance(['sunny', 'hot', 'normal', 'true'],
                                   'yes'))
     self.outlook_stump.update_count(
         instance.TrainingInstance(['overcast', 'hot', 'normal', 'true'],
                                   'yes'))
     self.outlook_stump.update_count(
         instance.TrainingInstance(['rainy', 'mild', 'normal', 'true'],
                                   'yes'))
     self.outlook_stump.update_count(
         instance.TrainingInstance(['rainy', 'mild', 'normal', 'false'],
                                   'yes'))
     expected = -(3.0 / 9 * math.log(
         3.0 / 9, 2)) * 3  #3.0/9 and not 2.0/6 because of smoothing
     self.assertEqual(expected, self.outlook_stump.split_info())
コード例 #14
0
 def setUp(self):
     self.ins1 = ins.TrainingInstance(['bar', 'two'], 'a')
     self.ins2 = ins.TrainingInstance(['foo', 'two'], 'a')
     self.ins3 = ins.TrainingInstance(['baz', 'three'], 'b')
コード例 #15
0
 def test_to_string(self):
     instances = ins.TrainingInstances([
         instance.TrainingInstance(['foo', 'bar'], 'a'),
         instance.TrainingInstance(['foo', 'foobar'], 'b')
     ])
     self.assertEqual('[[foo,bar;a], [foo,foobar;b]]', str(instances))
コード例 #16
0
 def test_the_number_of_instances(self):
     instances = ins.TrainingInstances([
         instance.TrainingInstance(['foo', 'bar'], 'a'),
         instance.TrainingInstance(['foo', 'foobar'], 'b')
     ])
     self.assertEqual(2, len(instances), '2 instances should be present')
コード例 #17
0
 def training(self, file_path):
     all_values = self.__get_all_values(file_path, self.DATA)
     return inss.TrainingInstances([ins.TrainingInstance(values[:-1], values[-1]) for values in all_values if values is not None])
コード例 #18
0
    def test_split_info_greater_for_higher_arity_attributes(self):
        self.outlook_stump.update_count(
            instance.TrainingInstance(['sunny', 'mild', 'normal', 'true'],
                                      'yes'))
        self.outlook_stump.update_count(
            instance.TrainingInstance(['overcast', 'mild', 'normal', 'true'],
                                      'no'))
        self.outlook_stump.update_count(
            instance.TrainingInstance(['sunny', 'hot', 'normal', 'false'],
                                      'yes'))
        self.outlook_stump.update_count(
            instance.TrainingInstance(['overcast', 'hot', 'normal', 'false'],
                                      'yes'))
        self.outlook_stump.update_count(
            instance.TrainingInstance(['rainy', 'mild', 'normal', 'true'],
                                      'yes'))
        self.outlook_stump.update_count(
            instance.TrainingInstance(['rainy', 'mild', 'normal', 'false'],
                                      'yes'))

        windy_stump = ds.DecisionStump(self.attributes[3], self.klass)
        windy_stump.update_count(
            instance.TrainingInstance(['sunny', 'mild', 'normal', 'true'],
                                      'yes'))
        windy_stump.update_count(
            instance.TrainingInstance(['overcast', 'mild', 'normal', 'true'],
                                      'no'))
        windy_stump.update_count(
            instance.TrainingInstance(['sunny', 'hot', 'normal', 'false'],
                                      'yes'))
        windy_stump.update_count(
            instance.TrainingInstance(['overcast', 'hot', 'normal', 'false'],
                                      'yes'))
        windy_stump.update_count(
            instance.TrainingInstance(['rainy', 'mild', 'normal', 'true'],
                                      'yes'))
        windy_stump.update_count(
            instance.TrainingInstance(['rainy', 'mild', 'normal', 'false'],
                                      'yes'))

        self.assertTrue(
            self.outlook_stump.split_info() > windy_stump.split_info())
コード例 #19
0
 def test_training_instance_has_class_and_attributes(self):
     instance = ins.TrainingInstance(['bar','two'],'a')
     self.assertEqual(self.a, instance.klass_value)
     self.assertEqual(['bar', 'two'], instance.attrs)