def test_training_intances_creation(self): instances = training(datasetsDir(self) + 'numerical' + SEP + 'person') self.assertEqual(6, len(instances), '6 instances should be present') self.assertEqual( ins.TrainingInstance( ['0', '25', 'salaried', 'single', '0', '0', '65000', '3'], 'yes'), instances[0]) self.assertEqual( ins.TrainingInstance( ['5', '42', 'salaried', 'married', '2', '6', '65000', '6'], 'no'), instances[5])
def test_training_as_gold(self): training1 = instance.TrainingInstance(['a','b','c'],'x') training2 = instance.TrainingInstance(['d','b','c'],'y') training3 = instance.TrainingInstance(['e','b','c'],'z') training_instances = [training1, training2, training3] gold_instances = ins.training_as_gold(training_instances) self.assertEqual(3, len(gold_instances)) for i in [0,1,2]: self.assertEqual(training_instances[i].attrs, gold_instances[i].attrs) self.assertEqual(training_instances[i].klass_value, gold_instances[i].klass_value)
def setup_instance_distances_with_6_instances(self): ins4 = ins.TrainingInstance(['bar', 'one'], 'a') ins5 = ins.TrainingInstance(['foo', 'one'], 'a') ins6 = ins.TrainingInstance(['baz', 'four'], 'b') id = knn.InstanceDistances() id.distance(1.0, self.ins1) id.distance(1.0, self.ins2) id.distance(1.0, self.ins3) id.distance(2.0, ins4) id.distance(3.0, ins5) id.distance(2.0, ins6) return id
def test_get_training_as_gold(self): _training = ins.TrainingInstance( ['3', '34', 'self-employed', 'married', '2', '3', '120000', '2'], 'yes') gold = _training.as_gold() self.assertEqual(gold.attrs, _training.attrs) self.assertEqual(gold.klass_value, _training.klass_value)
def test_remove_attrbutes(self): _training = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes') id = attribute.Attribute('id', ['continuous'], 0) annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6) _training.remove_attributes([id, annual_salary]) self.assertEqual(6, len(_training.attrs)) self.assertEqual('34', _training.attrs[0])
def test_hamilton_distance(self): attributes = [ attr.Attribute('A1', ['a', 'b'], 0), attr.Attribute('A2', ['continuous'], 1), attr.Attribute('A3', ['continuous'], 2), attr.Attribute('A4', ['g', 'h'], 3) ] instance1 = ins.TrainingInstance(['a', 5, 3.4, 'g'], 'y') instance2 = ins.TestInstance(['a', 5, 3.4, 'g']) self.assertEqual( 0, distancemetric.hamiltonian_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 5, 3.4, 'g']) self.assertEqual( 1, distancemetric.hamiltonian_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 4, 3.4, 'h']) self.assertEqual( 3, distancemetric.hamiltonian_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 4, 1.4, 'h']) self.assertEqual( 5, distancemetric.hamiltonian_distance(instance1, instance2, attributes))
def test_euclidean_distance(self): attributes = [ attr.Attribute('A1', ['a', 'b'], 0), attr.Attribute('A2', ['continuous'], 1), attr.Attribute('A3', ['continuous'], 2), attr.Attribute('A4', ['g', 'h'], 3) ] instance1 = ins.TrainingInstance(['a', 5, 3.4, 'g'], 'y') instance2 = ins.TestInstance(['a', 5, 3.4, 'g']) self.assertEqual( 0, distancemetric.euclidean_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 5, 3.4, 'g']) self.assertEqual( 1, distancemetric.euclidean_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 4, 3.4, 'h']) self.assertEqual( math.sqrt(3), distancemetric.euclidean_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 4, 1.4, 'h']) self.assertEqual( math.sqrt(7), distancemetric.euclidean_distance(instance1, instance2, attributes))
def test_cannot_set_class_in_training_instance(self): instance = ins.TrainingInstance(['bar','two'],'a') try: getattr(instance, 'set_klass')(self.b) self.fail('should not be able to set a class on a Training Instance') except AttributeError: self.assertEqual(self.a, instance.klass_value, 'should not have changed the original class')
def test_values_of_atrributes(self): _training = ins.TrainingInstance( ['3', '34', 'self-employed', 'married', '2', '3', '120000', '2'], 'yes') dependents = attribute.Attribute('dependents', ['continuous'], 4) annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6) self.assertEqual(['2', '120000'], _training.values([dependents, annual_salary]))
def test_get_attribute_value_from_instance_using_attribute(self): instance = ins.TrainingInstance(['bar','two'],'a') attr = attribute.Attribute('second', ['two','duo'], 1) self.assertEqual('two', instance.value(attr)) test = ins.TestInstance(['bar','two']) self.assertEqual('two', test.value(attr)) gold = ins.GoldInstance(['bar','two'],'a') self.assertEqual('two', gold.value(attr))
def test_string_representation(self): instance = ins.TrainingInstance(['bar','two'],'a') self.assertEqual("[bar,two;a]", instance.__str__()); instance = ins.TestInstance(['bar','two']) self.assertEqual("[bar,two; ]", instance.__str__()); instance.set_klass('b') self.assertEqual("[bar,two;b]", instance.__str__()); instance = ins.GoldInstance(['bar','two'],'a') self.assertEqual("[bar,two;a; ]", instance.__str__()); instance.set_klass('b') self.assertEqual("[bar,two;a;b]", instance.__str__());
def test_discretise_using_discretised_attributes(self): dependents = attribute.Attribute('dependents',['continuous'], 4) annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6) disc_dependents = da.DiscretisedAttribute('dependents', r.Range(0, 2, True).split(2), 4) disc_annual_salary = da.DiscretisedAttribute('annualsalary', r.Range(0, 120000, True).split(5), 6) discretised_attributes = [disc_dependents, disc_annual_salary] instance = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes') self.assertEqual(2, instance.value(dependents)) self.assertEqual(120000, instance.value(annual_salary)) instance.discretise(discretised_attributes) self.assertEqual('b', instance.value(disc_dependents)) self.assertEqual('e', instance.value(disc_annual_salary))
def test_split_info_with_equal_distribution(self): self.outlook_stump.update_count( instance.TrainingInstance(['sunny', 'mild', 'normal', 'true'], 'yes')) self.outlook_stump.update_count( instance.TrainingInstance(['overcast', 'mild', 'normal', 'true'], 'no')) self.outlook_stump.update_count( instance.TrainingInstance(['sunny', 'hot', 'normal', 'true'], 'yes')) self.outlook_stump.update_count( instance.TrainingInstance(['overcast', 'hot', 'normal', 'true'], 'yes')) self.outlook_stump.update_count( instance.TrainingInstance(['rainy', 'mild', 'normal', 'true'], 'yes')) self.outlook_stump.update_count( instance.TrainingInstance(['rainy', 'mild', 'normal', 'false'], 'yes')) expected = -(3.0 / 9 * math.log( 3.0 / 9, 2)) * 3 #3.0/9 and not 2.0/6 because of smoothing self.assertEqual(expected, self.outlook_stump.split_info())
def setUp(self): self.ins1 = ins.TrainingInstance(['bar', 'two'], 'a') self.ins2 = ins.TrainingInstance(['foo', 'two'], 'a') self.ins3 = ins.TrainingInstance(['baz', 'three'], 'b')
def test_to_string(self): instances = ins.TrainingInstances([ instance.TrainingInstance(['foo', 'bar'], 'a'), instance.TrainingInstance(['foo', 'foobar'], 'b') ]) self.assertEqual('[[foo,bar;a], [foo,foobar;b]]', str(instances))
def test_the_number_of_instances(self): instances = ins.TrainingInstances([ instance.TrainingInstance(['foo', 'bar'], 'a'), instance.TrainingInstance(['foo', 'foobar'], 'b') ]) self.assertEqual(2, len(instances), '2 instances should be present')
def training(self, file_path): all_values = self.__get_all_values(file_path, self.DATA) return inss.TrainingInstances([ins.TrainingInstance(values[:-1], values[-1]) for values in all_values if values is not None])
def test_split_info_greater_for_higher_arity_attributes(self): self.outlook_stump.update_count( instance.TrainingInstance(['sunny', 'mild', 'normal', 'true'], 'yes')) self.outlook_stump.update_count( instance.TrainingInstance(['overcast', 'mild', 'normal', 'true'], 'no')) self.outlook_stump.update_count( instance.TrainingInstance(['sunny', 'hot', 'normal', 'false'], 'yes')) self.outlook_stump.update_count( instance.TrainingInstance(['overcast', 'hot', 'normal', 'false'], 'yes')) self.outlook_stump.update_count( instance.TrainingInstance(['rainy', 'mild', 'normal', 'true'], 'yes')) self.outlook_stump.update_count( instance.TrainingInstance(['rainy', 'mild', 'normal', 'false'], 'yes')) windy_stump = ds.DecisionStump(self.attributes[3], self.klass) windy_stump.update_count( instance.TrainingInstance(['sunny', 'mild', 'normal', 'true'], 'yes')) windy_stump.update_count( instance.TrainingInstance(['overcast', 'mild', 'normal', 'true'], 'no')) windy_stump.update_count( instance.TrainingInstance(['sunny', 'hot', 'normal', 'false'], 'yes')) windy_stump.update_count( instance.TrainingInstance(['overcast', 'hot', 'normal', 'false'], 'yes')) windy_stump.update_count( instance.TrainingInstance(['rainy', 'mild', 'normal', 'true'], 'yes')) windy_stump.update_count( instance.TrainingInstance(['rainy', 'mild', 'normal', 'false'], 'yes')) self.assertTrue( self.outlook_stump.split_info() > windy_stump.split_info())
def test_training_instance_has_class_and_attributes(self): instance = ins.TrainingInstance(['bar','two'],'a') self.assertEqual(self.a, instance.klass_value) self.assertEqual(['bar', 'two'], instance.attrs)