Ejemplo n.º 1
0
    def test_forward_select(self):
        path = datasetsDir(self) + 'minigolf' + SEP + 'weather'
        _training = training(path)
        _attributes, _klass = metadata(path)
        _test = test(path)
        _gold = gold(path)

        verify_training = copy.deepcopy(_training)
        verify_attributes = copy.deepcopy(_attributes)

        feat_sel = fs.FeatureSelection(_training, _attributes, _klass, _test,
                                       _gold, ['1R', '4', '0.1'])
        feat_sel.forward_selection()

        self.assertEqual(1, len(_attributes))
        self.assertEqual('outlook', _attributes[0].name)
        self.verify_number_of_attributes(_training, 1)
        self.verify_number_of_attributes(_test, 1)
        self.verify_number_of_attributes(_gold, 1)

        #verification
        verification_cv_datasets = verify_training.cross_validation_datasets(4)
        accuracies = {}
        for attribute in verify_attributes:
            accuracies[
                attribute.name] = feat_sel.avg_accuracy_by_cross_validation(
                    verification_cv_datasets, 4, attr.Attributes([attribute]))

        #'windy': 0.41666666666666663, 'outlook': 0.79166666666666663, 'temperature': 0.41666666666666663, 'humidity': 0.54166666666666663
        self.assertAlmostEqual(0.4166666, accuracies['windy'], 6)
        self.assertAlmostEqual(0.79166666, accuracies['outlook'], 6)
        self.assertAlmostEqual(0.4166666, accuracies['temperature'], 6)
        self.assertAlmostEqual(0.5416666, accuracies['humidity'], 6)

        #outlook selected
        accuracies = {}
        for each in verify_attributes:
            if each.name == 'outlook':
                outlook = each
        verify_attributes.remove(outlook)
        for attribute in verify_attributes:
            accuracies[(
                'outlook',
                attribute.name)] = feat_sel.avg_accuracy_by_cross_validation(
                    verification_cv_datasets, 4,
                    attr.Attributes([outlook, attribute]))

        #{('outlook', 'humidity'): 0.79166666666666663, ('outlook', 'temperature'): 0.79166666666666663, ('outlook', 'windy'): 0.54166666666666663}
        self.assertAlmostEqual(0.7916666, accuracies[('outlook', 'humidity')],
                               6)
        self.assertAlmostEqual(0.7916666, accuracies['outlook', 'temperature'],
                               6)
        self.assertAlmostEqual(0.5416666, accuracies[('outlook', 'windy')], 6)
Ejemplo n.º 2
0
 def test_attributes_are_equal(self):
     attrs = a.Attributes([
         a.Attribute('band', ['dual', 'tri', 'quad'], 0),
         a.Attribute('size', ['big', 'small', 'medium'], 1)
     ])
     same = a.Attributes([
         a.Attribute('band', ['dual', 'tri', 'quad'], 0),
         a.Attribute('size', ['big', 'small', 'medium'], 1)
     ])
     self.assertEqual(attrs, same, 'they should be the same')
     other = a.Attributes([
         a.Attribute('band', ['dual', 'tri', 'quad'], 0),
         a.Attribute('pda', ['y', 'n'], 1)
     ])
     self.assertNotEqual(self.attrs, other, 'shouldnt be the same')
Ejemplo n.º 3
0
 def test_empty_freq_dists(self):
     attr1 = a.Attribute("first", ['a', 'b', 'c'], 0)
     attr2 = a.Attribute("second", ['d', 'e'], 1)
     attrs = a.Attributes([attr1, attr2])
     freq_dists = attrs.empty_freq_dists()
     self.assertEqual(2, len(freq_dists))
     self.assertEqual(3, len(freq_dists[attr1]))
     self.assertEqual(2, len(freq_dists[attr2]))
Ejemplo n.º 4
0
 def metadata(self, file_path):
     lines = self.__get_lines(file_path, self.NAMES)
     klass_values = item.NameItem(lines[0]).processed().split(',')
     index,attributes = 0, []
     for line in lines:
         nameitem = item.NameItem(line)      
         processed = nameitem.processed()
         if not len(processed) == 0 and nameitem.isAttribute():
             attributes.append(a.Attribute(self.get_name(processed), self.get_values(processed), index))
             index += 1
     return (a.Attributes(attributes), klass_values)
Ejemplo n.º 5
0
    def __select_attributes(self, max, selected, others, delta):
        if others is None or len(others) == 0: return selected
        max_at_level, attr_with_max_acc, fold = -1, None, self.get_fold()
        datasets = self.training.cross_validation_datasets(fold)
        for attribute in others:
            selected.append(attribute)
            avg_accuracy = self.avg_accuracy_by_cross_validation(
                datasets, fold, attr.Attributes(selected))
            if avg_accuracy > max_at_level:
                max_at_level = avg_accuracy
                attr_with_max_acc = attribute
            selected.remove(attribute)
        if max_at_level - max < delta: return selected

        selected.append(attr_with_max_acc)
        others.remove(attr_with_max_acc)
        return self.__select_attributes(max_at_level, selected, others, delta)
Ejemplo n.º 6
0
 def __eliminate_attributes(self, max, selected, delta):
     if selected is None or len(selected) == 0 or len(selected) == 1:
         return selected
     max_at_level, selections_with_max_acc, fold = -1, None, self.get_fold()
     datasets = self.training.cross_validation_datasets(fold)
     selected_for_iter = selected[:]
     for attribute in selected_for_iter:
         selected.remove(attribute)
         avg_accuracy = self.avg_accuracy_by_cross_validation(
             datasets, fold, attr.Attributes(selected))
         if avg_accuracy > max_at_level:
             max_at_level = avg_accuracy
             selections_with_max_acc = selected[:]
         selected.append(attribute)
     if max_at_level - max < delta: return selected
     return self.__eliminate_attributes(max_at_level,
                                        selections_with_max_acc, delta)
Ejemplo n.º 7
0
    def test_backward_select(self):
        path = datasetsDir(self) + 'minigolf' + SEP + 'weather'
        _training = training(path)
        _attributes, _klass = metadata(path)
        _test = test(path)
        _gold = gold(path)

        verify_training = copy.deepcopy(_training)
        verify_attributes = copy.deepcopy(_attributes)

        feat_sel = fs.FeatureSelection(_training, _attributes, _klass, _test,
                                       _gold, ['1R', '4', '0.1'])
        feat_sel.backward_elimination()

        self.assertEqual(3, len(_attributes))
        self.verify_number_of_attributes(_training, 3)
        self.verify_number_of_attributes(_test, 3)
        self.verify_number_of_attributes(_gold, 3)

        #verification
        #level 0
        avg_acc = feat_sel.avg_accuracy_by_cross_validation(
            verify_training.cross_validation_datasets(4), 4, verify_attributes)
        self.assertAlmostEqual(0.5416666, avg_acc, 6)

        verification_cv_datasets = verify_training.cross_validation_datasets(4)
        accuracies = {}
        for attribute in verify_attributes:
            attributes = verify_attributes[:]
            attributes.remove(attribute)
            accuracies[(attributes[0].name, attributes[1].name,
                        attributes[2].name
                        )] = feat_sel.avg_accuracy_by_cross_validation(
                            verification_cv_datasets, 4,
                            attr.Attributes(attributes))


#        {('outlook', 'humidity', 'windy'): 0.54166666666666663,
#        ('outlook', 'temperature', 'windy'): 0.54166666666666663,
#        ('temperature', 'humidity', 'windy'): 0.29166666666666663,
#        ('outlook', 'temperature', 'humidity'): 0.79166666666666663}

        self.assertAlmostEqual(0.5416666,
                               accuracies[('outlook', 'humidity', 'windy')], 6)
        self.assertAlmostEqual(0.5416666,
                               accuracies[('outlook', 'temperature', 'windy')],
                               6)
        self.assertAlmostEqual(
            0.2916666, accuracies[('temperature', 'humidity', 'windy')], 6)
        self.assertAlmostEqual(
            0.7916666, accuracies[('outlook', 'temperature', 'humidity')], 6)
        #
        #('outlook', 'temperature', 'humidity') selected
        accuracies = {}

        for each in verify_attributes:
            if each.name == 'windy':
                windy = each
        verify_attributes.remove(windy)
        for attribute in verify_attributes:
            attributes = verify_attributes[:]
            attributes.remove(attribute)
            accuracies[(attributes[0].name, attributes[1].name
                        )] = feat_sel.avg_accuracy_by_cross_validation(
                            verification_cv_datasets, 4,
                            attr.Attributes(attributes))

        self.assertAlmostEqual(0.7916666, accuracies[('outlook', 'humidity')],
                               6)
        self.assertAlmostEqual(0.7916666, accuracies['outlook', 'temperature'],
                               6)
        self.assertAlmostEqual(0.4166666,
                               accuracies[('temperature', 'humidity')], 6)
Ejemplo n.º 8
0
 def test_to_string(self):
     attr1 = a.Attribute("first", ['a', 'b', 'c'], 0)
     attr2 = a.Attribute("second", ['d', 'e'], 1)
     attrs = a.Attributes([attr1, attr2])
     self.assertEqual('[first:[a,b,c] index:0, second:[d,e] index:1]',
                      str(attrs))