Ejemplo n.º 1
0
    def test_linear_classifier_weights_on(self, dataset):
        # Test get_linear_svm_weights
        classifier = self.LEARNER(dataset)
        weights = get_linear_svm_weights(classifier, sum=True)

        weights = get_linear_svm_weights(classifier, sum=False)

        n_class = len(classifier.class_var.values)

        def class_pairs(n_class):
            for i in range(n_class - 1):
                for j in range(i + 1, n_class):
                    yield i, j

        l_map = classifier._get_libsvm_labels_map()

        for inst in dataset[:20]:
            dec_values = classifier.get_decision_values(inst)

            for dec_v, weight, rho, pair in zip(dec_values, weights,
                                                classifier.rho,
                                                class_pairs(n_class)):
                t_inst = Orange.data.Instance(classifier.domain, inst)
                dec_v1 = example_weighted_sum(t_inst, weight) - rho
                self.assertAlmostEqual(dec_v, dec_v1, 4)
Ejemplo n.º 2
0
    def test_linear_regression_weights_on(self, dataset):
        predictor = self.LEARNER(dataset)
        weights = get_linear_svm_weights(predictor)

        for inst in dataset[:20]:
            t_inst = Orange.data.Instance(predictor.domain, inst)
            prediction = predictor(inst)
            w_sum = example_weighted_sum(t_inst, weights)
            self.assertAlmostEqual(float(prediction), w_sum - predictor.rho[0], places=4)
Ejemplo n.º 3
0
    def test_linear_regression_weights_on(self, dataset):
        predictor = self.LEARNER(dataset)
        weights = get_linear_svm_weights(predictor)

        for inst in dataset[:20]:
            t_inst = Orange.data.Instance(predictor.domain, inst)
            prediction = predictor(inst)
            w_sum = example_weighted_sum(t_inst, weights)
            self.assertAlmostEqual(float(prediction),
                                   w_sum - predictor.rho[0],
                                   places=4)
Ejemplo n.º 4
0
    def test_linear_classifier_weights_on(self, dataset):
        # Test get_linear_svm_weights
        classifier = self.LEARNER(dataset)
        weights = get_linear_svm_weights(classifier, sum=True)

        weights = get_linear_svm_weights(classifier, sum=False)

        n_class = len(classifier.class_var.values)

        def class_pairs(n_class):
            for i in range(n_class - 1):
                for j in range(i + 1, n_class):
                    yield i, j

        l_map = classifier._get_libsvm_labels_map()

        for inst in dataset[:20]:
            dec_values = classifier.get_decision_values(inst)

            for dec_v, weight, rho, pair in zip(dec_values, weights, classifier.rho, class_pairs(n_class)):
                t_inst = Orange.data.Instance(classifier.domain, inst)
                dec_v1 = example_weighted_sum(t_inst, weight) - rho
                self.assertAlmostEqual(dec_v, dec_v1, 4)
Ejemplo n.º 5
0
 def features_ranking_svm(self, name="ranking", random_state=64):
     """Get ranking of features by REF
     random_state: Set seed in svm.SVMLearnerEasy, but it does not work.
     """
     ranking_list = []
     f_list = [col for col in self.features]
     logging_per_run = list()
     for i in range(self.n_features):
         train_X = self.data_X[f_list]
         train_y = self.data_y
         if len(train_X.columns) == 0:
             break
         tuned_learner = svm.SVMLearnerEasy(folds=5,
                                            kernel_type=svm.kernels.Linear,
                                            svm_type=svm.SVMLearner.C_SVC,
                                            random_state=random_state)
         org_data_table = df2tb(train_X, train_y)
         weights = svm.get_linear_svm_weights(tuned_learner(org_data_table),
                                              sum=False)
         internal_scores = defaultdict(float)
         for w in weights:
             magnitude = np.sqrt(
                 sum([w_attr**2 for attr, w_attr in w.items()]))
             for attr, w_attr in w.items():
                 internal_scores["%s" % attr] += (w_attr / magnitude)**2
         features_score = []
         for i in internal_scores:
             attr_name = i.split("Orange.feature.Continuous 'N_")[1].split(
                 "'")[0]
             features_score.append((attr_name, internal_scores[i]))
         features_score.sort(lambda a, b: cmp(a[1], b[1]))
         # Results for low-score feature
         logging_per_run.append(features_score)
         ranking_list.append(features_score[0][0])
         f_list.remove(features_score[0][0])
     self.logging.append((name, logging_per_run))
     return ranking_list
from Orange import data
from Orange.classification import svm

brown = data.Table("brown-selected")
classifier = svm.SVMLearner(brown,
                            kernel_type=svm.kernels.Linear,
                            normalization=False)

weights = svm.get_linear_svm_weights(classifier)
print sorted("%.10f" % w for w in weights.values())

import pylab as plt
plt.hist(weights.values())