Beispiel #1
0
    def test(self, labels, test_set):
        _,ts = helper.format_for_scikit(labels, test_set)
        predictions = self.classifier.predict(ts)

        if self.plot_roc:
            print("ROC curve plot unavailable for %s") % (str(self))

        return helper.accuracy(labels, predictions), predictions
Beispiel #2
0
    def train_helper(self, labels, train_set, kernel_name, deg=3):
        self.classifier = svm.SVC(kernel=kernel_name, degree=deg,
                                  probability=True, cache_size=1000.0)
        l,ts = helper.format_for_scikit(labels, train_set)

        #pca = PCA(n_components='mle')
        #ts = pca.fit_transform(ts)

        self.classifier.fit(ts, l)
Beispiel #3
0
    def test(self, labels, test_set):
        l,ts = helper.format_for_scikit(labels, test_set)

        #pca = PCA(n_components='mle')
        #ts = pca.fit_transform(ts)

        predictions = self.classifier.predict(ts)

        if self.plot_roc:
            probas = self.classifier.predict_proba(ts)
            helper.roc(probas, l, str(self))

        return helper.accuracy(labels, predictions), predictions
    def test(self, labels, test_set):
        _,ts = helper.format_for_scikit(labels, test_set)
        predictions = self.classifier.predict(ts)

        if self.plot_roc:

            feat_list = test_set[0].keys()
            # FIXME: handle output file name
            outfile = '../data/dt.dot'
            print("ROC curve unavailable for this classifier.\n" +
                  "Creating a Decision Tree plot instead in: %s") % (outfile)
            tree.export_graphviz(self.classifier, outfile, feat_list)

        return helper.accuracy(labels, predictions), predictions
    def chi2(data, labels, print_results=True):
        """Compute chi-squared statistic for each class/feature combination.
        The results are printed (if the `print_results` paramter is True) in
        the stdout AND returned."""

        y, X       = helper.format_for_scikit(labels, data)
        chi2, pval = fs.chi2(X,y)

        feature_names = [name for name in data[0].keys()]

        if print_results:
            print('Chi-squared values for each feature :')
            for i,v in enumerate(chi2):
                print('%s => %f') % (feature_names[i], v)
            print('#####################################')
            print('p-value for each feature :')
            for i,v in enumerate(pval):
                print('%s => %f') % (feature_names[i], v)

        return chi2, pval
Beispiel #6
0
 def train(self, labels, train_set):
     self.classifier = lda.LDA()
     l,ts = helper.format_for_scikit(labels, train_set)
     self.classifier.fit(ts, l)
 def train(self, labels, train_set):
     self.classifier = tree.DecisionTreeClassifier()
     l,ts = helper.format_for_scikit(labels, train_set)
     self.classifier.fit(ts, l)
 def train(self, labels, train_set):
     self.classifier = BernoulliNB()
     l, ts = helper.format_for_scikit(labels, train_set)
     self.classifier.fit(ts, l)
Beispiel #9
0
 def train(self, labels, train_set):
     self.classifier = linear_model.LogisticRegression()
     l,ts = helper.format_for_scikit(labels, train_set)
     self.classifier.fit(ts, l)