def test(self, labels, test_set): _,ts = helper.format_for_scikit(labels, test_set) predictions = self.classifier.predict(ts) if self.plot_roc: print("ROC curve plot unavailable for %s") % (str(self)) return helper.accuracy(labels, predictions), predictions
def train_helper(self, labels, train_set, kernel_name, deg=3): self.classifier = svm.SVC(kernel=kernel_name, degree=deg, probability=True, cache_size=1000.0) l,ts = helper.format_for_scikit(labels, train_set) #pca = PCA(n_components='mle') #ts = pca.fit_transform(ts) self.classifier.fit(ts, l)
def test(self, labels, test_set): l,ts = helper.format_for_scikit(labels, test_set) #pca = PCA(n_components='mle') #ts = pca.fit_transform(ts) predictions = self.classifier.predict(ts) if self.plot_roc: probas = self.classifier.predict_proba(ts) helper.roc(probas, l, str(self)) return helper.accuracy(labels, predictions), predictions
def test(self, labels, test_set): _,ts = helper.format_for_scikit(labels, test_set) predictions = self.classifier.predict(ts) if self.plot_roc: feat_list = test_set[0].keys() # FIXME: handle output file name outfile = '../data/dt.dot' print("ROC curve unavailable for this classifier.\n" + "Creating a Decision Tree plot instead in: %s") % (outfile) tree.export_graphviz(self.classifier, outfile, feat_list) return helper.accuracy(labels, predictions), predictions
def chi2(data, labels, print_results=True): """Compute chi-squared statistic for each class/feature combination. The results are printed (if the `print_results` paramter is True) in the stdout AND returned.""" y, X = helper.format_for_scikit(labels, data) chi2, pval = fs.chi2(X,y) feature_names = [name for name in data[0].keys()] if print_results: print('Chi-squared values for each feature :') for i,v in enumerate(chi2): print('%s => %f') % (feature_names[i], v) print('#####################################') print('p-value for each feature :') for i,v in enumerate(pval): print('%s => %f') % (feature_names[i], v) return chi2, pval
def train(self, labels, train_set): self.classifier = lda.LDA() l,ts = helper.format_for_scikit(labels, train_set) self.classifier.fit(ts, l)
def train(self, labels, train_set): self.classifier = tree.DecisionTreeClassifier() l,ts = helper.format_for_scikit(labels, train_set) self.classifier.fit(ts, l)
def train(self, labels, train_set): self.classifier = BernoulliNB() l, ts = helper.format_for_scikit(labels, train_set) self.classifier.fit(ts, l)
def train(self, labels, train_set): self.classifier = linear_model.LogisticRegression() l,ts = helper.format_for_scikit(labels, train_set) self.classifier.fit(ts, l)