SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    AdaBoostClassifier(),
    GaussianNB(),
    ExtraTreesClassifier()
]

#iteration over various classifiers
for name, clf in zip(names, classifiers):
    clf.fit(features_train, labels_train)
    scores = clf.score(features_test, labels_test)
    print " "
    print "Classifier:"
    evaluate.evaluate_clf(clf, features, labels, num_iters=1000, test_size=0.3)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print "====================================================================="

#Model tuning with grid_search.GridSearchCV
##Define Scoring and Cross Validation
from sklearn import grid_search
from sklearn.tree import DecisionTreeClassifier

cv = sklearn.cross_validation.StratifiedShuffleSplit(labels, n_iter=10)


def scoring(estimator, features_test, labels_test):
    labels_pred = estimator.predict(features_test)
    p = sklearn.metrics.precision_score(labels_test,
                                        labels_pred,
Beispiel #2
0
### Adaboost Classifier
from sklearn.ensemble import AdaBoostClassifier
a_clf = AdaBoostClassifier(algorithm='SAMME')

### Support Vector Machine Classifier
from sklearn.svm import SVC
s_clf = SVC(kernel='rbf', C=1000)

### Random Forest
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier()

### Stochastic Gradient Descent - Logistic Regression
from sklearn.linear_model import SGDClassifier
g_clf = SGDClassifier(loss='log')

### Selected Classifiers Evaluation
evaluate.evaluate_clf(l_clf, features, labels)
evaluate.evaluate_clf(k_clf, features, labels)

### Final Machine Algorithm Selection
clf = l_clf

# dump your classifier, dataset and features_list so
# anyone can run/check your results
pickle.dump(clf, open("../data/my_classifier.pkl", "w"))
pickle.dump(my_dataset, open("../data/my_dataset.pkl", "w"))
pickle.dump(my_feature_list, open("../data/my_feature_list.pkl", "w"))

Beispiel #3
0
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    AdaBoostClassifier(),
    GaussianNB(),
    ExtraTreesClassifier()]


 # iterate over classifiers
for name, clf in zip(names, classifiers):
        clf.fit(features_train,labels_train)
        scores = clf.score(features_test,labels_test)
        print " "
        print "Classifier:"
        evaluate.evaluate_clf(clf, features, labels, num_iters=1000, test_size=0.3)
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        print "====================================================================="


#Model tuning using grid_search.GridSearchCV
##define cv and scoring
from sklearn import grid_search
from sklearn.tree import DecisionTreeClassifier

cv = sklearn.cross_validation.StratifiedShuffleSplit(labels, n_iter=10)
def scoring(estimator, features_test, labels_test):
     labels_pred = estimator.predict(features_test)
     p = sklearn.metrics.precision_score(labels_test, labels_pred, average='micro')
     r = sklearn.metrics.recall_score(labels_test, labels_pred, average='micro')
     if p > 0.3 and r > 0.3:
k_clf = KMeans(n_clusters=2, tol=0.001)

### Adaboost Classifier
from sklearn.ensemble import AdaBoostClassifier
a_clf = AdaBoostClassifier(algorithm='SAMME')

### Support Vector Machine Classifier
from sklearn.svm import SVC
s_clf = SVC(kernel='rbf', C=1000)

### Random Forest
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier()

### Stochastic Gradient Descent - Logistic Regression
from sklearn.linear_model import SGDClassifier
g_clf = SGDClassifier(loss='log')

### Selected Classifiers Evaluation
evaluate.evaluate_clf(l_clf, features, labels)
evaluate.evaluate_clf(k_clf, features, labels)

### Final Machine Algorithm Selection
clf = l_clf

# dump your classifier, dataset and features_list so
# anyone can run/check your results
pickle.dump(clf, open("../data/my_classifier.pkl", "w"))
pickle.dump(my_dataset, open("../data/my_dataset.pkl", "w"))
pickle.dump(my_feature_list, open("../data/my_feature_list.pkl", "w"))