Y = notices.target vocab = tfidf.vocabulary_ print("vocab len=", len(vocab)) test_size = 0.6 X_train, X_test, y_train, y_test = \ cross_validation.train_test_split(X, Y, test_size=test_size, random_state=0) print("--------------------------") print("TF-IDF - SVM") print("--------------------------") clf = svm.SVC() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print( metrics.classification_report(y_test, y_pred, target_names=label_names)) print(metrics.confusion_matrix(y_test, y_pred)) crossValidationScores(clf, X_train, y_train) ############################################################################### # Classification and ROC analysis # Run classifier with crossvalidation and plot ROC curves met.showRocAnalysis(X, Y)
print("-----------------------------------------------") class_weight = {0:5} print("Class weight=", class_weight) clf = svm.SVC(C=100.0, cache_size=200, class_weight=class_weight, coef0=0.0, degree=3, gamma=0.0001, kernel='rbf', max_iter= -1, probability=False, shrinking=True, tol=0.001, verbose=False).fit(X_train, y_train) print("Test svm.SVC score=", clf.score(X_test, y_test)) print("Train svm.SVC score=", clf.score(X_train, y_train)) print("-----------------------------------------------") print("Metrics on TEST SET") print("-----------------------------------------------") y_pred = clf.predict(X_test) print(metrics.classification_report(y_test, y_pred, target_names=label_names)) print(metrics.confusion_matrix(y_test, y_pred)) print("-----------------------------------------------") print("Metrics on TRAIN SET") print("-----------------------------------------------") y_predTrain = clf.predict(X_train) print(metrics.classification_report(y_train, y_predTrain, target_names=label_names)) print(metrics.confusion_matrix(y_train, y_predTrain)) met.crossValidationScores(clf, X_train, y_train) met.showRocAnalysis(X_bns, Y, class_weight=class_weight)