Ejemplo n.º 1
0
    Y = notices.target

    vocab = tfidf.vocabulary_
    print("vocab len=", len(vocab))

    test_size = 0.6
    X_train, X_test, y_train, y_test = \
        cross_validation.train_test_split(X, Y,
                                          test_size=test_size, random_state=0)

    print("--------------------------")
    print("TF-IDF - SVM")
    print("--------------------------")

    clf = svm.SVC()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print(
        metrics.classification_report(y_test, y_pred,
                                      target_names=label_names))
    print(metrics.confusion_matrix(y_test, y_pred))

    crossValidationScores(clf, X_train, y_train)

    ###############################################################################
    # Classification and ROC analysis

    # Run classifier with crossvalidation and plot ROC curves
    met.showRocAnalysis(X, Y)
Ejemplo n.º 2
0
    print("-----------------------------------------------")
    class_weight = {0:5}
    print("Class weight=", class_weight)
    clf = svm.SVC(C=100.0, cache_size=200, class_weight=class_weight, coef0=0.0, degree=3,
                  gamma=0.0001, kernel='rbf', max_iter= -1, probability=False, shrinking=True,
                  tol=0.001, verbose=False).fit(X_train, y_train)   
    print("Test svm.SVC score=", clf.score(X_test, y_test))
    print("Train svm.SVC score=", clf.score(X_train, y_train))
    
    print("-----------------------------------------------")
    print("Metrics on TEST SET")   
    print("-----------------------------------------------")    
    y_pred = clf.predict(X_test)
    
    print(metrics.classification_report(y_test, y_pred, target_names=label_names))
    print(metrics.confusion_matrix(y_test, y_pred))       
    
    print("-----------------------------------------------")
    print("Metrics on TRAIN SET")   
    print("-----------------------------------------------")    
    y_predTrain = clf.predict(X_train)
    
    print(metrics.classification_report(y_train, y_predTrain, target_names=label_names))
    print(metrics.confusion_matrix(y_train, y_predTrain))       

    met.crossValidationScores(clf, X_train, y_train)
    
    met.showRocAnalysis(X_bns, Y, class_weight=class_weight)