def run_all_classifiers(stop_words, use_pipeline, dynamic_datasets_path):
    data = readDatasets.read_dataset(dynamic_datasets_path)
    train_data = data[0]
    test_data = data[1]

    print('Running the classifiers...\n')

    nbScores = nbClassifier.nb_classifier(stop_words, train_data, test_data,
                                          use_pipeline)
    rfScores = rfClassifier.rf_classifier(stop_words, train_data, test_data,
                                          use_pipeline)
    svmScores = svmClassifier.svm_classifier(stop_words, train_data, test_data,
                                             use_pipeline)
    # knnScores = knnClassifier.knn_classifier(stop_words, train_data, test_data)
    mymethodScores = MyMethodClassifier.my_method_classifier(
        stop_words, train_data, test_data, dynamic_datasets_path)

    '{:06.2f}'.format(3.141592653589793)

    # Open an outputCsvFile and write the scores which we will receive from the classifiers.

    print('Writing classifiers\' scores to the outputCsvFile...\n')

    location_10_fold = os.path.join(dynamic_datasets_path, 'Resources',
                                    'datasets', 'EvaluationMetric_10fold.csv')

    with open(location_10_fold, mode='w', encoding="utf8") as csvfile:
        csvWriter = csv.writer(csvfile,
                               delimiter='\t',
                               quotechar='|',
                               quoting=csv.QUOTE_MINIMAL)

        # Write the Headers (first row & column).
        csvWriter.writerow(['Statistic_Measure'] + ['Naive_Bayes'] +
                           ['Random_Forest'] + ['SVM'] + ['KNN'] +
                           ['My_Method'])

        # Write the scores.
        csvWriter.writerow(['Accuracy'] + ['{:.3}'.format(nbScores[0])] +
                           ['{:.3}'.format(rfScores[0])] +
                           ['{:.3}'.format(svmScores[0])] + ['knn'] +
                           ['{:.3}'.format(mymethodScores[0])])
        # + [knnScores[0]] + [mymethodScores[0]])
        csvWriter.writerow(['Precision'] + ['{:.3}'.format(nbScores[1])] +
                           ['{:.3}'.format(rfScores[1])] +
                           ['{:.3}'.format(svmScores[1])] + ['knn'] +
                           ['{:.3}'.format(mymethodScores[1])])
        # + [knnScores[1]]] + [mymethodScores[1]])
        csvWriter.writerow(['Recall'] + ['{:.3}'.format(nbScores[2])] +
                           ['{:.3}'.format(rfScores[2])] +
                           ['{:.3}'.format(svmScores[2])] + ['knn'] +
                           ['{:.3}'.format(mymethodScores[2])])
        # + [knnScores[2]] + [mymethodScores[2]])
        csvWriter.writerow(['F-Measure'] + ['{:.3}'.format(nbScores[3])] +
                           ['{:.3}'.format(rfScores[3])] +
                           ['{:.3}'.format(svmScores[3])] + ['knn'] +
                           ['{:.3}'.format(mymethodScores[3])])
        # + [knnScores[3]]] + [mymethodScores[3]])

    print('Finished writing to the outputCsvFile!')
Exemple #2
0
    clf.fit(vectorTrain, train_data['Category'])
    y_pred = clf.predict(vectorTest)

    # print "Train Accuracy :: ", accuracy_score(train_data['Category'], clf.predict(vectorTrain))
    # print "Test Accuracy :: ", accuracy_score(train_data['Category'], y_pred)

    #y_pred = cross_val_predict(clf, X=vectorTrain, y=vectorTest, cv=10, n_jobs=multiprocessing.cpu_count())
    writePredictionsToCsv.write_predictions_to_csv(y_pred, test_data,
                                                   dynamic_datasets_path)

    # Best GridSearch params
    # print clf.best_params_

    print("Elapsed time of successional-run: ",
          time.time() - start_time_successional)

    print('MyMethodClassifier finished!\n')
    return scores


# Run myMethodClassifier directly:
if __name__ == '__main__':
    dynamic_datasets_path = '..'
    data = readDatasets.read_dataset(dynamic_datasets_path)
    trainData = data[0]
    testData = data[1]

    my_method_classifier(stopWords.get_stop_words(), trainData, testData,
                         dynamic_datasets_path)
    exit()
        # GridSearch
        # parameters = {'n_estimators': [130, 110, 100, 80, 50, 30, 20, 10]}
        # svr = RandomForestClassifier()
        # clf = GridSearchCV(svr, parameters)

        # clf.fit(vectorTrain, train_y)
        # y_pred = clf.predict(vectorTest)
        #
        # print "Train Accuracy :: ", accuracy_score(train_y, clf.predict(vectorTrain))
        # print "Test Accuracy :: ", accuracy_score(test_y, y_pred)

        # Best GridSearch params
        # print clf.best_params_

        print("Elapsed time of successional-run: ",
              time.time() - start_time_successional)

    print('rfClassifier finished!\n')
    return scores


# Run rfClassifier directly:
if __name__ == '__main__':
    data = readDatasets.read_dataset()
    trainData = data[0]
    testData = data[1]
    usePipeline = False

    rf_classifier(stopWords.get_stop_words(), trainData, testData, usePipeline)