Example #1
0
# # Split on train and test dataset
train_corpus, test_corpus, train_label_names, test_label_names = train_test_split(
    np.array(data_df['Clean Article']),
    np.array(data_df['Target Name']),
    test_size=TEST_SET_SIZE_RATIO,
    random_state=42)

cv_train_features, cv_test_features = get_simple_bag_of_words_features(
    train_corpus, test_corpus)
# # pack data in one class
training_data = TrainingData(cv_train_features, train_label_names,
                             cv_test_features, test_label_names)

# # Get classifier definitions
classifier_list, classifier_name_list, classifier_name_shortcut_list = \
    get_chosen_classifiers()

# Train and save on disk
results = train_multiple_classifiers(classifier_list, classifier_name_list,
                                     training_data)
# # Load from disk
# classifier_list = util.load_classifier_list(classifier_name_list,
#                                             CLASSIFIERS_AND_RESULTS_DIR_PATH)
# results = util.load_object(RESULTS_PATH)

# create_cv_test_time_plots(results, classifier_name_shortcut_list)
cv_mean_scores = [round(result[1], SCORE_DECIMAL_PLACES) for result in results]
test_scores = [round(result[2], SCORE_DECIMAL_PLACES) for result in results]
elapsed_times = [round(result[3], TIME_DECIMAL_PLACES) for result in results]
# create_bar_plot(classifier_name_shortcut_list, 'Classifier scores', 'Accuracy',
#                 cv_mean_scores, y_range_tuple=(0, 1))
Example #2
0
    def get_chosen_classifiers_and_their_metadata(self):
        classifiers_tuples = []
        for classifier_enum in self.classifiers:
            classifiers_tuples.append(get_classifier_tuple(classifier_enum))

        return get_chosen_classifiers(classifiers_tuples)