# # Split on train and test dataset train_corpus, test_corpus, train_label_names, test_label_names = train_test_split( np.array(data_df['Clean Article']), np.array(data_df['Target Name']), test_size=TEST_SET_SIZE_RATIO, random_state=42) cv_train_features, cv_test_features = get_simple_bag_of_words_features( train_corpus, test_corpus) # # pack data in one class training_data = TrainingData(cv_train_features, train_label_names, cv_test_features, test_label_names) # # Get classifier definitions classifier_list, classifier_name_list, classifier_name_shortcut_list = \ get_chosen_classifiers() # Train and save on disk results = train_multiple_classifiers(classifier_list, classifier_name_list, training_data) # # Load from disk # classifier_list = util.load_classifier_list(classifier_name_list, # CLASSIFIERS_AND_RESULTS_DIR_PATH) # results = util.load_object(RESULTS_PATH) # create_cv_test_time_plots(results, classifier_name_shortcut_list) cv_mean_scores = [round(result[1], SCORE_DECIMAL_PLACES) for result in results] test_scores = [round(result[2], SCORE_DECIMAL_PLACES) for result in results] elapsed_times = [round(result[3], TIME_DECIMAL_PLACES) for result in results] # create_bar_plot(classifier_name_shortcut_list, 'Classifier scores', 'Accuracy', # cv_mean_scores, y_range_tuple=(0, 1))
def get_chosen_classifiers_and_their_metadata(self): classifiers_tuples = [] for classifier_enum in self.classifiers: classifiers_tuples.append(get_classifier_tuple(classifier_enum)) return get_chosen_classifiers(classifiers_tuples)