def test_classification_report_multiclass(): # Test performance report iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) # print classification report with class names expected_report = """\ precision recall f1-score support setosa 0.83 0.79 0.81 24 versicolor 0.33 0.10 0.15 31 virginica 0.42 0.90 0.57 20 weighted avg 0.51 0.53 0.47 75 """ report = classification_report( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names) assert_equal(report, expected_report) # print classification report with label detection expected_report = """\ precision recall f1-score support 0 0.83 0.79 0.81 24 1 0.33 0.10 0.15 31 2 0.42 0.90 0.57 20 weighted avg 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) assert_equal(report, expected_report)
def test_classification_report_multiclass_with_string_label(): y_true, y_pred, _ = make_prediction(binary=False) y_true = np.array(["blue", "green", "red"])[y_true] y_pred = np.array(["blue", "green", "red"])[y_pred] expected_report = """\ precision recall f1-score support blue 0.83 0.79 0.81 24 green 0.33 0.10 0.15 31 red 0.42 0.90 0.57 20 weighted avg 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) assert_equal(report, expected_report) expected_report = """\ precision recall f1-score support a 0.83 0.79 0.81 24 b 0.33 0.10 0.15 31 c 0.42 0.90 0.57 20 weighted avg 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred, target_names=["a", "b", "c"]) assert_equal(report, expected_report)
def test_classification_report_multiclass_with_digits(): # Test performance report with added digits in floating point values iris = datasets.load_iris() y_true, y_pred, _ = make_prediction(dataset=iris, binary=False) # print classification report with class names expected_report = """\ precision recall f1-score support setosa 0.82609 0.79167 0.80851 24 versicolor 0.33333 0.09677 0.15000 31 virginica 0.41860 0.90000 0.57143 20 weighted avg 0.51375 0.53333 0.47310 75 """ report = classification_report( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names, digits=5) assert_equal(report, expected_report) # print classification report with label detection expected_report = """\ precision recall f1-score support 0 0.83 0.79 0.81 24 1 0.33 0.10 0.15 31 2 0.42 0.90 0.57 20 weighted avg 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) assert_equal(report, expected_report)
def test_multilabel_classification_report_with_samples_averaging(): n_classes = 4 n_samples = 50 _, y_true = make_multilabel_classification(n_features=1, n_samples=n_samples, n_classes=n_classes, random_state=0) _, y_pred = make_multilabel_classification(n_features=1, n_samples=n_samples, n_classes=n_classes, random_state=1) expected_report = """\ precision recall f1-score support 0 0.50 0.67 0.57 24 1 0.51 0.74 0.61 27 2 0.29 0.08 0.12 26 3 0.52 0.56 0.54 27 samples avg 0.46 0.42 0.40 104 """ report = classification_report(y_true, y_pred, average='samples') assert_equal(report, expected_report)
def test_classification_report_binary_averaging(): y_true = [0, 1, 1, 1, 0, 1, 1, 0] y_pred = [0, 0, 1, 1, 1, 0, 1, 0] # print classification report with class names expected_report = """\ precision recall f1-score support 0 0.50 0.67 0.57 3 1 0.75 0.60 0.67 5 binary avg 0.75 0.60 0.67 8 """ report = classification_report(y_true, y_pred, average='binary') assert_equal(report, expected_report)
def test_classification_report_multiclass_with_unicode_label(): y_true, y_pred, _ = make_prediction(binary=False) labels = np.array([u"blue\xa2", u"green\xa2", u"red\xa2"]) y_true = labels[y_true] y_pred = labels[y_pred] expected_report = u"""\ precision recall f1-score support blue\xa2 0.83 0.79 0.81 24 green\xa2 0.33 0.10 0.15 31 red\xa2 0.42 0.90 0.57 20 weighted avg 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) assert_equal(report, expected_report)
def train_model(dataset_directory: str, model_name: str, stroke_thicknesses: List[int], width: int, height: int, staff_line_vertical_offsets: List[int], training_minibatch_size: int, optimizer: str, dynamic_learning_rate_reduction: bool, use_fixed_canvas: bool, datasets: List[str], class_weights_balancing_method: str): image_dataset_directory = os.path.join(dataset_directory, "images") bounding_boxes = None bounding_boxes_cache = os.path.join(dataset_directory, "bounding_boxes.txt") print("Loading configuration and data-readers...") start_time = time() number_of_classes = len(os.listdir(os.path.join(image_dataset_directory, "training"))) training_configuration = ConfigurationFactory.get_configuration_by_name(model_name, optimizer, width, height, training_minibatch_size, number_of_classes) if training_configuration.performs_localization() and bounding_boxes is None: # Try to unpickle with open(bounding_boxes_cache, "rb") as cache: bounding_boxes = pickle.load(cache) if not training_configuration.performs_localization(): bounding_boxes = None train_generator = ImageDataGenerator(rotation_range=training_configuration.rotation_range, zoom_range=training_configuration.zoom_range ) training_data_generator = DirectoryIteratorWithBoundingBoxes( directory=os.path.join(image_dataset_directory, "training"), image_data_generator=train_generator, target_size=(training_configuration.input_image_rows, training_configuration.input_image_columns), batch_size=training_configuration.training_minibatch_size, bounding_boxes=bounding_boxes, ) training_steps_per_epoch = np.math.ceil(training_data_generator.samples / training_data_generator.batch_size) validation_generator = ImageDataGenerator() validation_data_generator = DirectoryIteratorWithBoundingBoxes( directory=os.path.join(image_dataset_directory, "validation"), image_data_generator=validation_generator, target_size=( training_configuration.input_image_rows, training_configuration.input_image_columns), batch_size=training_configuration.training_minibatch_size, bounding_boxes=bounding_boxes) validation_steps_per_epoch = np.math.ceil(validation_data_generator.samples / validation_data_generator.batch_size) test_generator = ImageDataGenerator() test_data_generator = DirectoryIteratorWithBoundingBoxes( directory=os.path.join(image_dataset_directory, "test"), image_data_generator=test_generator, target_size=(training_configuration.input_image_rows, training_configuration.input_image_columns), batch_size=training_configuration.training_minibatch_size, shuffle=False, bounding_boxes=bounding_boxes) test_steps_per_epoch = np.math.ceil(test_data_generator.samples / test_data_generator.batch_size) model = training_configuration.classifier() model.summary() print("Model {0} loaded.".format(training_configuration.name())) print(training_configuration.summary()) start_of_training = datetime.date.today() best_model_path = "{0}_{1}.h5".format(start_of_training, training_configuration.name()) monitor_variable = 'val_acc' if training_configuration.performs_localization(): monitor_variable = 'val_output_class_acc' model_checkpoint = ModelCheckpoint(best_model_path, monitor=monitor_variable, save_best_only=True, verbose=1) early_stop = EarlyStopping(monitor=monitor_variable, patience=training_configuration.number_of_epochs_before_early_stopping, verbose=1) learning_rate_reduction = ReduceLROnPlateau(monitor=monitor_variable, patience=training_configuration.number_of_epochs_before_reducing_learning_rate, verbose=1, factor=training_configuration.learning_rate_reduction_factor, min_lr=training_configuration.minimum_learning_rate) tensorboard_callback = TensorBoard( log_dir="./logs/{0}_{1}/".format(start_of_training, training_configuration.name()), batch_size=training_configuration.training_minibatch_size) if dynamic_learning_rate_reduction: callbacks = [model_checkpoint, early_stop, tensorboard_callback, learning_rate_reduction] else: print("Learning-rate reduction on Plateau disabled") callbacks = [model_checkpoint, early_stop, tensorboard_callback] class_weight_calculator = ClassWeightCalculator() class_weights = class_weight_calculator.calculate_class_weights(image_dataset_directory, method=class_weights_balancing_method, class_indices=training_data_generator.class_indices) if class_weights_balancing_method is not None: print("Using {0} method for obtaining class weights to compensate for an unbalanced dataset.".format( class_weights_balancing_method)) print("Training on dataset...") history = model.fit_generator( generator=training_data_generator, steps_per_epoch=training_steps_per_epoch, epochs=training_configuration.number_of_epochs, callbacks=callbacks, validation_data=validation_data_generator, validation_steps=validation_steps_per_epoch, class_weight=class_weights ) print("Loading best model from check-point and testing...") best_model = keras.models.load_model(best_model_path) test_data_generator.reset() file_names = test_data_generator.filenames class_labels = os.listdir(os.path.join(image_dataset_directory, "test")) # Notice that some classes have so few elements, that they are not present in the test-set and do not # appear in the final report. To obtain the correct classes, we have to enumerate all non-empty class # directories inside the test-folder and use them as labels names_of_classes_with_test_data = [ class_name for class_name in class_labels if os.listdir(os.path.join(image_dataset_directory, "test", class_name))] true_classes = test_data_generator.classes predictions = best_model.predict_generator(test_data_generator, steps=test_steps_per_epoch) if training_configuration.performs_localization(): predicted_classes = numpy.argmax(predictions[0], axis=1) else: predicted_classes = numpy.argmax(predictions, axis=1) test_data_generator.reset() evaluation = best_model.evaluate_generator(test_data_generator, steps=test_steps_per_epoch) classification_accuracy = 0 print("Reporting classification statistics with micro average") report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3, target_names=names_of_classes_with_test_data, average='micro') print(report) print("Reporting classification statistics with macro average") report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3, target_names=names_of_classes_with_test_data, average='macro') print(report) print("Reporting classification statistics with weighted average") report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3, target_names=names_of_classes_with_test_data, average='weighted' ) print(report) indices_of_misclassified_files = [i for i, e in enumerate(true_classes - predicted_classes) if e != 0] misclassified_files = [file_names[i] for i in indices_of_misclassified_files] misclassified_files_actual_prediction_indices = [predicted_classes[i] for i in indices_of_misclassified_files] misclassified_files_actual_prediction_classes = [class_labels[i] for i in misclassified_files_actual_prediction_indices] print("Misclassified files:") for i in range(len(misclassified_files)): print("\t{0} is incorrectly classified as {1}".format(misclassified_files[i], misclassified_files_actual_prediction_classes[i])) for i in range(len(best_model.metrics_names)): current_metric = best_model.metrics_names[i] print("{0}: {1:.5f}".format(current_metric, evaluation[i])) if current_metric == 'acc' or current_metric == 'output_class_acc': classification_accuracy = evaluation[i] print("Total Accuracy: {0:0.5f}%".format(classification_accuracy * 100)) print("Total Error: {0:0.5f}%".format((1 - classification_accuracy) * 100)) end_time = time() execution_time_in_seconds = round(end_time - start_time) print("Execution time: {0:.1f}s".format(end_time - start_time)) training_result_image = "{1}_{0}_{2:.1f}p.png".format(training_configuration.name(), start_of_training, classification_accuracy * 100) TrainingHistoryPlotter.plot_history(history, training_result_image) datasets_string = str.join(",", datasets) notification_message = "Training on {0} dataset with model {1} finished. " \ "Accuracy: {2:0.5f}%".format(datasets_string, model_name, classification_accuracy * 100) TelegramNotifier.send_message_via_telegram(notification_message, training_result_image) dataset_size = training_data_generator.samples + validation_data_generator.samples + test_data_generator.samples stroke_thicknesses_string = ",".join(map(str, stroke_thicknesses)) staff_line_vertical_offsets_string = ",".join(map(str, staff_line_vertical_offsets)) image_sizes = "{0}x{1}px".format(training_configuration.input_image_rows, training_configuration.input_image_columns) data_augmentation = "{0}% zoom, {1}° rotation".format(int(training_configuration.zoom_range * 100), training_configuration.rotation_range) today = "{0:02d}.{1:02d}.{2}".format(start_of_training.day, start_of_training.month, start_of_training.year) balancing_method = "None" if class_weights_balancing_method is None else class_weights_balancing_method GoogleSpreadsheetReporter.append_result_to_spreadsheet(dataset_size=dataset_size, image_sizes=image_sizes, stroke_thicknesses=stroke_thicknesses_string, staff_lines=staff_line_vertical_offsets_string, model_name=model_name, data_augmentation=data_augmentation, optimizer=optimizer, early_stopping=training_configuration.number_of_epochs_before_early_stopping, reduction_patience=training_configuration.number_of_epochs_before_reducing_learning_rate, learning_rate_reduction_factor=training_configuration.learning_rate_reduction_factor, minibatch_size=training_minibatch_size, initialization=training_configuration.initialization, initial_learning_rate=training_configuration.get_initial_learning_rate(), accuracy=classification_accuracy, date=today, use_fixed_canvas=use_fixed_canvas, datasets=datasets_string, execution_time_in_seconds=execution_time_in_seconds, balancing_method=balancing_method)