def main(): training_imp = import_cleaned_training_set() testing_imp = import_cleaned_testing_set() training, testing = get_text_from_reviews(training_imp, testing_imp) training_overall = extract_overall_from_reviews(training_imp) testing_overall = extract_overall_from_reviews(testing_imp) print("polarity") word_dictionary = extract_word_dictionary() negative, neutral =import_neutral_negative() print(len(word_dictionary)) polarity_train = [set_polarity(review, word_dictionary, negative, neutral) for review in training] polarity_test = [set_polarity(review, word_dictionary, negative, neutral) for review in testing] print("removing repeats") without_repeat_train = sum_repeated(polarity_train) without_repeat_test = sum_repeated(polarity_test) print("creating matrix") all_words = without_repeat_test + without_repeat_train unique_words = extract_unique_words(all_words) print(len(unique_words)) matrix_test = create_sparse_matrix(unique_words, without_repeat_test) matrix = create_sparse_matrix(unique_words, without_repeat_train) print("svm") c = train_model(matrix, training_overall, matrix_test, testing_overall) plot_confusion_matrix(c)
data['TEST'] = pd.read_csv(cfg['PATHS']['TEST_SET']) # Custom Keras callback that logs all training and validation metrics after each epoch to the current Azure run class LogRunMetrics(Callback): def on_epoch_end(self, epoch, log): for metric_name in log: if 'val' in metric_name: run.log('validation_' + metric_name.split('_')[-1], log[metric_name]) else: run.log('training_' + metric_name, log[metric_name]) #run.log('validation_auc', log['val_auc']) # Set model callbacks callbacks = [EarlyStopping(monitor='val_loss', verbose=1, patience=cfg['TRAIN']['PATIENCE'], mode='min', restore_best_weights=True), LogRunMetrics()] # Train a model start_time = datetime.datetime.now() model, test_metrics, test_generator = train_model(cfg, data, callbacks) print("TRAINING TIME = " + str((datetime.datetime.now() - start_time).total_seconds() / 60.0) + " min") # Log test set performance metrics, ROC, confusion matrix in Azure run test_predictions = model.predict_generator(test_generator, verbose=0) test_labels = test_generator.labels for metric_name in test_metrics: run.log('test_' + metric_name, test_metrics[metric_name]) covid_idx = test_generator.class_indices['COVID-19'] roc_plt = plot_roc("Test set", test_generator.labels, test_predictions, class_id=covid_idx) run.log_image("ROC", plot=roc_plt) cm_plt = plot_confusion_matrix(test_generator.labels, test_predictions, class_id=covid_idx) run.log_image("Confusion matrix", plot=cm_plt)
str((datetime.datetime.now() - start_time).total_seconds() / 60.0) + " min") # Identify the logs from the best model run.log("TensorBoard logs folder original name", best_logs_date) shutil.copytree(cfg['PATHS']['LOGS'] + best_logs_date, cfg['PATHS']['LOGS'] + "logs") # Log test set performance metrics, ROC, confusion matrix in Azure run test_predictions = model.predict(data['X_test'], batch_size=cfg['TRAIN']['BATCH_SIZE']) for metric_name in test_metrics: run.log('best_test_' + metric_name, test_metrics[metric_name]) roc_plt = plot_roc("Test set", data['Y_test'], test_predictions) run.log_image("ROC", plot=roc_plt) cm_plt = plot_confusion_matrix(data['Y_test'], test_predictions) run.log_image("Confusion matrix", plot=cm_plt) # Log test set performance of all models and save to CSV for i in range(len(test_metrics_dict['loss'])): run.log_row("Test set metrics", model=(i + 1), f1score=test_metrics_dict['f1score'][i], recall=test_metrics_dict['recall'][i], precision=test_metrics_dict['precision'][i], auc=test_metrics_dict['auc'][i], loss=test_metrics_dict['loss'][i]) test_set_metrics_df = pd.DataFrame(test_metrics_dict) test_set_metrics_df.to_csv(cfg['PATHS']['MULTI_TRAIN_TEST_METRICS']) # Save the model's weights
predict = np.argmax(y_pred, axis=-1) classes = eval_generator.classes[eval_generator.index_array] classes_all = eval_generator_train.classes[eval_generator_train.index_array] classes = np.reshape(classes, (len(classes), 1)) from sklearn.preprocessing import OneHotEncoder enc = OneHotEncoder(handle_unknown='ignore') y_test = enc.fit_transform(classes).toarray() ## from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score Resultsdic = {} Resultsdic['accuracy'] = accuracy_score(classes, predict ) Resultsdic['precision'] = precision_score(classes, predict, average="weighted") Resultsdic['recall'] = recall_score(classes, predict, average="weighted") Resultsdic['auc score'] = roc_auc_score(y_test, y_pred, average="weighted", multi_class='ovr') ## names=["covid","normal","pneumonia-bac", "pneumonia-vir"] names=["Healthy","infected", "Pneumonia"] from sklearn.metrics import confusion_matrix cm = confusion_matrix(classes, predict) vs.plot_confusion_matrix(cm, target_names=names, normalize=False)