def main():
    training_imp = import_cleaned_training_set()
    testing_imp = import_cleaned_testing_set()
    training, testing = get_text_from_reviews(training_imp, testing_imp)
    training_overall = extract_overall_from_reviews(training_imp)
    testing_overall = extract_overall_from_reviews(testing_imp)

    print("polarity")
    word_dictionary = extract_word_dictionary()
    negative, neutral =import_neutral_negative()
    print(len(word_dictionary))
    polarity_train = [set_polarity(review, word_dictionary, negative, neutral) for review in training]
    polarity_test = [set_polarity(review, word_dictionary, negative, neutral) for review in testing]

    print("removing repeats")
    without_repeat_train = sum_repeated(polarity_train)
    without_repeat_test = sum_repeated(polarity_test)

    print("creating matrix")
    all_words = without_repeat_test + without_repeat_train
    unique_words = extract_unique_words(all_words)

    print(len(unique_words))
    matrix_test = create_sparse_matrix(unique_words, without_repeat_test)
    matrix  = create_sparse_matrix(unique_words, without_repeat_train)

    print("svm")
    c = train_model(matrix, training_overall, matrix_test, testing_overall)
    plot_confusion_matrix(c)
예제 #2
0
data['TEST'] = pd.read_csv(cfg['PATHS']['TEST_SET'])

# Custom Keras callback that logs all training and validation metrics after each epoch to the current Azure run
class LogRunMetrics(Callback):
    def on_epoch_end(self, epoch, log):
        for metric_name in log:
            if 'val' in metric_name:
                run.log('validation_' + metric_name.split('_')[-1], log[metric_name])
            else:
                run.log('training_' + metric_name, log[metric_name])
        #run.log('validation_auc', log['val_auc'])

# Set model callbacks
callbacks = [EarlyStopping(monitor='val_loss', verbose=1, patience=cfg['TRAIN']['PATIENCE'], mode='min', restore_best_weights=True),
             LogRunMetrics()]

# Train a model
start_time = datetime.datetime.now()
model, test_metrics, test_generator = train_model(cfg, data, callbacks)
print("TRAINING TIME = " + str((datetime.datetime.now() - start_time).total_seconds() / 60.0) + " min")

# Log test set performance metrics, ROC, confusion matrix in Azure run
test_predictions = model.predict_generator(test_generator, verbose=0)
test_labels = test_generator.labels
for metric_name in test_metrics:
    run.log('test_' + metric_name, test_metrics[metric_name])
covid_idx = test_generator.class_indices['COVID-19']
roc_plt = plot_roc("Test set", test_generator.labels, test_predictions, class_id=covid_idx)
run.log_image("ROC", plot=roc_plt)
cm_plt = plot_confusion_matrix(test_generator.labels, test_predictions, class_id=covid_idx)
run.log_image("Confusion matrix", plot=cm_plt)
예제 #3
0
      str((datetime.datetime.now() - start_time).total_seconds() / 60.0) +
      " min")

# Identify the logs from the best model
run.log("TensorBoard logs folder original name", best_logs_date)
shutil.copytree(cfg['PATHS']['LOGS'] + best_logs_date,
                cfg['PATHS']['LOGS'] + "logs")

# Log test set performance metrics, ROC, confusion matrix in Azure run
test_predictions = model.predict(data['X_test'],
                                 batch_size=cfg['TRAIN']['BATCH_SIZE'])
for metric_name in test_metrics:
    run.log('best_test_' + metric_name, test_metrics[metric_name])
roc_plt = plot_roc("Test set", data['Y_test'], test_predictions)
run.log_image("ROC", plot=roc_plt)
cm_plt = plot_confusion_matrix(data['Y_test'], test_predictions)
run.log_image("Confusion matrix", plot=cm_plt)

# Log test set performance of all models and save to CSV
for i in range(len(test_metrics_dict['loss'])):
    run.log_row("Test set metrics",
                model=(i + 1),
                f1score=test_metrics_dict['f1score'][i],
                recall=test_metrics_dict['recall'][i],
                precision=test_metrics_dict['precision'][i],
                auc=test_metrics_dict['auc'][i],
                loss=test_metrics_dict['loss'][i])
test_set_metrics_df = pd.DataFrame(test_metrics_dict)
test_set_metrics_df.to_csv(cfg['PATHS']['MULTI_TRAIN_TEST_METRICS'])

# Save the model's weights
predict = np.argmax(y_pred, axis=-1)

classes = eval_generator.classes[eval_generator.index_array]
classes_all = eval_generator_train.classes[eval_generator_train.index_array]

classes = np.reshape(classes, (len(classes), 1))

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
y_test = enc.fit_transform(classes).toarray()

##
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

Resultsdic = {}

Resultsdic['accuracy'] = accuracy_score(classes, predict )
Resultsdic['precision'] = precision_score(classes, predict, average="weighted")
Resultsdic['recall'] = recall_score(classes, predict, average="weighted")
Resultsdic['auc score'] = roc_auc_score(y_test, y_pred, average="weighted", multi_class='ovr')

##
names=["covid","normal","pneumonia-bac", "pneumonia-vir"]
names=["Healthy","infected", "Pneumonia"]
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(classes, predict)
vs.plot_confusion_matrix(cm, target_names=names, normalize=False)