def make_metrics(self, predictions): """ Make metrics with prediction dictionary * Args: predictions: prediction dictionary consisting of - key: 'id' (sequence id) - value: dictionary consisting of - class_idx * Returns: metrics: metric dictionary consisting of - 'macro_f1': class prediction macro(unweighted mean) f1 - 'macro_precision': class prediction macro(unweighted mean) precision - 'macro_recall': class prediction macro(unweighted mean) recall - 'accuracy': class prediction accuracy """ pred_classes = [] target_classes = [] for data_id, pred in predictions.items(): target = self._dataset.get_ground_truth(data_id) pred_classes.append( self._dataset.class_idx2text[pred["class_idx"]]) target_classes.append(target["class_text"]) # confusion matrix try: pycm_obj = pycm.ConfusionMatrix(actual_vector=target_classes, predict_vector=pred_classes) except pycmVectorError as e: if str(e) == "Number of the classes is lower than 2": logger.warning( "Number of classes in the batch is 1. Sanity check is highly recommended." ) return { "macro_f1": 1., "macro_precision": 1., "macro_recall": 1., "accuracy": 1., } raise self.write_predictions( { "target": target_classes, "predict": pred_classes }, pycm_obj=pycm_obj) metrics = { "macro_f1": macro_f1(pycm_obj), "macro_precision": macro_precision(pycm_obj), "macro_recall": macro_recall(pycm_obj), "accuracy": pycm_obj.Overall_ACC, } return metrics
def print_metric(y_true, y_pred, weighted_error=False): cz = pycm.ConfusionMatrix(actual_vector=y_true.argmax(axis=1), predict_vector=y_pred.argmax(axis=1)) # Accuracy acc = cz.Overall_ACC print("Average Accuracy : " + str(acc * 100) + '%') # Specificity specificity = cz.TNR totalprecision = 0 for key, value in specificity.items(): totalprecision = totalprecision + value print('Average Specificity : ' + str(totalprecision * 100 / 4.0) + '%') # Sensitivity recall = cz.TPR totalrecall = 0 for key, value in recall.items(): totalrecall = totalrecall + value print('Average Sensitivity : ' + str(totalrecall * 100 / 4.0) + '%') if weighted_error == True: Weighted_Error(y_true, y_pred)
def detect_windturbines_with_CNN(self): if not isinstance(random_state, type(None)): self.random_state = random_state # 1. Import the data: self.import_data() # 2. Split data into training and test data: print("\nSplitting data:") print("-----------------\n") X_train, X_test, y_train, y_test = train_test_split( self.X, self.y, test_size=self.test_size, random_state=self.random_state) self.X_train = X_train self.X_test = X_test self.y_train = y_train self.y_test = y_test # 2.1. Randomize and split the indices with the same random_state in order to keep the indices of the crops X_train, X_test, indices_train, indices_test = train_test_split( self.X, self.indices, test_size=self.test_size, random_state=self.random_state) self.indices_train = indices_train self.indices_test = indices_test # 3. Preprocess the data: print("\nPreprocessing data:") print("-----------------\n") training_set = self.preprocess_data(X_train, y_train) test_set = self.preprocess_data(X_test, y_test) # 4. Build the CNN: print("\nBuilding the CNN:") print("-----------------\n") cnn = self.build_CNN() # 5. Compile, train and evaluate the CNN: print("\nCompiling, training and evaluating the CNN:") print("-----------------\n") cnn = self.train_CNN(cnn, training_set, test_set) self.cnn = cnn # 6. Create confusion matrix and accuracy score: print("\nCreate confusion matrix and calculate accuracy score:") print("-----------------\n") cm, ac = self.create_confusion_matrix() self.confusion_matrix = cm self.accuracy_score = ac self.pycm = pycm.ConfusionMatrix(self.y_test, self.y_pred) print("\nDone! CNN object available through .cnn")
def get_complete_report(self, y_true, y_pred, class_indices): """ This is a separate function written to calculate every possible classification metric value that different classification problems might need. This function will be used to get a report of all the classification metrics, as well the class wise statistics for all the classes and export it to a HTML file saved at the evaluation path. References to the library: https://www.pycm.ir/doc/index.html#Cite @article{Haghighi2018, doi = {10.21105/joss.00729}, url = {https://doi.org/10.21105/joss.00729}, year = {2018}, month = {may}, publisher = {The Open Journal}, volume = {3}, number = {25}, pages = {729}, author = {Sepand Haghighi and Masoomeh Jasemi and Shaahin Hessabi and Alireza Zolanvari}, title = {{PyCM}: Multiclass confusion matrix library in Python}, journal = {Journal of Open Source Software} } Arguments: -y_true : Ground truths -y_pred : Predicted labels -model_name : Name of the model, for example - vgg16, inception_v3, resnet50 etc -stage_no : The stage of training for which the evaluation has tp be done. This pipeline is trained in two stages 1 and 2. The stage number is needed to save the architecture for individual stages and have unique file names. -class_indices : This contains information about the mapping of the class labels to integers. """ label_indices = dict() for (k, v) in class_indices.items(): label_indices[v] = k y_true_label = list(y_true) y_pred_label = list(y_pred) for idx, item in enumerate(y_true_label): y_true_label[idx] = label_indices[item] for idx, item in enumerate(y_pred_label): y_pred_label[idx] = label_indices[item] cm = pycm.ConfusionMatrix(y_true_label, y_pred_label) cm.save_html(self.path_dict["eval_path"] + "stage{}/".format(self.stage_no) + '{}_detailed_metrics_analysis_stage_{}'.format( self.input_params["model_name"], self.stage_no))
def get_confusion_matrix(y_predicted, y_actual): if len(y_predicted) > 0 and len(y_actual) > 0: if not isinstance(y_actual[0], numbers.Number): y_actual = y_list_to_single(y_actual) if not isinstance(y_predicted[0], numbers.Number): y_predicted = y_list_to_single(y_predicted) return pycm.ConfusionMatrix(actual_vector=y_actual, predict_vector=y_predicted) else: return None
def test_to_pyplot(self, mock_pyplot): if sys.version_info[0] >= 3: import pycm handler = torchbearer.callbacks.pycm._to_pyplot(True, 'test {epoch}') y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] cm = pycm.ConfusionMatrix(y_true, y_pred) handler(cm, {torchbearer.EPOCH: 3}) self.assertTrue(mock_pyplot.imshow.call_args[0][0].max() == 1) mock_pyplot.title.assert_called_once_with('test 3') handler = torchbearer.callbacks.pycm._to_pyplot(False) y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] cm = pycm.ConfusionMatrix(y_true, y_pred) handler(cm, {}) self.assertTrue(mock_pyplot.imshow.call_args[0][0].max() > 1)
def test_calculate_metrics(): truelbl = np.random.choice([0, 1, 2, 3], size=100, replace=True) predlbl = np.random.choice([0, 1, 2, 3], size=100, replace=True) got_cnf, got_stats = dgpredict.calculate_metrics(predlbl, truelbl) got_stats = pd.DataFrame(got_stats) expected = pycm.ConfusionMatrix(truelbl, predlbl) np.testing.assert_equal(got_cnf, expected.to_array()) expected_stats = { k: expected.class_stat[k] # pylint: disable=no-member for k in ["TPR", "TNR", "PPV", "NPV", "FPR", "FNR", "FDR", "ACC", "F1"] } expected_stats["MCC"] = expected.overall_stat["Overall MCC"] # pylint: disable=no-member expected_stats["TotalACC"] = expected.overall_stat["Overall ACC"] # pylint: disable=no-member expected_stats = pd.DataFrame(expected_stats) pd.testing.assert_frame_equal(got_stats, expected_stats)
def score_model(clf, X, y, split_factor=0.2, window_size=5): n = len(X) test_len = int(n * split_factor) y_real = [] y_pred = [] # y_proba = [] for i in range(test_len, 0, -window_size): X_train = X[:n-i] y_train = y[:n-i] clf.fit(X_train, y_train) y_pred_t = clf.predict(X[n-i:n-i+window_size]) # y_proba_t = clf.predict_proba(X[n-i:n-i+window_size]) for y_t in y[n - i:n - i + window_size]: y_real.append(y_t) for y_t in y_pred_t: y_pred.append(y_t) # for y_t in y_proba_t: # y_proba.append(y_t) return pycm.ConfusionMatrix(y_real, y_pred)
def main(): # Get start time for this run timestr = time.strftime("%Y%m%d-%H%M%S") print(timestr) # for tt in range(1): for tt in range(30): # include this for loop to randomly sample learning rate, other hyperparameters # Get start time for this run timestr = time.strftime("%Y%m%d-%H%M%S") print(timestr) xx = 3 + random.random()*1 LEARNING_RATE = 10**-xx # random search hyperparameters yy = 3 + random.random()*1 ALPHA_L2REG = 1*10**-yy zz = random.random()*0 DROPOUT_RATE = zz print('Iteration: ', tt, LEARNING_RATE , ALPHA_L2REG, DROPOUT_RATE) for xx in [True]: # for xx in [True, False]: ISMULTISPECTRAL = xx #%% Data loading if ISMULTISPECTRAL: # 21-channel pickled tiled images # set paths to RGB images data_dir = PATH_PATCHES3D out_path = PATH_OUTPUT3D # Data augmentation and normalization for training # Just normalization for validation data_transforms = { 'train': transforms.Compose([ transforms.ToTensor(), transforms.Normalize(MEAN_CHANNEL_PIXELVALS, STD_CHANNEL_PIXELVALS) ]), 'val': transforms.Compose([ transforms.ToTensor(), transforms.Normalize(MEAN_CHANNEL_PIXELVALS, STD_CHANNEL_PIXELVALS) ]), 'test': transforms.Compose([ transforms.ToTensor(), transforms.Normalize(MEAN_CHANNEL_PIXELVALS, STD_CHANNEL_PIXELVALS) ]), } num_channels = 21 - N_REMOVED else: # RGB 3-channel, pickled images # set paths to RGB images data_dir = PATH_PATCHES2D out_path = PATH_OUTPUT2D # Data augmentation and normalization for training # Just normalization for validation data_transforms = { 'train': transforms.Compose([ transforms.ToTensor(), transforms.Normalize(MEAN_CHANNEL_PIXELVALS[:3], STD_CHANNEL_PIXELVALS[:3]) ]), 'val': transforms.Compose([ transforms.ToTensor(), transforms.Normalize(MEAN_CHANNEL_PIXELVALS[:3], STD_CHANNEL_PIXELVALS[:3]) ]), 'test': transforms.Compose([ transforms.ToTensor(), transforms.Normalize(MEAN_CHANNEL_PIXELVALS[:3], STD_CHANNEL_PIXELVALS[:3]) ]), } num_channels = 3 image_datasets = {x: datasets.DatasetFolder(os.path.join(data_dir, x), loader=pickle_loader, extensions='.pkl', transform=data_transforms[x]) for x in ['train', 'val', 'test']} print('Num channels', num_channels) if not LOADMODEL: dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=0) for x in ['train', 'val', 'test']} else: dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=False, num_workers=0) for x in ['train', 'val', 'test']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']} class_names = image_datasets['train'].classes num_classes = len(class_names) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Dataset sizes', dataset_sizes) print('Number of classes', num_classes) print('GPU vs CPU:', device) #%%Finetuning the convnet #Load a pretrained model and reset final fully connected layer. # model_ft = models.resnet18(pretrained=True) model_ft = densenet_av.densenet_40_12_bc(pretrained=ISPRETRAINED, in_channels=num_channels, drop_rate=DROPOUT_RATE) num_ftrs = model_ft.fc.in_features print('model_ft.fc.in_features =', num_ftrs) #debugging model_ft.fc = nn.Linear(num_ftrs, num_classes) model_ft = model_ft.to(device) criterion = nn.CrossEntropyLoss() # Observe that all parameters are being optimized # optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) optimizer_ft = optim.Adam(model_ft.parameters(), lr=LEARNING_RATE, weight_decay=ALPHA_L2REG) # defaulat ADAM lr = 0.001 # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=LRDECAY_STEP, gamma=LRDECAY_GAMMA) # exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=1000, gamma=LRDECAY_GAMMA) # For Adam optimizer, no need for LR decay #%% Train and evaluate if LOADMODEL: # load weights instead of training print('Loading model... Select loss/acc file:') filepath = mat.uigetfile() [cache_loss, cache_acc] = pickle.load(open(filepath, "rb")) print('Loading model... ') modelpath = filepath.replace('lossacc', 'modelparam').replace('.pkl', '.pt') model_ft.load_state_dict(torch.load(modelpath)) model_ft.eval() # Get same filename for saving path_head, path_tail = os.path.split(filepath) filename_pre, path_ext = os.path.splitext(path_tail) # # 8-25-2019 # # Obtain per-image classification accuracy based on patches - loop through folders without dataloader # for phase in ['train', 'val', 'test']: # data_dir2 = os.path.join(data_dir, phase) # tissues = os.listdir(data_dir2) # should be num_classes # of folders # print('Evaluating per-specimen accuracy on dataset: ', phase) # # # Iterate over tissue classes # for tt, tissue in enumerate(tissues): # tissue_folder = os.path.join(data_dir2, tissue) # tissue_files = os.listdir(tissue_folder) # tissue_dates = [i.split('_', 1)[0] for i in tissue_files] # unique_dates = list(set(tissue_dates)) ## print(unique_dates) # num_dates = np.size(unique_dates) # # num_patches_tissue_date = np.zeros((num_dates, 1)) # num_correctpatches_tissue_date = np.zeros((num_dates, 1)) # iscorrect_tissue_date = np.zeros((num_dates, 1)) # # # Calculate fraction of correct patch predictions per tissue-date specimen # num_patches = 0 # for i, session in enumerate(unique_dates): ## print(session) # num_patches_tissue_date[i] = tissue_dates.count(session) # tissue_patches_session_filenames = [item for item in tissue_files if item.startswith(session)] # # # Load patches into one batch of shape [M, C, H, W] # # where M is batch size (# patches), C is # channels # patches_session = np.zeros((int(num_patches_tissue_date[i]), num_channels, TILE_HEIGHT, TILE_WIDTH)) # for j, patch_filename in enumerate(tissue_patches_session_filenames): # if ISMULTISPECTRAL: # this_image = pickle_loader(os.path.join(tissue_folder, patch_filename)) # read image, shape (H, W, 21) # mean = np.array(MEAN_CHANNEL_PIXELVALS) # std = np.array(STD_CHANNEL_PIXELVALS) # inp = (this_image - mean)/std # else: # this_image = mpimg.imread(os.path.join(tissue_folder, patch_filename)) # read image, shape (H, W, 3) # mean = np.array(MEAN_CHANNEL_PIXELVALS[:3]) # std = np.array(STD_CHANNEL_PIXELVALS[:3]) # inp = (this_image - mean)/std # ## plt.figure(), plt.imshow(this_image[:,:,:3]) ## print(os.path.join(tissue_folder, patch_filename)) ## sys.exit() # patches_session[j] = inp.transpose((2, 0, 1)) # # # Predict on patches # with torch.no_grad(): # inputs = torch.tensor(patches_session, dtype=torch.float).to(device) # outputs = model_ft(inputs) # _, preds = torch.max(outputs, 1) # ## print(preds) # # # Calculate number correct patches # true_label = tt # num_correctpatches_tissue_date[i] = np.sum(preds.cpu().numpy()==true_label) # iscorrect_tissue_date[i] = (num_correctpatches_tissue_date[i]/num_patches_tissue_date[i])>=0.5 # Assume 50% or greater patches predictions gives the overall specimen prediction # ## num_patches = num_patches + num_patches_tissue_date[i] ## print(' correct', num_correctpatches_tissue_date[i], num_patches_tissue_date[i], iscorrect_tissue_date[i]) ## print(num_patches) # # # Output per-specimen results # specimens_correct = np.sum(iscorrect_tissue_date) # print(' ', tissue, ': correct specimens ', specimens_correct, ' out of ', num_dates) else: # Train model from scratch print('Train model...') # Train #It should take around 15-25 min on CPU. On GPU though, it takes less than a minute. model_ft, cache_loss, cache_acc = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, dataloaders, device, dataset_sizes, num_epochs=NUM_EPOCHS) # Save loss and acc to disk filename_pre = 'nclass' + str(num_classes) t_size, val_acc = zip(*cache_acc['val']) # Calculate best val acc bestval_acc = max(val_acc).item() # filename_pre = timestr + '_nclass' + str(num_classes) + '_pretrain' + str(ISPRETRAINED)+ '_batch' + str(BATCH_SIZE) + '_epoch' + str(NUM_EPOCHS) + '_lr' + str(LEARNING_RATE) + '_' + str(LRDECAY_STEP) + '_' + str(LRDECAY_GAMMA) + '_val' +"{:.4f}".format(bestval_acc) filename_pre = timestr + '_multispec' + str(ISMULTISPECTRAL) + '_nclass' + str(num_classes) + '_pretrain' + str(ISPRETRAINED)+ '_batch' + str(BATCH_SIZE) + '_epoch' + str(NUM_EPOCHS) + '_lr' + str(LEARNING_RATE) + '_L2reg' + str(ALPHA_L2REG) + '_DROPOUT' + str(DROPOUT_RATE) + '_val' +"{:.4f}".format(bestval_acc) filename = 'lossacc_' + filename_pre + '.pkl' pickle.dump([cache_loss, cache_acc], open(os.path.join(out_path, filename), "wb" )) # Save trained model's parameters for inference filename2 = 'modelparam_' + filename_pre + '.pt' torch.save(model_ft.state_dict(), os.path.join(out_path, filename2)) # Evaluate model_ft.eval() # set dropout and batch normalization layers to evaluation mode before running inference # # Examine each figure and output to get granular prediction output info ## fig0 = visualize_model(model_ft, dataloaders, device, class_names, num_images=90, columns=10) ## fig0 = visualize_model(model_ft, dataloaders, device, class_names, num_images=10) # visualize validation images # fig0 = visualize_model(model_ft, dataloaders, device, class_names, num_images=288, columns=12, phase='test') # # Save visualization figure # fig0_filename = 'visualize_' + filename_pre + '.png' # fig0 = plt.gcf() # fig0.set_size_inches(FIG_HEIGHT, FIG_WIDTH) # plt.savefig(os.path.join(out_path, fig0_filename), bbox_inches='tight', dpi=FIG_DPI) fig1, fig2 = learning_curve(cache_loss, cache_acc, class_names, num_epochs=NUM_EPOCHS) # Save learning curve figures fig1_filename = 'losscurve_' + filename_pre + '.pdf' fig2_filename = 'acccurve_' + filename_pre + '.pdf' fig1.set_size_inches(FIG_HEIGHT, FIG_WIDTH) fig2.set_size_inches(FIG_HEIGHT, FIG_WIDTH) fig1.savefig(os.path.join(out_path, fig1_filename), bbox_inches='tight', dpi=FIG_DPI) fig2.savefig(os.path.join(out_path, fig2_filename), bbox_inches='tight', dpi=FIG_DPI) # Display confusion matrix for phase in ['train', 'val', 'test']: confusion_matrix = torch.zeros(num_classes, num_classes) y_actu = [] y_pred = [] with torch.no_grad(): for i, (inputs, classes) in enumerate(dataloaders[phase]): inputs = inputs.to(device, dtype=torch.float) # shape [128, 21, 36, 36] classes = classes.to(device) outputs = model_ft(inputs) _, preds = torch.max(outputs, 1) for t, p in zip(classes.view(-1), preds.view(-1)): confusion_matrix[t.long(), p.long()] += 1 # Vector of class labels and predictions y_actu = np.hstack((y_actu, classes.view(-1).cpu().numpy())) y_pred = np.hstack((y_pred, preds.view(-1).cpu().numpy())) # print(confusion_matrix) print(confusion_matrix.diag()/confusion_matrix.sum(1)) # per-class accuracy fig3 = plot_confusion_matrix(confusion_matrix, classes=class_names, normalize=CM_NORMALIZED, title='Confusion matrix, ' + phase) # Save confusion matrix figure fig3_filename = 'cm' + phase + '_' + filename_pre + '.pdf' fig3.set_size_inches(FIG_HEIGHT, FIG_WIDTH) fig3.savefig(os.path.join(out_path, fig3_filename), bbox_inches='tight', dpi=FIG_DPI) # Also save as jpg, 5-1-2020 fig3_filename_jpg = 'cm' + phase + '_' + filename_pre + '.jpg' fig3.savefig(os.path.join(out_path, fig3_filename_jpg), bbox_inches='tight', dpi=FIG_DPI) # Display confusion matrix analysis cm2 = pycm.ConfusionMatrix(actual_vector=y_actu, predict_vector=y_pred) # Create CM From Data # cm2 = pycm.ConfusionMatrix(matrix={"Class1": {"Class1": 1, "Class2":2}, "Class2": {"Class1": 0, "Class2": 5}}) # Create CM Directly cm2 # line output: pycm.ConfusionMatrix(classes: ['Class1', 'Class2']) print(cm2) plt.ioff() plt.show()
# plot confusion matrix cm_normalised = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] df_cm = pd.DataFrame(cm_normalised, df.char.tolist(), df.char.tolist()) fontsize = 8 hm = sns.heatmap(df_cm, cmap="jet") hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), fontsize=fontsize) hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), fontsize=fontsize) plt.ylabel('True') plt.xlabel('Predicted') # another way of checking confusion matrix import pycm cm = pycm.ConfusionMatrix(y_true, predict_classes) cm.save_csv('cm') df_cm = pd.read_csv('cm.csv') df_cm = df_cm.set_index('Class') df_cm = df_cm.transpose() print(df_cm[['PPV', 'TPR', 'F1']]) # precision, recall, f1 # check classes with f1 value < threshold f1 = df_cm[['F1', 'PPV', 'TPR']].to_dict() threshold = 0.80 print('c f1 prec. recall') poor = [] for key, data in f1.items(): if key != 'F1': continue for idx, v in f1[key].items(): v = float(v)
def createConfusionMatrix(predictions, targets): """ https://www.pycm.ir/doc/index.html """ return pycm.ConfusionMatrix(actual_vector=targets, predict_vector=predictions)
def run(input_model, save_name): # 创建测试集Datset对象 images3, labels3, table3 = load_test('test', mode='test') print('testimages:', len(images3)) print('testlabels:', len(labels3)) x_test, y_test = preprocess(images3, labels3) db_test = tf.data.Dataset.from_tensor_slices((images3, labels3)) db_test = db_test.shuffle(500).map(db_preprocess).batch(batchsz) print('process data Successfully') # -----------predict----------------------- model_name = input_model model = load_model(model_name) test_loss, test_acc = model.evaluate(db_test) print('test_loss:', test_loss, 'test_acc:', test_acc, '\n') y_test_pred = model.predict(x_test) # print('np.argmax(y_test,axis=1)', np.argmax(y_test, axis=1), '\n') # print('np.argmax(y_test_pred,axis=1)', np.argmax(y_test_pred, axis=1), '\n') test_accuracy = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_test_pred, axis=1)) print('acc:', test_accuracy, '\n') cmcm = pycm.ConfusionMatrix(actual_vector=np.argmax(y_test, axis=1), predict_vector=np.argmax(y_test_pred, axis=1)) print(cmcm) cm_plot_label = ['Benign', 'Malignant', 'Normal'] cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_test_pred, axis=1)) utils.plot_confusion_matrix(cm, cm_plot_label, title='Confusion Matrix for Breast Cancer', savename='matrix_' + save_name + '.png') utils.print_sesp(cm, cm_plot_label) print( '\n---------------------------------------------------------------------' ) # --------------classification report----------------- class_outcome = classification_report(np.argmax(y_test, axis=1), np.argmax(y_test_pred, axis=1), target_names=cm_plot_label) print('classification report:', '\n', class_outcome) print( '\n---------------------------------------------------------------------' ) # -------------------ROC and AUC----------------------------------- roc_log = roc_auc_score(y_test, y_test_pred, average='micro') print('functionally computed AUC:', roc_log) # false-postive-rate fpr = (1-tnr) = 1-specificity # true-postive-rate tpr = sensitivity fpr, tpr, thresholds = roc_curve(np.ravel(y_test), np.ravel(y_test_pred)) area_under_curve = auc(fpr, tpr) print('manually computed AUC:', area_under_curve) # auc curve plt.figure() plt.plot([0, 1], [0, 1], 'r--') plt.plot(fpr, tpr, label='AUC = {:.3f}'.format(area_under_curve)) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve') plt.legend(loc='best') plt.savefig('ROC' + save_name + '.svg') plt.close()
def test_confusion_matrix(): truelbl = np.random.choice([0, 1, 2, 3], size=100, replace=True) predlbl = np.random.choice([0, 1, 2, 3], size=100, replace=True) got = dgpredict.confusion_matrix(truelbl, predlbl) expected = pycm.ConfusionMatrix(truelbl, predlbl).to_array() np.testing.assert_equal(got, expected)
def make_metrics(self, predictions): """ Make metrics with prediction dictionary * Args: predictions: prediction dictionary consisting of - key: 'id' (sequence id) - value: dictionary consisting of - tag_idxs * Returns: metrics: metric dictionary consisting of - 'accuracy': sequence level accuracy - 'tag_accuracy': tag level accuracy - 'macro_f1': tag prediction macro(unweighted mean) f1 - 'macro_precision': tag prediction macro(unweighted mean) precision - 'macro_recall': tag prediction macro(unweighted mean) recall """ pred_tag_idxs_list = [] target_tag_idxs_list = [] accurate_sequence = [] for data_idx, pred in predictions.items(): target = self._dataset.get_ground_truth(data_idx) pred_tag_idxs_list.append(pred["tag_idxs"]) target_tag_idxs_list.append(target["tag_idxs"]) accurate_sequence.append(1 if ( np.asarray(target["tag_idxs"]) == np.asarray(pred["tag_idxs"]) ).all() else 0) pred_tags = [[ self._dataset.tag_idx2text[tag_idx] for tag_idx in tag_idxs ] for tag_idxs in pred_tag_idxs_list] target_tags = [[ self._dataset.tag_idx2text[tag_idx] for tag_idx in tag_idxs ] for tag_idxs in target_tag_idxs_list] flat_pred_tags = list(common_utils.flatten(pred_tags)) flat_target_tags = list(common_utils.flatten(target_tags)) # confusion matrix try: pycm_obj = pycm.ConfusionMatrix(actual_vector=flat_target_tags, predict_vector=flat_pred_tags) except pycmVectorError as e: if str(e) == "Number of the classes is lower than 2": logger.warning( "Number of tags in the batch is 1. Sanity check is highly recommended." ) return { "accuracy": 1., "tag_accuracy": 1., "macro_f1": 1., "macro_precision": 1., "macro_recall": 1., "conlleval_accuracy": 1., "conlleval_f1": 1., } raise self.write_predictions( { "target": flat_target_tags, "predict": flat_pred_tags }, pycm_obj=pycm_obj) sequence_accuracy = sum(accurate_sequence) / len(accurate_sequence) metrics = { "accuracy": sequence_accuracy, "tag_accuracy": pycm_obj.Overall_ACC, "macro_f1": macro_f1(pycm_obj), "macro_precision": macro_precision(pycm_obj), "macro_recall": macro_recall(pycm_obj), "conlleval_accuracy": conlleval_accuracy(target_tags, pred_tags), "conlleval_f1": conlleval_f1(target_tags, pred_tags), } return metrics
def __log_confusion_matrix(self, all_preds: torch.Tensor, all_labels: torch.Tensor, epoch: int): buf = io.BytesIO() dataset = self.dataloader_eval.dataset label_map = { value: key for key, value in dataset.label_index_map.items() } np.set_printoptions(precision=3) if self.config.multi_labels: fig, axes = plt.subplots(1, len(label_map.keys()), figsize=(25, 5)) cm = metrics.multilabel_confusion_matrix(y_pred=all_preds.numpy(), y_true=all_labels.numpy()) for i in range(len(label_map.keys())): mat = np.array([[cm[i][1][1], cm[i][1][0]], [cm[i][0][1], cm[i][0][0]]]) result = mat / mat.sum(axis=1, keepdims=True) print(f"{label_map[i]}\n{result}\n") display = metrics.ConfusionMatrixDisplay( result, display_labels=["P", "N"]) display.plot(ax=axes[i], cmap=plt.cm.Blues, values_format=".2f") display.ax_.set_title(label_map[i]) display.ax_.set_ylabel("True label" if i == 0 else "") display.ax_.set_yticklabels(["P", "N"] if i == 0 else []) display.im_.colorbar.remove() plt.subplots_adjust(wspace=0.1, hspace=0.1) fig.colorbar(display.im_, ax=axes) plt.savefig(buf, format="png", dpi=180) else: cm = metrics.confusion_matrix(y_pred=all_preds.numpy(), y_true=all_labels.numpy(), normalize="true") display = metrics.ConfusionMatrixDisplay( cm, display_labels=label_map.values()) display.plot(cmap=plt.cm.Blues) display.figure_.savefig(buf, format="png", dpi=180) cm = pycm.ConfusionMatrix(actual_vector=all_labels.numpy(), predict_vector=all_preds.numpy()) cm.relabel(mapping=label_map) cm.print_normalized_matrix() buf.seek(0) img_arr = np.frombuffer(buf.getvalue(), dtype=np.uint8) buf.close() img = cv2.imdecode(img_arr, 1) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) cv2.imwrite("confusion.png", img) mlflow.log_artifact("confusion.png") self.writer.add_image("confusion_maatrix", img, epoch, dataformats="HWC")