def confusion_matrix(pred: list, label: list, label_index: dict, file_name: str = None, output_dir='results'): cm = ConfusionMatrix(pred, label) cm.relabel(mapping=label_index) cm_matrix = cm.matrix cm_normalized_matrix = cm.normalized_matrix if file_name is None: file_name = 'confusion_matrix.json' normalized_file_name = file_name.replace('.', '_normalized.') if output_dir is not None: if not os.path.exists(output_dir): os.makedirs(output_dir) with open(os.path.join(output_dir, file_name), 'w') as fp: json.dump(cm_matrix, fp, indent=4) # with open(os.path.join(output_dir, normalized_file_name), 'w') as fp: # json.dump(cm_normalized_matrix, fp, indent=4) return cm_matrix
def show_classification_metrics(pred, label, use_matric=None, label_index=None, matrics_list=None, display=True): """calc metrics for classification model using PYCM Args: pred (numpy.array) : pred label for each batch (batch_size X number of pred classes) label (numpy.array) : label for each batch (batch_size X number of classes) label_index (dict) : class name (default=None) matrics_list (list) : add matrics name (refer to the pycm metrics list) (defailt=None) display (bool) : Whether to print the overall result (default=True) Returns: metrics (dict) : contain the 2-level result (overall_stat, class_stat) """ # pred = pred.reshape(-1) # label = label.reshape(-1) cm = ConfusionMatrix(pred, label) if label_index is not None: cm.relabel(mapping=label_index) default_matrics_list = cm.recommended_list if matrics_list is not None: default_matrics_list.extend(matrics_list) if display: cm.stat(summary=True) print("[Matrix]") cm.print_matrix() print("[Normalized Matrix]") cm.print_normalized_matrix() overall_stat = cm.overall_stat class_stat = cm.class_stat filter_overall_stat = { k: v for k, v in overall_stat.items() if k in default_matrics_list } filter_class_stat = { k: v for k, v in class_stat.items() if k in default_matrics_list } output = dict() output["overall_stat"] = filter_overall_stat output["class_stat"] = filter_class_stat return output
def __call__(self): confusion_matrix = ConfusionMatrix( matrix=deepcopy(self._confusion_matrix)) confusion_matrix.relabel( mapping={value - 1: key for key, value in self._label_map.items()}) metrics = { metric_name: getattr(confusion_matrix, metric_name) for metric_name in self._metric_names } self.reset() return metrics
x=np.array(x_train), y=np.array(y_train), validation_split=0.2, # 80/20 split verbose=1, epochs=50, # Notice that r-square is pretty low for the first 5 epochs callbacks=[callback]) # Early stopping tf_preds = model.predict(x=x_test) dir(history) # summarize history for R^2: plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model Accuracy over Epochs') plt.ylabel('Accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() ############################################################# # Model Performance # ############################################################# from pycm import ConfusionMatrix # Yes, both Sci-kit learn and PYCM give the same confusion matrix, thankfully. cm = ConfusionMatrix(actual_vector=np.array(y_test), predict_vector=np.array(RF_preds)) cm.relabel(mapping={0: "Non-graduate", 1: "Graduate"}) print(cm)
def _analyze_stats(results_dir, sf_lf_map, correct_str, errors_str, sf_confusion, id_map, experiment=None): """ :param results_dir: directory in which to write results :param sf_lf_map: Map of SFs to candidate LFs (i.e. AA --> {amino acids, alcoholics anonymous, etc.}) :param correct_str: String containing prettified versions of all correctly predicted examples :param errors_str: String containing prettified versions of all incorrectly predicted examples :param sf_confusion: dictionary of confusion matrices for SFs :param id_map: Dictionary with keys 'correct' and 'error'. Values are example ids. For generating inter-model confusion matries :param experiment: name of experiment in which to write results in side of results_dir/ :return: """ correct_fp = os.path.join(results_dir, 'correct.txt') reports_fp = os.path.join(results_dir, 'reports.txt') errors_fp = os.path.join(results_dir, 'errors.txt') summary_fp = os.path.join(results_dir, 'summary.csv') id_fp = os.path.join(results_dir, 'error_tracker.json') df = defaultdict(list) rare_lfs = get_rare_lfs() rare_recalls = [] cols = [ 'sf', 'experiment', 'support', 'num_targets', 'micro_precision', 'micro_recall', 'micro_f1', 'macro_precision', 'macro_recall', 'macro_f1', 'weighted_precision', 'weighted_recall', 'weighted_f1', ] with open(id_fp, 'w') as fd: json.dump(id_map, fd) reports = [] with open(correct_fp, 'w') as fd: for k in sorted(correct_str.keys()): fd.write(correct_str[k]) with open(errors_fp, 'w') as fd: for k in sorted(errors_str.keys()): fd.write(errors_str[k]) for sf in sf_confusion: labels = sf_lf_map[sf] labels_trunc = list(map(lambda x: x.split(';')[0], labels)) y_true = sf_confusion[sf][0] y_pred = sf_confusion[sf][1] sf_results = classification_report(y_true, y_pred, labels=list(range(len(labels_trunc))), target_names=labels_trunc, output_dict=True) report = classification_report(y_true, y_pred, labels=list(range(len(labels_trunc))), target_names=labels_trunc) macro_nonzero = defaultdict(float) num_nonzero = 0 for orig_lf, lf in zip(labels, labels_trunc): if orig_lf in rare_lfs: rare_recalls.append(sf_results[lf]['recall']) d = sf_results[lf] if d['support'] > 0: macro_nonzero['precision'] += d['precision'] macro_nonzero['recall'] += d['recall'] macro_nonzero['f1-score'] += d['f1-score'] num_nonzero += 1 for suffix in ['precision', 'recall', 'f1-score']: sf_results['macro avg'][suffix] = macro_nonzero[suffix] / float(num_nonzero) reports.append(report) reports.append('\n\n') metrics = ['micro avg', 'macro avg', 'weighted avg'] for metric in metrics: if metric in sf_results: for k, v in sf_results[metric].items(): if not k == 'support': metric_key = '{}_{}'.format(metric.split(' ')[0], k.split('-')[0]) df[metric_key].append(v) else: for suffix in ['precision', 'recall', 'f1']: df['{}_{}'.format(metric.split(' ')[0], suffix)].append(None) df['sf'].append(sf) df['num_targets'].append(len(labels_trunc)) df['support'].append(sf_results['weighted avg']['support']) try: cm = ConfusionMatrix(actual_vector=y_true, predict_vector=y_pred) label_idx_to_str = dict() for idx in cm.classes: label_idx_to_str[idx] = labels_trunc[int(idx)] cm.relabel(mapping=label_idx_to_str) cm_outpath = os.path.join(results_dir, 'confusion', sf) cm.save_html(cm_outpath) except: print('Only 1 target class for test set SF={}'.format(sf)) df['experiment'] = [experiment] * len(df['sf']) summary_df = pd.DataFrame(df, columns=cols) summary_df.to_csv(summary_fp, index=False) with open(reports_fp, 'w') as fd: for report in reports: fd.write(report) suffixes = ['precision', 'recall', 'f1'] types = ['weighted', 'macro'] agg_metrics = {} for t in types: for suffix in suffixes: key = '{}_{}'.format(t, suffix) avg_val = summary_df[key].mean() print('Global {} --> {}'.format(key, avg_val)) agg_metrics[key] = avg_val num_targets = summary_df['num_targets'].unique().tolist() print('Num Targets, Macro F1, Weighted F1') for t in sorted(num_targets): avg_macro_f1 = summary_df[summary_df['num_targets'] == t]['macro_f1'].mean() avg_weighted_f1 = summary_df[summary_df['num_targets'] == t]['weighted_f1'].mean() print('{},{},{}'.format(t, avg_macro_f1, avg_weighted_f1)) rare_recall = sum(rare_recalls) / float(len(rare_recalls)) num_rare = len(rare_recalls) print('Recall on {} rare long forms: {}'.format(num_rare, rare_recall)) return agg_metrics
def showStats(true_labels, predictions, labels_dir, showExtraStats, writeToFile=''): """ Prints various statistics about model preformance. Arguments: true_labels: true labels of the predictions predictions: predicted labels labels_dir: directory to text file of labels to integers Returns: N/A """ #Print basic info print("Labels:") label_dict = fileToLabelDict(labels_dir) print(label_dict) print("\nPredictions:") print(predictions) print("\nActual:") print(true_labels) print("\n") #Builds confusion matrix and additional stats my_inverted_dict = {} for elem in label_dict.keys(): my_inverted_dict[elem] = int(label_dict[elem]) my_inverted_dict = dict( zip(my_inverted_dict.values(), my_inverted_dict.keys())) cm = ConfusionMatrix(actual_vector=true_labels, predict_vector=predictions) cm.relabel(mapping=my_inverted_dict) cm.print_matrix() if showExtraStats: #print("Micro F1 Score: ",cm.overall_stat['F1 Micro']) #print("Macro F1 Score: ",cm.overall_stat['F1 Macro']) #print("Cross Entropy: ",cm.overall_stat['Cross Entropy']) #print("95% CI: ",cm.overall_stat['95% CI']) print("AUC: ", cm.AUC) print("AUC quality:", cm.AUCI) #Outputs to output.txt if writeToFile == '': pass else: with open(writeToFile, 'w') as f: f.write("Labels:\n\n") f.write(str(label_dict)) f.write("\n\nPredictions::\n\n") f.write(str(predictions)) f.write("\n\nActual:\n\n") f.write(str(true_labels)) f.write("\n\n") f.write(dictToString(cm.matrix)) f.write("\n\n") f.write("AUC: \n\n") f.write(str(cm.AUC)) f.write("AUC Quality: \n\n") f.write(str(cm.AUCI)) f.write("\n\n")