def object_mask_and_boundary_metrics(agg_res, res, step): gt_objs = res['gt'] pred_objs = res['pred'] bde_flag = res.get('bde_flag', 0) ari_flag = res.get('ari_flag', 0) B, T = gt_objs.shape[:2] stride = gt_objs.shape[3] // pred_objs.shape[3] M = ObjectMetrics(gt_objs, pred_objs, stride=stride) # cluster background features bg_feats = res.get('background_features', np.array([0])) if bg_feats.sum() < 1: bg_feats = None M.cluster_unlabeled_pixels(features=bg_feats, stride=M.stride, n_clusters=res.get('background_n_clusters', 5)) metrics = M.compute_all_metrics(examples=range(B), times=range(T), thresh=res['thresh'], stride=M.stride, connectivity=2, mode='thick', compute_BDEs=bde_flag, compute_ARIs=ari_flag, normalize=True) metrics = { k: metrics[k] for k in metrics.keys() if k in [ 'mIoU', 'recall', 'boundary_f_measure', 'boundary_displacement_error', 'adjusted_rand_index' ] } # split metrics and time average split_metrics = {} for k in metrics.keys(): vshape = metrics[k].shape vals = np.split(metrics[k], vshape[-1], axis=-1) for i, val in enumerate(vals): split_metrics[k + ('{0}').format('' if not i else i)] = val[:, :, 0] if res.get('agg_mean', 0): split_metrics = { k: np.mean(split_metrics[k], axis=0, keepdims=True) for k in split_metrics.keys() } if agg_res is None: agg_res = [] agg_res.append(split_metrics) return agg_res
def get_metrics(self, phase=None, epoch=None, item=None): metrics = copy.deepcopy(self.metrics) if phase is not None: metrics = {phase: metrics[phase]} if epoch is not None: for _p in metrics.keys(): metrics[_p] = {epoch: metrics[_p][epoch]} if item is not None: for _p in metrics.keys(): for _e in metrics[_p].keys(): metrics[_p][_e] = {item: metrics[_p][_e][item]} return metrics
def grid_search_own_metrics_class_stratified(ix,iy,stratus_list, clf,get_grid_hyperparams_kargs, regressor_name="Regressor", show_progress_percentage=0.1, kfold=5,shuffle=True, sort_report_by='roc_auc_score'): clfks = get_grid_hyperparams(**get_grid_hyperparams_kargs) report_data = [] hpks = list(clfks[0].keys()) cols = len(clfks) progress_int = max(int(round(cols*show_progress_percentage,0)),1) print("Total number of evaluations:{}".format(cols)) for col,clfk in enumerate(clfks): metrics = cv_metrics_stratified_class(ix, iy.flatten(), stratus_list=stratus_list, clf=clf, clfk=clfk, kfold=kfold,shuffle=shuffle) metrics_report = {'name':regressor_name} for m in metrics.keys(): stats = metrics_stats(metrics[m],rn=3) for sk in stats.keys(): metrics_report[m+"_"+sk]=stats[sk] metrics_report.update(clfk) report_data.append(metrics_report.copy()) if col%progress_int==0: progress = round(100*col/cols,0) print("{} %".format(progress)) print("100.0 %") odf = pd.DataFrame(report_data[:]) odf = odf.sort_values(by=[sort_report_by+"_mean"],ascending=False) mean_cols = list(filter(lambda x: "mean" in x,list(odf.columns))) ocols = ['name'] ocols.extend(hpks) ocols.extend(mean_cols) ocols.extend(list(filter(lambda x: x not in ocols,odf.columns))) odf = odf[ocols].reset_index(drop=True) return odf.copy()
def __call__(self, epoch): if self._batches is None: logger.info("Preparing evaluation data...") self._batches = self.reader.input_module.batch_generator(self._dataset, self._batch_size, is_eval=True) logger.info("Started evaluation %s" % self._info) metrics = defaultdict(lambda: list()) bar = progressbar.ProgressBar( max_value=len(self._dataset) // self._batch_size + 1, widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ']) for i, batch in bar(enumerate(self._batches)): inputs = self._dataset[i * self._batch_size:(i + 1) * self._batch_size] predictions = self.reader.model_module(batch, self._ports) m = self.apply_metrics(inputs, predictions) for k in self._metrics: metrics[k].append(m[k]) metrics = self.combine_metrics(metrics) super().add_to_history(metrics, self._iter, epoch) printmetrics = sorted(metrics.keys()) res = "Epoch %d\tIter %d\ttotal %d" % (epoch, self._iter, self._total) for m in printmetrics: res += '\t%s: %.3f' % (m, metrics[m]) self.update_summary(self._iter, self._info + '_' + m, metrics[m]) if self._write_metrics_to is not None: with open(self._write_metrics_to, 'a') as f: f.write("{0} {1} {2:.5}\n".format(datetime.now(), self._info + '_' + m, np.round(metrics[m], 5))) res += '\t' + self._info logger.info(res) if self._side_effect is not None: self._side_effect_state = self._side_effect(metrics, self._side_effect_state)
def calc_prediction_metrics(prediction,labels,metrics): prediction,labels = prediction.flatten(),labels.flatten() mask = ((labels > 240) | (labels < 10)) & (prediction>-0.001) binary_label = np.zeros_like(labels) binary_label[labels > 240] = 1 binary_label,prediction = binary_label[mask],prediction[mask] return dict([(k,metrics[k](binary_label,prediction)) for k in metrics.keys()])
def _checkpoint(self, model, metrics, checkpoint_metric, epoch): """Saves the model. Args: model(nn.Module): A PyTorch model. metrics(Dict[str, Any]): A dictionary object containing model performance metrics. checkpoint_metric(str): The checkpoint metric associated with the model. epoch(int): The current epoch. """ if checkpoint_metric not in metrics.keys(): raise KeyError( f'{checkpoint_metric} not in metrics {metrics.keys()}') if np.isnan(self.state['best_score']): self.state['best_score'] = metrics[checkpoint_metric] is_best = True else: if 'loss' in checkpoint_metric: is_best = self.state['best_score'] > metrics[checkpoint_metric] self.state['best_score'] = min(self.state['best_score'], metrics[checkpoint_metric]) else: is_best = self.state['best_score'] < metrics[checkpoint_metric] self.state['best_score'] = max(self.state['best_score'], metrics[checkpoint_metric]) data = { 'epoch': epoch, 'state_dict': model.state_dict(), 'best_score': self.state['best_score'], 'optimizer': self.optimizer.state_dict(), 'scheduler': self.scheduler.state_dict(), **metrics } if is_best: self._save(data, f'best_model.pt') if self.compute_auroc: save_metrics = [ 'val_auroc', 'val_subclass_rob_auroc', 'val_true_subclass_rob_auroc', 'val_alt_subclass_rob_auroc' ] else: save_metrics = [ 'val_acc', 'val_acc_rw', 'val_subclass_rob_acc', 'val_subclass_rob_acc_rw', 'val_true_subclass_rob_acc' ] for metric in save_metrics: if metrics[metric] > self.state['best_' + metric]: self.state['best_' + metric] = metrics[metric] self._save(data, f'best_{metric}_model.pt') if self.config[ 'save_every'] > 0 and epoch % self.config['save_every'] == 0: self._save(data, f'checkpoint_epoch_{epoch}.pt')
def cv_metrics_df_with_indexes(X, Y, train_indexes, test_indexes, iclf, iclfk={}, report_metrics=[ 'matthews_corr_coef', 'roc_auc_score', 'f1_score', 'sensitivity', 'specificity' ], norm=False, calc_stats=True, report_name='CLF', sort_metric='matthews_corr_coef_min'): output_objs = {} output_metrics = {} stats_df = [] report_name_sufix = '' total_features = X.shape[-1] for train_index, test_index in zip(train_indexes, test_indexes): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] if len(y_test.shape) > 1: y_test = y_test.argmax(1) if norm == True: X_train, X_test = norm_z_score(X_train, X_test) start_feature_number = 1 end_feature_number = total_features + 1 for feature_number in range(start_feature_number, end_feature_number): tmp_scores = output_metrics.get('F' + str(feature_number), {}) X_train_ = X_train[:, :feature_number] X_test_ = X_test[:, :feature_number] tmp_scores = fit_and_get_metrics(X_train_, X_test_, y_train, y_test, iclf, iclfk, report_metrics, tmp_scores) output_metrics['F' + str(feature_number)] = tmp_scores if calc_stats == True: number_of_features = 1 for fn in range(number_of_features, total_features + 1): fk = 'F' + str(fn) metrics = output_metrics[fk] metrics_report = { 'Name': report_name + report_name_sufix, 'Number of Variables': fn } for m in metrics.keys(): stats = metrics_stats(metrics[m], rn=3) for sk in stats.keys(): metrics_report[m + "_" + sk] = stats[sk] stats_df.append(metrics_report) stats_df = pd.DataFrame(stats_df).sort_values( by=[sort_metric, 'Number of Variables'], ascending=[False, True]).reset_index(drop=True) return output_metrics.copy(), stats_df.copy()
def live_evaluation_print(metrics): """ TODO """ for name in metrics.keys(): print("------------------------") print("Results for %s" % (name)) print("Histogram - non normalized") print(metrics[name]['h']) if metrics[name]['pm'] is not None: print("Matched percentage: %f" % (metrics[name]['pm'])) if metrics[name]['pd'] is not None: print("Matched duration percentage: %f" % (metrics[name]['pd'])) print("Histogram - normalized (excluding NAs)") print(metrics[name]['h_n']) print("Consistency") print(metrics[name]['c'])
def get_alpha_diversity_metrics(): """ List scikit-bio's alpha diversity metrics The alpha diversity metrics listed here can be passed as metrics to ``skbio.diversity.alpha_diversity``. Returns ------- list of str Alphabetically sorted list of alpha diversity metrics implemented in scikit-bio. See Also -------- alpha_diversity get_beta_diversity_metrics """ metrics = _get_alpha_diversity_metric_map() return sorted(metrics.keys())
def get_best_model(self, metric_name:str, take_highest:bool = True): ''' Tags and returns the best model of the experiment, based on the given metric Args: metric_name (str): The name of the metric, such as accuracy take_highest (bool): In case of accuracy and score, this is typically True. In case you want to get the model based on the lowest error, you can use False Returns: Run: the best run, which will be labeled as best run ''' runs = {} run_metrics = {} for r in tqdm(self.__experiment.get_runs()): metrics = r.get_metrics() if metric_name in metrics.keys(): runs[r.id] = r run_metrics[r.id] = metrics best_run_id = min(run_metrics, key = lambda k: run_metrics[k][metric_name]) best_run = runs[best_run_id] best_run.tag('Best run') return best_run
def returnLatexDf(best_fit_model, metrics, k): df_model = pd.DataFrame(best_fit_model.cv_results_) # Remove cols time_cols = [ 'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time' ] param_cols = [x for x in df_model.columns if 'param_' in x] df_model.drop(time_cols + param_cols, axis=1, inplace=True) # Split df to separate dfs based on metric split_cols = ["split" + str(x) + "_test" for x in range(k)] + ["mean_test"] df_latex = [] for m in list(metrics.keys()): df_col_names = ["params"] + [x + '_' + m for x in split_cols] df_part = df_model[df_model.columns & df_col_names] df_latex.append(df_part.to_latex(index=False)) return df_latex
def cv_metrics_stratified_class_report_with_indexes( X, Y, indexes, clf, clfk={}, kfold=5, shuffle=True, report_metrics=[ 'matthews_corr_coef', 'roc_auc_score', 'f1_score', 'sensitivity', 'specificity' ], regressor_name='Regressor', sort_report_by='roc_auc_score', norm=False): report_data = [] metrics = cv_metrics_stratified_class_with_indexes( X, Y, indexes=indexes, clf=clf, clfk=clfk, report_metrics=report_metrics, norm=norm) metrics_report = {'name': regressor_name} for m in metrics.keys(): stats = metrics_stats(metrics[m], rn=3) for sk in stats.keys(): metrics_report[m + "_" + sk] = stats[sk] metrics_report.update(clfk) report_data.append(metrics_report.copy()) odf = pd.DataFrame(report_data[:]) odf = odf.sort_values(by=[sort_report_by + "_mean"], ascending=False) mean_cols = list(filter(lambda x: "mean" in x, list(odf.columns))) ocols = ['name'] ocols.extend(clfk) ocols.extend(mean_cols) ocols.extend(list(filter(lambda x: x not in ocols, odf.columns))) odf = odf[ocols].reset_index(drop=True) return odf.copy()
def __init__(self, metrics, plots): """ Create a new instance of MultiScorer. Parameters ---------- metrics: dict The metrics to be used by the scorer. The dictionary must have as key a name (str) for the metric and as value a tuple containing the metric function itself and a dict literal of the additional named arguments to be passed to the function. The metric function should be one of the `sklearn.metrics` function or any other callable with the same signature: `metric(y_true, p_pred, **kwargs)`. plots: dict Plots to be generated for each CV run. """ self.metrics = metrics self.plots = plots self.results = {} self._called = False self.n_folds = 0 for metric in metrics.keys(): self.results[metric] = [] self.results["cal_time"] = []
def select_best_models(results, models, d_metric): columns = ['auc', 'f1', 'precision', 'recall', 'time', 'parameters'] rv = pd.DataFrame(index=models, columns=columns) best_metric = 0 best_models = {} for model, iters in results.items(): top_intra_metric = 0 best_models[model] = {} for params, metrics in iters.items(): header = [key for key in metrics.keys()] if metrics[d_metric] > top_intra_metric: top_intra_metric = metrics[d_metric] best_models[model]['parameters'] = params best_models[model]['metrics'] = metrics to_append = [value for value in best_models[model]['metrics'].values()] to_append.append(best_models[model]['parameters']) rv.loc[model] = to_append if top_intra_metric > best_metric: best_metric = top_intra_metric best_model = model, params return rv, best_models, (best_model, best_metric)
def save_testing_error(save_path, trainer, evaluator, dataset_str, save_extension=None): # The trainer is only given here to get the current iteration and epoch. iteration = trainer.state.iteration epoch = trainer.state.epoch metrics = evaluator.state.metrics # print (list(metrics.keys())) print("{} Results - Epoch: {} AccumulatedLoss: {}".format( dataset_str, epoch, metrics)) metric_values = [] for key in metrics.keys(): title = "Testing metric: {}".format(key) metric_value = metrics[key] metric_values.append(metric_value) name_for_log = dataset_str + ' ' + key try: wandb.log({name_for_log: metric_value}) except: pass # print (metrics.keys()) # also save as .txt for plotting log_name = os.path.join(save_path, save_extension) if epoch == 1: #assumes you always eval at end of 1st epoch with open(log_name, 'w') as the_file: # overwrite exiting file the_file.write('#iteration,loss1,loss2,...\n') with open(log_name, 'a') as the_file: the_file.write('{},{}\n'.format(iteration, ",".join(map(str, metric_values)))) plot_loss(log_name, log_name.replace('.txt', '.jpg'), 'Testing Loss') return metrics['AccumulatedLoss']
def _to_csv(self, tasks, baselines, max_col=10): now = datetime.now() dt_string = now.strftime("%d-%m-%Y-%H-%M-%S") with open(os.path.join("ft_runs", dt_string + "_report.csv"), "w") as fw: # fieldnames = ['task', 'baseline'] + ['metric'+ix for ix in range(10) ] # writer = csv.DictWriter(fw, fieldnames=fieldnames) writer = csv.writer(fw) for task in tasks: for ix, baseline in enumerate(baselines): metrics = tasks[task][baseline]["test.json"] if ix == 0: cols = [task] + list(metrics.keys()) padded_cols = cols + ["_"] * (max_col - len(cols)) writer.writerow(padded_cols) cols = [baseline] + [ round(metrics[metric], 4) for metric in metrics ] padded_cols = cols + ["_"] * (max_col - len(cols)) writer.writerow(padded_cols)
def cv_metrics_stratified_class_with_indexes_and_transform( X, Y, indexes, iclf, iclfk={}, transform=None, kfold=5, shuffle=True, report_metrics=[ 'matthews_corr_coef', 'roc_auc_score', 'f1_score', 'sensitivity', 'specificity' ], norm=False, calc_stats=True, report_name='CLF', sort_metric='roc_auc_score_min', transformations=[], features_top_ns=[], X_names=[], vectors=[], vector=None, allow_x_list_size=1): output_objs = {} output_metrics = {} stats_df = [] report_name_sufix = '' report_name_sufix_xs = '' conditions = [ type(X) == list, len(transformations) > allow_x_list_size, len(features_top_ns) == len(transformations), len(X_names) == len(transformations) ] # multiple_x = utils.validate_multiple_conditions(conditions) # for train_index, test_index in indexes: if multiple_x: X_train, X_test, y_train, y_test = transform_and_join( X, Y, train_index, test_index, transformations, features_top_ns, iclf, iclfk, joint_transformation=None, vectors=vectors) report_name_sufix_xs = [ xn + "_" + tr + "_" + str(feats) for xn, tr, feats in zip( X_names, transformations, features_top_ns) ] report_name_sufix_xs = " & ".join(report_name_sufix_xs) else: X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] total_features = X_train.shape[-1] number_of_features = total_features if type(transform) == str: report_name_sufix = report_name_sufix_xs + "_" + transform number_of_features = 1 X_train, X_test, y_train, y_test = transform_x_train_test( X_train, X_test, y_train, y_test, transform=transform, iclf=iclf, iclfk=iclfk, vector=vector) if len(y_test.shape) > 1: y_test = y_test.argmax(1) if norm == True: X_train, X_test = norm_z_score(X_train, X_test) start_feature_number = number_of_features end_feature_number = total_features + 1 for feature_number in range(start_feature_number, end_feature_number): tmp_scores = output_metrics.get('F' + str(feature_number), {}) X_train_ = X_train[:, :feature_number] X_test_ = X_test[:, :feature_number] tmp_scores = fit_and_get_metrics(X_train_, X_test_, y_train, y_test, iclf, iclfk, report_metrics, tmp_scores) output_metrics['F' + str(feature_number)] = tmp_scores if calc_stats == True: for fn in range(number_of_features, total_features + 1): fk = 'F' + str(fn) metrics = output_metrics[fk] metrics_report = { 'Name': report_name + report_name_sufix, 'Number of Variables': fn } for m in metrics.keys(): stats = metrics_stats(metrics[m], rn=3) for sk in stats.keys(): metrics_report[m + "_" + sk] = stats[sk] stats_df.append(metrics_report) stats_df = pd.DataFrame(stats_df).sort_values( by=[sort_metric, 'Number of Variables'], ascending=[False, True]).reset_index(drop=True) return output_metrics.copy(), stats_df.copy()
""" return np.min(np.abs(u - v)) metrics = OrderedDict(Euclidian=dict(metric='euclidean'), L3=dict(metric='minkowski', p=3), L4=dict(metric='minkowski', p=4), taxicab=dict(metric='cityblock'), Braycurtis=dict(metric='braycurtis'), Canberra=dict(metric='canberra'), Min=dict(metric=min_sep), Max=dict(metric='chebyshev'), Correlation=dict(metric='correlation'), Cosine=dict(metric='cosine')) eig_metric_names = list( metrics.keys()) + [name + " normed" for name in metrics] phys_metric_names = list( metrics.keys()) + [name + " Luclus" for name in metrics] def get_seperations(vectors, norm_values=None): """ For each pair of jet inputs in an event get the seperation. Each or the metrics in the dict will be tried, then retried with a normed space if norming norm_values are given A normed space is one where the vectors have been divided by the norm_values. Parameters ---------- vectors : list of 2d numpy arrays of floats
def plot_all_metrics(metrics, gene_names, all_learn_options, save, plots=None, bottom=0.19): num_methods = len(metrics.keys()) metrics_names = metrics[metrics.keys()[0]].keys() num_genes = len(gene_names) width = 0.9 / num_methods ind = np.arange(num_genes) if save == True: first_key = all_learn_options.keys()[0] #basefile = r"..\results\V%s_trmetric%s_%s" % (all_learn_options[first_key]["V"], all_learn_options[first_key]["training_metric"], datestamp()) basefile = r"..\results\%s" % (first_key) d = os.path.dirname(basefile) if not os.path.exists(d): os.makedirs(d) with open(basefile + ".plot.pickle", "wb") as f: pickle.dump([metrics, all_learn_options, gene_names], f) for metric in metrics_names: if 'global' not in metric: plt.figure(metric, figsize=(20, 8)) elif plots == None or 'gene level' in plots: plt.figure(metric, figsize=(12, 12)) boxplot_labels = [] boxplot_arrays = {} boxplot_median = {} for i, method in enumerate(metrics.keys()): boxplot_labels.append(method) for metric in metrics[method].keys(): if 'global' in metric: plt.figure(metric) plt.bar([i], metrics[method][metric], 0.9, color=plt.cm.Paired(1. * i / len(metrics.keys())), label=method) else: if plots == None or 'gene level' in plots: plt.figure(metric) plt.bar(ind + (i * width), metrics[method][metric], width, color=plt.cm.Paired(1. * i / len(metrics.keys())), label=method) median_metric = np.median(metrics[method][metric]) print method, metric, median_metric assert not np.isnan(median_metric), "found nan for %s, %s" % ( method, metric) if metric not in boxplot_arrays.keys(): boxplot_arrays[metric] = np.array( metrics[method][metric])[:, None] boxplot_median[metric] = [ np.median(np.array(metrics[method][metric])) ] else: boxplot_arrays[metric] = np.concatenate( (boxplot_arrays[metric], np.array(metrics[method][metric])[:, None]), axis=1) boxplot_median[metric].append( np.median(np.array(metrics[method][metric]))) for metric in metrics_names: if plots == None or 'gene level' in plots: ax = plt.figure(metric) leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # leg.draggable(state=True, use_blit=True) plt.ylabel(metric) if 'global' in metric: plt.xticks(range(len(metrics.keys())), metrics.keys(), rotation=70) plt.grid(True, which='both') plt.subplots_adjust(left=0.05, right=0.8) else: plt.xticks(ind + width, gene_names) plt.grid(True, which='both') plt.subplots_adjust(left=0.05, right=0.8) if save == True: plt.xticks(ind + 0.5, gene_names) if metric == 'AUC': plt.ylim([0.5, 1.0]) plt.savefig(basefile + "_" + metric + "_bar" + ".png") if (plots == None or "boxplots" in plots) and 'global' not in metric: plt.figure('Boxplot %s' % metric) sorted_boxplot = np.argsort(boxplot_median[metric])[::-1] plt.boxplot(boxplot_arrays[metric][:, sorted_boxplot]) plt.ylabel(metric) plt.xticks(range(1, num_methods + 1), np.array(boxplot_labels)[sorted_boxplot], rotation=70) plt.subplots_adjust(top=0.97, bottom=bottom) if metric == 'RMSE': plt.ylim((1.0, 2.0)) if save == True: plt.savefig(basefile + "_" + metric + ".png")
def _compute_progress_metrics(self, sample_losses, corrects, type_to_labels, type_to_num_classes, per_class_meters, reweight=None): """Extracts metrics from each of the per_class_meters. Args: sample_losses(np.ndarray of shape (N, )): The loss computed for each sample. corrects(np.ndarray of shape(N, )): Whether or not the model produced a correct prediction for each sample. type_to_labels(Dict[str, Union[np.ndarray, torch.Tensor, Sequence]]): Dictionary object mapping the label_type (e.g. superclass, subclass, true_subclass) to the labels themselves. type_to_num_classes(Dict[str, int]): type_to_num_classes(Dict[str, int]): Dictionary object that maps the label_type to the number of classes for that label_type. per_class_meters(Dict[str, List[AverageMeter]]): A dictionary of per_class_meters, where a per_class_meter is a list of AverageMeter objects, one for each class. There is a per_class_meter for each label_type, and for each metric_type (e.g. losses, accs). Returns: metrics(Dict[str, Any]): A dictionary object that describes model performance based on information in each of the per_class_meters. """ batch_stats = {} for label_type, labels in type_to_labels.items(): num_classes = type_to_num_classes[label_type] losses, counts = self.criterion.compute_group_avg( sample_losses, labels, num_groups=num_classes) accs, _ = self.criterion.compute_group_avg(corrects, labels, num_groups=num_classes) losses_rw, counts_rw = self.criterion.compute_group_avg( sample_losses, labels, num_groups=num_classes, reweight=reweight) accs_rw, _ = self.criterion.compute_group_avg( corrects, labels, num_groups=num_classes, reweight=reweight) batch_stats[label_type] = { 'losses': losses, 'losses_rw': losses_rw, 'counts': counts, 'counts_rw': counts_rw, 'accs': accs, 'accs_rw': accs_rw } metrics = {} for label_type, stats in batch_stats.items(): losses, counts, accs, losses_rw, counts_rw, accs_rw = \ stats['losses'], stats['counts'], stats['accs'], stats['losses_rw'], stats['counts_rw'], stats['accs_rw'] loss_meters = per_class_meters[f'per_{label_type}_losses'] loss_meters_rw = per_class_meters[ f'per_{label_type}_losses_reweighted'] acc_meters = per_class_meters[f'per_{label_type}_accs'] acc_meters_rw = per_class_meters[ f'per_{label_type}_accs_reweighted'] num_classes = type_to_num_classes[label_type] for i in range(num_classes): loss_meters[i].update(losses[i], counts[i]) acc_meters[i].update(accs[i], counts[i]) loss_meters_rw[i].update(losses_rw[i], counts_rw[i]) acc_meters_rw[i].update(accs_rw[i], counts_rw[i]) active = np.array([i for i, m in enumerate(acc_meters) if m.count]) if len(active) > 0: rob_loss = max( [gl.avg for gl in np.array(loss_meters)[active]]) rob_acc = min( [ga.avg * 100 for ga in np.array(acc_meters)[active]]) rob_loss_rw = max( [gl.avg for gl in np.array(loss_meters_rw)[active]]) rob_acc_rw = min( [ga.avg * 100 for ga in np.array(acc_meters_rw)[active]]) else: rob_loss = 0. rob_acc = 0. rob_loss_rw = 0. rob_acc_rw = 0. metrics[f'{label_type}_rob_loss'] = rob_loss metrics[f'{label_type}_rob_acc'] = rob_acc metrics[f'{label_type}_rob_loss_rw'] = rob_loss_rw metrics[f'{label_type}_rob_acc_rw'] = rob_acc_rw if 'true_subclass_rob_acc' not in metrics.keys(): metrics['true_subclass_rob_acc'] = -1 return metrics
def plot_all_metrics(metrics, gene_names, all_learn_options, save, plots=None, bottom=0.19): num_methods = len(metrics.keys()) metrics_names = metrics[metrics.keys()[0]].keys() num_genes = len(gene_names) width = 0.9/num_methods ind = np.arange(num_genes) if save==True: first_key = all_learn_options.keys()[0] #basefile = r"..\results\V%s_trmetric%s_%s" % (all_learn_options[first_key]["V"], all_learn_options[first_key]["training_metric"], datestamp()) basefile = r"..\results\%s" % (first_key) d = os.path.dirname(basefile) if not os.path.exists(d): os.makedirs(d) with open(basefile + ".plot.pickle", "wb") as f: pickle.dump([metrics, all_learn_options, gene_names], f) for metric in metrics_names: if 'global' not in metric: plt.figure(metric, figsize=(20, 8)) elif plots == None or 'gene level' in plots: plt.figure(metric, figsize=(12, 12)) boxplot_labels = [] boxplot_arrays = {} boxplot_median = {} for i, method in enumerate(metrics.keys()): boxplot_labels.append(method) for metric in metrics[method].keys(): if 'global' in metric: plt.figure(metric) plt.bar([i], metrics[method][metric], 0.9, color=plt.cm.Paired(1.*i/len(metrics.keys())), label=method) else: if plots == None or 'gene level' in plots: plt.figure(metric) plt.bar(ind+(i*width), metrics[method][metric], width, color=plt.cm.Paired(1.*i/len(metrics.keys())), label=method) median_metric = np.median(metrics[method][metric]) print method, metric, median_metric assert not np.isnan(median_metric), "found nan for %s, %s" % (method, metric) if metric not in boxplot_arrays.keys(): boxplot_arrays[metric] = np.array(metrics[method][metric])[:, None] boxplot_median[metric] = [np.median(np.array(metrics[method][metric]))] else: boxplot_arrays[metric] = np.concatenate((boxplot_arrays[metric], np.array(metrics[method][metric])[:, None]), axis=1) boxplot_median[metric].append(np.median(np.array(metrics[method][metric]))) for metric in metrics_names: if plots == None or 'gene level' in plots: ax = plt.figure(metric) leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # leg.draggable(state=True, use_blit=True) plt.ylabel(metric) if 'global' in metric: plt.xticks(range(len(metrics.keys())), metrics.keys(), rotation=70) plt.grid(True, which='both') plt.subplots_adjust(left = 0.05, right = 0.8) else: plt.xticks(ind+width, gene_names) plt.grid(True, which='both') plt.subplots_adjust(left = 0.05, right = 0.8) if save == True: plt.xticks(ind+0.5, gene_names) if metric=='AUC': plt.ylim([0.5, 1.0]) plt.savefig(basefile + "_" + metric + "_bar" + ".png") if (plots == None or "boxplots" in plots) and 'global' not in metric: plt.figure('Boxplot %s' % metric) sorted_boxplot = np.argsort(boxplot_median[metric])[::-1] plt.boxplot(boxplot_arrays[metric][:, sorted_boxplot]) plt.ylabel(metric) plt.xticks(range(1, num_methods+1), np.array(boxplot_labels)[sorted_boxplot], rotation=70) plt.subplots_adjust(top = 0.97, bottom = bottom) if metric == 'RMSE': plt.ylim((1.0, 2.0)) if save == True: plt.savefig(basefile + "_" + metric + ".png")
def metrics_names(metrics): return sorted(metrics.keys())
def plot_phys_event(eventWise, event_num, metric_names=None, jet_names=None): """ Parameters ---------- eventWise : event_num : *jet_names : Returns ------- """ if jet_names is None: jet_names = [ name.split('_')[0] for name in eventWise.columns if name.endswith("_PhysDistance") ] jet_names = jet_names[::2] if metric_names is None: metric_names = [] name = True while name: name = InputTools.list_complete("Chose a metric (empty to stop); ", metrics.keys()) name = name.strip() metric_names.append(name) del metric_names[-1] num_jets = len(jet_names) # get global data eventWise.selected_event = event_num phis = eventWise.JetInputs_Phi y_lims = np.min(phis), np.max(phis) rapidities = eventWise.JetInputs_Rapidity x_lims = np.min(rapidities), np.max(rapidities) same_mask = np.array(eventWise.JetInputs_PairLabels.tolist()) cross_mask = np.array(eventWise.JetInputs_PairCrossings.tolist()) colours = np.zeros((len(same_mask), len(same_mask[0]), 4), dtype=float) colours += 0.3 colours[same_mask] = [0.1, 1., 0.1, 0.] colours[cross_mask] = [1., 0.1, 0., 0.] colours[:, :, -1] = same_mask.astype(float) * 0.5 + cross_mask.astype( float) * 0.5 + 0.2 # make a grid of axis for each jet name and each metric num_metrics = len(metric_names) fig, ax_arr = plt.subplots(num_jets, num_metrics, sharex=True, sharey=True) ax_arr = ax_arr.reshape((num_jets, num_metrics)) # now the other axis should contain the plots metric_order = list(metrics.keys()) for jet_n, jet_name in enumerate(jet_names): distances = getattr(eventWise, jet_name + "_PhysDistance") # normalise the distances distances = distances / np.nanmean(distances.tolist(), axis=(1, 2)) ratios = getattr(eventWise, jet_name + "_DifferencePhysDistance") for metric_n, metric in enumerate(metric_names): metric_pos = metric_order.index(metric) ax = ax_arr[jet_n, metric_n] for i1, (p1, r1) in enumerate(zip(phis, rapidities)): for i2, (p2, r2) in enumerate(zip(phis, rapidities)): width = distances[metric_pos, i1, i2] line = matplotlib.lines.Line2D([r1, r2], [p1, p2], c=colours[i1, i1], lw=width) ax.add_line(line) if jet_n == num_jets - 1: ax.set_xlabel(metric) if metric_n == 0: ax.set_ylabel(jet_name) ax.set_xlim(*x_lims) ax.set_ylim(*y_lims) fig.set_size_inches(num_metrics * 3.5, num_jets * 1.8) #fig.tight_layout() fig.subplots_adjust(hspace=0.0, wspace=0., right=1., top=1.)
def _checkpoint(self, model, metrics, checkpoint_metric, epoch): """Saves the model. Args: model(nn.Module): A PyTorch model. metrics(Dict[str, Any]): A dictionary object containing model performance metrics. checkpoint_metric(str): The checkpoint metric associated with the model. epoch(int): The current epoch. """ if checkpoint_metric not in metrics.keys(): raise KeyError( f"{checkpoint_metric} not in metrics {metrics.keys()}") if np.isnan(self.state["best_score"]): self.state["best_score"] = metrics[checkpoint_metric] is_best = True else: if "loss" in checkpoint_metric: is_best = self.state["best_score"] > metrics[checkpoint_metric] self.state["best_score"] = min(self.state["best_score"], metrics[checkpoint_metric]) else: is_best = self.state["best_score"] < metrics[checkpoint_metric] self.state["best_score"] = max(self.state["best_score"], metrics[checkpoint_metric]) data = { "epoch": epoch, "state_dict": model.state_dict(), "best_score": self.state["best_score"], "optimizer": self.optimizer.state_dict(), "scheduler": self.scheduler.state_dict(), **metrics, } if is_best: self._save(data, f"best_model.pt") if self.compute_auroc: save_metrics = [ "val_auroc", "val_subclass_rob_auroc", "val_true_subclass_rob_auroc", "val_alt_subclass_rob_auroc", ] else: save_metrics = [ "val_acc", "val_acc_rw", "val_subclass_rob_acc", "val_subclass_rob_acc_rw", "val_true_subclass_rob_acc", ] for metric in save_metrics: if metrics[metric] > self.state["best_" + metric]: self.state["best_" + metric] = metrics[metric] self._save(data, f"best_{metric}_model.pt") if self.config[ "save_every"] > 0 and epoch % self.config["save_every"] == 0: self._save(data, f"checkpoint_epoch_{epoch}.pt")