コード例 #1
0
ファイル: eval_metrics.py プロジェクト: bugbug-lou/VVN
def object_mask_and_boundary_metrics(agg_res, res, step):
    gt_objs = res['gt']
    pred_objs = res['pred']
    bde_flag = res.get('bde_flag', 0)
    ari_flag = res.get('ari_flag', 0)
    B, T = gt_objs.shape[:2]
    stride = gt_objs.shape[3] // pred_objs.shape[3]

    M = ObjectMetrics(gt_objs, pred_objs, stride=stride)
    # cluster background features
    bg_feats = res.get('background_features', np.array([0]))
    if bg_feats.sum() < 1:
        bg_feats = None

    M.cluster_unlabeled_pixels(features=bg_feats,
                               stride=M.stride,
                               n_clusters=res.get('background_n_clusters', 5))
    metrics = M.compute_all_metrics(examples=range(B),
                                    times=range(T),
                                    thresh=res['thresh'],
                                    stride=M.stride,
                                    connectivity=2,
                                    mode='thick',
                                    compute_BDEs=bde_flag,
                                    compute_ARIs=ari_flag,
                                    normalize=True)
    metrics = {
        k: metrics[k]
        for k in metrics.keys() if k in [
            'mIoU', 'recall', 'boundary_f_measure',
            'boundary_displacement_error', 'adjusted_rand_index'
        ]
    }

    # split metrics and time average
    split_metrics = {}
    for k in metrics.keys():
        vshape = metrics[k].shape
        vals = np.split(metrics[k], vshape[-1], axis=-1)
        for i, val in enumerate(vals):
            split_metrics[k + ('{0}').format('' if not i else i)] = val[:, :,
                                                                        0]

    if res.get('agg_mean', 0):
        split_metrics = {
            k: np.mean(split_metrics[k], axis=0, keepdims=True)
            for k in split_metrics.keys()
        }

    if agg_res is None:
        agg_res = []

    agg_res.append(split_metrics)
    return agg_res
コード例 #2
0
 def get_metrics(self, phase=None, epoch=None, item=None):
     metrics = copy.deepcopy(self.metrics)
     if phase is not None:
         metrics = {phase: metrics[phase]}
     if epoch is not None:
         for _p in metrics.keys():
             metrics[_p] = {epoch: metrics[_p][epoch]}
     if item is not None:
         for _p in metrics.keys():
             for _e in metrics[_p].keys():
                 metrics[_p][_e] = {item: metrics[_p][_e][item]}
     return metrics
コード例 #3
0
def grid_search_own_metrics_class_stratified(ix,iy,stratus_list,
                                             clf,get_grid_hyperparams_kargs,
                                         regressor_name="Regressor",
                                         show_progress_percentage=0.1, kfold=5,shuffle=True,
                                            sort_report_by='roc_auc_score'):
    clfks = get_grid_hyperparams(**get_grid_hyperparams_kargs)
    report_data = []
    hpks = list(clfks[0].keys())
    cols = len(clfks)
    progress_int = max(int(round(cols*show_progress_percentage,0)),1)
    print("Total number of evaluations:{}".format(cols))
    for col,clfk in enumerate(clfks):
        metrics = cv_metrics_stratified_class(ix, iy.flatten(), stratus_list=stratus_list,
                                              clf=clf, clfk=clfk, kfold=kfold,shuffle=shuffle)
        metrics_report = {'name':regressor_name}
        for m in metrics.keys():
            stats = metrics_stats(metrics[m],rn=3)
            for sk in stats.keys():
                metrics_report[m+"_"+sk]=stats[sk]
        metrics_report.update(clfk)
        report_data.append(metrics_report.copy())
        if col%progress_int==0:
            progress = round(100*col/cols,0)
            print("{} %".format(progress))
    print("100.0 %")
    odf = pd.DataFrame(report_data[:])
    odf = odf.sort_values(by=[sort_report_by+"_mean"],ascending=False)
    mean_cols = list(filter(lambda x: "mean" in x,list(odf.columns)))
    ocols = ['name']
    ocols.extend(hpks)
    ocols.extend(mean_cols)
    ocols.extend(list(filter(lambda x: x not in ocols,odf.columns)))
    odf = odf[ocols].reset_index(drop=True)
    return odf.copy()
コード例 #4
0
    def __call__(self, epoch):
        if self._batches is None:
            logger.info("Preparing evaluation data...")
            self._batches = self.reader.input_module.batch_generator(self._dataset, self._batch_size, is_eval=True)

        logger.info("Started evaluation %s" % self._info)
        metrics = defaultdict(lambda: list())
        bar = progressbar.ProgressBar(
            max_value=len(self._dataset) // self._batch_size + 1,
            widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') '])
        for i, batch in bar(enumerate(self._batches)):
            inputs = self._dataset[i * self._batch_size:(i + 1) * self._batch_size]
            predictions = self.reader.model_module(batch, self._ports)
            m = self.apply_metrics(inputs, predictions)
            for k in self._metrics:
                metrics[k].append(m[k])

        metrics = self.combine_metrics(metrics)
        super().add_to_history(metrics, self._iter, epoch)

        printmetrics = sorted(metrics.keys())
        res = "Epoch %d\tIter %d\ttotal %d" % (epoch, self._iter, self._total)
        for m in printmetrics:
            res += '\t%s: %.3f' % (m, metrics[m])
            self.update_summary(self._iter, self._info + '_' + m, metrics[m])
            if self._write_metrics_to is not None:
                with open(self._write_metrics_to, 'a') as f:
                    f.write("{0} {1} {2:.5}\n".format(datetime.now(), self._info + '_' + m,
                                                      np.round(metrics[m], 5)))
        res += '\t' + self._info
        logger.info(res)

        if self._side_effect is not None:
            self._side_effect_state = self._side_effect(metrics, self._side_effect_state)
コード例 #5
0
def calc_prediction_metrics(prediction,labels,metrics):
    prediction,labels = prediction.flatten(),labels.flatten()    
    mask = ((labels > 240) | (labels < 10)) & (prediction>-0.001)
    
    binary_label = np.zeros_like(labels)
    binary_label[labels > 240] = 1
    binary_label,prediction = binary_label[mask],prediction[mask]
    return dict([(k,metrics[k](binary_label,prediction)) for k in metrics.keys()])
コード例 #6
0
    def _checkpoint(self, model, metrics, checkpoint_metric, epoch):
        """Saves the model.
        
        Args:
            model(nn.Module): A PyTorch model.
            metrics(Dict[str, Any]): A dictionary object containing
                model performance metrics.
            checkpoint_metric(str): The checkpoint metric associated with the model.
            epoch(int): The current epoch. 
        """
        if checkpoint_metric not in metrics.keys():
            raise KeyError(
                f'{checkpoint_metric} not in metrics {metrics.keys()}')

        if np.isnan(self.state['best_score']):
            self.state['best_score'] = metrics[checkpoint_metric]
            is_best = True
        else:
            if 'loss' in checkpoint_metric:
                is_best = self.state['best_score'] > metrics[checkpoint_metric]
                self.state['best_score'] = min(self.state['best_score'],
                                               metrics[checkpoint_metric])
            else:
                is_best = self.state['best_score'] < metrics[checkpoint_metric]
                self.state['best_score'] = max(self.state['best_score'],
                                               metrics[checkpoint_metric])

        data = {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'best_score': self.state['best_score'],
            'optimizer': self.optimizer.state_dict(),
            'scheduler': self.scheduler.state_dict(),
            **metrics
        }

        if is_best:
            self._save(data, f'best_model.pt')

        if self.compute_auroc:
            save_metrics = [
                'val_auroc', 'val_subclass_rob_auroc',
                'val_true_subclass_rob_auroc', 'val_alt_subclass_rob_auroc'
            ]
        else:
            save_metrics = [
                'val_acc', 'val_acc_rw', 'val_subclass_rob_acc',
                'val_subclass_rob_acc_rw', 'val_true_subclass_rob_acc'
            ]

        for metric in save_metrics:
            if metrics[metric] > self.state['best_' + metric]:
                self.state['best_' + metric] = metrics[metric]
                self._save(data, f'best_{metric}_model.pt')

        if self.config[
                'save_every'] > 0 and epoch % self.config['save_every'] == 0:
            self._save(data, f'checkpoint_epoch_{epoch}.pt')
コード例 #7
0
def cv_metrics_df_with_indexes(X,
                               Y,
                               train_indexes,
                               test_indexes,
                               iclf,
                               iclfk={},
                               report_metrics=[
                                   'matthews_corr_coef', 'roc_auc_score',
                                   'f1_score', 'sensitivity', 'specificity'
                               ],
                               norm=False,
                               calc_stats=True,
                               report_name='CLF',
                               sort_metric='matthews_corr_coef_min'):
    output_objs = {}
    output_metrics = {}
    stats_df = []
    report_name_sufix = ''
    total_features = X.shape[-1]
    for train_index, test_index in zip(train_indexes, test_indexes):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        if len(y_test.shape) > 1:
            y_test = y_test.argmax(1)
        if norm == True:
            X_train, X_test = norm_z_score(X_train, X_test)
        start_feature_number = 1
        end_feature_number = total_features + 1
        for feature_number in range(start_feature_number, end_feature_number):
            tmp_scores = output_metrics.get('F' + str(feature_number), {})
            X_train_ = X_train[:, :feature_number]
            X_test_ = X_test[:, :feature_number]
            tmp_scores = fit_and_get_metrics(X_train_, X_test_, y_train,
                                             y_test, iclf, iclfk,
                                             report_metrics, tmp_scores)
            output_metrics['F' + str(feature_number)] = tmp_scores
    if calc_stats == True:
        number_of_features = 1
        for fn in range(number_of_features, total_features + 1):
            fk = 'F' + str(fn)
            metrics = output_metrics[fk]
            metrics_report = {
                'Name': report_name + report_name_sufix,
                'Number of Variables': fn
            }
            for m in metrics.keys():
                stats = metrics_stats(metrics[m], rn=3)
                for sk in stats.keys():
                    metrics_report[m + "_" + sk] = stats[sk]
            stats_df.append(metrics_report)
        stats_df = pd.DataFrame(stats_df).sort_values(
            by=[sort_metric, 'Number of Variables'],
            ascending=[False, True]).reset_index(drop=True)
    return output_metrics.copy(), stats_df.copy()
コード例 #8
0
def live_evaluation_print(metrics):
    """
    TODO
    """
    for name in metrics.keys():
        print("------------------------")
        print("Results for %s" % (name))
        print("Histogram - non normalized")
        print(metrics[name]['h'])
        if metrics[name]['pm'] is not None:
            print("Matched percentage: %f" % (metrics[name]['pm']))
            if metrics[name]['pd'] is not None:
                print("Matched duration percentage: %f" %
                      (metrics[name]['pd']))
        print("Histogram - normalized (excluding NAs)")
        print(metrics[name]['h_n'])
        print("Consistency")
        print(metrics[name]['c'])
コード例 #9
0
ファイル: _driver.py プロジェクト: RNAer/scikit-bio
def get_alpha_diversity_metrics():
    """ List scikit-bio's alpha diversity metrics

    The alpha diversity metrics listed here can be passed as metrics to
    ``skbio.diversity.alpha_diversity``.

    Returns
    -------
    list of str
        Alphabetically sorted list of alpha diversity metrics implemented in
        scikit-bio.

    See Also
    --------
    alpha_diversity
    get_beta_diversity_metrics

    """
    metrics = _get_alpha_diversity_metric_map()
    return sorted(metrics.keys())
コード例 #10
0
 def get_best_model(self, metric_name:str, take_highest:bool = True):
     '''
     Tags and returns the best model of the experiment, based on the given metric
     Args:
         metric_name (str): The name of the metric, such as accuracy
         take_highest (bool): In case of accuracy and score, this is typically True.  In case you want to get the model based on the lowest error, you can use False
     Returns:
         Run: the best run, which will be labeled as best run
     '''
     runs = {}
     run_metrics = {}
     for r in tqdm(self.__experiment.get_runs()):
         metrics = r.get_metrics()
         if metric_name in metrics.keys():
             runs[r.id] = r
             run_metrics[r.id] = metrics
     best_run_id = min(run_metrics, key = lambda k: run_metrics[k][metric_name])
     best_run = runs[best_run_id]
     best_run.tag('Best run')
     return best_run
コード例 #11
0
ファイル: _driver.py プロジェクト: SantosJGND/Nlas
def get_alpha_diversity_metrics():
    """ List scikit-bio's alpha diversity metrics

    The alpha diversity metrics listed here can be passed as metrics to
    ``skbio.diversity.alpha_diversity``.

    Returns
    -------
    list of str
        Alphabetically sorted list of alpha diversity metrics implemented in
        scikit-bio.

    See Also
    --------
    alpha_diversity
    get_beta_diversity_metrics

    """
    metrics = _get_alpha_diversity_metric_map()
    return sorted(metrics.keys())
コード例 #12
0
def returnLatexDf(best_fit_model, metrics, k):
    df_model = pd.DataFrame(best_fit_model.cv_results_)

    # Remove cols
    time_cols = [
        'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time'
    ]
    param_cols = [x for x in df_model.columns if 'param_' in x]
    df_model.drop(time_cols + param_cols, axis=1, inplace=True)

    # Split df to separate dfs based on metric
    split_cols = ["split" + str(x) + "_test" for x in range(k)] + ["mean_test"]
    df_latex = []

    for m in list(metrics.keys()):
        df_col_names = ["params"] + [x + '_' + m for x in split_cols]
        df_part = df_model[df_model.columns & df_col_names]
        df_latex.append(df_part.to_latex(index=False))

    return df_latex
コード例 #13
0
def cv_metrics_stratified_class_report_with_indexes(
        X,
        Y,
        indexes,
        clf,
        clfk={},
        kfold=5,
        shuffle=True,
        report_metrics=[
            'matthews_corr_coef', 'roc_auc_score', 'f1_score', 'sensitivity',
            'specificity'
        ],
        regressor_name='Regressor',
        sort_report_by='roc_auc_score',
        norm=False):
    report_data = []
    metrics = cv_metrics_stratified_class_with_indexes(
        X,
        Y,
        indexes=indexes,
        clf=clf,
        clfk=clfk,
        report_metrics=report_metrics,
        norm=norm)
    metrics_report = {'name': regressor_name}
    for m in metrics.keys():
        stats = metrics_stats(metrics[m], rn=3)
        for sk in stats.keys():
            metrics_report[m + "_" + sk] = stats[sk]
    metrics_report.update(clfk)
    report_data.append(metrics_report.copy())
    odf = pd.DataFrame(report_data[:])
    odf = odf.sort_values(by=[sort_report_by + "_mean"], ascending=False)
    mean_cols = list(filter(lambda x: "mean" in x, list(odf.columns)))
    ocols = ['name']
    ocols.extend(clfk)
    ocols.extend(mean_cols)
    ocols.extend(list(filter(lambda x: x not in ocols, odf.columns)))
    odf = odf[ocols].reset_index(drop=True)
    return odf.copy()
コード例 #14
0
    def __init__(self, metrics, plots):
        """
        Create a new instance of MultiScorer.

        Parameters
        ----------
        metrics: dict
            The metrics to be used by the scorer.
            The dictionary must have as key a name (str) for the metric and as value a tuple containing the metric
            function itself and a dict literal of the additional named arguments to be passed to the function. The
            metric function should be one of the `sklearn.metrics` function or any other callable with the same
            signature: `metric(y_true, p_pred, **kwargs)`.
        plots: dict
            Plots to be generated for each CV run.
        """
        self.metrics = metrics
        self.plots = plots
        self.results = {}
        self._called = False
        self.n_folds = 0

        for metric in metrics.keys():
            self.results[metric] = []
        self.results["cal_time"] = []
コード例 #15
0
def select_best_models(results, models, d_metric):
    columns = ['auc', 'f1', 'precision', 'recall', 'time', 'parameters']
    rv = pd.DataFrame(index=models, columns=columns)
    best_metric = 0
    best_models = {}

    for model, iters in results.items():
        top_intra_metric = 0
        best_models[model] = {}
        for params, metrics in iters.items():
            header = [key for key in metrics.keys()]
            if metrics[d_metric] > top_intra_metric:
                top_intra_metric = metrics[d_metric]
                best_models[model]['parameters'] = params
                best_models[model]['metrics'] = metrics

        to_append = [value for value in best_models[model]['metrics'].values()]
        to_append.append(best_models[model]['parameters'])
        rv.loc[model] = to_append
        if top_intra_metric > best_metric:
            best_metric = top_intra_metric
            best_model = model, params

    return rv, best_models, (best_model, best_metric)
コード例 #16
0
def save_testing_error(save_path,
                       trainer,
                       evaluator,
                       dataset_str,
                       save_extension=None):
    # The trainer is only given here to get the current iteration and epoch.
    iteration = trainer.state.iteration
    epoch = trainer.state.epoch

    metrics = evaluator.state.metrics
    # print (list(metrics.keys()))
    print("{} Results - Epoch: {}  AccumulatedLoss: {}".format(
        dataset_str, epoch, metrics))
    metric_values = []
    for key in metrics.keys():
        title = "Testing metric: {}".format(key)
        metric_value = metrics[key]
        metric_values.append(metric_value)
        name_for_log = dataset_str + ' ' + key
        try:
            wandb.log({name_for_log: metric_value})
        except:
            pass

    # print (metrics.keys())

    # also save as .txt for plotting
    log_name = os.path.join(save_path, save_extension)
    if epoch == 1:  #assumes you always eval at end of 1st epoch
        with open(log_name, 'w') as the_file:  # overwrite exiting file
            the_file.write('#iteration,loss1,loss2,...\n')
    with open(log_name, 'a') as the_file:
        the_file.write('{},{}\n'.format(iteration,
                                        ",".join(map(str, metric_values))))
    plot_loss(log_name, log_name.replace('.txt', '.jpg'), 'Testing Loss')
    return metrics['AccumulatedLoss']
コード例 #17
0
    def _to_csv(self, tasks, baselines, max_col=10):
        now = datetime.now()

        dt_string = now.strftime("%d-%m-%Y-%H-%M-%S")

        with open(os.path.join("ft_runs", dt_string + "_report.csv"),
                  "w") as fw:
            # fieldnames = ['task', 'baseline'] + ['metric'+ix for ix in range(10) ]
            # writer = csv.DictWriter(fw, fieldnames=fieldnames)
            writer = csv.writer(fw)

            for task in tasks:
                for ix, baseline in enumerate(baselines):
                    metrics = tasks[task][baseline]["test.json"]
                    if ix == 0:
                        cols = [task] + list(metrics.keys())
                        padded_cols = cols + ["_"] * (max_col - len(cols))
                        writer.writerow(padded_cols)

                    cols = [baseline] + [
                        round(metrics[metric], 4) for metric in metrics
                    ]
                    padded_cols = cols + ["_"] * (max_col - len(cols))
                    writer.writerow(padded_cols)
コード例 #18
0
def cv_metrics_stratified_class_with_indexes_and_transform(
        X,
        Y,
        indexes,
        iclf,
        iclfk={},
        transform=None,
        kfold=5,
        shuffle=True,
        report_metrics=[
            'matthews_corr_coef', 'roc_auc_score', 'f1_score', 'sensitivity',
            'specificity'
        ],
        norm=False,
        calc_stats=True,
        report_name='CLF',
        sort_metric='roc_auc_score_min',
        transformations=[],
        features_top_ns=[],
        X_names=[],
        vectors=[],
        vector=None,
        allow_x_list_size=1):
    output_objs = {}
    output_metrics = {}
    stats_df = []
    report_name_sufix = ''
    report_name_sufix_xs = ''
    conditions = [
        type(X) == list,
        len(transformations) > allow_x_list_size,
        len(features_top_ns) == len(transformations),
        len(X_names) == len(transformations)
    ]  #
    multiple_x = utils.validate_multiple_conditions(conditions)  #
    for train_index, test_index in indexes:
        if multiple_x:
            X_train, X_test, y_train, y_test = transform_and_join(
                X,
                Y,
                train_index,
                test_index,
                transformations,
                features_top_ns,
                iclf,
                iclfk,
                joint_transformation=None,
                vectors=vectors)
            report_name_sufix_xs = [
                xn + "_" + tr + "_" + str(feats) for xn, tr, feats in zip(
                    X_names, transformations, features_top_ns)
            ]
            report_name_sufix_xs = " & ".join(report_name_sufix_xs)
        else:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
        total_features = X_train.shape[-1]
        number_of_features = total_features
        if type(transform) == str:
            report_name_sufix = report_name_sufix_xs + "_" + transform
            number_of_features = 1
            X_train, X_test, y_train, y_test = transform_x_train_test(
                X_train,
                X_test,
                y_train,
                y_test,
                transform=transform,
                iclf=iclf,
                iclfk=iclfk,
                vector=vector)
        if len(y_test.shape) > 1:
            y_test = y_test.argmax(1)
        if norm == True:
            X_train, X_test = norm_z_score(X_train, X_test)
        start_feature_number = number_of_features
        end_feature_number = total_features + 1
        for feature_number in range(start_feature_number, end_feature_number):
            tmp_scores = output_metrics.get('F' + str(feature_number), {})
            X_train_ = X_train[:, :feature_number]
            X_test_ = X_test[:, :feature_number]
            tmp_scores = fit_and_get_metrics(X_train_, X_test_, y_train,
                                             y_test, iclf, iclfk,
                                             report_metrics, tmp_scores)
            output_metrics['F' + str(feature_number)] = tmp_scores
    if calc_stats == True:
        for fn in range(number_of_features, total_features + 1):
            fk = 'F' + str(fn)
            metrics = output_metrics[fk]
            metrics_report = {
                'Name': report_name + report_name_sufix,
                'Number of Variables': fn
            }
            for m in metrics.keys():
                stats = metrics_stats(metrics[m], rn=3)
                for sk in stats.keys():
                    metrics_report[m + "_" + sk] = stats[sk]
            stats_df.append(metrics_report)
        stats_df = pd.DataFrame(stats_df).sort_values(
            by=[sort_metric, 'Number of Variables'],
            ascending=[False, True]).reset_index(drop=True)
    return output_metrics.copy(), stats_df.copy()
コード例 #19
0
    """
    return np.min(np.abs(u - v))


metrics = OrderedDict(Euclidian=dict(metric='euclidean'),
                      L3=dict(metric='minkowski', p=3),
                      L4=dict(metric='minkowski', p=4),
                      taxicab=dict(metric='cityblock'),
                      Braycurtis=dict(metric='braycurtis'),
                      Canberra=dict(metric='canberra'),
                      Min=dict(metric=min_sep),
                      Max=dict(metric='chebyshev'),
                      Correlation=dict(metric='correlation'),
                      Cosine=dict(metric='cosine'))
eig_metric_names = list(
    metrics.keys()) + [name + " normed" for name in metrics]
phys_metric_names = list(
    metrics.keys()) + [name + " Luclus" for name in metrics]


def get_seperations(vectors, norm_values=None):
    """
    For each pair of jet inputs in an event get the seperation.
    Each or the metrics in the dict will be tried,
    then retried with a normed space if norming norm_values are given
    A normed space is one where the vectors have been divided
    by the norm_values.

    Parameters
    ----------
    vectors : list of 2d numpy arrays of floats
コード例 #20
0
ファイル: util.py プロジェクト: ElucidataInc/crispor
def plot_all_metrics(metrics,
                     gene_names,
                     all_learn_options,
                     save,
                     plots=None,
                     bottom=0.19):
    num_methods = len(metrics.keys())
    metrics_names = metrics[metrics.keys()[0]].keys()
    num_genes = len(gene_names)
    width = 0.9 / num_methods
    ind = np.arange(num_genes)

    if save == True:
        first_key = all_learn_options.keys()[0]
        #basefile = r"..\results\V%s_trmetric%s_%s" % (all_learn_options[first_key]["V"], all_learn_options[first_key]["training_metric"], datestamp())
        basefile = r"..\results\%s" % (first_key)

        d = os.path.dirname(basefile)
        if not os.path.exists(d):
            os.makedirs(d)
        with open(basefile + ".plot.pickle", "wb") as f:
            pickle.dump([metrics, all_learn_options, gene_names], f)

    for metric in metrics_names:
        if 'global' not in metric:
            plt.figure(metric, figsize=(20, 8))
        elif plots == None or 'gene level' in plots:
            plt.figure(metric, figsize=(12, 12))

    boxplot_labels = []
    boxplot_arrays = {}
    boxplot_median = {}

    for i, method in enumerate(metrics.keys()):
        boxplot_labels.append(method)
        for metric in metrics[method].keys():

            if 'global' in metric:
                plt.figure(metric)
                plt.bar([i],
                        metrics[method][metric],
                        0.9,
                        color=plt.cm.Paired(1. * i / len(metrics.keys())),
                        label=method)
            else:
                if plots == None or 'gene level' in plots:
                    plt.figure(metric)
                    plt.bar(ind + (i * width),
                            metrics[method][metric],
                            width,
                            color=plt.cm.Paired(1. * i / len(metrics.keys())),
                            label=method)

                median_metric = np.median(metrics[method][metric])
                print method, metric, median_metric
                assert not np.isnan(median_metric), "found nan for %s, %s" % (
                    method, metric)
                if metric not in boxplot_arrays.keys():
                    boxplot_arrays[metric] = np.array(
                        metrics[method][metric])[:, None]
                    boxplot_median[metric] = [
                        np.median(np.array(metrics[method][metric]))
                    ]
                else:
                    boxplot_arrays[metric] = np.concatenate(
                        (boxplot_arrays[metric],
                         np.array(metrics[method][metric])[:, None]),
                        axis=1)
                    boxplot_median[metric].append(
                        np.median(np.array(metrics[method][metric])))

    for metric in metrics_names:
        if plots == None or 'gene level' in plots:
            ax = plt.figure(metric)
            leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            # leg.draggable(state=True, use_blit=True)
            plt.ylabel(metric)

            if 'global' in metric:
                plt.xticks(range(len(metrics.keys())),
                           metrics.keys(),
                           rotation=70)
                plt.grid(True, which='both')
                plt.subplots_adjust(left=0.05, right=0.8)
            else:
                plt.xticks(ind + width, gene_names)
                plt.grid(True, which='both')
                plt.subplots_adjust(left=0.05, right=0.8)
        if save == True:
            plt.xticks(ind + 0.5, gene_names)
            if metric == 'AUC':
                plt.ylim([0.5, 1.0])
            plt.savefig(basefile + "_" + metric + "_bar" + ".png")

        if (plots == None or "boxplots" in plots) and 'global' not in metric:
            plt.figure('Boxplot %s' % metric)

            sorted_boxplot = np.argsort(boxplot_median[metric])[::-1]

            plt.boxplot(boxplot_arrays[metric][:, sorted_boxplot])
            plt.ylabel(metric)
            plt.xticks(range(1, num_methods + 1),
                       np.array(boxplot_labels)[sorted_boxplot],
                       rotation=70)
            plt.subplots_adjust(top=0.97, bottom=bottom)

            if metric == 'RMSE':
                plt.ylim((1.0, 2.0))

        if save == True:
            plt.savefig(basefile + "_" + metric + ".png")
コード例 #21
0
    def _compute_progress_metrics(self,
                                  sample_losses,
                                  corrects,
                                  type_to_labels,
                                  type_to_num_classes,
                                  per_class_meters,
                                  reweight=None):
        """Extracts metrics from each of the per_class_meters.

        Args:
            sample_losses(np.ndarray of shape (N, )): The loss computed for
                each sample.
            corrects(np.ndarray of shape(N, )): Whether or not the model produced
                a correct prediction for each sample.
            type_to_labels(Dict[str, Union[np.ndarray, torch.Tensor, Sequence]]):
                Dictionary object mapping the label_type (e.g. superclass, subclass,
                true_subclass) to the labels themselves.
            type_to_num_classes(Dict[str, int]):
                type_to_num_classes(Dict[str, int]): Dictionary object that maps the 
                label_type to the number
                of classes for that label_type.
            per_class_meters(Dict[str, List[AverageMeter]]):  A dictionary of
                per_class_meters, where a per_class_meter is a list of AverageMeter 
                objects, one for each class. There is a per_class_meter for each 
                label_type, and for each metric_type (e.g. losses, accs).

        Returns:
            metrics(Dict[str, Any]): A dictionary object that describes model
                performance based on information in each of the per_class_meters.
        """
        batch_stats = {}
        for label_type, labels in type_to_labels.items():
            num_classes = type_to_num_classes[label_type]
            losses, counts = self.criterion.compute_group_avg(
                sample_losses, labels, num_groups=num_classes)
            accs, _ = self.criterion.compute_group_avg(corrects,
                                                       labels,
                                                       num_groups=num_classes)
            losses_rw, counts_rw = self.criterion.compute_group_avg(
                sample_losses,
                labels,
                num_groups=num_classes,
                reweight=reweight)
            accs_rw, _ = self.criterion.compute_group_avg(
                corrects, labels, num_groups=num_classes, reweight=reweight)
            batch_stats[label_type] = {
                'losses': losses,
                'losses_rw': losses_rw,
                'counts': counts,
                'counts_rw': counts_rw,
                'accs': accs,
                'accs_rw': accs_rw
            }
        metrics = {}
        for label_type, stats in batch_stats.items():
            losses, counts, accs, losses_rw, counts_rw, accs_rw = \
                stats['losses'], stats['counts'], stats['accs'], stats['losses_rw'], stats['counts_rw'], stats['accs_rw']
            loss_meters = per_class_meters[f'per_{label_type}_losses']
            loss_meters_rw = per_class_meters[
                f'per_{label_type}_losses_reweighted']
            acc_meters = per_class_meters[f'per_{label_type}_accs']
            acc_meters_rw = per_class_meters[
                f'per_{label_type}_accs_reweighted']

            num_classes = type_to_num_classes[label_type]
            for i in range(num_classes):
                loss_meters[i].update(losses[i], counts[i])
                acc_meters[i].update(accs[i], counts[i])
                loss_meters_rw[i].update(losses_rw[i], counts_rw[i])
                acc_meters_rw[i].update(accs_rw[i], counts_rw[i])

            active = np.array([i for i, m in enumerate(acc_meters) if m.count])
            if len(active) > 0:
                rob_loss = max(
                    [gl.avg for gl in np.array(loss_meters)[active]])
                rob_acc = min(
                    [ga.avg * 100 for ga in np.array(acc_meters)[active]])
                rob_loss_rw = max(
                    [gl.avg for gl in np.array(loss_meters_rw)[active]])
                rob_acc_rw = min(
                    [ga.avg * 100 for ga in np.array(acc_meters_rw)[active]])
            else:
                rob_loss = 0.
                rob_acc = 0.
                rob_loss_rw = 0.
                rob_acc_rw = 0.
            metrics[f'{label_type}_rob_loss'] = rob_loss
            metrics[f'{label_type}_rob_acc'] = rob_acc
            metrics[f'{label_type}_rob_loss_rw'] = rob_loss_rw
            metrics[f'{label_type}_rob_acc_rw'] = rob_acc_rw

        if 'true_subclass_rob_acc' not in metrics.keys():
            metrics['true_subclass_rob_acc'] = -1
        return metrics
コード例 #22
0
ファイル: util.py プロジェクト: bmcorser/Azimuth
def plot_all_metrics(metrics, gene_names, all_learn_options, save, plots=None, bottom=0.19):
    num_methods = len(metrics.keys())
    metrics_names = metrics[metrics.keys()[0]].keys()
    num_genes = len(gene_names)
    width = 0.9/num_methods
    ind = np.arange(num_genes)

    if save==True:
        first_key = all_learn_options.keys()[0]
        #basefile = r"..\results\V%s_trmetric%s_%s" % (all_learn_options[first_key]["V"], all_learn_options[first_key]["training_metric"], datestamp())
        basefile = r"..\results\%s" % (first_key)

        d = os.path.dirname(basefile)
        if not os.path.exists(d):
            os.makedirs(d)
        with open(basefile + ".plot.pickle", "wb") as f:
            pickle.dump([metrics, all_learn_options, gene_names], f)

    for metric in metrics_names:
        if 'global' not in metric:
            plt.figure(metric, figsize=(20, 8))
        elif plots == None or 'gene level' in plots:
            plt.figure(metric, figsize=(12, 12))

    boxplot_labels = []
    boxplot_arrays = {}
    boxplot_median = {}

    for i, method in enumerate(metrics.keys()):
        boxplot_labels.append(method)
        for metric in metrics[method].keys():

            if 'global' in metric:
                plt.figure(metric)
                plt.bar([i], metrics[method][metric], 0.9, color=plt.cm.Paired(1.*i/len(metrics.keys())), label=method)
            else:
                if plots == None or 'gene level' in plots:
                    plt.figure(metric)
                    plt.bar(ind+(i*width), metrics[method][metric], width, color=plt.cm.Paired(1.*i/len(metrics.keys())), label=method)

                median_metric = np.median(metrics[method][metric])
                print method, metric, median_metric
                assert not np.isnan(median_metric), "found nan for %s, %s" % (method, metric)
                if metric not in boxplot_arrays.keys():
                    boxplot_arrays[metric] = np.array(metrics[method][metric])[:, None]
                    boxplot_median[metric] = [np.median(np.array(metrics[method][metric]))]
                else:
                    boxplot_arrays[metric] = np.concatenate((boxplot_arrays[metric], np.array(metrics[method][metric])[:, None]), axis=1)
                    boxplot_median[metric].append(np.median(np.array(metrics[method][metric])))


    for metric in metrics_names:
        if plots == None or 'gene level' in plots:
            ax = plt.figure(metric)
            leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            # leg.draggable(state=True, use_blit=True)
            plt.ylabel(metric)

            if 'global' in metric:
                plt.xticks(range(len(metrics.keys())), metrics.keys(), rotation=70)
                plt.grid(True, which='both')
                plt.subplots_adjust(left = 0.05, right = 0.8)
            else:
                plt.xticks(ind+width, gene_names)
                plt.grid(True, which='both')
                plt.subplots_adjust(left = 0.05, right = 0.8)
        if save == True:
            plt.xticks(ind+0.5, gene_names)
            if metric=='AUC':
                plt.ylim([0.5, 1.0])                
            plt.savefig(basefile + "_" + metric + "_bar" + ".png")

        if (plots == None or "boxplots" in plots) and 'global' not in metric:
            plt.figure('Boxplot %s' % metric)

            sorted_boxplot = np.argsort(boxplot_median[metric])[::-1]

            plt.boxplot(boxplot_arrays[metric][:, sorted_boxplot])
            plt.ylabel(metric)
            plt.xticks(range(1, num_methods+1), np.array(boxplot_labels)[sorted_boxplot], rotation=70)
            plt.subplots_adjust(top = 0.97, bottom = bottom)

            if metric == 'RMSE':
                plt.ylim((1.0, 2.0))

        if save == True:
            plt.savefig(basefile + "_" + metric + ".png")
コード例 #23
0
ファイル: Models.py プロジェクト: zjxgithub/noscope
 def metrics_names(metrics):
     return sorted(metrics.keys())
コード例 #24
0
def plot_phys_event(eventWise, event_num, metric_names=None, jet_names=None):
    """
    

    Parameters
    ----------
    eventWise :
        
    event_num :
        
    *jet_names :
        

    Returns
    -------

    """
    if jet_names is None:
        jet_names = [
            name.split('_')[0] for name in eventWise.columns
            if name.endswith("_PhysDistance")
        ]
        jet_names = jet_names[::2]
    if metric_names is None:
        metric_names = []
        name = True
        while name:
            name = InputTools.list_complete("Chose a metric (empty to stop); ",
                                            metrics.keys())
            name = name.strip()
            metric_names.append(name)
        del metric_names[-1]
    num_jets = len(jet_names)
    # get global data
    eventWise.selected_event = event_num
    phis = eventWise.JetInputs_Phi
    y_lims = np.min(phis), np.max(phis)
    rapidities = eventWise.JetInputs_Rapidity
    x_lims = np.min(rapidities), np.max(rapidities)
    same_mask = np.array(eventWise.JetInputs_PairLabels.tolist())
    cross_mask = np.array(eventWise.JetInputs_PairCrossings.tolist())
    colours = np.zeros((len(same_mask), len(same_mask[0]), 4), dtype=float)
    colours += 0.3
    colours[same_mask] = [0.1, 1., 0.1, 0.]
    colours[cross_mask] = [1., 0.1, 0., 0.]
    colours[:, :, -1] = same_mask.astype(float) * 0.5 + cross_mask.astype(
        float) * 0.5 + 0.2
    # make a grid of axis for each jet name and each metric
    num_metrics = len(metric_names)
    fig, ax_arr = plt.subplots(num_jets, num_metrics, sharex=True, sharey=True)
    ax_arr = ax_arr.reshape((num_jets, num_metrics))
    # now the other axis should contain the plots
    metric_order = list(metrics.keys())
    for jet_n, jet_name in enumerate(jet_names):
        distances = getattr(eventWise, jet_name + "_PhysDistance")
        # normalise the distances
        distances = distances / np.nanmean(distances.tolist(), axis=(1, 2))
        ratios = getattr(eventWise, jet_name + "_DifferencePhysDistance")
        for metric_n, metric in enumerate(metric_names):
            metric_pos = metric_order.index(metric)
            ax = ax_arr[jet_n, metric_n]
            for i1, (p1, r1) in enumerate(zip(phis, rapidities)):
                for i2, (p2, r2) in enumerate(zip(phis, rapidities)):
                    width = distances[metric_pos, i1, i2]
                    line = matplotlib.lines.Line2D([r1, r2], [p1, p2],
                                                   c=colours[i1, i1],
                                                   lw=width)
                    ax.add_line(line)
            if jet_n == num_jets - 1:
                ax.set_xlabel(metric)
            if metric_n == 0:
                ax.set_ylabel(jet_name)
            ax.set_xlim(*x_lims)
            ax.set_ylim(*y_lims)
    fig.set_size_inches(num_metrics * 3.5, num_jets * 1.8)
    #fig.tight_layout()
    fig.subplots_adjust(hspace=0.0, wspace=0., right=1., top=1.)
コード例 #25
0
    def _checkpoint(self, model, metrics, checkpoint_metric, epoch):
        """Saves the model.

        Args:
            model(nn.Module): A PyTorch model.
            metrics(Dict[str, Any]): A dictionary object containing
                model performance metrics.
            checkpoint_metric(str): The checkpoint metric associated with the model.
            epoch(int): The current epoch.
        """
        if checkpoint_metric not in metrics.keys():
            raise KeyError(
                f"{checkpoint_metric} not in metrics {metrics.keys()}")

        if np.isnan(self.state["best_score"]):
            self.state["best_score"] = metrics[checkpoint_metric]
            is_best = True
        else:
            if "loss" in checkpoint_metric:
                is_best = self.state["best_score"] > metrics[checkpoint_metric]
                self.state["best_score"] = min(self.state["best_score"],
                                               metrics[checkpoint_metric])
            else:
                is_best = self.state["best_score"] < metrics[checkpoint_metric]
                self.state["best_score"] = max(self.state["best_score"],
                                               metrics[checkpoint_metric])

        data = {
            "epoch": epoch,
            "state_dict": model.state_dict(),
            "best_score": self.state["best_score"],
            "optimizer": self.optimizer.state_dict(),
            "scheduler": self.scheduler.state_dict(),
            **metrics,
        }

        if is_best:
            self._save(data, f"best_model.pt")

        if self.compute_auroc:
            save_metrics = [
                "val_auroc",
                "val_subclass_rob_auroc",
                "val_true_subclass_rob_auroc",
                "val_alt_subclass_rob_auroc",
            ]
        else:
            save_metrics = [
                "val_acc",
                "val_acc_rw",
                "val_subclass_rob_acc",
                "val_subclass_rob_acc_rw",
                "val_true_subclass_rob_acc",
            ]

        for metric in save_metrics:
            if metrics[metric] > self.state["best_" + metric]:
                self.state["best_" + metric] = metrics[metric]
                self._save(data, f"best_{metric}_model.pt")

        if self.config[
                "save_every"] > 0 and epoch % self.config["save_every"] == 0:
            self._save(data, f"checkpoint_epoch_{epoch}.pt")