Ejemplo n.º 1
0
def train_plot(runs, s):
    run_infos = [get_run_info(run) for run in runs]
    metrics = run_infos[0][1].keys()
    n_metrics = len(metrics)
    fig, axes = plt.subplots(n_metrics, 1, figsize=(5, 3 * n_metrics))
    for metric, ax in zip(metrics, axes):
        ax.set_title('Evaluation {}'.format(metric))

    # get col index of non unique columns (params that changes between runs)
    all_params = pd.DataFrame([p[-1] for p in run_infos])
    non_unique_params = all_params.apply(pd.Series.nunique) != 1

    for run_dir, metrics_vals, label, best_model, params in run_infos:
        relevant_params = pd.DataFrame(params,
                                       index=[0]).loc[:, non_unique_params]
        cols = relevant_params.columns
        vals = map(str, relevant_params.iloc[0])
        label = '_'.join(map(''.join, zip(cols, vals)))
        # label = label.replace(r'model_tr-.*_vl-.*_bi', 'bi')
        for m, ax in zip(metrics, axes):
            if m in metrics_vals:
                ax.set_title(m)
                ax.plot(smooth(metrics_vals[m], s), label=label)

    plt.legend(loc='best', prop={'size': 6})
    plt.tight_layout()
    plt.savefig('train_progress.pdf')
Ejemplo n.º 2
0
def all_fps(args):
    runs = find_runs('runs_segmentation_hdm05-122/') + \
           find_runs('runs_segmentation_hdm05-65/') + \
           find_runs('runs_segmentation_hdm05-15/')

    summaries = [
        get_run_summary(get_run_info(r), epoch='microAP') for r in runs
    ]
    summary = pd.concat(summaries, ignore_index=True)
    summary['Dataset'] = summary['run_dir'].str.extract(
        '.*(hdm05-\d+)', expand=False).str.upper()
    summary['Fold'] = summary['val_data'].str.extract(
        '.*fold-(\d+)-of.*', expand=False).apply(lambda x: 'Fold ' + x)

    summary = summary[~summary['fps'].isin((6.0, 10.0, 12.0, 20.0))]

    sorted_datasets = natsorted(summary['Dataset'].unique())

    summary.columns = list(
        map(lambda x: x.replace('best_', ''), summary.columns))

    metric_cols = ('microAP', 'macroAP', 'F1', 'microMultiF1', 'macroMultiF1')

    summary = summary.groupby(['bidirectional', 'Dataset', 'fps'],
                              as_index=False)[metric_cols].aggregate(
                                  pd.np.mean)
    fps_values = summary['fps'].unique()

    id_cols = list(set(summary.columns) - set(metric_cols))
    summary = summary.melt(id_vars=id_cols,
                           value_vars=metric_cols,
                           var_name='Metric',
                           value_name='Value')

    g = sns.FacetGrid(summary,
                      col='Dataset',
                      row='Metric',
                      hue='bidirectional',
                      col_order=sorted_datasets,
                      margin_titles=True,
                      size=2,
                      aspect=1.5)
    g = g.map(plt.semilogx, 'fps', 'Value') \
        .set(xticks=fps_values) \
        .set_xticklabels(['{:g}'.format(f) for f in fps_values]) \
        .add_legend()

    plt.subplots_adjust(top=0.925)
    g.fig.suptitle('Performance vs FPS')
    g.savefig(args.output)
Ejemplo n.º 3
0
def segm_summary(args):
    runs = find_runs('runs_segmentation_hdm05-122/') + \
           find_runs('runs_segmentation_hdm05-65/') + \
           find_runs('runs_segmentation_hdm05-15/')

    summaries = [
        get_run_summary(get_run_info(r), epoch='microAP') for r in runs
    ]
    summary = pd.concat(summaries, ignore_index=True)
    # remove best_ prefix
    summary.columns = map(lambda x: x.replace('best_', ''), summary.columns)

    summary['Dataset'] = summary['run_dir'].str.extract(
        '.*(hdm05-\d+)', expand=False).str.upper()
    summary['Fold'] = summary['val_data'].str.extract(
        '.*fold-(\d+)-of.*', expand=False).apply(lambda x: 'Fold ' + x)
    sorted_datasets = natsorted(summary['Dataset'].unique())

    # summary = summary[summary['fps'] == 120.0]

    model_labels = pd.np.array(['Uni-LSTM', 'Bi-LSTM'])
    summary['bidirectional'] = model_labels[summary['bidirectional'].astype(
        int)]

    metric_cols = ('microAP', 'macroAP', 'F1', 'microMultiF1', 'macroMultiF1')
    metric_names = ('micro-averaged AP', 'macro-averaged AP',
                    '$F_1$ (optimal threshold)',
                    'micro-averaged $F_1$ (multiple optimal thresholds)',
                    'macro-averaged $F_1$ (multiple optimal thresholds)')

    aggfunc = lambda x: '{:3.2f} $\pm$ {:3.2f}'.format(pd.np.mean(x),
                                                       pd.np.std(x))

    for metric, metric_name in zip(metric_cols, metric_names):
        pivot = pd.pivot_table(summary,
                               index=['fps', 'bidirectional'],
                               values=metric,
                               columns=['Dataset',
                                        'Fold'])  # , aggfunc=aggfunc)
        pivot = pivot.reindex(model_labels, axis=0, level='bidirectional')
        pivot = pivot.reindex(sorted_datasets, axis=1, level='Dataset')
        print('\\multicolumn{7}{c}{\\textit{%s}} \\\\' % metric_name)
        print(
            pivot.to_latex(column_format='lcccccc',
                           multicolumn_format='c',
                           na_rep='-',
                           escape=False))
        print()
Ejemplo n.º 4
0
def ablation(runs):
    summaries = [get_run_summary(get_run_info(r)) for r in runs]
    summary = pd.concat(summaries, ignore_index=True)

    # Drop cols with unique value everywhere
    # value_counts = summary.apply(pd.Series.nunique)
    # cols_to_drop = value_counts[value_counts < 2].index
    # summary = summary.drop(cols_to_drop, axis=1)

    params = ['bidirectional', 'embed', 'hd', 'layers']
    for p in params:
        rest = params[:]
        rest.remove(p)
        table = summary.pivot_table(values='best_acc', columns=p, index=rest)
        table = table.mean()
        print(table)
Ejemplo n.º 5
0
def single_model(args, bidir):
    runs = find_runs('runs_segmentation_hdm05-122/') + \
           find_runs('runs_segmentation_hdm05-65/') + \
           find_runs('runs_segmentation_hdm05-15/')

    summaries = [get_run_summary(get_run_info(r), epoch='test') for r in runs]
    summary = pd.concat(summaries, ignore_index=True)

    summary['Dataset'] = summary['run_dir'].str.extract(
        '.*(hdm05-\d+)', expand=False).str.upper()
    summary['Fold'] = summary['val_data'].str.extract(
        '.*fold-(\d+)-of.*', expand=False).apply(lambda x: 'Fold ' + x)

    sorted_datasets = natsorted(summary['Dataset'].unique())

    summary = summary[summary['bidirectional'] == bidir]
    summary = summary[summary['fps'] == 120.0]
    summary = summary[summary['Fair'] == args.fair]
    summary = summary[summary['Stream'] == args.stream]

    # summary.columns = map(lambda x: x.replace('best_', ''), summary.columns)
    # model = 'Bi-LSTM' if bidir else 'Uni-LSTM'

    metric_cols = ('microAP', 'macroAP', 'microF1', 'macroF1', 'catMicroF1',
                   'catMacroF1')
    metric_names = ('micro-$AP$', 'macro-$AP$', 'micro-$F_1$', 'macro-$F_1$',
                    'cmicro-$F_1$', 'cmacro-$F_1$')

    aggfunc = lambda x: '{:4.2%} $\pm$ {:3.2%}'.format(pd.np.mean(x),
                                                       pd.np.std(x))

    pivot = pd.pivot_table(summary,
                           values=metric_cols,
                           columns='Dataset',
                           aggfunc=aggfunc)
    pivot = pivot.reindex(metric_cols, axis=0)
    pivot = pivot.reindex(sorted_datasets, axis=1)
    pivot.index = metric_names

    print(
        pivot.to_latex(column_format='rXXX',
                       multicolumn_format='r',
                       na_rep='-',
                       escape=False))
    if args.output:
        pivot.to_csv(args.output)
Ejemplo n.º 6
0
def display_status(runs):
    infos = [get_run_info(r) for r in runs]
    summaries = [get_run_summary(i) for i in infos]
    summary = pd.concat(
        summaries,
        ignore_index=True)  # .sort_values('best_acc', ascending=False)

    if args.output:
        summary.to_csv(args.output, index=False)
    else:
        with pd.option_context('display.width', None), \
             pd.option_context('max_columns', None):

            # get col index of non unique columns (params that changes between runs)
            unique_cols = summary.apply(pd.Series.nunique) == 1
            non_unique_cols = summary.apply(pd.Series.nunique) != 1
            print(summary.loc[:, non_unique_cols])
            print("Common params:")
            print(summary.loc[0, unique_cols])
Ejemplo n.º 7
0
def time(args):

    runs = find_runs('runs_segmentation_hdm05-122/') + \
           find_runs('runs_segmentation_hdm05-65/') + \
           find_runs('runs_segmentation_hdm05-15/')

    summaries = [get_run_summary(get_run_info(r), epoch='test') for r in runs]
    summary = pd.concat(summaries, ignore_index=True)

    summary = summary[summary['Fair'] & ~summary['Stream']]

    summary['Dataset'] = summary['run_dir'].str.extract(
        '.*(hdm05-\d+)', expand=False).str.upper()
    summary['Fold'] = summary['val_data'].str.extract(
        '.*fold-(\d+)-of.*', expand=False).apply(lambda x: 'Fold ' + x)

    sorted_datasets = natsorted(summary['Dataset'].unique())
    pivot = pd.pivot_table(summary,
                           values='AnnotTime',
                           index=['bidirectional', 'fps'],
                           columns=['Dataset', 'Fold'])
    pivot = pivot.reindex(sorted_datasets, axis=1, level='Dataset')
    pivot.to_csv('annotation_times.csv')
    print(pivot)
Ejemplo n.º 8
0
def sota_hdm05(args):
    runs = find_runs('runs_segmentation_hdm05-15_20-80/')
    summaries = [get_run_summary(get_run_info(r), epoch='test') for r in runs]
    summary = pd.concat(summaries, ignore_index=True)

    # summary = summary[summary['bidirectional'] == bidir]
    summary = summary[summary['fps'] == 120.0]
    summary = summary[summary['Fair'] == args.fair]
    summary = summary[summary['Stream'] == args.stream]

    summary['Dataset'] = 'HDM05-15 (20-80)'
    # summary.columns = map(lambda x: x.replace('best_', ''), summary.columns)
    model_names = np.array(['\\unimodel{}', '\\bimodel{}'])
    summary['bidirectional'] = model_names[summary['bidirectional'].astype(
        int)]

    metric_cols = ('microAP', 'macroAP', 'microF1', 'macroF1', 'catMicroF1',
                   'catMacroF1')
    metric_names = ('micro-$AP$', 'macro-$AP$', 'micro-$F_1$', 'macro-$F_1$',
                    'cmicro-$F_1$', 'cmacro-$F_1$')

    pivot = pd.pivot_table(summary,
                           values=metric_cols,
                           columns='bidirectional')
    pivot = pivot.reindex(metric_cols, axis=0)
    pivot = pivot.reindex(model_names, axis=1)
    pivot.index = metric_names

    print(
        pivot.to_latex(column_format='rXX',
                       multicolumn_format='r',
                       na_rep='-',
                       escape=False,
                       formatters=['{:4.2%}'.format, '{:4.2%}'.format]))
    if args.output:
        pivot.to_csv(args.output)
Ejemplo n.º 9
0
def fps(args):

    plt.rc('text', usetex=True)
    plt.rc('font', family='serif')

    sns.set_style('whitegrid')
    sns.set_context('notebook', font_scale=1.2)

    runs = find_runs('runs_segmentation_hdm05-122/') + \
           find_runs('runs_segmentation_hdm05-65/') + \
           find_runs('runs_segmentation_hdm05-15/')

    summaries = [get_run_summary(get_run_info(r), epoch='test') for r in runs]
    summary = pd.concat(summaries, ignore_index=True)
    summary['Dataset'] = summary['run_dir'].str.extract(
        '.*(hdm05-\d+)', expand=False).str.upper()
    summary['Fold'] = summary['val_data'].str.extract(
        '.*fold-(\d+)-of.*', expand=False).apply(lambda x: 'Fold ' + x)

    summary = summary[summary['Fair'] == args.fair]
    summary = summary[summary['Stream'] == args.stream]
    summary = summary[~summary['fps'].isin(
        (0.5, 6.0, 10.0, 12.0, 20.0, 24.0, 40.0))]

    metric = 'microF1'
    summary = summary.groupby(['bidirectional', 'Dataset', 'fps'],
                              as_index=False)[metric].aggregate(pd.np.mean)

    fps_values = summary['fps'].unique()
    sorted_datasets = natsorted(summary['Dataset'].unique())

    h = 2.5
    fig, ax = plt.subplots(1, 3, figsize=(4 * h, h))
    for i, dset in enumerate(sorted_datasets):
        # Online
        keep = (summary['Dataset'] == dset) & ~summary['bidirectional']
        xy = summary[keep].sort_values('fps')
        ax[i].semilogx(xy['fps'],
                       xy[metric],
                       color='b',
                       marker='.',
                       label=r'\textrm{Online-LSTM}')

        # Offline
        keep = (summary['Dataset'] == dset) & summary['bidirectional']
        xy = summary[keep].sort_values('fps')
        ax[i].semilogx(xy['fps'],
                       xy[metric],
                       color='r',
                       marker='.',
                       label=r'\textrm{Offline-LSTM}')

        ax[i].set_title('\\textrm{{{}}}'.format(dset))
        ax[i].set_xticks(fps_values)
        ax[i].set_xticklabels(
            ['\\textrm{{{:g}}}'.format(f) for f in fps_values])

    ax[0].set_ylim([0.75, 0.825])
    ax[0].set_yticks([0.77, 0.79, 0.81], minor=True)
    ax[0].grid(b=True, axis='y', which='minor', linestyle='--')

    ax[1].set_ylim([0.50, 0.8])
    ax[1].set_yticks([0.55, 0.65, 0.75], minor=True)
    ax[1].grid(b=True, axis='y', which='minor', linestyle='--')

    ax[2].set_ylim([0.25, 0.7])
    ax[2].set_yticks([0.3, 0.4, 0.5, 0.6, 0.7])
    ax[2].set_yticks([0.35, 0.45, 0.55, 0.65], minor=True)
    ax[2].grid(b=True, axis='y', which='minor', linestyle='--')

    # ax[0].set_yticks

    ax[0].set_ylabel(r'\textrm{micro-$F_1$}')
    ax[1].set_xlabel(r'\textrm{FPS (logarithmic scale)}')
    ax[1].legend(loc='best', frameon=True)
    plt.tight_layout()
    plt.savefig(args.output)