예제 #1
0
def format_scores_report(table):
    output = pd.DataFrame()
    for column in table.columns:
        output[column] = list(format_scores_column(table[column]))
    output.index = table.index
    output.columns = pd.MultiIndex.from_tuples(output.columns)
    return table_html(output)
예제 #2
0
파일: report.py 프로젝트: buriy/naeval
def format_report(table, labels, github=False):
    models = table.model.unique()
    datasets = table.dataset.unique()
    metrics = ['errors', 'prec', 'recall', 'time']

    table['errors'] = table.prec + table.recall
    table = table.pivot('model', 'dataset', metrics)
    table = table.swaplevel(axis=1)
    if github:
        metrics = ['errors', 'time']

    table = table.reindex(
        index=models,
        columns=[
            (dataset, metric)
            for dataset in datasets
            for metric in metrics
        ]
    )

    for dataset in datasets:
        for metric in metrics:
            column = table[dataset, metric]
            table[dataset, metric] = list(format_column(column, metric, github))

    table.index.name = None
    table.columns.names = None, None
    table.index = [labels.get(_, _) for _ in table.index]
    return table_html(table)
예제 #3
0
def format_natasha_report(scores, bench, models):
    scores = scores.loc[models]
    bench = bench.loc[models]

    scores.pop((GAREEV, LOC))
    for column in scores.columns:
        scores[column] = scores[column].map(scores_f1)
    scores = scores.stack().mean(axis=1).unstack()  # model x type

    output = pd.DataFrame()
    output['PER/LOC/ORG f1'] = [
        format_f1_scores(_)
        for _ in scores[[PER, LOC, ORG]].values
    ]

    columns = [
        ['init', format_sec, 'init, s'],
        ['disk', format_mb, 'disk, mb'],
        ['ram', format_mb, 'ram, mb'],
        [['speed', 'device'], format_speed, 'speed, articles/s']
    ]
    for slice, format, name in columns:
        values = bench[slice].values.tolist()
        output[name] = [format(_) for _ in values]

    output.index = models
    return table_html(output)
예제 #4
0
파일: report.py 프로젝트: buriy/naeval
def format_natasha_report(table, labels, models):
    table = table.groupby('model')['errors', 'time'].sum()

    table = table.loc[models]
    table.index.name = None

    table.index = [labels.get(_, _) for _ in table.index]
    return table_html(table)
예제 #5
0
파일: report.py 프로젝트: natasha/naeval
def format_scores_report(table):
    output = pd.DataFrame()
    for column in table.columns:
        output[column] = table[column].map(format_scores)

    output.columns = pd.MultiIndex.from_tuples(output.columns)
    output.columns.names = [None, 'prec/recall/f1,%']

    return table_html(output)
예제 #6
0
파일: report.py 프로젝트: natasha/naeval
def format_github_report2(table, datasets):
    output = pd.DataFrame()
    output['type'] = table['type']

    for dataset in datasets:
        values = table[dataset].values
        selection = select_max(values, key=first)
        values = highlight(values, selection, format_github_cell)
        output[dataset] = list(values)

    output = output.rename(columns={'simlex965': 'simlex'})
    return table_html(output)
예제 #7
0
파일: report.py 프로젝트: natasha/naeval
def format_github_scores_report(table):
    output = pd.DataFrame()
    for column in table.columns:
        dataset, type = column
        if dataset == GAREEV and type == LOC:
            continue
        output[column] = list(format_github_scores_column(table[column]))

    output.index = table.index
    output.columns = pd.MultiIndex.from_tuples(output.columns)
    output.columns.names = [None, 'f1']

    return table_html(output)
예제 #8
0
def format_github_report1(table):
    output = pd.DataFrame()
    output['type'] = table['type']

    columns = [['init', format_sec, select_min, 'init, s'],
               ['get', format_mks, select_min, 'get, µs'],
               ['disk', format_mb, select_min, 'disk, mb'],
               ['ram', format_mb, select_min, 'ram, mb'],
               ['vocab', format_vocab, select_max, 'vocab']]
    for column, format, select, name in columns:
        values = table[column].values
        selection = select(values)
        values = highlight(values, selection, format)
        output[name] = list(values)

    return table_html(output)
예제 #9
0
파일: report.py 프로젝트: natasha/naeval
def format_report(table, datasets):
    output = pd.DataFrame()
    output['type'] = table['type']

    columns = [
        ['init', format_sec, 'init, s'],
        ['get', format_mks, 'get, µs'],
        ['disk', format_mb, 'disk, mb'],
        ['ram', format_mb, 'ram, mb'],
    ]
    for column, format, name in columns:
        output[name] = table[column].map(format)

    for dataset in datasets:
        output[dataset] = table[dataset].map(format_cell)

    return table_html(output)
예제 #10
0
파일: report.py 프로젝트: buriy/naeval
def format_natasha_report(scores, bench, dataset, models):
    scores = scores.loc[models]
    bench = bench.loc[models]

    output = pd.DataFrame()
    output['accuracy'] = [
        '%.3f' % _
        for _ in scores[dataset]
    ]

    columns = [
        ['init', format_sec, 'init, s'],
        ['disk', format_mb, 'disk, mb'],
        ['ram', format_mb, 'ram, mb'],
        [['speed', 'device'], format_speed, 'speed, sents/s']
    ]
    for slice, format, name in columns:
        values = bench[slice].values.tolist()
        output[name] = [format(_) for _ in values]

    output.index = models
    return table_html(output)
예제 #11
0
def format_natasha_report(table, datasets):
    output = pd.DataFrame()
    output['type'] = table['type']

    scores = pd.DataFrame()
    for dataset in datasets:
        scores[dataset] = [score for score, cover in table[dataset]]

    scores = scores.mean(axis=1)
    output['precision'] = ['%.3f' % _ for _ in scores]

    columns = [
        ['init', format_sec, 'init, s'],
        ['disk', format_mb, 'disk, mb'],
        ['ram', format_mb, 'ram, mb'],
        ['vocab', format_vocab, 'vocab'],
    ]
    for column, format, name in columns:
        values = table[column].values
        output[name] = [format(_) for _ in values]

    output.index = table.index
    return table_html(output)
예제 #12
0
파일: report.py 프로젝트: buriy/naeval
def format_scores_report(table):
    output = pd.DataFrame()
    for column in table.columns:
        output[column] = list(format_scores_column(table[column]))
    output.index = table.index
    return table_html(output)