def format_scores_report(table): output = pd.DataFrame() for column in table.columns: output[column] = list(format_scores_column(table[column])) output.index = table.index output.columns = pd.MultiIndex.from_tuples(output.columns) return table_html(output)
def format_report(table, labels, github=False): models = table.model.unique() datasets = table.dataset.unique() metrics = ['errors', 'prec', 'recall', 'time'] table['errors'] = table.prec + table.recall table = table.pivot('model', 'dataset', metrics) table = table.swaplevel(axis=1) if github: metrics = ['errors', 'time'] table = table.reindex( index=models, columns=[ (dataset, metric) for dataset in datasets for metric in metrics ] ) for dataset in datasets: for metric in metrics: column = table[dataset, metric] table[dataset, metric] = list(format_column(column, metric, github)) table.index.name = None table.columns.names = None, None table.index = [labels.get(_, _) for _ in table.index] return table_html(table)
def format_natasha_report(scores, bench, models): scores = scores.loc[models] bench = bench.loc[models] scores.pop((GAREEV, LOC)) for column in scores.columns: scores[column] = scores[column].map(scores_f1) scores = scores.stack().mean(axis=1).unstack() # model x type output = pd.DataFrame() output['PER/LOC/ORG f1'] = [ format_f1_scores(_) for _ in scores[[PER, LOC, ORG]].values ] columns = [ ['init', format_sec, 'init, s'], ['disk', format_mb, 'disk, mb'], ['ram', format_mb, 'ram, mb'], [['speed', 'device'], format_speed, 'speed, articles/s'] ] for slice, format, name in columns: values = bench[slice].values.tolist() output[name] = [format(_) for _ in values] output.index = models return table_html(output)
def format_natasha_report(table, labels, models): table = table.groupby('model')['errors', 'time'].sum() table = table.loc[models] table.index.name = None table.index = [labels.get(_, _) for _ in table.index] return table_html(table)
def format_scores_report(table): output = pd.DataFrame() for column in table.columns: output[column] = table[column].map(format_scores) output.columns = pd.MultiIndex.from_tuples(output.columns) output.columns.names = [None, 'prec/recall/f1,%'] return table_html(output)
def format_github_report2(table, datasets): output = pd.DataFrame() output['type'] = table['type'] for dataset in datasets: values = table[dataset].values selection = select_max(values, key=first) values = highlight(values, selection, format_github_cell) output[dataset] = list(values) output = output.rename(columns={'simlex965': 'simlex'}) return table_html(output)
def format_github_scores_report(table): output = pd.DataFrame() for column in table.columns: dataset, type = column if dataset == GAREEV and type == LOC: continue output[column] = list(format_github_scores_column(table[column])) output.index = table.index output.columns = pd.MultiIndex.from_tuples(output.columns) output.columns.names = [None, 'f1'] return table_html(output)
def format_github_report1(table): output = pd.DataFrame() output['type'] = table['type'] columns = [['init', format_sec, select_min, 'init, s'], ['get', format_mks, select_min, 'get, µs'], ['disk', format_mb, select_min, 'disk, mb'], ['ram', format_mb, select_min, 'ram, mb'], ['vocab', format_vocab, select_max, 'vocab']] for column, format, select, name in columns: values = table[column].values selection = select(values) values = highlight(values, selection, format) output[name] = list(values) return table_html(output)
def format_report(table, datasets): output = pd.DataFrame() output['type'] = table['type'] columns = [ ['init', format_sec, 'init, s'], ['get', format_mks, 'get, µs'], ['disk', format_mb, 'disk, mb'], ['ram', format_mb, 'ram, mb'], ] for column, format, name in columns: output[name] = table[column].map(format) for dataset in datasets: output[dataset] = table[dataset].map(format_cell) return table_html(output)
def format_natasha_report(scores, bench, dataset, models): scores = scores.loc[models] bench = bench.loc[models] output = pd.DataFrame() output['accuracy'] = [ '%.3f' % _ for _ in scores[dataset] ] columns = [ ['init', format_sec, 'init, s'], ['disk', format_mb, 'disk, mb'], ['ram', format_mb, 'ram, mb'], [['speed', 'device'], format_speed, 'speed, sents/s'] ] for slice, format, name in columns: values = bench[slice].values.tolist() output[name] = [format(_) for _ in values] output.index = models return table_html(output)
def format_natasha_report(table, datasets): output = pd.DataFrame() output['type'] = table['type'] scores = pd.DataFrame() for dataset in datasets: scores[dataset] = [score for score, cover in table[dataset]] scores = scores.mean(axis=1) output['precision'] = ['%.3f' % _ for _ in scores] columns = [ ['init', format_sec, 'init, s'], ['disk', format_mb, 'disk, mb'], ['ram', format_mb, 'ram, mb'], ['vocab', format_vocab, 'vocab'], ] for column, format, name in columns: values = table[column].values output[name] = [format(_) for _ in values] output.index = table.index return table_html(output)
def format_scores_report(table): output = pd.DataFrame() for column in table.columns: output[column] = list(format_scores_column(table[column])) output.index = table.index return table_html(output)