Python unitsize 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: faiss.contrib.factory_tools

메소드/함수: unitsize

hotexamples.com에서의 예제들: 2

Python unitsize - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 faiss.contrib.factory_tools.unitsize에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: parse_bench_all_ivf.py 프로젝트: zjw0304/faiss

def plot_tradeoffs(db, allres, allstats, code_size, recall_rank):
    stat0 = next(iter(allstats.values()))
    d = stat0["d"]
    n_threads = stat0["n_threads"]
    recall_idx = stat0["ranks"].index(recall_rank)
    # times come after the perf measure
    times_idx = len(stat0["ranks"])

    if type(code_size) == int:
        if code_size == 0:
            code_size = [0, 1e50]
            code_size_name = "any code size"
        else:
            code_size_name = "code_size=%d" % code_size
            code_size = [code_size, code_size]
    elif type(code_size) == tuple:
        code_size_name = "code_size in [%d, %d]" % code_size
    else:
        assert False

    names_maxperf = []

    for k in sorted(allres):
        v = allres[k]
        if v.ndim != 2: continue
        us = unitsize(d, k)
        if not code_size[0] <= us <= code_size[1]: continue
        names_maxperf.append((v[-1, recall_idx], k))

    # sort from lowest to highest topline accuracy
    names_maxperf.sort()
    names = [name for mp, name in names_maxperf]

    selected_methods, optimal_points =  \
        extract_pareto_optimal(allres, names, recall_idx, times_idx)

    not_selected = list(set(names) - set(selected_methods))

    print("methods without an optimal OP: ", not_selected)

    pyplot.title('database ' + db + ' ' + code_size_name)

    # grayed out lines

    for k in not_selected:
        v = allres[k]
        if v.ndim != 2: continue
        us = unitsize(d, k)
        if not code_size[0] <= us <= code_size[1]: continue

        linestyle = (':' if 'PQ' in k else
                     '-.' if 'SQ4' in k else '--' if 'SQ8' in k else '-')

        pyplot.semilogy(v[:, recall_idx],
                        1000 / v[:, times_idx],
                        label=None,
                        linestyle=linestyle,
                        marker='o' if 'HNSW' in k else '+',
                        color='#cccccc',
                        linewidth=0.2)

    plot_subset(allres, allstats, selected_methods, recall_idx, times_idx)

    if len(not_selected) == 0:
        om = ''
    else:
        om = '\nomitted:'
        nc = len(om)
        for m in not_selected:
            if nc > 80:
                om += '\n'
                nc = 0
            om += ' ' + m
            nc += len(m) + 1

    # pyplot.semilogy(optimal_points[1, :], optimal_points[2, :], marker="s")
    # print(optimal_points[0, :])
    pyplot.xlabel('1-recall at %d %s' % (recall_rank, om))
    pyplot.ylabel('QPS (%d threads)' % n_threads)
    pyplot.legend()
    pyplot.grid()
    return selected_methods, not_selected

예제 #2

파일 보기

파일: parse_bench_all_ivf.py 프로젝트: zjw0304/faiss

def plot_subset(allres,
                allstats,
                selected_methods,
                recall_idx,
                times_idx=3,
                report=["overhead", "build time"]):

    # important methods
    for k in selected_methods:
        v = allres[k]

        stats = allstats[k]
        d = stats["d"]
        dbsize = stats["nb"]
        if "index_size" in stats and "tables_size" in stats:
            tot_size = stats['index_size'] + stats['tables_size']
        else:
            tot_size = -1
        id_size = 8  # 64 bit

        addt = ''
        if 'add_time' in stats:
            add_time = stats['add_time']
            if add_time > 7200:
                add_min = add_time / 60
                addt = ', %dh%02d' % (add_min / 60, add_min % 60)
            else:
                add_sec = int(add_time)
                addt = ', %dm%02d' % (add_sec / 60, add_sec % 60)

        code_size = unitsize(d, k)

        label = k

        if "code_size" in report:
            label += " %d bytes" % code_size

        tight_size = (code_size + id_size) * dbsize

        if tot_size < 0 or "overhead" not in report:
            pass  # don't know what the index size is
        elif tot_size > 10 * tight_size:
            label += " overhead x%.1f" % (tot_size / tight_size)
        else:
            label += " overhead+%.1f%%" % (tot_size / tight_size * 100 - 100)

        if "build time" in report:
            label += " " + addt

        linestyle = (':' if 'Refine' in k or 'RFlat' in k else
                     '-.' if 'SQ' in k else '-' if '4fs' in k else '-')
        print(k, linestyle)
        pyplot.semilogy(v[:, recall_idx],
                        1000 / v[:, times_idx],
                        label=label,
                        linestyle=linestyle,
                        marker='o' if '4fs' in k else '+')

    recall_rank = stats["ranks"][recall_idx]
    if stats["measure"] == "recall":
        pyplot.xlabel('1-recall at %d' % recall_rank)
    elif stats["measure"] == "inter":
        pyplot.xlabel('inter @ %d' % recall_rank)
    else:
        assert False
    pyplot.ylabel('QPS (%d threads)' % stats["n_threads"])