Python TrecRun.run_data 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: trectools

클래스/타입: TrecRun

메소드/함수: run_data

hotexamples.com에서의 예제들: 3

Python TrecRun.run_data - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 trectools.TrecRun.run_data에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

TrecRun(30)

evaluate_run(4)

run_data(3)

__class__(1)

get_filename(1)

get_full_filename_path(1)

get_group(1)

get_mean_coverage(1)

get_runid(1)

get_top_documents(1)

modify_query_ids(1)

topics(1)

topics_intersection_with(1)

예제 #1

파일 보기

def report_run_per_query(qrels,
                         run_file_name,
                         remove_docs_with_zero_score=False):
    run = TrecRun(run_file_name)
    system = run.run_data['system'][0]
    if remove_docs_with_zero_score:
        run.run_data = run.run_data[run.run_data['score'] > 0]

    trec_eval = TrecEval(run, qrels)

    bpref = trec_eval.getBpref(per_query=True)
    ndcg_10 = trec_eval.getNDCG(depth=10, per_query='query')
    ndcg = trec_eval.getNDCG(per_query='query')

    ret = bpref.join(ndcg_10, on='query')
    ret = ret.join(ndcg, on='query')

    for query, r in ret.iterrows():
        yield json.dumps({
            'corpus': extract_corpus(run_file_name),
            'topic': query,
            'tag': system,
            "bpref": r['Bpref@1000'],
            "pseudoNDCG@10": r['NDCG@10'],
            "pseudoNDCG": r['NDCG@1000']
        })

예제 #2

파일 보기

def report_run(qrels, run_file_name, remove_docs_with_zero_score=False):
    run = TrecRun(run_file_name)
    system = run.run_data['system'][0]
    if remove_docs_with_zero_score:
        run.run_data = run.run_data[run.run_data['score'] > 0]

    trec_eval = TrecEval(run, qrels)

    ret = {
        'corpus': extract_corpus(run_file_name),
        'topics': extract_topics(run_file_name),
        'tag': system,
        "bpref": trec_eval.getBpref(),
        "pseudoNDCG@10": trec_eval.getNDCG(depth=10),
        "pseudoNDCG": trec_eval.getNDCG()
    }

    return json.dumps(ret)

예제 #3

파일 보기

파일: fusion.py 프로젝트: liubingai/trectools

def reciprocal_rank_fusion(trec_runs, k=60, max_docs=1000, output=sys.stdout):
    """
        Implements a reciprocal rank fusion as define in
        ``Reciprocal Rank fusion outperforms Condorcet and individual Rank Learning Methods`` by Cormack, Clarke and Buettcher.

        Parameters:
            k: term to avoid vanishing importance of lower-ranked documents. Default value is 60 (default value used in their paper).
            output: a file pointer to write the results. Sys.stdout is the default.
    """

    outputRun = TrecRun()
    rows = []
    topics = trec_runs[0].topics()

    for topic in sorted(topics):
        doc_scores = {}
        for r in trec_runs:
            docs_for_run = r.get_top_documents(topic, n=1000)

            for pos, docid in enumerate(docs_for_run, start=1):
                doc_scores[docid] = doc_scores.get(docid,
                                                   0.0) + 1.0 / (k + pos)

        # Writes out information for this topic
        for rank, (docid, score) in enumerate(sorted(iter(doc_scores.items()),
                                                     key=lambda x:
                                                     (-x[1], x[0]))[:max_docs],
                                              start=1):
            # output.write("%s Q0 %s %d %f reciprocal_rank_fusion_k=%d\n" % (str(topic), docid, rank, score, k))
            rows.append((topic, "Q0", docid, rank, score,
                         "reciprocal_rank_fusion_k=%d" % k))

    df = pd.DataFrame(rows)
    df.columns = ["query", "q0", "docid", "rank", "score", "system"]
    df["q0"] = df["q0"].astype(np.str)
    outputRun.run_data = df.copy()

    return outputRun