def prepare_final_submissions(qrels):
    print('')
    print('## Preparing final submission files by removing qrels...')
    print('')

    run1 = 'expanded.anserini.final-r5.fusion1.txt'
    print(f'Generating {run1}')
    os.system(f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' +
              f'--input runs/expanded.anserini.covid-r5.fusion1.txt --output runs/{run1} --runtag r5.fusion1')
    run1_md5 = compute_md5(f'runs/{run1}')
    assert generate_md5 or run1_md5 == final_runs[run1], f'Error in producing {run1}!'

    run2 = 'expanded.anserini.final-r5.fusion2.txt'
    print(f'Generating {run2}')
    os.system(f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' +
              f'--input runs/expanded.anserini.covid-r5.fusion2.txt --output runs/{run2} --runtag r5.fusion2')
    run2_md5 = compute_md5(f'runs/{run2}')
    assert generate_md5 or run2_md5 == final_runs[run2], f'Error in producing {run2}!'

    run3 = 'expanded.anserini.final-r5.rf.txt'
    print(f'Generating {run3}')
    os.system(f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} ' +
              f'--input runs/expanded.anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt --output runs/{run3} --runtag r5.rf')
    run3_md5 = compute_md5(f'runs/{run3}')
    assert generate_md5 or run3_md5 == final_runs[run3], f'Error in producing {run3}!'

    if generate_md5:
        final_md5 = {run: compute_md5(f'runs/{run}') for run in final_runs}
        print(f'Checksums for final runs: {final_md5}')
def perform_fusion():
    print('')
    print('## Performing fusion...')
    print('')

    fusion_run1 = 'expanded.anserini.covid-r5.fusion1.txt'
    set1 = ['expanded.anserini.covid-r5.abstract.qq.bm25.txt',
            'expanded.anserini.covid-r5.full-text.qq.bm25.txt',
            'expanded.anserini.covid-r5.paragraph.qq.bm25.txt']

    print(f'Performing fusion to create {fusion_run1}')
    os.system('PYTHONPATH=../pyserini ' +
              'python -m pyserini.fusion --method rrf --runtag reciprocal_rank_fusion_k=60 --k 10000 '
              f'--out runs/{fusion_run1} --runs runs/{set1[0]} runs/{set1[1]} runs/{set1[2]}')

    assert generate_md5 or compute_md5(f'runs/{fusion_run1}') == cumulative_runs[fusion_run1], f'Error in producing {fusion_run1}!'

    fusion_run2 = 'expanded.anserini.covid-r5.fusion2.txt'
    set2 = ['expanded.anserini.covid-r5.abstract.qdel.bm25.txt',
            'expanded.anserini.covid-r5.full-text.qdel.bm25.txt',
            'expanded.anserini.covid-r5.paragraph.qdel.bm25.txt']

    print(f'Performing fusion to create {fusion_run2}')
    os.system('PYTHONPATH=../pyserini ' +
              'python -m pyserini.fusion --method rrf --runtag reciprocal_rank_fusion_k=60 --k 10000 ' +
              f'--out runs/{fusion_run2} --runs runs/{set2[0]} runs/{set2[1]} runs/{set2[2]}')

    assert generate_md5 or compute_md5(f'runs/{fusion_run2}') == cumulative_runs[fusion_run2], f'Error in producing {fusion_run2}!'

    if generate_md5:
        cumulative_md5 = {run: compute_md5(f'runs/{run}') for run in cumulative_runs}
        print(f'Checksums for cumulative runs: {cumulative_md5}')
Ejemplo n.º 3
0
def prepare_final_submissions(cumulative_qrels, check_md5=False):
    print('')
    print('## Preparing final submission files by removing qrels...')
    print('')

    run1 = 'anserini.final-r4.fusion1.txt'
    print(f'Generating {run1}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/anserini.covid-r4.fusion1.txt --output runs/{run1} --runtag r4.fusion1'
    )
    run1_md5 = compute_md5(f'runs/{run1}')
    if check_md5:
        assert run1_md5 == final_runs[run1], f'Error in producing {run1}!'

    run2 = 'anserini.final-r4.fusion2.txt'
    print(f'Generating {run2}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/anserini.covid-r4.fusion2.txt --output runs/{run2} --runtag r4.fusion2'
    )
    run2_md5 = compute_md5(f'runs/{run2}')
    if check_md5:
        assert run2_md5 == final_runs[run2], f'Error in producing {run2}!'

    run3 = 'anserini.final-r4.rf.txt'
    print(f'Generating {run3}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/anserini.covid-r4.abstract.qdel.bm25+rm3Rf.txt --output runs/{run3} --runtag r4.rf'
    )
    run3_md5 = compute_md5(f'runs/{run3}')
    if check_md5:
        assert run3_md5 == final_runs[run3], f'Error in producing {run3}!'

    print('')
    print(run1 + ' ' * (35 - len(run1)) + run1_md5)
    print(run2 + ' ' * (35 - len(run2)) + run2_md5)
    print(run3 + ' ' * (35 - len(run3)) + run3_md5)
Ejemplo n.º 4
0
def perform_fusion(check_md5=True):
    print('')
    print('## Performing fusion...')
    print('')

    fusion_run1 = 'anserini.covid-r4.fusion1.txt'
    set1 = [
        'anserini.covid-r4.abstract.qq.bm25.txt',
        'anserini.covid-r4.full-text.qq.bm25.txt',
        'anserini.covid-r4.paragraph.qq.bm25.txt'
    ]

    print(f'Performing fusion to create {fusion_run1}')
    os.system(
        'PYTHONPATH=../pyserini ' +
        'python -m pyserini.fusion --method rrf --runtag reciprocal_rank_fusion_k=60 --k 10000 '
        f'--out runs/{fusion_run1} --runs runs/{set1[0]} runs/{set1[1]} runs/{set1[2]}'
    )

    if check_md5:
        assert compute_md5(f'runs/{fusion_run1}') == cumulative_runs[
            fusion_run1], f'Error in producing {fusion_run1}!'

    fusion_run2 = 'anserini.covid-r4.fusion2.txt'
    set2 = [
        'anserini.covid-r4.abstract.qdel.bm25.txt',
        'anserini.covid-r4.full-text.qdel.bm25.txt',
        'anserini.covid-r4.paragraph.qdel.bm25.txt'
    ]

    print(f'Performing fusion to create {fusion_run2}')
    os.system(
        'PYTHONPATH=../pyserini ' +
        'python -m pyserini.fusion --method rrf --runtag reciprocal_rank_fusion_k=60 --k 10000 '
        +
        f'--out runs/{fusion_run2} --runs runs/{set2[0]} runs/{set2[1]} runs/{set2[2]}'
    )

    if check_md5:
        assert compute_md5(f'runs/{fusion_run2}') == cumulative_runs[
            fusion_run2], f'Error in producing {fusion_run2}!'
Ejemplo n.º 5
0
def prepare_final_submissions(qrels):
    print('')
    print('## Preparing final submission files by removing qrels...')
    print('')

    run1 = 'anserini.final-r3.fusion1.txt'
    print(f'Generating {run1}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} '
        +
        f'--input runs/anserini.covid-r3.fusion1.txt --output runs/{run1} --runtag r3.fusion1'
    )
    run1_md5 = compute_md5(f'runs/{run1}')
    assert run1_md5 == final_runs[run1], f'Error in producing {run1}!'

    run2 = 'anserini.final-r3.fusion2.txt'
    print(f'Generating {run2}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} '
        +
        f'--input runs/anserini.covid-r3.fusion2.txt --output runs/{run2} --runtag r3.fusion2'
    )
    run2_md5 = compute_md5(f'runs/{run2}')
    assert run2_md5 == final_runs[run2], f'Error in producing {run2}!'

    run3 = 'anserini.final-r3.rf.txt'
    print(f'Generating {run3}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {qrels} '
        +
        f'--input runs/anserini.covid-r3.abstract.qdel.bm25+rm3Rf.txt --output runs/{run3} --runtag r3.rf'
    )
    run3_md5 = compute_md5(f'runs/{run3}')
    assert run3_md5 == final_runs[run3], f'Error in producing {run3}!'

    print('')
    print(f'{run1:<35}{run1_md5}')
    print(f'{run2:<35}{run2_md5}')
    print(f'{run3:<35}{run3_md5}')
Ejemplo n.º 6
0
def prepare_final_submissions(cumulative_qrels, check_md5=False):
    print('')
    print('## Preparing final submission files by removing qrels...')
    print('')

    run1 = 'anserini.final-r5.fusion1.txt'
    print(f'Generating {run1}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/anserini.covid-r5.fusion1.txt --output runs/{run1} --runtag r5.fusion1'
    )
    run1_md5 = compute_md5(f'runs/{run1}')
    if check_md5:
        assert run1_md5 == final_runs[run1], f'Error in producing {run1}!'

    run2 = 'anserini.final-r5.fusion2.txt'
    print(f'Generating {run2}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/anserini.covid-r5.fusion2.txt --output runs/{run2} --runtag r5.fusion2'
    )
    run2_md5 = compute_md5(f'runs/{run2}')
    if check_md5:
        assert run2_md5 == final_runs[run2], f'Error in producing {run2}!'

    run3 = 'anserini.final-r5.rf.txt'
    print(f'Generating {run3}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/anserini.covid-r5.abstract.qdel.bm25+rm3Rf.txt --output runs/{run3} --runtag r5.rf'
    )
    run3_md5 = compute_md5(f'runs/{run3}')
    if check_md5:
        assert run3_md5 == final_runs[run3], f'Error in producing {run3}!'

    run4 = 'final.ruir1.txt'
    print(f'Generating {run4}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        + f'--input runs/ruir.fusion1.txt --output runs/{run4} --runtag r5.rf')
    run4_md5 = compute_md5(f'runs/{run4}')

    run5 = 'final.ruir2.txt'
    print(f'Generating {run5}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        + f'--input runs/ruir.fusion2.txt --output runs/{run5} --runtag r5.rf')
    run5_md5 = compute_md5(f'runs/{run5}')

    run6 = 'final.ruir3.txt'
    print(f'Generating {run6}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        + f'--input runs/ruir.fusion3.txt --output runs/{run6} --runtag r5.rf')
    run6_md5 = compute_md5(f'runs/{run6}')

    run33 = 'final.qruir33.txt'
    print(f'Generating {run33}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/ruir33f.txt --output runs/{run33} --runtag final.qruir33.txt'
    )
    run33_md5 = compute_md5(f'runs/{run33}')

    run52 = 'final.ruir52.txt'
    print(f'Generating {run52}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        + f'--input runs/ruir52f.txt --output runs/{run52} --runtag r5.rf')
    run52_md5 = compute_md5(f'runs/{run52}')

    runm2 = 'final.ruirm2.txt'
    print(f'Generating {runm2}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        + f'--input runs/ruirm2f.txt --output runs/{runm2} --runtag r5.rf')
    runm2_md5 = compute_md5(f'runs/{runm2}')

    runs2 = 'final.ruirs2.txt'
    print(f'Generating {runs2}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        + f'--input runs/ruirs2f.txt --output runs/{runs2} --runtag r5.rf')
    runs2_md5 = compute_md5(f'runs/{runs2}')

    runq = 'final.qruir.txt'
    print(f'Generating {runq}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/anserini.covid-r5.fusionq.txt --output runs/{runq} --runtag final.qruir.txt'
    )
    runs2_md5 = compute_md5(f'runs/{runq}')

    runq = 'final.qruir.filtered.txt'
    print(f'Generating {runq}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/ruir33f.txt-filtered --output runs/{runq} --runtag final.qruir.filtered.txt'
    )
    runs2_md5 = compute_md5(f'runs/{runq}')

    runq = 'final.qonly.txt'
    print(f'Generating {runq}')
    os.system(
        f'python tools/scripts/filter_run_with_qrels.py --discard --qrels {cumulative_qrels} '
        +
        f'--input runs/anserini.covid-r5.full-text.qonly.bm25.txt --output runs/{runq} --runtag final.qonly.txt'
    )
    runsq_md5 = compute_md5(f'runs/{runq}')

    print('')
    print(run1 + ' ' * (35 - len(run1)) + run1_md5)
    print(run2 + ' ' * (35 - len(run2)) + run2_md5)
    print(run3 + ' ' * (35 - len(run3)) + run3_md5)
    print(run4 + ' ' * (35 - len(run4)) + run4_md5)
    print(run5 + ' ' * (35 - len(run5)) + run5_md5)
    print(run6 + ' ' * (35 - len(run6)) + run6_md5)
    print(run33 + ' ' * (35 - len(run33)) + run33_md5)
    print(run52 + ' ' * (35 - len(run52)) + run52_md5)
    print(runm2 + ' ' * (35 - len(runm2)) + runm2_md5)
    print(runs2 + ' ' * (35 - len(runs2)) + runs2_md5)