def test_nyt_qrels(self): self._test_qrels('nyt/wksup', count=1864661, items={ 0: GenericQrel('8454', '8454', 1), 9: GenericQrel('8579', '8579', 1), 1864660: GenericQrel('1854817', '1854817', 1), }) self._test_qrels('nyt/wksup/train', count=1863657, items={ 0: GenericQrel('8454', '8454', 1), 9: GenericQrel('8579', '8579', 1), 1863656: GenericQrel('1854817', '1854817', 1), }) self._test_qrels('nyt/wksup/valid', count=1004, items={ 0: GenericQrel('6461', '6461', 1), 9: GenericQrel('13148', '13148', 1), 1003: GenericQrel('1854529', '1854529', 1), }) self._test_qrels('nyt/trec-core-2017', count=30030, items={ 0: TrecQrel('307', '1001536', 1, '0'), 9: TrecQrel('307', '1029429', 1, '0'), 30029: TrecQrel('690', '996059', 0, '0'), })
def test_antique_test(self): self._test_queries( "antique/test", count=200, items={ 0: GenericQuery(query_id='3990512', text='how can we get concentration onsomething?'), 9: GenericQuery(query_id='1783010', text='What is Blaphsemy?'), 199: GenericQuery(query_id='1971899', text='what is masturbat***?'), }) self._test_qrels('antique/test', count=6589, items={ 0: TrecQrel(query_id='1964316', doc_id='1964316_5', relevance=4, iteration='U0'), 9: TrecQrel(query_id='1964316', doc_id='1964316_2', relevance=4, iteration='Q0'), 6588: TrecQrel(query_id='1262692', doc_id='3699008_1', relevance=2, iteration='Q0') })
def test_cord19_qrels(self): self._test_qrels('cord19/trec-covid', count=69318, items={ 0: TrecQrel('1', '005b2j4b', 2, '4.5'), 9: TrecQrel('1', '05vx82oo', 0, '3'), 69317: TrecQrel('50', 'zz8wvos9', 1, '5'), })
def test_qrels(self): self._test_qrels('trec-fair-2021/train', count=2185446, items={ 0: TrecQrel("1", "572", 1, "0"), 9: TrecQrel("1", "4514", 1, "0"), 2185445: TrecQrel("57", "67253426", 1, "0"), })
def test_qrels(self): self._test_qrels('cranfield', count=1837, items={ 0: TrecQrel('1', '184', 2, '0'), 9: TrecQrel('1', '57', 2, '0'), 1836: TrecQrel('225', '1188', -1, '0'), })
def test_dummy_queries(self): dataset = ir_datasets.create_dataset( docs_tsv='test/dummy/docs.tsv', queries_tsv='test/dummy/queries.tsv', qrels_trec='test/dummy/qrels') self._test_qrels(dataset, count=55, items={ 0: TrecQrel('1', 'T1', 0, '0'), 9: TrecQrel('1', 'T11', 0, '0'), 54: TrecQrel('4', 'T15', 0, '0'), })
def test_qrels(self): self._test_qrels('aol-ia', count=19442629, items={ 0: TrecQrel('50aa67fe786ca7', '430d8aa747a3', 1, '142'), 9: TrecQrel('f6eff9e0848e2d', 'ecd6d884243b', 1, '217'), 19442628: TrecQrel('14c1b5b54212ad', 'a114f6d94af0', 1, '24967361'), })
def qrels_iter(self): with self.qrels_dlc.stream() as stream: stream = io.TextIOWrapper(stream) for lines in sentinel_splitter(stream, sentinel=' /\n'): query_id = lines[0].rstrip('\n') for line in lines[1:]: for doc_id in line.split(): yield TrecQrel(query_id, doc_id, 1, '0')
def qrels_iter(self): with self._qrels_dlc.stream() as f: f = codecs.getreader('utf8')(f) for line in f: cols = line.rstrip().split() if len(cols) != 3: raise RuntimeError(f'expected 3 columns, got {len(cols)}') qid, did, score = cols yield TrecQrel(qid, did, int(score), '0')
def test_pmc_qrels(self): self._test_qrels('pmc/v1/trec-cds-2014', count=37949, items={ 0: TrecQrel('1', '1033658', 0, '0'), 9: TrecQrel('1', '1037001', 0, '0'), 37948: TrecQrel('30', '80153', 0, '0'), }) self._test_qrels('pmc/v1/trec-cds-2015', count=37807, items={ 0: TrecQrel('1', '1065003', 1, '0'), 9: TrecQrel('1', '117132', 0, '0'), 37806: TrecQrel('30', '64646', 2, '0'), }) self._test_qrels('pmc/v2/trec-cds-2016', count=37707, items={ 0: TrecQrel('1', '1036067', 0, '0'), 9: TrecQrel('1', '1160569', 0, '0'), 37706: TrecQrel('30', '65042', 0, '0'), })
def qrels_iter(self): with self._source.stream() as file: file = getreader("utf8")(file) for line in file: if line == "\n": continue # Ignore blank lines. cols = line.rstrip().split() if len(cols) != 4: raise RuntimeError( f"Expected 4 columns but got {len(cols)}.") qid, it, did, score = cols yield TrecQrel(qid, did, int(float(score)), it)
def test_qrels(self): mock_file = StringFile(''' Q0 0 D1 3 Q0 1 D2 2 Q0 0\tD3 3 Q0 1 D2 1 Q1 0 D2 1 '''.lstrip()) QREL_DEFS = {} expected_results = [ TrecQrel('Q0', 'D1', 3, '0'), TrecQrel('Q0', 'D2', 2, '1'), TrecQrel('Q0', 'D3', 3, '0'), TrecQrel('Q0', 'D2', 1, '1'), TrecQrel('Q1', 'D2', 1, '0'), ] qrels = TrecQrels(mock_file, QREL_DEFS) self.assertEqual(qrels.qrels_path(), 'MOCK') self.assertEqual(qrels.qrels_defs(), QREL_DEFS) self.assertEqual(list(qrels.qrels_iter()), expected_results)
def qrels_iter(self): for dlc in self.qrels_dlcs: base_path = Path(dlc.path()) for file in sorted(base_path.glob(f'**/{self.split}/*.gz')): with gzip.open(file, 'rt') as f: for line in f: data = json.loads(line) yield TrecQrel( query_id=data['url'], doc_id=data['url'], relevance=1, iteration='0', )
def test_antique_train_split200train(self): self._test_queries( 'antique/train/split200-train', count=2_226, items={ 0: GenericQuery( query_id='3097310', text='What causes severe swelling and pain in the knees?'), 9: GenericQuery( query_id='3486120', text= 'Why does PAMELA ANDERSON ........NOT CARE about Children?' ), 2225: GenericQuery(query_id='4086230', text='See I have lost my voice what do I do?'), }) self._test_qrels('antique/train/split200-train', count=25229, items={ 0: TrecQrel(query_id='2531329', doc_id='2531329_0', relevance=4, iteration='U0'), 9: TrecQrel(query_id='3825668', doc_id='3825668_4', relevance=4, iteration='Q0'), 25228: TrecQrel(query_id='884731', doc_id='884731_1', relevance=3, iteration='Q0') })
def qrels_iter(self): with self._qrels_dlc.stream() as f: f = codecs.getreader('utf8')(f) it = iter(f) assert next( it).strip() == 'query-id\tcorpus-id\tscore' # header row for line in it: if line == '\n': continue # ignore blank lines cols = line.rstrip().split() if len(cols) != 3: raise RuntimeError(f'expected 3 columns, got {len(cols)}') qid, did, score = cols yield TrecQrel(qid, did, int(score), '0')
def test_antique_train_split200valid(self): self._test_queries( 'antique/train/split200-valid', count=200, items={ 0: GenericQuery(query_id='1907320', text='How do I get college money?'), 9: GenericQuery( query_id='3083719', text='How do you safely wean a person off Risperidal?'), 199: GenericQuery( query_id='2573745', text='How did African American women get the right to Vote?' ), }) self._test_qrels('antique/train/split200-valid', count=2193, items={ 0: TrecQrel(query_id='2550445', doc_id='2550445_0', relevance=4, iteration='U0'), 9: TrecQrel(query_id='196651', doc_id='196651_1', relevance=4, iteration='Q0'), 2192: TrecQrel(query_id='344029', doc_id='344029_4', relevance=4, iteration='Q0') })
def test_antique_train(self): self._test_queries( 'antique/train', count=2_426, items={ 0: GenericQuery( query_id='3097310', text='What causes severe swelling and pain in the knees?'), 9: GenericQuery( query_id='992730', text='How do you transfer voicemail messages onto tape?'), 2425: GenericQuery(query_id='4086230', text='See I have lost my voice what do I do?'), }) self._test_qrels('antique/train', count=27422, items={ 0: TrecQrel(query_id='2531329', doc_id='2531329_0', relevance=4, iteration='U0'), 9: TrecQrel(query_id='3825668', doc_id='3825668_4', relevance=4, iteration='Q0'), 27421: TrecQrel(query_id='884731', doc_id='884731_1', relevance=3, iteration='Q0') })
def test_antique_test_nonoffensive(self): self._test_queries( 'antique/test/non-offensive', count=176, items={ 0: GenericQuery(query_id='3990512', text='how can we get concentration onsomething?'), 9: GenericQuery(query_id='1783010', text='What is Blaphsemy?'), 175: GenericQuery( query_id='1340574', text= 'Why do some people only go to church on Easter Sunday and never go again until Christmas ?' ) }) self._test_qrels('antique/test/non-offensive', count=5752, items={ 0: TrecQrel(query_id='1964316', doc_id='1964316_5', relevance=4, iteration='U0'), 9: TrecQrel(query_id='1964316', doc_id='1964316_2', relevance=4, iteration='Q0'), 5751: TrecQrel(query_id='1262692', doc_id='3699008_1', relevance=2, iteration='Q0') })
def test_trec_mandarin_qrels(self): self._test_qrels('trec-mandarin/trec5', count=15588, items={ 0: TrecQrel(query_id='1', doc_id='CB001007-BFJ-588-408', relevance=0, iteration='0'), 9: TrecQrel(query_id='1', doc_id='CB006019-BFJ-2117-506', relevance=0, iteration='0'), 15587: TrecQrel(query_id='28', doc_id='pd9312-91', relevance=0, iteration='0') }) self._test_qrels('trec-mandarin/trec6', count=9236, items={ 0: TrecQrel(query_id='29', doc_id='CB001004-BFW-1143-212', relevance=1, iteration='0'), 9: TrecQrel(query_id='29', doc_id='CB002028-BFW-1086-1035', relevance=0, iteration='0'), 9235: TrecQrel(query_id='54', doc_id='pd9312-1824', relevance=0, iteration='0') })
def test_medline_qrels(self): self._test_qrels('medline/trec-genomics-2004', count=8268, items={ 0: TrecQrel('1', '10077651', 2, '0'), 9: TrecQrel('1', '10449402', 2, '0'), 8267: TrecQrel('50', '9951698', 1, '0'), }) self._test_qrels('medline/trec-genomics-2005', count=39958, items={ 0: TrecQrel('100', '10023709', 0, '0'), 9: TrecQrel('100', '10138840', 0, '0'), 39957: TrecQrel('149', '9989364', 0, '0'), })
def test_qrels(self): self._test_qrels('msmarco-qna/train', count=8069749, items={ 0: TrecQrel('1185869', '0-0', 1, '0'), 9: TrecQrel('1185869', '9-0', 0, '0'), 8069748: TrecQrel('461916', '7066857-0', 0, '0'), }) self._test_qrels('msmarco-qna/dev', count=1008985, items={ 0: TrecQrel('1102432', '7066858-0', 0, '0'), 9: TrecQrel('1102432', '7066861-0', 0, '0'), 1008984: TrecQrel('371455', '8009483-0', 0, '0'), })
def test_trec_spanish_qrels(self): self._test_qrels('trec-spanish/trec3', count=19005, items={ 0: TrecQrel(query_id='1', doc_id='SP94-0000082', relevance=1, iteration='0'), 9: TrecQrel(query_id='1', doc_id='SP94-0001385', relevance=0, iteration='0'), 19004: TrecQrel(query_id='25', doc_id='SP94-0202950', relevance=1, iteration='0') }) self._test_qrels('trec-spanish/trec4', count=13109, items={ 0: TrecQrel(query_id='26', doc_id='SP94-0000054', relevance=1, iteration='0'), 9: TrecQrel(query_id='26', doc_id='SP94-0000700', relevance=0, iteration='0'), 13108: TrecQrel(query_id='50', doc_id='SP94-0202879', relevance=0, iteration='0') })
def test_qrels(self): self._test_qrels('tweets2013-ia/trec-mb-2013', count=71279, items={ 0: TrecQrel('111', '297136541426397184', 0, 'Q0'), 9: TrecQrel('111', '299374475248537602', 0, 'Q0'), 71278: TrecQrel('170', '317942407385726976', 0, 'Q0'), }) self._test_qrels('tweets2013-ia/trec-mb-2014', count=57985, items={ 0: TrecQrel('171', '305851659194609664', 0, 'Q0'), 9: TrecQrel('171', '304392188215836672', 0, 'Q0'), 57984: TrecQrel('225', '299257357664387072', 0, 'Q0'), })
def test_car_qrels(self): self._test_qrels('car/v1.5/trec-y1/auto', count=5820, items={ 0: TrecQrel('Aftertaste/Aftertaste%20processing%20in%20the%20cerebral%20cortex', '38c1bd25ddca2705164677a3f598c46df85afba7', 1, '0'), 9: TrecQrel('Aftertaste/Temporal%20taste%20perception', '8a41a87100d139bb9c108c8cab2ac3baaabea3ce', 1, '0'), 5819: TrecQrel('Yellowstone%20National%20Park/Recreation', 'e80b5185da1493edde41bea19a389a3f62167369', 1, '0'), }) self._test_qrels('car/v1.5/trec-y1/manual', count=29571, items={ 0: TrecQrel('Hadley%20cell/Hadley%20cell%20expansion', '389c8a699f4db2f0278700d1c32e63ac369906cd', -1, '0'), 9: TrecQrel('Water%20cycle/Effects%20on%20biogeochemical%20cycling', '844a0a0d5860ff1da8a9fcfb16cc4ce04ffb963f', 1, '0'), 29570: TrecQrel('Rancidification/Reducing%20rancidification', '20a4e9af2853803a08854a1cc8973534e2235658', -1, '0'), }) self._test_qrels('car/v1.5/test200', count=4706, items={ 0: TrecQrel('ASME/ASME%20codes%20and%20standards', '16d8f62407d2cdd283a71735e5c83f7d7947b93a', 1, '0'), 9: TrecQrel('Activity%20theory/An%20explanation', 'c0ee784b8f0eb3b80aaf85f42d5148655192cc1d', 1, '0'), 4705: TrecQrel('Zang-fu/Yin/yang%20and%20the%20Five%20Elements', 'fe6f4dd186037e09bf00f0f08bf172babac7930b', 1, '0'), }) self._test_qrels('car/v1.5/train/fold0', count=1054369, items={ 0: TrecQrel("$pread/''$pread''%20Book", '2f545ffad1581dea4a2e4720aa9feb7389e1956a', 1, '0'), 9: TrecQrel('%22Wild%20Bill%22%20Hickok/Death/Burial', '528b68a3355672c9b8bd5003428b72f54074b3fb', 1, '0'), 1054368: TrecQrel('Zygmunt%20Szcz%C4%99sny%20Feli%C5%84ski/Views%20on%20Poland', 'fd77154f625ca721e554cbd0e4f33b51d4d92af6', 1, '0'), })
def test_qrels(self): self._test_qrels('tripclick/train', count=2705212, items={ 0: TrecQrel('8', '1398048', 1, '0'), 9: TrecQrel('8', '1431742', 1, '0'), 2705211: TrecQrel('1647720', '11698361', 1, '0'), }) self._test_qrels('tripclick/train/head', count=116821, items={ 0: TrecQrel('8', '1398048', 1, '0'), 9: TrecQrel('8', '1431742', 1, '0'), 116820: TrecQrel('1630245', '10818871', 1, '0'), }) self._test_qrels('tripclick/train/head/dctr', count=128420, items={ 0: TrecQrel('8', '1398048', 3, '0'), 9: TrecQrel('8', '5651514', 1, '0'), 128419: TrecQrel('1630245', '9448244', 0, '0'), }) self._test_qrels('tripclick/train/torso', count=966898, items={ 0: TrecQrel('5', '1099235', 1, '0'), 9: TrecQrel('15', '9028026', 0, '0'), 966897: TrecQrel('1647511', '11429892', 1, '0'), }) self._test_qrels('tripclick/train/tail', count=1621493, items={ 0: TrecQrel('1', '981744', 1, '0'), 9: TrecQrel('27', '1194092', 1, '0'), 1621492: TrecQrel('1647720', '11698361', 1, '0'), }) self._test_qrels('tripclick/val', count=82409, items={ 0: TrecQrel('38', '1390633', 1, '0'), 9: TrecQrel('38', '9137657', 0, '0'), 82408: TrecQrel('1645595', '9982749', 1, '0'), }) self._test_qrels('tripclick/val/head', count=64364, items={ 0: TrecQrel('38', '1390633', 1, '0'), 9: TrecQrel('38', '9137657', 0, '0'), 64363: TrecQrel('1630209', '11086242', 1, '0'), }) self._test_qrels('tripclick/val/head/dctr', count=66812, items={ 0: TrecQrel('38', '1390633', 2, '0'), 9: TrecQrel('38', '7858667', 0, '0'), 66811: TrecQrel('1630209', '9358372', 0, '0'), }) self._test_qrels('tripclick/val/torso', count=14133, items={ 0: TrecQrel('534', '1397165', 1, '0'), 9: TrecQrel('534', '5671894', 1, '0'), 14132: TrecQrel('1626635', '10258672', 1, '0'), }) self._test_qrels('tripclick/val/tail', count=3912, items={ 0: TrecQrel('1052', '951102', 1, '0'), 9: TrecQrel('9347', '296234', 1, '0'), 3911: TrecQrel('1645595', '9982749', 1, '0'), })
def test_msmarco_passage_qrels(self): self._test_qrels('msmarco-passage/train', count=532761, items={ 0: TrecQrel(query_id='1185869', doc_id='0', relevance=1, iteration='0'), 9: TrecQrel(query_id='186154', doc_id='1160', relevance=1, iteration='0'), 532760: TrecQrel(query_id='405466', doc_id='8841735', relevance=1, iteration='0') }) self._test_qrels('msmarco-passage/train/judged', count=532761, items={ 0: TrecQrel(query_id='1185869', doc_id='0', relevance=1, iteration='0'), 9: TrecQrel(query_id='186154', doc_id='1160', relevance=1, iteration='0'), 532760: TrecQrel(query_id='405466', doc_id='8841735', relevance=1, iteration='0') }) self._test_qrels('msmarco-passage/train/medical', count=54627, items={ 0: TrecQrel(query_id='403613', doc_id='60', relevance=1, iteration='0'), 9: TrecQrel(query_id='685235', doc_id='12191', relevance=1, iteration='0'), 54626: TrecQrel(query_id='496447', doc_id='8839368', relevance=1, iteration='0') }) self._test_qrels('msmarco-passage/dev', count=59273, items={ 0: TrecQrel(query_id='1102432', doc_id='2026790', relevance=1, iteration='0'), 9: TrecQrel(query_id='300674', doc_id='7067032', relevance=1, iteration='0'), 59272: TrecQrel(query_id='371455', doc_id='8009476', relevance=1, iteration='0') }) self._test_qrels('msmarco-passage/dev/small', count=7437, items={ 0: TrecQrel('300674', '7067032', 1, '0'), 9: TrecQrel('54544', '7068203', 1, '0'), 7436: TrecQrel('195199', '8009377', 1, '0'), }) self._test_qrels('msmarco-passage/dev/judged', count=59273, items={ 0: TrecQrel(query_id='1102432', doc_id='2026790', relevance=1, iteration='0'), 9: TrecQrel(query_id='300674', doc_id='7067032', relevance=1, iteration='0'), 59272: TrecQrel(query_id='371455', doc_id='8009476', relevance=1, iteration='0') }) self._test_qrels('msmarco-passage/trec-dl-2019', count=9260, items={ 0: TrecQrel(query_id='19335', doc_id='1017759', relevance=0, iteration='Q0'), 9: TrecQrel(query_id='19335', doc_id='1274615', relevance=0, iteration='Q0'), 9259: TrecQrel(query_id='1133167', doc_id='977421', relevance=0, iteration='Q0') }) self._test_qrels('msmarco-passage/trec-dl-2019/judged', count=9260, items={ 0: TrecQrel(query_id='19335', doc_id='1017759', relevance=0, iteration='Q0'), 9: TrecQrel(query_id='19335', doc_id='1274615', relevance=0, iteration='Q0'), 9259: TrecQrel(query_id='1133167', doc_id='977421', relevance=0, iteration='Q0') }) self._test_qrels('msmarco-passage/train/split200-train', count=532630, items={ 0: TrecQrel(query_id='1185869', doc_id='0', relevance=1, iteration='0'), 9: TrecQrel(query_id='186154', doc_id='1160', relevance=1, iteration='0'), 532629: TrecQrel(query_id='405466', doc_id='8841735', relevance=1, iteration='0') }) self._test_qrels('msmarco-passage/train/split200-valid', count=131, items={ 0: TrecQrel(query_id='318166', doc_id='179254', relevance=1, iteration='0'), 9: TrecQrel(query_id='1158250', doc_id='791721', relevance=1, iteration='0'), 130: TrecQrel(query_id='302427', doc_id='512871', relevance=1, iteration='0') }) self._test_qrels('msmarco-document/trec-dl-2020', count=9098, items={ 0: TrecQrel('42255', 'D1006124', 0, '0'), 9: TrecQrel('42255', 'D1168483', 0, '0'), 9097: TrecQrel('1136962', 'D96742', 0, '0'), }) self._test_qrels('msmarco-document/trec-dl-2020/judged', count=9098, items={ 0: TrecQrel('42255', 'D1006124', 0, '0'), 9: TrecQrel('42255', 'D1168483', 0, '0'), 9097: TrecQrel('1136962', 'D96742', 0, '0'), }) self._test_qrels('msmarco-passage/trec-dl-hard', count=4256, items={ 0: TrecQrel('915593', '1396701', 0, 'Q0'), 9: TrecQrel('915593', '1772932', 0, 'Q0'), 4255: TrecQrel('1056416', '8739207', 0, 'Q0'), }) self._test_qrels('msmarco-passage/trec-dl-hard/fold1', count=1072, items={ 0: TrecQrel('915593', '1396701', 0, 'Q0'), 9: TrecQrel('915593', '1772932', 0, 'Q0'), 1071: TrecQrel('174463', '8770954', 1, '0'), }) self._test_qrels('msmarco-passage/trec-dl-hard/fold2', count=898, items={ 0: TrecQrel('794429', '8663241', 3, 'Q0'), 9: TrecQrel('588587', '8548223', 1, 'Q0'), 897: TrecQrel('19335', '901329', 0, 'Q0'), }) self._test_qrels('msmarco-passage/trec-dl-hard/fold3', count=444, items={ 0: TrecQrel('177604', '8451987', 0, 'Q0'), 9: TrecQrel('177604', '8451996', 2, 'Q0'), 443: TrecQrel('1105792', '996676', 0, '0'), }) self._test_qrels('msmarco-passage/trec-dl-hard/fold4', count=716, items={ 0: TrecQrel('801118', '8708701', 3, 'Q0'), 9: TrecQrel('507445', '8407104', 1, 'Q0'), 715: TrecQrel('1056416', '8739207', 0, 'Q0'), }) self._test_qrels('msmarco-passage/trec-dl-hard/fold5', count=1126, items={ 0: TrecQrel('190044', '1353072', 3, 'Q0'), 9: TrecQrel('190044', '886798', 1, 'Q0'), 1125: TrecQrel('1103153', '8226445', 0, 'Q0'), })
def qrels_iter(self): with self._qrels_dlc.stream() as stream: for line in stream: data = json.loads(line) for rlDoc in data["rel_docs"]: yield TrecQrel(str(data["id"]), str(rlDoc), 1, "0")
def test_qrels(self): self._test_qrels( 'car/v1.5/trec-y1/auto', count=5820, items={ 0: TrecQrel( 'Aftertaste/Aftertaste%20processing%20in%20the%20cerebral%20cortex', '38c1bd25ddca2705164677a3f598c46df85afba7', 1, '0'), 9: TrecQrel('Aftertaste/Temporal%20taste%20perception', '8a41a87100d139bb9c108c8cab2ac3baaabea3ce', 1, '0'), 5819: TrecQrel('Yellowstone%20National%20Park/Recreation', 'e80b5185da1493edde41bea19a389a3f62167369', 1, '0'), }) self._test_qrels( 'car/v1.5/trec-y1/manual', count=29571, items={ 0: TrecQrel('Hadley%20cell/Hadley%20cell%20expansion', '389c8a699f4db2f0278700d1c32e63ac369906cd', -1, '0'), 9: TrecQrel( 'Water%20cycle/Effects%20on%20biogeochemical%20cycling', '844a0a0d5860ff1da8a9fcfb16cc4ce04ffb963f', 1, '0'), 29570: TrecQrel('Rancidification/Reducing%20rancidification', '20a4e9af2853803a08854a1cc8973534e2235658', -1, '0'), }) self._test_qrels( 'car/v1.5/test200', count=4706, items={ 0: TrecQrel('ASME/ASME%20codes%20and%20standards', '16d8f62407d2cdd283a71735e5c83f7d7947b93a', 1, '0'), 9: TrecQrel('Activity%20theory/An%20explanation', 'c0ee784b8f0eb3b80aaf85f42d5148655192cc1d', 1, '0'), 4705: TrecQrel('Zang-fu/Yin/yang%20and%20the%20Five%20Elements', 'fe6f4dd186037e09bf00f0f08bf172babac7930b', 1, '0'), }) self._test_qrels( 'car/v1.5/train/fold0', count=1054369, items={ 0: TrecQrel("$pread/''$pread''%20Book", '2f545ffad1581dea4a2e4720aa9feb7389e1956a', 1, '0'), 9: TrecQrel('%22Wild%20Bill%22%20Hickok/Death/Burial', '528b68a3355672c9b8bd5003428b72f54074b3fb', 1, '0'), 1054368: TrecQrel( 'Zygmunt%20Szcz%C4%99sny%20Feli%C5%84ski/Views%20on%20Poland', 'fd77154f625ca721e554cbd0e4f33b51d4d92af6', 1, '0'), }) self._test_qrels( 'car/v1.5/train/fold1', count=1052398, items={ 0: TrecQrel('$100,000%20infield/Eddie%20Collins', 'c7aa3c7821a112a149d85f650cbca4ec23c63617', 1, '0'), 9: TrecQrel( "%60Abdu'l-Bah%C3%A1/Acre/Marriage%20and%20family%20life", '4da4ea634ccae1173e553129b368e95962969ec8', 1, '0'), 1052397: TrecQrel('Zygosity/Types/Nullizygous', '36186e2655db62fd9c31701302f86636b03d2511', 1, '0'), }) self._test_qrels( 'car/v1.5/train/fold2', count=1061162, items={ 0: TrecQrel("$h*!%20My%20Dad%20Says/''Surviving%20Jack''", 'dc4866e5b230ffb48b6f808f41ccf8063fbdc9fa', 1, '0'), 9: TrecQrel( '%22Left-Wing%22%20Communism:%20An%20Infantile%20Disorder/%22Left-wing%22%20communism%20in%20Germany', '22ec581e3e1c5397e64bc6f0066dc8aea12fc71f', 1, '0'), 1061161: TrecQrel('ZynAddSubFX/Windows%20version', 'b9d1be10b54e5efcbf3e6f1e5f2fbaf7c8af303c', 1, '0'), }) self._test_qrels( 'car/v1.5/train/fold3', count=1046784, items={ 0: TrecQrel( '$2%20billion%20arms%20deal/Confessional%20statements', '0e512b5962fa5ea838a578cbf414ae09b863a33f', 1, '0'), 9: TrecQrel( '$2%20billion%20arms%20deal/Investigative%20committee', '812cb64a35f482bd60f82c1d67204c73612cb6a7', 1, '0'), 1046783: TrecQrel('Zyuden%20Sentai%20Kyoryuger/Video%20game', '844b90cf6f7c62e5bf51625a4d216baec2825bf9', 1, '0'), }) self._test_qrels( 'car/v1.5/train/fold4', count=1061911, items={ 0: TrecQrel('$1,000%20genome/Additional%20Resources', '67ea5eae967657a8f0282066e3086573e41726d5', 1, '0'), 9: TrecQrel('$1,000%20genome/Commercial%20efforts', 'a7ac9041cd833d6b09cc5270b495e9f94704027f', 1, '0'), 1061910: TrecQrel('Zyron/Products', 'f355f98b4e3d5b08f60abe61022e9393202b9718', 1, '0'), })
def test_qrels(self): self._test_qrels('msmarco-passage-v2/train', count=284212, items={ 0: TrecQrel('1185869', 'msmarco_passage_08_840101254', 1, '0'), 9: TrecQrel('186154', 'msmarco_passage_02_556351008', 1, '0'), 284211: TrecQrel('697642', 'msmarco_passage_05_512118117', 1, '0'), }) self._test_qrels('msmarco-passage-v2/dev1', count=4009, items={ 0: TrecQrel('763878', 'msmarco_passage_33_459057644', 1, '0'), 9: TrecQrel('290779', 'msmarco_passage_10_301562908', 1, '0'), 4008: TrecQrel('1091692', 'msmarco_passage_23_330102695', 1, '0'), }) self._test_qrels('msmarco-passage-v2/dev2', count=4411, items={ 0: TrecQrel('419507', 'msmarco_passage_04_254301507', 1, '0'), 9: TrecQrel('1087630', 'msmarco_passage_18_685926585', 1, '0'), 4410: TrecQrel('961297', 'msmarco_passage_18_858458289', 1, '0'), }) self._test_qrels('msmarco-passage-v2/trec-dl-2021', count=10828, items={ 0: TrecQrel('2082', 'msmarco_passage_01_552803451', 0, '0'), 9: TrecQrel('2082', 'msmarco_passage_02_437070914', 3, '0'), 10827: TrecQrel('1129560', 'msmarco_passage_68_639912287', 0, '0'), }) self._test_qrels('msmarco-passage-v2/trec-dl-2021/judged', count=10828, items={ 0: TrecQrel('2082', 'msmarco_passage_01_552803451', 0, '0'), 9: TrecQrel('2082', 'msmarco_passage_02_437070914', 3, '0'), 10827: TrecQrel('1129560', 'msmarco_passage_68_639912287', 0, '0'), })
def test_qrels(self): self._test_qrels('aquaint/trec-robust-2005', count=37798, items={ 0: TrecQrel('303', 'APW19980609.1531', 2, '0'), 9: TrecQrel('303', 'APW19981117.0914', 0, '0'), 37797: TrecQrel('689', 'XIE20000925.0055', 0, '0'), })