예제 #1
0
파일: nyt.py 프로젝트: allenai/ir_datasets
 def test_nyt_qrels(self):
     self._test_qrels('nyt/wksup',
                      count=1864661,
                      items={
                          0: GenericQrel('8454', '8454', 1),
                          9: GenericQrel('8579', '8579', 1),
                          1864660: GenericQrel('1854817', '1854817', 1),
                      })
     self._test_qrels('nyt/wksup/train',
                      count=1863657,
                      items={
                          0: GenericQrel('8454', '8454', 1),
                          9: GenericQrel('8579', '8579', 1),
                          1863656: GenericQrel('1854817', '1854817', 1),
                      })
     self._test_qrels('nyt/wksup/valid',
                      count=1004,
                      items={
                          0: GenericQrel('6461', '6461', 1),
                          9: GenericQrel('13148', '13148', 1),
                          1003: GenericQrel('1854529', '1854529', 1),
                      })
     self._test_qrels('nyt/trec-core-2017',
                      count=30030,
                      items={
                          0: TrecQrel('307', '1001536', 1, '0'),
                          9: TrecQrel('307', '1029429', 1, '0'),
                          30029: TrecQrel('690', '996059', 0, '0'),
                      })
예제 #2
0
 def test_antique_test(self):
     self._test_queries(
         "antique/test",
         count=200,
         items={
             0:
             GenericQuery(query_id='3990512',
                          text='how can we get concentration onsomething?'),
             9:
             GenericQuery(query_id='1783010', text='What is Blaphsemy?'),
             199:
             GenericQuery(query_id='1971899', text='what is masturbat***?'),
         })
     self._test_qrels('antique/test',
                      count=6589,
                      items={
                          0:
                          TrecQrel(query_id='1964316',
                                   doc_id='1964316_5',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='1964316',
                                   doc_id='1964316_2',
                                   relevance=4,
                                   iteration='Q0'),
                          6588:
                          TrecQrel(query_id='1262692',
                                   doc_id='3699008_1',
                                   relevance=2,
                                   iteration='Q0')
                      })
예제 #3
0
 def test_cord19_qrels(self):
     self._test_qrels('cord19/trec-covid',
                      count=69318,
                      items={
                          0: TrecQrel('1', '005b2j4b', 2, '4.5'),
                          9: TrecQrel('1', '05vx82oo', 0, '3'),
                          69317: TrecQrel('50', 'zz8wvos9', 1, '5'),
                      })
예제 #4
0
 def test_qrels(self):
     self._test_qrels('trec-fair-2021/train',
                      count=2185446,
                      items={
                          0: TrecQrel("1", "572", 1, "0"),
                          9: TrecQrel("1", "4514", 1, "0"),
                          2185445: TrecQrel("57", "67253426", 1, "0"),
                      })
예제 #5
0
 def test_qrels(self):
     self._test_qrels('cranfield',
                      count=1837,
                      items={
                          0: TrecQrel('1', '184', 2, '0'),
                          9: TrecQrel('1', '57', 2, '0'),
                          1836: TrecQrel('225', '1188', -1, '0'),
                      })
예제 #6
0
 def test_dummy_queries(self):
     dataset = ir_datasets.create_dataset(
         docs_tsv='test/dummy/docs.tsv',
         queries_tsv='test/dummy/queries.tsv',
         qrels_trec='test/dummy/qrels')
     self._test_qrels(dataset,
                      count=55,
                      items={
                          0: TrecQrel('1', 'T1', 0, '0'),
                          9: TrecQrel('1', 'T11', 0, '0'),
                          54: TrecQrel('4', 'T15', 0, '0'),
                      })
예제 #7
0
 def test_qrels(self):
     self._test_qrels('aol-ia',
                      count=19442629,
                      items={
                          0:
                          TrecQrel('50aa67fe786ca7', '430d8aa747a3', 1,
                                   '142'),
                          9:
                          TrecQrel('f6eff9e0848e2d', 'ecd6d884243b', 1,
                                   '217'),
                          19442628:
                          TrecQrel('14c1b5b54212ad', 'a114f6d94af0', 1,
                                   '24967361'),
                      })
예제 #8
0
 def qrels_iter(self):
     with self.qrels_dlc.stream() as stream:
         stream = io.TextIOWrapper(stream)
         for lines in sentinel_splitter(stream, sentinel='   /\n'):
             query_id = lines[0].rstrip('\n')
             for line in lines[1:]:
                 for doc_id in line.split():
                     yield TrecQrel(query_id, doc_id, 1, '0')
예제 #9
0
 def qrels_iter(self):
     with self._qrels_dlc.stream() as f:
         f = codecs.getreader('utf8')(f)
         for line in f:
             cols = line.rstrip().split()
             if len(cols) != 3:
                 raise RuntimeError(f'expected 3 columns, got {len(cols)}')
             qid, did, score = cols
             yield TrecQrel(qid, did, int(score), '0')
예제 #10
0
 def test_pmc_qrels(self):
     self._test_qrels('pmc/v1/trec-cds-2014', count=37949, items={
         0: TrecQrel('1', '1033658', 0, '0'),
         9: TrecQrel('1', '1037001', 0, '0'),
         37948: TrecQrel('30', '80153', 0, '0'),
     })
     self._test_qrels('pmc/v1/trec-cds-2015', count=37807, items={
         0: TrecQrel('1', '1065003', 1, '0'),
         9: TrecQrel('1', '117132', 0, '0'),
         37806: TrecQrel('30', '64646', 2, '0'),
     })
     self._test_qrels('pmc/v2/trec-cds-2016', count=37707, items={
         0: TrecQrel('1', '1036067', 0, '0'),
         9: TrecQrel('1', '1160569', 0, '0'),
         37706: TrecQrel('30', '65042', 0, '0'),
     })
예제 #11
0
 def qrels_iter(self):
     with self._source.stream() as file:
         file = getreader("utf8")(file)
         for line in file:
             if line == "\n":
                 continue  # Ignore blank lines.
             cols = line.rstrip().split()
             if len(cols) != 4:
                 raise RuntimeError(
                     f"Expected 4 columns but got {len(cols)}.")
             qid, it, did, score = cols
             yield TrecQrel(qid, did, int(float(score)), it)
예제 #12
0
    def test_qrels(self):
        mock_file = StringFile('''
Q0 0 D1 3
Q0 1 D2   2

Q0 0\tD3 3
Q0 1 D2 1
Q1 0 D2 1
'''.lstrip())
        QREL_DEFS = {}
        expected_results = [
            TrecQrel('Q0', 'D1', 3, '0'),
            TrecQrel('Q0', 'D2', 2, '1'),
            TrecQrel('Q0', 'D3', 3, '0'),
            TrecQrel('Q0', 'D2', 1, '1'),
            TrecQrel('Q1', 'D2', 1, '0'),
        ]

        qrels = TrecQrels(mock_file, QREL_DEFS)
        self.assertEqual(qrels.qrels_path(), 'MOCK')
        self.assertEqual(qrels.qrels_defs(), QREL_DEFS)
        self.assertEqual(list(qrels.qrels_iter()), expected_results)
예제 #13
0
 def qrels_iter(self):
     for dlc in self.qrels_dlcs:
         base_path = Path(dlc.path())
         for file in sorted(base_path.glob(f'**/{self.split}/*.gz')):
             with gzip.open(file, 'rt') as f:
                 for line in f:
                     data = json.loads(line)
                     yield TrecQrel(
                         query_id=data['url'],
                         doc_id=data['url'],
                         relevance=1,
                         iteration='0',
                     )
예제 #14
0
 def test_antique_train_split200train(self):
     self._test_queries(
         'antique/train/split200-train',
         count=2_226,
         items={
             0:
             GenericQuery(
                 query_id='3097310',
                 text='What causes severe swelling and pain in the knees?'),
             9:
             GenericQuery(
                 query_id='3486120',
                 text=
                 'Why does PAMELA   ANDERSON ........NOT CARE about  Children?'
             ),
             2225:
             GenericQuery(query_id='4086230',
                          text='See I have lost my voice what do I do?'),
         })
     self._test_qrels('antique/train/split200-train',
                      count=25229,
                      items={
                          0:
                          TrecQrel(query_id='2531329',
                                   doc_id='2531329_0',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='3825668',
                                   doc_id='3825668_4',
                                   relevance=4,
                                   iteration='Q0'),
                          25228:
                          TrecQrel(query_id='884731',
                                   doc_id='884731_1',
                                   relevance=3,
                                   iteration='Q0')
                      })
예제 #15
0
파일: beir.py 프로젝트: allenai/ir_datasets
 def qrels_iter(self):
     with self._qrels_dlc.stream() as f:
         f = codecs.getreader('utf8')(f)
         it = iter(f)
         assert next(
             it).strip() == 'query-id\tcorpus-id\tscore'  # header row
         for line in it:
             if line == '\n':
                 continue  # ignore blank lines
             cols = line.rstrip().split()
             if len(cols) != 3:
                 raise RuntimeError(f'expected 3 columns, got {len(cols)}')
             qid, did, score = cols
             yield TrecQrel(qid, did, int(score), '0')
예제 #16
0
 def test_antique_train_split200valid(self):
     self._test_queries(
         'antique/train/split200-valid',
         count=200,
         items={
             0:
             GenericQuery(query_id='1907320',
                          text='How do I get college money?'),
             9:
             GenericQuery(
                 query_id='3083719',
                 text='How do you safely wean a person off Risperidal?'),
             199:
             GenericQuery(
                 query_id='2573745',
                 text='How did African American women get the right to Vote?'
             ),
         })
     self._test_qrels('antique/train/split200-valid',
                      count=2193,
                      items={
                          0:
                          TrecQrel(query_id='2550445',
                                   doc_id='2550445_0',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='196651',
                                   doc_id='196651_1',
                                   relevance=4,
                                   iteration='Q0'),
                          2192:
                          TrecQrel(query_id='344029',
                                   doc_id='344029_4',
                                   relevance=4,
                                   iteration='Q0')
                      })
예제 #17
0
 def test_antique_train(self):
     self._test_queries(
         'antique/train',
         count=2_426,
         items={
             0:
             GenericQuery(
                 query_id='3097310',
                 text='What causes severe swelling and pain in the knees?'),
             9:
             GenericQuery(
                 query_id='992730',
                 text='How do you transfer voicemail messages onto tape?'),
             2425:
             GenericQuery(query_id='4086230',
                          text='See I have lost my voice what do I do?'),
         })
     self._test_qrels('antique/train',
                      count=27422,
                      items={
                          0:
                          TrecQrel(query_id='2531329',
                                   doc_id='2531329_0',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='3825668',
                                   doc_id='3825668_4',
                                   relevance=4,
                                   iteration='Q0'),
                          27421:
                          TrecQrel(query_id='884731',
                                   doc_id='884731_1',
                                   relevance=3,
                                   iteration='Q0')
                      })
예제 #18
0
 def test_antique_test_nonoffensive(self):
     self._test_queries(
         'antique/test/non-offensive',
         count=176,
         items={
             0:
             GenericQuery(query_id='3990512',
                          text='how can we get concentration onsomething?'),
             9:
             GenericQuery(query_id='1783010', text='What is Blaphsemy?'),
             175:
             GenericQuery(
                 query_id='1340574',
                 text=
                 'Why do some people only go to church on Easter Sunday and never go again until Christmas ?'
             )
         })
     self._test_qrels('antique/test/non-offensive',
                      count=5752,
                      items={
                          0:
                          TrecQrel(query_id='1964316',
                                   doc_id='1964316_5',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='1964316',
                                   doc_id='1964316_2',
                                   relevance=4,
                                   iteration='Q0'),
                          5751:
                          TrecQrel(query_id='1262692',
                                   doc_id='3699008_1',
                                   relevance=2,
                                   iteration='Q0')
                      })
예제 #19
0
 def test_trec_mandarin_qrels(self):
     self._test_qrels('trec-mandarin/trec5', count=15588, items={
         0: TrecQrel(query_id='1', doc_id='CB001007-BFJ-588-408', relevance=0, iteration='0'),
         9: TrecQrel(query_id='1', doc_id='CB006019-BFJ-2117-506', relevance=0, iteration='0'),
         15587: TrecQrel(query_id='28', doc_id='pd9312-91', relevance=0, iteration='0')
     })
     self._test_qrels('trec-mandarin/trec6', count=9236, items={
         0: TrecQrel(query_id='29', doc_id='CB001004-BFW-1143-212', relevance=1, iteration='0'),
         9: TrecQrel(query_id='29', doc_id='CB002028-BFW-1086-1035', relevance=0, iteration='0'),
         9235: TrecQrel(query_id='54', doc_id='pd9312-1824', relevance=0, iteration='0')
     })
예제 #20
0
 def test_medline_qrels(self):
     self._test_qrels('medline/trec-genomics-2004', count=8268, items={
         0: TrecQrel('1', '10077651', 2, '0'),
         9: TrecQrel('1', '10449402', 2, '0'),
         8267: TrecQrel('50', '9951698', 1, '0'),
     })
     self._test_qrels('medline/trec-genomics-2005', count=39958, items={
         0: TrecQrel('100', '10023709', 0, '0'),
         9: TrecQrel('100', '10138840', 0, '0'),
         39957: TrecQrel('149', '9989364', 0, '0'),
     })
예제 #21
0
 def test_qrels(self):
     self._test_qrels('msmarco-qna/train', count=8069749, items={
         0: TrecQrel('1185869', '0-0', 1, '0'),
         9: TrecQrel('1185869', '9-0', 0, '0'),
         8069748: TrecQrel('461916', '7066857-0', 0, '0'),
     })
     self._test_qrels('msmarco-qna/dev', count=1008985, items={
         0: TrecQrel('1102432', '7066858-0', 0, '0'),
         9: TrecQrel('1102432', '7066861-0', 0, '0'),
         1008984: TrecQrel('371455', '8009483-0', 0, '0'),
     })
예제 #22
0
 def test_trec_spanish_qrels(self):
     self._test_qrels('trec-spanish/trec3', count=19005, items={
         0: TrecQrel(query_id='1', doc_id='SP94-0000082', relevance=1, iteration='0'),
         9: TrecQrel(query_id='1', doc_id='SP94-0001385', relevance=0, iteration='0'),
         19004: TrecQrel(query_id='25', doc_id='SP94-0202950', relevance=1, iteration='0')
     })
     self._test_qrels('trec-spanish/trec4', count=13109, items={
         0: TrecQrel(query_id='26', doc_id='SP94-0000054', relevance=1, iteration='0'),
         9: TrecQrel(query_id='26', doc_id='SP94-0000700', relevance=0, iteration='0'),
         13108: TrecQrel(query_id='50', doc_id='SP94-0202879', relevance=0, iteration='0')
     })
예제 #23
0
 def test_qrels(self):
     self._test_qrels('tweets2013-ia/trec-mb-2013',
                      count=71279,
                      items={
                          0: TrecQrel('111', '297136541426397184', 0, 'Q0'),
                          9: TrecQrel('111', '299374475248537602', 0, 'Q0'),
                          71278: TrecQrel('170', '317942407385726976', 0,
                                          'Q0'),
                      })
     self._test_qrels('tweets2013-ia/trec-mb-2014',
                      count=57985,
                      items={
                          0: TrecQrel('171', '305851659194609664', 0, 'Q0'),
                          9: TrecQrel('171', '304392188215836672', 0, 'Q0'),
                          57984: TrecQrel('225', '299257357664387072', 0,
                                          'Q0'),
                      })
예제 #24
0
 def test_car_qrels(self):
     self._test_qrels('car/v1.5/trec-y1/auto', count=5820, items={
         0: TrecQrel('Aftertaste/Aftertaste%20processing%20in%20the%20cerebral%20cortex', '38c1bd25ddca2705164677a3f598c46df85afba7', 1, '0'),
         9: TrecQrel('Aftertaste/Temporal%20taste%20perception', '8a41a87100d139bb9c108c8cab2ac3baaabea3ce', 1, '0'),
         5819: TrecQrel('Yellowstone%20National%20Park/Recreation', 'e80b5185da1493edde41bea19a389a3f62167369', 1, '0'),
     })
     self._test_qrels('car/v1.5/trec-y1/manual', count=29571, items={
         0: TrecQrel('Hadley%20cell/Hadley%20cell%20expansion', '389c8a699f4db2f0278700d1c32e63ac369906cd', -1, '0'),
         9: TrecQrel('Water%20cycle/Effects%20on%20biogeochemical%20cycling', '844a0a0d5860ff1da8a9fcfb16cc4ce04ffb963f', 1, '0'),
         29570: TrecQrel('Rancidification/Reducing%20rancidification', '20a4e9af2853803a08854a1cc8973534e2235658', -1, '0'),
     })
     self._test_qrels('car/v1.5/test200', count=4706, items={
         0: TrecQrel('ASME/ASME%20codes%20and%20standards', '16d8f62407d2cdd283a71735e5c83f7d7947b93a', 1, '0'),
         9: TrecQrel('Activity%20theory/An%20explanation', 'c0ee784b8f0eb3b80aaf85f42d5148655192cc1d', 1, '0'),
         4705: TrecQrel('Zang-fu/Yin/yang%20and%20the%20Five%20Elements', 'fe6f4dd186037e09bf00f0f08bf172babac7930b', 1, '0'),
     })
     self._test_qrels('car/v1.5/train/fold0', count=1054369, items={
         0: TrecQrel("$pread/''$pread''%20Book", '2f545ffad1581dea4a2e4720aa9feb7389e1956a', 1, '0'),
         9: TrecQrel('%22Wild%20Bill%22%20Hickok/Death/Burial', '528b68a3355672c9b8bd5003428b72f54074b3fb', 1, '0'),
         1054368: TrecQrel('Zygmunt%20Szcz%C4%99sny%20Feli%C5%84ski/Views%20on%20Poland', 'fd77154f625ca721e554cbd0e4f33b51d4d92af6', 1, '0'),
     })
예제 #25
0
 def test_qrels(self):
     self._test_qrels('tripclick/train', count=2705212, items={
         0: TrecQrel('8', '1398048', 1, '0'),
         9: TrecQrel('8', '1431742', 1, '0'),
         2705211: TrecQrel('1647720', '11698361', 1, '0'),
     })
     self._test_qrels('tripclick/train/head', count=116821, items={
         0: TrecQrel('8', '1398048', 1, '0'),
         9: TrecQrel('8', '1431742', 1, '0'),
         116820: TrecQrel('1630245', '10818871', 1, '0'),
     })
     self._test_qrels('tripclick/train/head/dctr', count=128420, items={
         0: TrecQrel('8', '1398048', 3, '0'),
         9: TrecQrel('8', '5651514', 1, '0'),
         128419: TrecQrel('1630245', '9448244', 0, '0'),
     })
     self._test_qrels('tripclick/train/torso', count=966898, items={
         0: TrecQrel('5', '1099235', 1, '0'),
         9: TrecQrel('15', '9028026', 0, '0'),
         966897: TrecQrel('1647511', '11429892', 1, '0'),
     })
     self._test_qrels('tripclick/train/tail', count=1621493, items={
         0: TrecQrel('1', '981744', 1, '0'),
         9: TrecQrel('27', '1194092', 1, '0'),
         1621492: TrecQrel('1647720', '11698361', 1, '0'),
     })
     self._test_qrels('tripclick/val', count=82409, items={
         0: TrecQrel('38', '1390633', 1, '0'),
         9: TrecQrel('38', '9137657', 0, '0'),
         82408: TrecQrel('1645595', '9982749', 1, '0'),
     })
     self._test_qrels('tripclick/val/head', count=64364, items={
         0: TrecQrel('38', '1390633', 1, '0'),
         9: TrecQrel('38', '9137657', 0, '0'),
         64363: TrecQrel('1630209', '11086242', 1, '0'),
     })
     self._test_qrels('tripclick/val/head/dctr', count=66812, items={
         0: TrecQrel('38', '1390633', 2, '0'),
         9: TrecQrel('38', '7858667', 0, '0'),
         66811: TrecQrel('1630209', '9358372', 0, '0'),
     })
     self._test_qrels('tripclick/val/torso', count=14133, items={
         0: TrecQrel('534', '1397165', 1, '0'),
         9: TrecQrel('534', '5671894', 1, '0'),
         14132: TrecQrel('1626635', '10258672', 1, '0'),
     })
     self._test_qrels('tripclick/val/tail', count=3912, items={
         0: TrecQrel('1052', '951102', 1, '0'),
         9: TrecQrel('9347', '296234', 1, '0'),
         3911: TrecQrel('1645595', '9982749', 1, '0'),
     })
예제 #26
0
 def test_msmarco_passage_qrels(self):
     self._test_qrels('msmarco-passage/train', count=532761, items={
         0: TrecQrel(query_id='1185869', doc_id='0', relevance=1, iteration='0'),
         9: TrecQrel(query_id='186154', doc_id='1160', relevance=1, iteration='0'),
         532760: TrecQrel(query_id='405466', doc_id='8841735', relevance=1, iteration='0')
     })
     self._test_qrels('msmarco-passage/train/judged', count=532761, items={
         0: TrecQrel(query_id='1185869', doc_id='0', relevance=1, iteration='0'),
         9: TrecQrel(query_id='186154', doc_id='1160', relevance=1, iteration='0'),
         532760: TrecQrel(query_id='405466', doc_id='8841735', relevance=1, iteration='0')
     })
     self._test_qrels('msmarco-passage/train/medical', count=54627, items={
         0: TrecQrel(query_id='403613', doc_id='60', relevance=1, iteration='0'),
         9: TrecQrel(query_id='685235', doc_id='12191', relevance=1, iteration='0'),
         54626: TrecQrel(query_id='496447', doc_id='8839368', relevance=1, iteration='0')
     })
     self._test_qrels('msmarco-passage/dev', count=59273, items={
         0: TrecQrel(query_id='1102432', doc_id='2026790', relevance=1, iteration='0'),
         9: TrecQrel(query_id='300674', doc_id='7067032', relevance=1, iteration='0'),
         59272: TrecQrel(query_id='371455', doc_id='8009476', relevance=1, iteration='0')
     })
     self._test_qrels('msmarco-passage/dev/small', count=7437, items={
         0: TrecQrel('300674', '7067032', 1, '0'),
         9: TrecQrel('54544', '7068203', 1, '0'),
         7436: TrecQrel('195199', '8009377', 1, '0'),
     })
     self._test_qrels('msmarco-passage/dev/judged', count=59273, items={
         0: TrecQrel(query_id='1102432', doc_id='2026790', relevance=1, iteration='0'),
         9: TrecQrel(query_id='300674', doc_id='7067032', relevance=1, iteration='0'),
         59272: TrecQrel(query_id='371455', doc_id='8009476', relevance=1, iteration='0')
     })
     self._test_qrels('msmarco-passage/trec-dl-2019', count=9260, items={
         0: TrecQrel(query_id='19335', doc_id='1017759', relevance=0, iteration='Q0'),
         9: TrecQrel(query_id='19335', doc_id='1274615', relevance=0, iteration='Q0'),
         9259: TrecQrel(query_id='1133167', doc_id='977421', relevance=0, iteration='Q0')
     })
     self._test_qrels('msmarco-passage/trec-dl-2019/judged', count=9260, items={
         0: TrecQrel(query_id='19335', doc_id='1017759', relevance=0, iteration='Q0'),
         9: TrecQrel(query_id='19335', doc_id='1274615', relevance=0, iteration='Q0'),
         9259: TrecQrel(query_id='1133167', doc_id='977421', relevance=0, iteration='Q0')
     })
     self._test_qrels('msmarco-passage/train/split200-train', count=532630, items={
         0: TrecQrel(query_id='1185869', doc_id='0', relevance=1, iteration='0'),
         9: TrecQrel(query_id='186154', doc_id='1160', relevance=1, iteration='0'),
         532629: TrecQrel(query_id='405466', doc_id='8841735', relevance=1, iteration='0')
     })
     self._test_qrels('msmarco-passage/train/split200-valid', count=131, items={
         0: TrecQrel(query_id='318166', doc_id='179254', relevance=1, iteration='0'),
         9: TrecQrel(query_id='1158250', doc_id='791721', relevance=1, iteration='0'),
         130: TrecQrel(query_id='302427', doc_id='512871', relevance=1, iteration='0')
     })
     self._test_qrels('msmarco-document/trec-dl-2020', count=9098, items={
         0: TrecQrel('42255', 'D1006124', 0, '0'),
         9: TrecQrel('42255', 'D1168483', 0, '0'),
         9097: TrecQrel('1136962', 'D96742', 0, '0'),
     })
     self._test_qrels('msmarco-document/trec-dl-2020/judged', count=9098, items={
         0: TrecQrel('42255', 'D1006124', 0, '0'),
         9: TrecQrel('42255', 'D1168483', 0, '0'),
         9097: TrecQrel('1136962', 'D96742', 0, '0'),
     })
     self._test_qrels('msmarco-passage/trec-dl-hard', count=4256, items={
         0: TrecQrel('915593', '1396701', 0, 'Q0'),
         9: TrecQrel('915593', '1772932', 0, 'Q0'),
         4255: TrecQrel('1056416', '8739207', 0, 'Q0'),
     })
     self._test_qrels('msmarco-passage/trec-dl-hard/fold1', count=1072, items={
         0: TrecQrel('915593', '1396701', 0, 'Q0'),
         9: TrecQrel('915593', '1772932', 0, 'Q0'),
         1071: TrecQrel('174463', '8770954', 1, '0'),
     })
     self._test_qrels('msmarco-passage/trec-dl-hard/fold2', count=898, items={
         0: TrecQrel('794429', '8663241', 3, 'Q0'),
         9: TrecQrel('588587', '8548223', 1, 'Q0'),
         897: TrecQrel('19335', '901329', 0, 'Q0'),
     })
     self._test_qrels('msmarco-passage/trec-dl-hard/fold3', count=444, items={
         0: TrecQrel('177604', '8451987', 0, 'Q0'),
         9: TrecQrel('177604', '8451996', 2, 'Q0'),
         443: TrecQrel('1105792', '996676', 0, '0'),
     })
     self._test_qrels('msmarco-passage/trec-dl-hard/fold4', count=716, items={
         0: TrecQrel('801118', '8708701', 3, 'Q0'),
         9: TrecQrel('507445', '8407104', 1, 'Q0'),
         715: TrecQrel('1056416', '8739207', 0, 'Q0'),
     })
     self._test_qrels('msmarco-passage/trec-dl-hard/fold5', count=1126, items={
         0: TrecQrel('190044', '1353072', 3, 'Q0'),
         9: TrecQrel('190044', '886798', 1, 'Q0'),
         1125: TrecQrel('1103153', '8226445', 0, 'Q0'),
     })
예제 #27
0
 def qrels_iter(self):
     with self._qrels_dlc.stream() as stream:
         for line in stream:
             data = json.loads(line)
             for rlDoc in data["rel_docs"]:
                 yield TrecQrel(str(data["id"]), str(rlDoc), 1, "0")
예제 #28
0
파일: car.py 프로젝트: allenai/ir_datasets
 def test_qrels(self):
     self._test_qrels(
         'car/v1.5/trec-y1/auto',
         count=5820,
         items={
             0:
             TrecQrel(
                 'Aftertaste/Aftertaste%20processing%20in%20the%20cerebral%20cortex',
                 '38c1bd25ddca2705164677a3f598c46df85afba7', 1, '0'),
             9:
             TrecQrel('Aftertaste/Temporal%20taste%20perception',
                      '8a41a87100d139bb9c108c8cab2ac3baaabea3ce', 1, '0'),
             5819:
             TrecQrel('Yellowstone%20National%20Park/Recreation',
                      'e80b5185da1493edde41bea19a389a3f62167369', 1, '0'),
         })
     self._test_qrels(
         'car/v1.5/trec-y1/manual',
         count=29571,
         items={
             0:
             TrecQrel('Hadley%20cell/Hadley%20cell%20expansion',
                      '389c8a699f4db2f0278700d1c32e63ac369906cd', -1, '0'),
             9:
             TrecQrel(
                 'Water%20cycle/Effects%20on%20biogeochemical%20cycling',
                 '844a0a0d5860ff1da8a9fcfb16cc4ce04ffb963f', 1, '0'),
             29570:
             TrecQrel('Rancidification/Reducing%20rancidification',
                      '20a4e9af2853803a08854a1cc8973534e2235658', -1, '0'),
         })
     self._test_qrels(
         'car/v1.5/test200',
         count=4706,
         items={
             0:
             TrecQrel('ASME/ASME%20codes%20and%20standards',
                      '16d8f62407d2cdd283a71735e5c83f7d7947b93a', 1, '0'),
             9:
             TrecQrel('Activity%20theory/An%20explanation',
                      'c0ee784b8f0eb3b80aaf85f42d5148655192cc1d', 1, '0'),
             4705:
             TrecQrel('Zang-fu/Yin/yang%20and%20the%20Five%20Elements',
                      'fe6f4dd186037e09bf00f0f08bf172babac7930b', 1, '0'),
         })
     self._test_qrels(
         'car/v1.5/train/fold0',
         count=1054369,
         items={
             0:
             TrecQrel("$pread/''$pread''%20Book",
                      '2f545ffad1581dea4a2e4720aa9feb7389e1956a', 1, '0'),
             9:
             TrecQrel('%22Wild%20Bill%22%20Hickok/Death/Burial',
                      '528b68a3355672c9b8bd5003428b72f54074b3fb', 1, '0'),
             1054368:
             TrecQrel(
                 'Zygmunt%20Szcz%C4%99sny%20Feli%C5%84ski/Views%20on%20Poland',
                 'fd77154f625ca721e554cbd0e4f33b51d4d92af6', 1, '0'),
         })
     self._test_qrels(
         'car/v1.5/train/fold1',
         count=1052398,
         items={
             0:
             TrecQrel('$100,000%20infield/Eddie%20Collins',
                      'c7aa3c7821a112a149d85f650cbca4ec23c63617', 1, '0'),
             9:
             TrecQrel(
                 "%60Abdu'l-Bah%C3%A1/Acre/Marriage%20and%20family%20life",
                 '4da4ea634ccae1173e553129b368e95962969ec8', 1, '0'),
             1052397:
             TrecQrel('Zygosity/Types/Nullizygous',
                      '36186e2655db62fd9c31701302f86636b03d2511', 1, '0'),
         })
     self._test_qrels(
         'car/v1.5/train/fold2',
         count=1061162,
         items={
             0:
             TrecQrel("$h*!%20My%20Dad%20Says/''Surviving%20Jack''",
                      'dc4866e5b230ffb48b6f808f41ccf8063fbdc9fa', 1, '0'),
             9:
             TrecQrel(
                 '%22Left-Wing%22%20Communism:%20An%20Infantile%20Disorder/%22Left-wing%22%20communism%20in%20Germany',
                 '22ec581e3e1c5397e64bc6f0066dc8aea12fc71f', 1, '0'),
             1061161:
             TrecQrel('ZynAddSubFX/Windows%20version',
                      'b9d1be10b54e5efcbf3e6f1e5f2fbaf7c8af303c', 1, '0'),
         })
     self._test_qrels(
         'car/v1.5/train/fold3',
         count=1046784,
         items={
             0:
             TrecQrel(
                 '$2%20billion%20arms%20deal/Confessional%20statements',
                 '0e512b5962fa5ea838a578cbf414ae09b863a33f', 1, '0'),
             9:
             TrecQrel(
                 '$2%20billion%20arms%20deal/Investigative%20committee',
                 '812cb64a35f482bd60f82c1d67204c73612cb6a7', 1, '0'),
             1046783:
             TrecQrel('Zyuden%20Sentai%20Kyoryuger/Video%20game',
                      '844b90cf6f7c62e5bf51625a4d216baec2825bf9', 1, '0'),
         })
     self._test_qrels(
         'car/v1.5/train/fold4',
         count=1061911,
         items={
             0:
             TrecQrel('$1,000%20genome/Additional%20Resources',
                      '67ea5eae967657a8f0282066e3086573e41726d5', 1, '0'),
             9:
             TrecQrel('$1,000%20genome/Commercial%20efforts',
                      'a7ac9041cd833d6b09cc5270b495e9f94704027f', 1, '0'),
             1061910:
             TrecQrel('Zyron/Products',
                      'f355f98b4e3d5b08f60abe61022e9393202b9718', 1, '0'),
         })
예제 #29
0
 def test_qrels(self):
     self._test_qrels('msmarco-passage-v2/train', count=284212, items={
         0: TrecQrel('1185869', 'msmarco_passage_08_840101254', 1, '0'),
         9: TrecQrel('186154', 'msmarco_passage_02_556351008', 1, '0'),
         284211: TrecQrel('697642', 'msmarco_passage_05_512118117', 1, '0'),
     })
     self._test_qrels('msmarco-passage-v2/dev1', count=4009, items={
         0: TrecQrel('763878', 'msmarco_passage_33_459057644', 1, '0'),
         9: TrecQrel('290779', 'msmarco_passage_10_301562908', 1, '0'),
         4008: TrecQrel('1091692', 'msmarco_passage_23_330102695', 1, '0'),
     })
     self._test_qrels('msmarco-passage-v2/dev2', count=4411, items={
         0: TrecQrel('419507', 'msmarco_passage_04_254301507', 1, '0'),
         9: TrecQrel('1087630', 'msmarco_passage_18_685926585', 1, '0'),
         4410: TrecQrel('961297', 'msmarco_passage_18_858458289', 1, '0'),
     })
     self._test_qrels('msmarco-passage-v2/trec-dl-2021', count=10828, items={
         0: TrecQrel('2082', 'msmarco_passage_01_552803451', 0, '0'),
         9: TrecQrel('2082', 'msmarco_passage_02_437070914', 3, '0'),
         10827: TrecQrel('1129560', 'msmarco_passage_68_639912287', 0, '0'),
     })
     self._test_qrels('msmarco-passage-v2/trec-dl-2021/judged', count=10828, items={
         0: TrecQrel('2082', 'msmarco_passage_01_552803451', 0, '0'),
         9: TrecQrel('2082', 'msmarco_passage_02_437070914', 3, '0'),
         10827: TrecQrel('1129560', 'msmarco_passage_68_639912287', 0, '0'),
     })
예제 #30
0
 def test_qrels(self):
     self._test_qrels('aquaint/trec-robust-2005', count=37798, items={
         0: TrecQrel('303', 'APW19980609.1531', 2, '0'),
         9: TrecQrel('303', 'APW19981117.0914', 0, '0'),
         37797: TrecQrel('689', 'XIE20000925.0055', 0, '0'),
     })