예제 #1
0
 def test_antique_test(self):
     self._test_queries(
         "antique/test",
         count=200,
         items={
             0:
             GenericQuery(query_id='3990512',
                          text='how can we get concentration onsomething?'),
             9:
             GenericQuery(query_id='1783010', text='What is Blaphsemy?'),
             199:
             GenericQuery(query_id='1971899', text='what is masturbat***?'),
         })
     self._test_qrels('antique/test',
                      count=6589,
                      items={
                          0:
                          TrecQrel(query_id='1964316',
                                   doc_id='1964316_5',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='1964316',
                                   doc_id='1964316_2',
                                   relevance=4,
                                   iteration='Q0'),
                          6588:
                          TrecQrel(query_id='1262692',
                                   doc_id='3699008_1',
                                   relevance=2,
                                   iteration='Q0')
                      })
예제 #2
0
 def test_dummy_qrels(self):
     dataset = ir_datasets.create_dataset(
         docs_tsv='test/dummy/docs.tsv',
         queries_tsv='test/dummy/queries.tsv',
         qrels_trec='test/dummy/qrels')
     self._test_queries(dataset,
                        count=4,
                        items={
                            0: GenericQuery('1', 'republican party'),
                            3: GenericQuery('4', 'media'),
                        })
예제 #3
0
 def test_medline_queries(self):
     self._test_queries('medline/trec-genomics-2004', count=50, items={
         0: TrecGenomicsQuery('1', 'Ferroportin-1 in humans', 'Find articles about Ferroportin-1, an iron transporter, in humans.', 'Ferroportin1 (also known as SLC40A1; Ferroportin 1; FPN1; HFE4; IREG1; Iron regulated gene 1; Iron-regulated transporter 1; MTP1; SLC11A3; and Solute carrier family 11 (proton-coupled divalent metal ion transporters), member 3) may play a role in iron transport.'),
         9: TrecGenomicsQuery('10', 'NEIL1', 'Find articles about the role of NEIL1 in repair of DNA.', 'Interested in role that NEIL1 plays in DNA repair.'),
         49: TrecGenomicsQuery('50', 'Low temperature protein expression in E. coli', 'Find research on improving protein expressions at low temperature in Escherichia coli bacteria.', 'The researcher is not satisfied with the yield of expressing a protein in E. coli when grown at low temperature and is searching for a better solution. The researcher is willing to try a different organism and/or method.'),
     })
     self._test_queries('medline/trec-genomics-2005', count=50, items={
         0: GenericQuery('100', 'Describe the procedure or methods for how to "open up" a cell through a process called "electroporation."'),
         9: GenericQuery('109', "Describe the procedure or methods for fluorogenic 5'-nuclease assay."),
         49: GenericQuery('149', 'Provide information about Mutations of the alpha 4-GABAA receptor and its/their impact on behavior.'),
     })
예제 #4
0
 def test_queries(self):
     self._test_queries('aol-ia',
                        count=9966939,
                        items={
                            0:
                            GenericQuery('8c418e7c9e5993',
                                         'rentdirect com'),
                            9:
                            GenericQuery('c8476c36af8761', 'www elaorg'),
                            9966938:
                            GenericQuery('bba88dc56436eb', 'c21curabba'),
                        })
예제 #5
0
 def queries_iter(self):
     with self.queries_dlc.stream() as stream:
         stream = io.TextIOWrapper(stream)
         for i, line in enumerate(stream):
             if i == 0:
                 continue # skip first (header) line
             yield GenericQuery(str(i), line.rstrip())
예제 #6
0
 def queries_iter(self):
     with self._queries_dlc.stream() as f:
         for line in codecs.getreader('cp1252')(f):
             if line.strip() == '':
                 continue
             doc_id, text = line[1:4], line[5:].rstrip()
             text = text.replace('[ANTIBODIES]', 'antibodies').replace(
                 '[BIOLOGICAL SUBSTANCES]',
                 'biological substances').replace(
                     '[CELL OR TISSUE TYPES]', 'cell or tissue types'
                 ).replace('[DISEASES]', 'diseases').replace(
                     '[DRUGS]',
                     'drugs').replace('[GENES]', 'genes').replace(
                         '[MOLECULAR FUNCTIONS]',
                         'molecular functions').replace(
                             '[MUTATIONS]', 'mutations').replace(
                                 '[PATHWAYS]', 'pathways').replace(
                                     '[PROTEINS]', 'proteins').replace(
                                         '[SIGNS OR SYMPTOMS]',
                                         'signs or symptoms').replace(
                                             '[STRAINS]',
                                             'strains').replace(
                                                 '[TOXICITIES]',
                                                 'toxicities').replace(
                                                     '[TUMOR TYPES]',
                                                     'tumor types')
             yield GenericQuery(doc_id, text)
예제 #7
0
 def queries_iter(self):
     with self.queries_dlc.stream() as stream:
         stream = io.TextIOWrapper(stream)
         for lines in sentinel_splitter(stream, sentinel='/\n'):
             query_id = lines[0].rstrip('\n')
             query_text = ''.join(lines[1:])
             yield GenericQuery(query_id, query_text)
예제 #8
0
 def queries_iter(self):
     for dlc in self.queries_dlcs:
         base_path = Path(dlc.path())
         for file in sorted(base_path.glob(f'**/{self.split}/*.gz')):
             with gzip.open(file, 'rt') as f:
                 for line in f:
                     data = json.loads(line)
                     yield GenericQuery(
                         data['url'], # query_id = url
                         data['docstring'], # text = docstring
                     )
예제 #9
0
 def test_queries(self):
     self._test_queries(
         'cranfield',
         count=225,
         items={
             0:
             GenericQuery(
                 '1',
                 'what similarity laws must be obeyed when constructing aeroelastic models\nof heated high speed aircraft .'
             ),
             9:
             GenericQuery(
                 '18',
                 'are real-gas transport properties for air available over a wide range of\nenthalpies and densities .'
             ),
             224:
             GenericQuery(
                 '365',
                 'what design factors can be used to control lift-drag ratios at mach\nnumbers above 5 .'
             ),
         })
예제 #10
0
 def test_nyt_queries(self):
     self._test_queries(
         'nyt/train',
         count=1863657,
         items={
             0:
             GenericQuery(
                 '8454',
                 'MARSH & MCLENNAN INC reports earnings for Qtr to Dec 31'),
             9:
             GenericQuery('8579', 'SALINGER BIOGRAPHY IS BLOCKED'),
             1863656:
             GenericQuery(
                 '1854817',
                 'STRATEGY ON IRAN STIRS NEW DEBATE AT WHITE HOUSE'),
         })
     self._test_queries(
         'nyt/valid',
         count=1004,
         items={
             0:
             GenericQuery('6461', "Why We're Forced To Be Slumlords"),
             9:
             GenericQuery(
                 '13148',
                 'NOVAMETRIX MEDICAL SYSTEMS INC reports earnings for Qtr to Dec 31'
             ),
             1003:
             GenericQuery('1854529', 'The Newest Antique: Atari'),
         })
예제 #11
0
 def test_antique_train_split200train(self):
     self._test_queries(
         'antique/train/split200-train',
         count=2_226,
         items={
             0:
             GenericQuery(
                 query_id='3097310',
                 text='What causes severe swelling and pain in the knees?'),
             9:
             GenericQuery(
                 query_id='3486120',
                 text=
                 'Why does PAMELA   ANDERSON ........NOT CARE about  Children?'
             ),
             2225:
             GenericQuery(query_id='4086230',
                          text='See I have lost my voice what do I do?'),
         })
     self._test_qrels('antique/train/split200-train',
                      count=25229,
                      items={
                          0:
                          TrecQrel(query_id='2531329',
                                   doc_id='2531329_0',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='3825668',
                                   doc_id='3825668_4',
                                   relevance=4,
                                   iteration='Q0'),
                          25228:
                          TrecQrel(query_id='884731',
                                   doc_id='884731_1',
                                   relevance=3,
                                   iteration='Q0')
                      })
예제 #12
0
 def test_antique_train_split200valid(self):
     self._test_queries(
         'antique/train/split200-valid',
         count=200,
         items={
             0:
             GenericQuery(query_id='1907320',
                          text='How do I get college money?'),
             9:
             GenericQuery(
                 query_id='3083719',
                 text='How do you safely wean a person off Risperidal?'),
             199:
             GenericQuery(
                 query_id='2573745',
                 text='How did African American women get the right to Vote?'
             ),
         })
     self._test_qrels('antique/train/split200-valid',
                      count=2193,
                      items={
                          0:
                          TrecQrel(query_id='2550445',
                                   doc_id='2550445_0',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='196651',
                                   doc_id='196651_1',
                                   relevance=4,
                                   iteration='Q0'),
                          2192:
                          TrecQrel(query_id='344029',
                                   doc_id='344029_4',
                                   relevance=4,
                                   iteration='Q0')
                      })
예제 #13
0
 def queries_iter(self):
     with self.queries_dlc.stream() as stream:
         stream = io.TextIOWrapper(stream)
         for lines in prefix_sentinel_splitter(stream, sentinel='.I '):
             record = {'query_id': '', 'text': ''}
             field = 'query_id'
             for line in lines:
                 if line.startswith('.W'):
                     field = 'text'
                 else:
                     record[field] += line
             record = {k: v.strip() for k, v in record.items()}
             record['query_id'] = record['query_id'].lstrip(
                 '0')  # remove leading 0s to match qrels
             yield GenericQuery(**record)
예제 #14
0
 def test_antique_train(self):
     self._test_queries(
         'antique/train',
         count=2_426,
         items={
             0:
             GenericQuery(
                 query_id='3097310',
                 text='What causes severe swelling and pain in the knees?'),
             9:
             GenericQuery(
                 query_id='992730',
                 text='How do you transfer voicemail messages onto tape?'),
             2425:
             GenericQuery(query_id='4086230',
                          text='See I have lost my voice what do I do?'),
         })
     self._test_qrels('antique/train',
                      count=27422,
                      items={
                          0:
                          TrecQrel(query_id='2531329',
                                   doc_id='2531329_0',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='3825668',
                                   doc_id='3825668_4',
                                   relevance=4,
                                   iteration='Q0'),
                          27421:
                          TrecQrel(query_id='884731',
                                   doc_id='884731_1',
                                   relevance=3,
                                   iteration='Q0')
                      })
예제 #15
0
 def test_antique_test_nonoffensive(self):
     self._test_queries(
         'antique/test/non-offensive',
         count=176,
         items={
             0:
             GenericQuery(query_id='3990512',
                          text='how can we get concentration onsomething?'),
             9:
             GenericQuery(query_id='1783010', text='What is Blaphsemy?'),
             175:
             GenericQuery(
                 query_id='1340574',
                 text=
                 'Why do some people only go to church on Easter Sunday and never go again until Christmas ?'
             )
         })
     self._test_qrels('antique/test/non-offensive',
                      count=5752,
                      items={
                          0:
                          TrecQrel(query_id='1964316',
                                   doc_id='1964316_5',
                                   relevance=4,
                                   iteration='U0'),
                          9:
                          TrecQrel(query_id='1964316',
                                   doc_id='1964316_2',
                                   relevance=4,
                                   iteration='Q0'),
                          5751:
                          TrecQrel(query_id='1262692',
                                   doc_id='3699008_1',
                                   relevance=2,
                                   iteration='Q0')
                      })
예제 #16
0
 def test_queries(self):
     self._test_queries('natural-questions/train', count=307373, items={
         0: GenericQuery('4549465242785278785', 'when is the last episode of season 8 of the walking dead'),
         9: GenericQuery('3542596469291219966', 'when was the first robot used in surgery'),
         307372: GenericQuery('-9055447625982456209', 'why is the dark age called the dark age'),
     })
     self._test_queries('natural-questions/dev', count=7830, items={
         0: GenericQuery('5225754983651766092', 'what purpose did seasonal monsoon winds have on trade'),
         9: GenericQuery('8467542931261548456', 'global trade during the ming dynasty of china'),
         7829: GenericQuery('6752717162503553157', 'how many goals have arsenal scored in the premier league'),
     })
예제 #17
0
 def test_queries(self):
     self._test_queries(
         'msmarco-document-v2/train',
         count=322196,
         items={
             0:
             GenericQuery('121352', 'define extreme'),
             9:
             GenericQuery(
                 '918533',
                 'what was introduced to the human diet in what year'),
             322195:
             GenericQuery('50393',
                          'benefits of boiling lemons and drinking juice.'),
         })
     self._test_queries(
         'msmarco-document-v2/dev1',
         count=4552,
         items={
             0: GenericQuery('2', ' Androgen receptor define'),
             9: GenericQuery('873886', 'what level does zubat evolves to'),
             4551: GenericQuery('1048565', 'who plays sebastian michaelis'),
         })
     self._test_queries('msmarco-document-v2/dev2',
                        count=5000,
                        items={
                            0:
                            GenericQuery('1048579', 'what is pcnt'),
                            9:
                            GenericQuery('1048779', 'what is ott media'),
                            4999:
                            GenericQuery('1092262',
                                         ';liter chemistry definition'),
                        })
     self._test_queries(
         'msmarco-document-v2/trec-dl-2019',
         count=200,
         items={
             0: GenericQuery('1108939',
                             'what slows down the flow of blood'),
             9: GenericQuery('885490', 'what party is paul ryan in'),
             199: GenericQuery('532603',
                               'university of dubuque enrollment'),
         })
     self._test_queries(
         'msmarco-document-v2/trec-dl-2019/judged',
         count=43,
         items={
             0:
             GenericQuery('156493', 'do goldfish grow'),
             9:
             GenericQuery('915593',
                          'what types of food can you cook sous vide'),
             42:
             GenericQuery(
                 '146187',
                 'difference between a mcdouble and a double cheeseburger'),
         })
     self._test_queries(
         'msmarco-document-v2/trec-dl-2020',
         count=200,
         items={
             0:
             GenericQuery('1030303', 'who is aziz hashim'),
             9:
             GenericQuery('1071750',
                          'why is pete rose banned from hall of fame'),
             199:
             GenericQuery('132622', 'definition of attempted arson'),
         })
     self._test_queries('msmarco-document-v2/trec-dl-2020/judged',
                        count=45,
                        items={
                            0:
                            GenericQuery('1030303', 'who is aziz hashim'),
                            9:
                            GenericQuery('1105792', 'define: geon'),
                            44:
                            GenericQuery(
                                '997622',
                                'where is the show shameless filmed'),
                        })
     self._test_queries(
         'msmarco-document-v2/trec-dl-2021',
         count=477,
         items={
             0: GenericQuery('787021', 'what is produced by muscle'),
             9: GenericQuery('1052368',
                             'who stabbed dr. martin luther king'),
             476: GenericQuery('855410', 'what is theraderm used for'),
         })
     self._test_queries(
         'msmarco-document-v2/trec-dl-2021/judged',
         count=57,
         items={
             0:
             GenericQuery(
                 '2082',
                 'At about what age do adults normally begin to lose bone mass?'
             ),
             9:
             GenericQuery(
                 '1104447',
                 'which kind of continental boundary is formed where two plates move horizontally past one another?'
             ),
             56:
             GenericQuery(
                 '1040198',
                 'who is the final arbiter of florida law in instances where there is no federal authority?'
             ),
         })
예제 #18
0
 def test_queries(self):
     self._test_queries(
         'lotte/lifestyle/dev/forum',
         count=2076,
         items={
             0:
             GenericQuery('0', 'Why does my cat keep patting my face?'),
             9:
             GenericQuery(
                 '9',
                 'Is this normal first day home behavior for my kitten, or should I be concerned?'
             ),
             2075:
             GenericQuery(
                 '2075',
                 'Direct Pull (V-Brake) vs. Center Pull Cantilevers (pros and cons)'
             ),
         })
     self._test_queries(
         'lotte/lifestyle/dev/search',
         count=417,
         items={
             0:
             GenericQuery(
                 '0',
                 'how much should i feed my 1 year old english mastiff?'),
             9:
             GenericQuery('9', 'is my corn snake male or female?'),
             416:
             GenericQuery(
                 '416',
                 'is there a difference between red and clear power steering fluid?'
             ),
         })
     self._test_queries(
         'lotte/lifestyle/test/forum',
         count=2002,
         items={
             0:
             GenericQuery(
                 '0',
                 'OK were all adults here, so really, how on earth should I use a squat toilet?'
             ),
             9:
             GenericQuery(
                 '9',
                 'I dont know my nationality. How can I visit Denmark?'),
             2001:
             GenericQuery('2001',
                          'What is each side of a 4-sided grater for?'),
         })
     self._test_queries('lotte/lifestyle/test/search',
                        count=661,
                        items={
                            0:
                            GenericQuery(
                                '0',
                                'are clear pomegranate seeds good to eat?'),
                            9:
                            GenericQuery('9', 'is lumpy coconut milk ok?'),
                            660:
                            GenericQuery('660',
                                         'is zone allowed in the nba?'),
                        })
     self._test_queries(
         'lotte/recreation/dev/forum',
         count=2002,
         items={
             0:
             GenericQuery(
                 '0',
                 'Would the One Ring even work for anyone but Sauron?'),
             9:
             GenericQuery(
                 '9',
                 'Which 2015 technologies were correctly predicted by Back to the Future II?'
             ),
             2001:
             GenericQuery('2001', 'Does priority matter in Magic?'),
         })
     self._test_queries(
         'lotte/recreation/dev/search',
         count=563,
         items={
             0:
             GenericQuery('0', 'do bards have to sing?'),
             9:
             GenericQuery('9', 'do attacks of opportunity stop movement?'),
             562:
             GenericQuery('562',
                          'are nikon and minolta lenses interchangeable?'),
         })
     self._test_queries(
         'lotte/recreation/test/forum',
         count=2002,
         items={
             0:
             GenericQuery(
                 '0', 'How did they make cars fall apart in old movies?'),
             9:
             GenericQuery('9',
                          'Is the title The Last Jedi singular or plural?'),
             2001:
             GenericQuery(
                 '2001',
                 'Is there any specific reason why female voice actors act for male roles in anime?'
             ),
         })
     self._test_queries(
         'lotte/recreation/test/search',
         count=924,
         items={
             0:
             GenericQuery(
                 '0',
                 'how can you tell if someone blocked you on xbox one?'),
             9:
             GenericQuery('9', 'are xbox games compatible with ps4?'),
             923:
             GenericQuery('923', 'are laurel and hardy jewish?'),
         })
     self._test_queries(
         'lotte/science/dev/forum',
         count=2013,
         items={
             0:
             GenericQuery(
                 '0',
                 'Making sense of principal component analysis, eigenvectors & eigenvalues'
             ),
             9:
             GenericQuery(
                 '9',
                 'Bayesian and frequentist reasoning in plain English'),
             2012:
             GenericQuery(
                 '2012',
                 'How can I tell if I have simplified my talk too much?'),
         })
     self._test_queries(
         'lotte/science/dev/search',
         count=538,
         items={
             0:
             GenericQuery('0', 'is sudan iv hydrophobic or hydrophilic?'),
             9:
             GenericQuery(
                 '9',
                 'how many atoms are present in one molecule of urea?'),
             537:
             GenericQuery('537', 'is both objective and subjective?'),
         })
     self._test_queries(
         'lotte/science/test/forum',
         count=2017,
         items={
             0:
             GenericQuery('0',
                          'Cooling a cup of coffee with help of a spoon'),
             9:
             GenericQuery('9',
                          'Why dont metals bond when touched together?'),
             2016:
             GenericQuery('2016', 'Why does cracking a joint make noise?'),
         })
     self._test_queries(
         'lotte/science/test/search',
         count=617,
         items={
             0:
             GenericQuery('0',
                          'mutually exclusive events are independent?'),
             9:
             GenericQuery('9', 'is tan x a function?'),
             616:
             GenericQuery(
                 '616',
                 'what is the relationship between polarity and hydrophobicity?'
             ),
         })
     self._test_queries(
         'lotte/technology/dev/forum',
         count=2003,
         items={
             0:
             GenericQuery('0', 'Strikethrough with GitHub Markdown'),
             9:
             GenericQuery('9',
                          'GitHub - Whats this Pro tag on my profile?'),
             2002:
             GenericQuery(
                 '2002',
                 'I have a hardware detection problem, what logs do I need to look into?'
             ),
         })
     self._test_queries(
         'lotte/technology/dev/search',
         count=916,
         items={
             0:
             GenericQuery('0',
                          'how many devices can you connect to bluetooth?'),
             9:
             GenericQuery('9', 'do docking stations have mac addresses?'),
             915:
             GenericQuery(
                 '915',
                 'what does it mean when someone is active but no green dot?'
             ),
         })
     self._test_queries(
         'lotte/technology/test/forum',
         count=2004,
         items={
             0:
             GenericQuery('0',
                          'Why does man print gimme gimme gimme at 00:30?'),
             9:
             GenericQuery(
                 '9',
                 'How do I grep for multiple patterns with pattern having a pipe character?'
             ),
             2003:
             GenericQuery(
                 '2003',
                 'Can I automatically log in to open WiFi that requires web login/password?'
             ),
         })
     self._test_queries(
         'lotte/technology/test/search',
         count=596,
         items={
             0: GenericQuery('0', 'which ipods are no longer supported?'),
             9: GenericQuery('9',
                             'how to change the name of my apple pencil?'),
             595: GenericQuery('595', 'is ping tcp or udp?'),
         })
     self._test_queries(
         'lotte/writing/dev/forum',
         count=2003,
         items={
             0:
             GenericQuery('0', 'The Rules of Writing'),
             9:
             GenericQuery(
                 '9',
                 'How do I translate into a gendered language where the gender would be a spoiler?'
             ),
             2002:
             GenericQuery(
                 '2002',
                 'Can I say I Java, or does it have to be I do Java?'),
         })
     self._test_queries(
         'lotte/writing/dev/search',
         count=497,
         items={
             0:
             GenericQuery('0', 'how are you doing lately meaning?'),
             9:
             GenericQuery(
                 '9',
                 'what is the difference between sign in and sign up?'),
             496:
             GenericQuery('496', 'can a tv screen be used as a camera?'),
         })
     self._test_queries(
         'lotte/writing/test/forum',
         count=2000,
         items={
             0:
             GenericQuery(
                 '0',
                 'How do you quote a passage that has used [sic] mistakenly?'
             ),
             9:
             GenericQuery(
                 '9',
                 'Is there a word or phrase for the feeling you get after looking at a word for too long?'
             ),
             1999:
             GenericQuery('1999', 'Opposite of a diet'),
         })
     self._test_queries(
         'lotte/writing/test/search',
         count=1071,
         items={
             0:
             GenericQuery(
                 '0',
                 'what is the difference between a college and an academy?'
             ),
             9:
             GenericQuery(
                 '9',
                 'what is the difference between present continuous tense and past continuous tense?'
             ),
             1070:
             GenericQuery(
                 '1070',
                 'what is the difference between pricey and pricey?'),
         })
     self._test_queries(
         'lotte/pooled/dev/forum',
         count=10097,
         items={
             0:
             GenericQuery('0', 'The Rules of Writing'),
             9:
             GenericQuery(
                 '9',
                 'How do I translate into a gendered language where the gender would be a spoiler?'
             ),
             10096:
             GenericQuery(
                 '10096',
                 'Direct Pull (V-Brake) vs. Center Pull Cantilevers (pros and cons)'
             ),
         })
     self._test_queries(
         'lotte/pooled/dev/search',
         count=2931,
         items={
             0:
             GenericQuery('0', 'how are you doing lately meaning?'),
             9:
             GenericQuery(
                 '9',
                 'what is the difference between sign in and sign up?'),
             2930:
             GenericQuery(
                 '2930',
                 'is there a difference between red and clear power steering fluid?'
             ),
         })
     self._test_queries(
         'lotte/pooled/test/forum',
         count=10025,
         items={
             0:
             GenericQuery(
                 '0',
                 'How do you quote a passage that has used [sic] mistakenly?'
             ),
             9:
             GenericQuery(
                 '9',
                 'Is there a word or phrase for the feeling you get after looking at a word for too long?'
             ),
             10024:
             GenericQuery('10024',
                          'What is each side of a 4-sided grater for?'),
         })
     self._test_queries(
         'lotte/pooled/test/search',
         count=3869,
         items={
             0:
             GenericQuery(
                 '0',
                 'what is the difference between a college and an academy?'
             ),
             9:
             GenericQuery(
                 '9',
                 'what is the difference between present continuous tense and past continuous tense?'
             ),
             3868:
             GenericQuery('3868', 'is zone allowed in the nba?'),
         })
예제 #19
0
 def test_gov2_queries(self):
     self._test_queries(
         'gov2/trec-tb-2004',
         count=50,
         items={
             0:
             TrecQuery(
                 '701', 'U.S. oil industry history',
                 'Describe the history of the U.S. oil industry',
                 'Relevant documents will include those on historical exploration and\ndrilling as well as history of regulatory bodies. Relevant are history\nof the oil industry in various states, even if drilling began in 1950\nor later.'
             ),
             9:
             TrecQuery(
                 '710', 'Prostate cancer treatments',
                 'What are the various treatments for prostate cancer?',
                 'Relevant cancer treatments include radiation therapy, radioactive\npellets, hormonal therapy and surgery. "Watchful waiting" is also\nconsidered relevant.'
             ),
             49:
             TrecQuery(
                 '750', 'John Edwards womens issues',
                 "What are Senator John Edwards' positions on women's issues such as pay\nequity, abortion, Title IX and violence against women.",
                 "Relevant documents will indicate Senator John Edwards' stand on issues\nconcerning women, such as pay parity, abortion rights, Title IX, and\nviolence against women.  Lists of press releases are relevant when the\nheadlines show he is voting for or against bills on women's\nissues. Not relevant are Edwards' positions on issues not exclusively\nconcerning women."
             ),
         })
     self._test_queries(
         'gov2/trec-tb-2005',
         count=50,
         items={
             0:
             TrecQuery(
                 '751', 'Scrabble Players',
                 'Give information on Scrabble players, when and where Scrabble is\nplayed, and how popular it has been.',
                 "Give information on the social aspects of the game Scrabble. Scrabble\nplayers may be named or described as a group.  Both real and fictional\nplayers are relevant. Mention of a scheduled Scrabble game is\nrelevant. Scrabble's popularity is relevant.  An account of a\nparticular game is relevant.  Descriptions of variants on the Scrabble\ngame are not relevant. Use of Scrabble tiles for other purposes are\nnot relevant. Scrabble software is not relevant unless there is\nmention of its users.  Titles of Scrabble-related books (dictionaries,\nglossaries, rulebooks) are not relevant."
             ),
             9:
             TrecQuery(
                 '760', 'american muslim mosques schools',
                 'Statistics regarding American Muslims, mosques, and schools.',
                 'Relevant documents should provide some count or proportion of mosques,\nMuslim-affiliated schools, or population. With regard to population,\nspecific age groupings, sexes, or other categorizations are\nacceptable. The statistics can be pertinent to a specific geographic\narea, such as Fulton County, the state of California, or the\nNortheast.  There is no restriction as to time period (for example\n2005 versus 1987).'
             ),
             49:
             TrecQuery(
                 '800', 'Ovarian Cancer Treatment',
                 'The remedies and treatments given to lesson or stop effects of ovarian\ncancer.',
                 'Relevant documents must include names of chemicals or medicines used\nto fight ovarian cancer. Studies of new treatments that are being\ntried are valid, even if they have not reached a conclusion as to\neffectiveness.'
             ),
         })
     self._test_queries(
         'gov2/trec-tb-2006',
         count=50,
         items={
             0:
             TrecQuery(
                 '801', 'Kudzu Pueraria lobata',
                 'Describe the origin, nature, extent of spread and means of controlling\nkudzu.',
                 'Identification of kudzu as an invasive species with description of how\nit spreads and grows is relevant.  A document which is simply a list\nheaded "invasive species" or "noxious weeds" including kudzu is not\nrelevant.  A statement that kudzu is present in a specific location is\nnot relevant unless it relates to its spread.  Features of kudzu such\nas its use as a treatment for alcoholism or its function as a haven\nfor plant pathogens describe its nature and are relevant.'
             ),
             9:
             TrecQuery(
                 '810', 'timeshare resales',
                 'Provide information regarding timeshare resales.',
                 'Relevant documents will include those describing the prospects of\nreselling a timeshare and the pitfalls one should be aware of when\nselling a timeshare.  Real estate legislature regarding the resale of\ntimeshares is not relevant.'
             ),
             49:
             TrecQuery(
                 '850', 'Mississippi River flood',
                 'How frequently does the Mississippi River flood its banks?',
                 'Flooding is a relative term which implies water overflowing its\ncontainer and causing damage to the surrounding ares.  Documents are\nrelevant if they describe Mississippi River events which are commonly\nconsidered to be floods.  Relevant documents may also show how such\nevents have led to the introduction of controls to lessen the\nfrequency of damaging floods of this river.  Relevant documents\ninclude different levels of flooding, not only the major ones.\nDocuments are not relevant if they are essentially forecasts or\nroutine reports of water levels.  They are also not relevant if they\nare purely bibliographies or lists of sources for relevant documents.\nPhotos and videos of floods alone are not relevant.'
             ),
         })
     self._test_queries(
         'gov2/trec-tb-2005/named-page',
         count=252,
         items={
             0:
             GenericQuery('601', 'metallurgy division world war history'),
             9:
             GenericQuery(
                 '610',
                 'united states vs david j. kaiser transcript court appeal'
             ),
             251:
             GenericQuery(
                 '872',
                 'medical advisory committee memorandum a rule to exclude idet'
             ),
         })
     self._test_queries('gov2/trec-tb-2005/efficiency',
                        count=50000,
                        items={
                            0: GenericQuery('1',
                                            'pierson s twin lakes marina'),
                            9: GenericQuery('10', 'hotel meistertrunk'),
                            49999: GenericQuery('50000', 'senator durbin'),
                        })
     self._test_queries('gov2/trec-tb-2006/named-page',
                        count=181,
                        items={
                            0:
                            GenericQuery('901', 'CCAP advance case search'),
                            9:
                            GenericQuery(
                                '910',
                                'HS project "It\'s not easy being green"'),
                            180:
                            GenericQuery('1081', 'Colleges in PA'),
                        })
     self._test_queries(
         'gov2/trec-tb-2006/efficiency',
         count=100000,
         items={
             0:
             GenericQuery('1',
                          'commissioner of revenue orange county virginia'),
             9:
             GenericQuery('10', 'terrorism policies in history'),
             99999:
             GenericQuery('100000', 'cervical flexion extension injury'),
         })
     self._test_queries(
         'gov2/trec-tb-2006/efficiency/10k',
         count=10000,
         items={
             0:
             GenericQuery('1',
                          'commissioner of revenue orange county virginia'),
             9:
             GenericQuery('10', 'terrorism policies in history'),
             9999:
             GenericQuery('10000',
                          'gdp of international business in bermuda'),
         })
     self._test_queries(
         'gov2/trec-tb-2006/efficiency/stream1',
         count=25000,
         items={
             0:
             GenericQuery('1',
                          'commissioner of revenue orange county virginia'),
             9:
             GenericQuery('10', 'terrorism policies in history'),
             24999:
             GenericQuery('25000', 'organized crime in columbus ohio'),
         })
     self._test_queries('gov2/trec-tb-2006/efficiency/stream2',
                        count=25000,
                        items={
                            0:
                            GenericQuery('25001',
                                         'pea ridge national park'),
                            9:
                            GenericQuery('25010', 'nylon concrete'),
                            24999:
                            GenericQuery('50000',
                                         'mark wallace bush and u.n.'),
                        })
     self._test_queries(
         'gov2/trec-tb-2006/efficiency/stream3',
         count=25000,
         items={
             0:
             GenericQuery('50001', "dept veteran's affairs connecticut"),
             9:
             GenericQuery('50010', 'nuclear & missile cold war'),
             24999:
             GenericQuery(
                 '75000',
                 'the role and responsibilities of the u.s. senate'),
         })
     self._test_queries(
         'gov2/trec-tb-2006/efficiency/stream4',
         count=25000,
         items={
             0:
             GenericQuery('75001',
                          'united states office of personel management'),
             9:
             GenericQuery('75010', 'percentage of youth tobacco smokers'),
             24999:
             GenericQuery('100000', 'cervical flexion extension injury'),
         })
     self._test_queries('gov2/trec-mq-2007',
                        count=10000,
                        items={
                            0:
                            GenericQuery('1',
                                         'after school program evaluation'),
                            9:
                            GenericQuery('10',
                                         'qualifications for a senator'),
                            9999:
                            GenericQuery('10000', 'californa mission'),
                        })
     self._test_queries(
         'gov2/trec-mq-2008',
         count=10000,
         items={
             0:
             GenericQuery('10001', 'comparability of pay analyses'),
             9:
             GenericQuery(
                 '10010',
                 'in in 2015 will the u.s military be fighting iran and north korea'
             ),
             9999:
             GenericQuery('20000', 'manchester city hall'),
         })
예제 #20
0
 def test_queries(self):
     self._test_queries('tripclick/train', count=685649, items={
         0: GenericQuery('8', re.compile(r'^a.*s$', flags=48)),
         9: GenericQuery('136', re.compile(r'^c.*g$', flags=48)),
         685648: GenericQuery('1647720', re.compile(r'^c.*e$', flags=48)),
     })
     self._test_queries('tripclick/train/head', count=3529, items={
         0: GenericQuery('8', re.compile(r'^a.*s$', flags=48)),
         9: GenericQuery('136', re.compile(r'^c.*g$', flags=48)),
         3528: GenericQuery('1630245', re.compile(r'^I.*d$', flags=48)),
     })
     self._test_queries('tripclick/train/head/dctr', count=3529, items={
         0: GenericQuery('8', re.compile(r'^a.*s$', flags=48)),
         9: GenericQuery('136', re.compile(r'^c.*g$', flags=48)),
         3528: GenericQuery('1630245', re.compile(r'^I.*d$', flags=48)),
     })
     self._test_queries('tripclick/train/torso', count=105964, items={
         0: GenericQuery('5', re.compile(r'^p.*e$', flags=48)),
         9: GenericQuery('43', re.compile(r'^p.*e$', flags=48)),
         105963: GenericQuery('1647511', re.compile(r'^h.*s$', flags=48)),
     })
     self._test_queries('tripclick/train/tail', count=576156, items={
         0: GenericQuery('1', re.compile(r'^c.*d$', flags=48)),
         9: GenericQuery('65', re.compile(r'^s.*t$', flags=48)),
         576155: GenericQuery('1647720', re.compile(r'^c.*e$', flags=48)),
     })
     self._test_queries('tripclick/val', count=3525, items={
         0: GenericQuery('38', re.compile(r'^a.*e$', flags=48)),
         9: GenericQuery('226', re.compile(r'^h.*t$', flags=48)),
         3524: GenericQuery('1645595', re.compile(r'^h.*n$', flags=48)),
     })
     self._test_queries('tripclick/val/head', count=1175, items={
         0: GenericQuery('38', re.compile(r'^a.*e$', flags=48)),
         9: GenericQuery('226', re.compile(r'^h.*t$', flags=48)),
         1174: GenericQuery('1630209', re.compile(r'^A.*e$', flags=48)),
     })
     self._test_queries('tripclick/val/head/dctr', count=1175, items={
         0: GenericQuery('38', re.compile(r'^a.*e$', flags=48)),
         9: GenericQuery('226', re.compile(r'^h.*t$', flags=48)),
         1174: GenericQuery('1630209', re.compile(r'^A.*e$', flags=48)),
     })
     self._test_queries('tripclick/val/torso', count=1175, items={
         0: GenericQuery('534', re.compile(r'^l.*n$', flags=48)),
         9: GenericQuery('4773', re.compile(r'^h.*l$', flags=48)),
         1174: GenericQuery('1626635', re.compile(r'^p.*f$', flags=48)),
     })
     self._test_queries('tripclick/val/tail', count=1175, items={
         0: GenericQuery('1052', re.compile(r'^r.*y$', flags=48)),
         9: GenericQuery('22440', re.compile(r'^g.*w$', flags=48)),
         1174: GenericQuery('1645595', re.compile(r'^h.*n$', flags=48)),
     })
     self._test_queries('tripclick/test', count=3525, items={
         0: GenericQuery('24', re.compile(r'^p.*g$', flags=48)),
         9: GenericQuery('354', re.compile(r'^a.*e$', flags=48)),
         3524: GenericQuery('1646719', re.compile(r'^p.*e$', flags=48)),
     })
     self._test_queries('tripclick/test/head', count=1175, items={
         0: GenericQuery('24', re.compile(r'^p.*g$', flags=48)),
         9: GenericQuery('354', re.compile(r'^a.*e$', flags=48)),
         1174: GenericQuery('1610957', re.compile(r'^S.*l$', flags=48)),
     })
     self._test_queries('tripclick/test/torso', count=1175, items={
         0: GenericQuery('152', re.compile(r'^v.*s$', flags=48)),
         9: GenericQuery('2700', re.compile(r'^p.*g$', flags=48)),
         1174: GenericQuery('1641005', re.compile(r'^h.*s$', flags=48)),
     })
     self._test_queries('tripclick/test/tail', count=1175, items={
         0: GenericQuery('4752', re.compile(r'^h.*e$', flags=48)),
         9: GenericQuery('15118', re.compile(r'^i.*n$', flags=48)),
         1174: GenericQuery('1646719', re.compile(r'^p.*e$', flags=48)),
     })
예제 #21
0
 def test_vaswani_queries(self):
     self._test_queries('vaswani', count=93, items={
         0: GenericQuery('1', 'MEASUREMENT OF DIELECTRIC CONSTANT OF LIQUIDS BY THE USE OF MICROWAVE TECHNIQUES\n'),
         9: GenericQuery('10', 'METHODS OF CALCULATING INSTANTANEOUS POWER DISSIPATION IN REACTIVE CIRCUITS\n'),
         92: GenericQuery('93', 'HIGH FREQUENCY OSCILLATORS USING TRANSISTORS THEORETICAL TREATMENT AND PRACTICAL CIRCUIT DETAILS\n'),
     })
예제 #22
0
 def test_queries(self):
     self._test_queries('msmarco-passage-v2/train', count=277144, items={
         0: GenericQuery('121352', 'define extreme'),
         9: GenericQuery('80926', 'can you use wallapop on your computer'),
         277143: GenericQuery('50393', 'benefits of boiling lemons and drinking juice.'),
     })
     self._test_queries('msmarco-passage-v2/dev1', count=3903, items={
         0: GenericQuery('2', ' Androgen receptor define'),
         9: GenericQuery('1049200', 'who recorded loving you'),
         3902: GenericQuery('1048565', 'who plays sebastian michaelis'),
     })
     self._test_queries('msmarco-passage-v2/dev2', count=4281, items={
         0: GenericQuery('1048579', 'what is pcnt'),
         9: GenericQuery('1048779', 'what is ott media'),
         4280: GenericQuery('1092262', ';liter chemistry definition'),
     })
     self._test_queries('msmarco-passage-v2/trec-dl-2021', count=477, items={
         0: GenericQuery('787021', 'what is produced by muscle'),
         9: GenericQuery('1052368', 'who stabbed dr. martin luther king'),
         476: GenericQuery('855410', 'what is theraderm used for'),
     })
     self._test_queries('msmarco-passage-v2/trec-dl-2021/judged', count=53, items={
         0: GenericQuery('2082', 'At about what age do adults normally begin to lose bone mass?'),
         9: GenericQuery('1107704', 'what was the main benefit of a single european currency?'),
         52: GenericQuery('1040198', 'who is the final arbiter of florida law in instances where there is no federal authority?'),
     })
예제 #23
0
 def test_clueweb12_queries(self):
     self._test_queries(
         'clueweb12/trec-web-2013',
         count=50,
         items={
             0:
             TrecWebTrackQuery(
                 '201', 'raspberry pi', '\n    What is a raspberry pi?\n  ',
                 'faceted',
                 (TrecSubtopic(number='1',
                               text='\n    What is a raspberry pi?\n  ',
                               type='inf'),
                  TrecSubtopic(
                      number='2',
                      text=
                      '\n    What software does a raspberry pi use?\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='3',
                      text=
                      '\n    What are hardware options for a raspberry pi?\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='4',
                      text=
                      '\n    How much does a basic raspberry pi cost?\n  ',
                      type='nav'),
                  TrecSubtopic(
                      number='5',
                      text=
                      '\n    Find info about the raspberry pi foundation.\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='6',
                      text='\n    Find a picture of a raspberry pi.\n  ',
                      type='nav'))),
             9:
             TrecWebTrackQuery(
                 '210', 'golf gps',
                 '\n    What is the best golf gps device?\n  ', 'faceted',
                 (TrecSubtopic(
                     number='1',
                     text='\n    What is the best golf gps device?\n  ',
                     type='inf'),
                  TrecSubtopic(
                      number='2',
                      text=
                      '\n    Compare Bushnell, Callaway and Garmin golf gps systems.\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='3',
                      text=
                      '\n    Is there a golf gps app for the Iphone?\n  ',
                      type='nav'),
                  TrecSubtopic(
                      number='4',
                      text=
                      '\n    Find information on handheld golf gps devices.\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='5',
                      text=
                      '\n    Is there a golf gps system that can be used world wide?\n  ',
                      type='nav'),
                  TrecSubtopic(
                      number='6',
                      text=
                      '\n    Where can I get a used golf gps device?\n  ',
                      type='inf'))),
             49:
             TrecWebTrackQuery(
                 '250', 'ford edge problems',
                 '\n    What problems have afflicted the Ford Edge car model?\n  ',
                 'single', ()),
         })
     self._test_queries(
         'clueweb12/trec-web-2014',
         count=50,
         items={
             0:
             TrecWebTrackQuery(
                 '251', 'identifying spider bites',
                 '\n  \tFind data on how to identify spider bites.\n  ',
                 'single', ()),
             9:
             TrecWebTrackQuery(
                 '260', 'the american revolutionary',
                 '\n  \tFind a list of the major battles of the American Revolution.\n  ',
                 'faceted',
                 (TrecSubtopic(
                     number='1',
                     text=
                     '\n  \tFind a list of the major battles of the American Revolution.\n  ',
                     type='nav'),
                  TrecSubtopic(
                      number='2',
                      text=
                      '\n  \tFind a time line of the American Revolution.\n  ',
                      type='nav'),
                  TrecSubtopic(
                      number='3',
                      text=
                      '\n  \tFind images of the American Revolution.\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='4',
                      text=
                      '\n  \tWhat were the causes of the American revolutionary war?\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='5',
                      text=
                      '\n  \tWhat is the history of the American revolutionary war?\n  ',
                      type='inf'))),
             49:
             TrecWebTrackQuery(
                 '300', 'how to find the mean',
                 '\n  \tFind a page that explains how to compute the mean of a set of numbers.\n  ',
                 'single', ()),
         })
     self._test_queries('clueweb12/b13/ntcir-www-1',
                        count=100,
                        items={
                            0: GenericQuery('0001', 'ascii code'),
                            9: GenericQuery('0010', 'Jurassic World'),
                            99: GenericQuery('0100', 'weight loss'),
                        })
     self._test_queries(
         'clueweb12/b13/ntcir-www-2',
         count=80,
         items={
             0:
             NtcirQuery(
                 '0001', 'Halloween picture',
                 'Halloween is coming. You want to find some pictures about Halloween to introduce it to your children.'
             ),
             9:
             NtcirQuery(
                 '0010', 'career plan',
                 'You are an undergraduate student who is about to graduate. You want to search some information about how to plan your career.'
             ),
             79:
             NtcirQuery(
                 '0080', 'www.gardenburger.com',
                 'You want to find the website "www.gardenburger.com"'),
         })
     self._test_queries(
         'clueweb12/b13/ntcir-www-3',
         count=160,
         items={
             0:
             NtcirQuery(
                 '0001', 'Halloween picture',
                 'Halloween is coming. You want to find some pictures about Halloween to introduce it to your children.'
             ),
             9:
             NtcirQuery(
                 '0010', 'career plan',
                 'You are an undergraduate student who is about to graduate. You want to search some information about how to plan your career.'
             ),
             159:
             NtcirQuery(
                 '0180', 'quincy jones productions',
                 'You want a list of famous records produced by Quincy Jones.'
             ),
         })
     self._test_queries(
         'clueweb12/b13/trec-misinfo-2019',
         count=51,
         items={
             0:
             MisinfoQuery(
                 '1', 'cranberries urinary tract infections',
                 '10.1002/14651858.CD001321.pub5',
                 'Can cranberries prevent urinary tract infections?',
                 'Symptoms of a urinary tract infection (UTI) include burning while urinating and a persistent urge to urinate. Relevant documents should discuss the effectiveness of consuming cranberries or cranberry juice for prevention of UTIs.  This topic is specifically about prevention rather than treatment of an existing infection.'
             ),
             9:
             MisinfoQuery(
                 '10', 'gene therapy sickle cell',
                 '10.1002/14651858.CD007652.pub6',
                 'Can gene therapy prevent complications caused by sickle cell disease?',
                 'Sickle cell disease (SCD) is an inherited blood disorder that affects the development of healthy red blood cells and causes red blood cells to change their form from a normal round shape to a crescent and rigid shape. People with sickle cell disease have fewer healthy blood cells, which can affect their oxygen carrying capacity and lead to serious or life-threatening complications. Gene therapy, as a newly advanced field, is claimed to be helpful for this disease. A relevant document discusses using gene therapy for preventing the symptoms and complications of SCD.'
             ),
             50:
             MisinfoQuery(
                 '51', 'dehumidifiers asthma',
                 '10.1002/14651858.CD003563.pub2',
                 'Can dehumidifiers be used to control asthma?',
                 'Dehumidification homes might improve lives of people with asthma. Dehumidifiers are electronic devices to control the level of humidity of environment which is suggested to contribute to factors that might affect asthma. A relevant document should discuss whether or not dehumidifiers can be used to control asthma symptoms or can improve lives of people with asthma.'
             ),
         })
예제 #24
0
파일: nyt.py 프로젝트: allenai/ir_datasets
 def test_nyt_queries(self):
     self._test_queries(
         'nyt/wksup',
         count=1864661,
         items={
             0:
             GenericQuery(
                 '8454',
                 'MARSH & MCLENNAN INC reports earnings for Qtr to Dec 31'),
             9:
             GenericQuery('8579', 'SALINGER BIOGRAPHY IS BLOCKED'),
             1864660:
             GenericQuery(
                 '1854817',
                 'STRATEGY ON IRAN STIRS NEW DEBATE AT WHITE HOUSE'),
         })
     self._test_queries(
         'nyt/wksup/train',
         count=1863657,
         items={
             0:
             GenericQuery(
                 '8454',
                 'MARSH & MCLENNAN INC reports earnings for Qtr to Dec 31'),
             9:
             GenericQuery('8579', 'SALINGER BIOGRAPHY IS BLOCKED'),
             1863656:
             GenericQuery(
                 '1854817',
                 'STRATEGY ON IRAN STIRS NEW DEBATE AT WHITE HOUSE'),
         })
     self._test_queries(
         'nyt/wksup/valid',
         count=1004,
         items={
             0:
             GenericQuery('6461', "Why We're Forced To Be Slumlords"),
             9:
             GenericQuery(
                 '13148',
                 'NOVAMETRIX MEDICAL SYSTEMS INC reports earnings for Qtr to Dec 31'
             ),
             1003:
             GenericQuery('1854529', 'The Newest Antique: Atari'),
         })
     self._test_queries(
         'nyt/trec-core-2017',
         count=50,
         items={
             0:
             TrecQuery(
                 '307', 'New Hydroelectric Projects',
                 'Identify hydroelectric projects proposed or under construction by country and location. Detailed description of nature, extent, purpose, problems, and consequences is desirable.',
                 'Relevant documents would contain as a minimum a clear statement that a hydroelectric project is planned or construction is under way and the location of the project. Renovation of existing facilities would be judged not relevant unless plans call for a significant increase in acre-feet or reservoir or a marked change in the environmental impact of the project. Arguments for and against proposed projects are relevant as long as they are supported by specifics, including as a minimum the name or location of the project. A statement that an individual or organization is for or against such projects in general would not be relevant. Proposals or projects underway to dismantle existing facilities or drain existing reservoirs are not relevant, nor are articles reporting a decision to drop a proposed plan.'
             ),
             9:
             TrecQuery(
                 '347', 'Wildlife Extinction',
                 'The spotted owl episode in America highlighted U.S. efforts to prevent the extinction of wildlife species. What is not well known is the effort of other countries to prevent the demise of species native to their countries. What other countries have begun efforts to prevent such declines?',
                 'A relevant item will specify the country, the involved species, and steps taken to save the species.'
             ),
             49:
             TrecQuery(
                 '690', 'college education advantage',
                 'Find documents which describe an advantage in hiring potential or increased income for graduates of U.S. colleges.',
                 'Relevant documents cite some advantage of a college education for job opportunities. Documents citing better opportunities for non-college vocational-training is not relevant.'
             ),
         })
예제 #25
0
 def queries_iter(self):
     for doc in self._collection.docs_iter():
         yield GenericQuery(doc.doc_id, doc.headline)
 def test_msmarco_document_queries(self):
     self._test_queries('msmarco-document/dev', count=5193, items={
         0: GenericQuery(query_id='174249', text='does xpress bet charge to deposit money in your account'),
         9: GenericQuery(query_id='68095', text='can hives be a sign of pregnancy'),
         5192: GenericQuery(query_id='195199', text='glioma meaning')
     })
     self._test_queries('msmarco-document/eval', count=5793, items={
         0: GenericQuery(query_id='355339', text='how to display how.close you are to.cell.tower'),
         9: GenericQuery(query_id='920435', text='what was the first mammal cloned?'),
         5792: GenericQuery(query_id='132622', text='definition of attempted arson')
     })
     self._test_queries('msmarco-document/train', count=367013, items={
         0: GenericQuery(query_id='1185869', text=')what was the immediate impact of the success of the manhattan project?'),
         9: GenericQuery(query_id='666321', text='what happens in a wrist sprain'),
         367012: GenericQuery(query_id='405466', text='is carbonic acid soluble')
     })
     self._test_queries('msmarco-document/orcas', count=10405342, items={
         0: GenericQuery(query_id='9265503', text='github'),
         9: GenericQuery(query_id='3262423', text='! in c'),
         10405341: GenericQuery(query_id='10460090', text='ð§¡')
     })
     self._test_queries('msmarco-document/trec-dl-2019', count=200, items={
         0: GenericQuery(query_id='1108939', text='what slows down the flow of blood'),
         9: GenericQuery(query_id='885490', text='what party is paul ryan in'),
         199: GenericQuery(query_id='532603', text='university of dubuque enrollment')
     })
     self._test_queries('msmarco-document/trec-dl-2019/judged', count=43, items={
         0: GenericQuery(query_id='156493', text='do goldfish grow'),
         9: GenericQuery(query_id='915593', text='what types of food can you cook sous vide'),
         42: GenericQuery(query_id='146187', text='difference between a mcdouble and a double cheeseburger')
     })
     self._test_queries('msmarco-document/trec-dl-2020', count=200, items={
         0: GenericQuery(query_id='1030303', text='who is aziz hashim'),
         9: GenericQuery(query_id='1071750', text='why is pete rose banned from hall of fame'),
         199: GenericQuery(query_id='132622', text='definition of attempted arson')
     })
예제 #27
0
 def test_msmarco_passage_queries(self):
     self._test_queries('msmarco-passage/train', count=808731, items={
         0: GenericQuery(query_id='121352', text='define extreme'),
         9: GenericQuery(query_id='492875', text='sanitizer temperature'),
         808730: GenericQuery(query_id='50393', text='benefits of boiling lemons and drinking juice.')
     })
     self._test_queries('msmarco-passage/train/judged', count=502939, items={
         0: GenericQuery(query_id='121352', text='define extreme'),
         9: GenericQuery(query_id='54528', text='blood clots in urine after menopause'),
         502938: GenericQuery(query_id='50393', text='benefits of boiling lemons and drinking juice.')
     })
     self._test_queries('msmarco-passage/train/split200-train', count=808531, items={
         0: GenericQuery(query_id='121352', text='define extreme'),
         9: GenericQuery(query_id='492875', text='sanitizer temperature'),
         808530: GenericQuery(query_id='50393', text='benefits of boiling lemons and drinking juice.')
     })
     self._test_queries('msmarco-passage/train/split200-valid', count=200, items={
         0: GenericQuery(query_id='93927', text='coastal processes are located on what vertebrae'),
         9: GenericQuery(query_id='503706', text='steroid prednisone possible risks'),
         199: GenericQuery(query_id='44209', text='average spousal ss benefit')
     })
     self._test_queries('msmarco-passage/train/medical', count=78895, items={
         0: GenericQuery(query_id='54528', text='blood clots in urine after menopause'),
         9: GenericQuery(query_id='445408', text='marijuana for weight loss'),
         78894: GenericQuery(query_id='945443', text='when do you start going to the doctor every other week during pregnancy')
     })
     self._test_queries('msmarco-passage/dev', count=101093, items={
         0: GenericQuery(query_id='1048578', text='cost of endless pools/swim spa'),
         9: GenericQuery(query_id='1048587', text='what is patron'),
         101092: GenericQuery(query_id='524285', text='treadmill incline meaning')
     })
     self._test_queries('msmarco-passage/dev/small', count=6980, items={
         0: GenericQuery('1048585', "what is paula deen's brother"),
         9: GenericQuery('524699', 'tricare service number'),
         6979: GenericQuery('1048565', 'who plays sebastian michaelis'),
     })
     self._test_queries('msmarco-passage/dev/judged', count=55578, items={
         0: GenericQuery(query_id='1048578', text='cost of endless pools/swim spa'),
         9: GenericQuery(query_id='1048601', text='what is pastoral medicine'),
         55577: GenericQuery(query_id='1048570', text='what is pearls before swine?')
     })
     self._test_queries('msmarco-passage/eval', count=101092, items={
         0: GenericQuery(query_id='786436', text='what is prescribed to treat thyroid storm'),
         9: GenericQuery(query_id='1048619', text='who plays stitch'),
         101091: GenericQuery(query_id='786430', text='what is prescribed for pelvic inflammatory disease?')
     })
     self._test_queries('msmarco-passage/eval/small', count=6837, items={
         0: GenericQuery('57', ' term service agreement definition'),
         9: GenericQuery('262636', 'how long is a moment'),
         6836: GenericQuery('567976', 'what are the causes of unemployment'),
     })
     self._test_queries('msmarco-passage/trec-dl-2019', count=200, items={
         0: GenericQuery(query_id='1108939', text='what slows down the flow of blood'),
         9: GenericQuery(query_id='885490', text='what party is paul ryan in'),
         199: GenericQuery(query_id='532603', text='university of dubuque enrollment')
     })
     self._test_queries('msmarco-passage/trec-dl-2019/judged', count=43, items={
         0: GenericQuery(query_id='156493', text='do goldfish grow'),
         9: GenericQuery(query_id='1037798', text='who is robert gray'),
         42: GenericQuery(query_id='146187', text='difference between a mcdouble and a double cheeseburger')
     })
     self._test_queries('msmarco-passage/trec-dl-2020', count=200, items={
         0: GenericQuery(query_id='1030303', text='who is aziz hashim'),
         9: GenericQuery(query_id='1071750', text='why is pete rose banned from hall of fame'),
         199: GenericQuery(query_id='132622', text='definition of attempted arson')
     })
     self._test_queries('msmarco-document/trec-dl-2020', count=200, items={
         0: GenericQuery('1030303', 'who is aziz hashim'),
         9: GenericQuery('1071750', 'why is pete rose banned from hall of fame'),
         199: GenericQuery('132622', 'definition of attempted arson'),
     })
     self._test_queries('msmarco-document/trec-dl-2020/judged', count=45, items={
         0: GenericQuery('1030303', 'who is aziz hashim'),
         9: GenericQuery('1105792', 'define: geon'),
         44: GenericQuery('997622', 'where is the show shameless filmed'),
     })
     self._test_queries('msmarco-passage/trec-dl-hard', count=50, items={
         0: GenericQuery('1108939', 'what slows down the flow of blood'),
         9: GenericQuery('451602', "medicare's definition of mechanical ventilation"),
         49: GenericQuery('88495', 'causes of stroke?'),
     })
     self._test_queries('msmarco-passage/trec-dl-hard/fold1', count=10, items={
         0: GenericQuery('966413', 'where are the benefits of cinnamon as a supplement?'),
         9: GenericQuery('883915', 'what other brain proteins can cause dementia'),
     })
     self._test_queries('msmarco-passage/trec-dl-hard/fold2', count=10, items={
         0: GenericQuery('588587', 'what causes heavy metal toxins in your body'),
         9: GenericQuery('794429', 'what is sculpture shape space'),
     })
     self._test_queries('msmarco-passage/trec-dl-hard/fold3', count=10, items={
         0: GenericQuery('1108939', 'what slows down the flow of blood'),
         9: GenericQuery('86606', 'causes of gas in large intestine'),
     })
     self._test_queries('msmarco-passage/trec-dl-hard/fold4', count=10, items={
         0: GenericQuery('1108100', 'what type of movement do bacteria exhibit?'),
         9: GenericQuery('88495', 'causes of stroke?'),
     })
     self._test_queries('msmarco-passage/trec-dl-hard/fold5', count=10, items={
         0: GenericQuery('190044', 'foods to detox liver naturally'),
         9: GenericQuery('877809', 'what metal are hip replacements made of'),
     })
예제 #28
0
 def test_codesearchnet_queries(self):
     self._test_queries(
         'codesearchnet/train',
         count=1880853,
         items={
             0:
             GenericQuery(
                 'https://github.com/ageitgey/face_recognition/blob/c96b010c02f15e8eeb0f71308c641179ac1f19bb/examples/face_recognition_knn.py#L46-L108',
                 'Trains a k-nearest neighbors classifier for face recognition.\n\n    :param train_dir: directory that contains a sub-directory for each known person, with its name.\n\n     (View in source code to see train_dir example tree structure)\n\n     Structure:\n        <train_dir>/\n        ├── <person1>/\n        │   ├── <somename1>.jpeg\n        │   ├── <somename2>.jpeg\n        │   ├── ...\n        ├── <person2>/\n        │   ├── <somename1>.jpeg\n        │   └── <somename2>.jpeg\n        └── ...\n\n    :param model_save_path: (optional) path to save model on disk\n    :param n_neighbors: (optional) number of neighbors to weigh in classification. Chosen automatically if not specified\n    :param knn_algo: (optional) underlying data structure to support knn.default is ball_tree\n    :param verbose: verbosity of training\n    :return: returns knn classifier that was trained on the given data.'
             ),
             9:
             GenericQuery(
                 'https://github.com/ageitgey/face_recognition/blob/c96b010c02f15e8eeb0f71308c641179ac1f19bb/face_recognition/api.py#L135-L151',
                 "Returns an 2d array of bounding boxes of human faces in a image using the cnn face detector\n    If you are using a GPU, this can give you much faster results since the GPU\n    can process batches of images at once. If you aren't using a GPU, you don't need this function.\n\n    :param img: A list of images (each as a numpy array)\n    :param number_of_times_to_upsample: How many times to upsample the image looking for faces. Higher numbers find smaller faces.\n    :param batch_size: How many images to include in each GPU processing batch.\n    :return: A list of tuples of found face locations in css (top, right, bottom, left) order"
             ),
             1880852:
             GenericQuery(
                 'https://github.com/nutella-framework/nutella_lib.js/blob/b3a3406a407e2a1ada6edcc503b70991f9cb249b/src/run_net_bin.js#L87-L102',
                 'Helper function This function uploads a file with a certain file name. If the upload is successful the first callback is executed, otherwise the second one is.'
             ),
         })
     self._test_queries(
         'codesearchnet/valid',
         count=89154,
         items={
             0:
             GenericQuery(
                 'https://github.com/openai/baselines/blob/3301089b48c42b87b396e246ea3f56fa4bfc9678/baselines/deepq/deepq.py#L95-L333',
                 'Train a deepq model.\n\n    Parameters\n    -------\n    env: gym.Env\n        environment to train on\n    network: string or a function\n        neural network to use as a q function approximator. If string, has to be one of the names of registered models in baselines.common.models\n        (mlp, cnn, conv_only). If a function, should take an observation tensor and return a latent variable tensor, which\n        will be mapped to the Q function heads (see build_q_func in baselines.deepq.models for details on that)\n    seed: int or None\n        prng seed. The runs with the same seed "should" give the same results. If None, no seeding is used.\n    lr: float\n        learning rate for adam optimizer\n    total_timesteps: int\n        number of env steps to optimizer for\n    buffer_size: int\n        size of the replay buffer\n    exploration_fraction: float\n        fraction of entire training period over which the exploration rate is annealed\n    exploration_final_eps: float\n        final value of random action probability\n    train_freq: int\n        update the model every `train_freq` steps.\n        set to None to disable printing\n    batch_size: int\n        size of a batched sampled from replay buffer for training\n    print_freq: int\n        how often to print out training progress\n        set to None to disable printing\n    checkpoint_freq: int\n        how often to save the model. This is so that the best version is restored\n        at the end of the training. If you do not wish to restore the best version at\n        the end of the training set this variable to None.\n    learning_starts: int\n        how many steps of the model to collect transitions for before learning starts\n    gamma: float\n        discount factor\n    target_network_update_freq: int\n        update the target network every `target_network_update_freq` steps.\n    prioritized_replay: True\n        if True prioritized replay buffer will be used.\n    prioritized_replay_alpha: float\n        alpha parameter for prioritized replay buffer\n    prioritized_replay_beta0: float\n        initial value of beta for prioritized replay buffer\n    prioritized_replay_beta_iters: int\n        number of iterations over which beta will be annealed from initial value\n        to 1.0. If set to None equals to total_timesteps.\n    prioritized_replay_eps: float\n        epsilon to add to the TD errors when updating priorities.\n    param_noise: bool\n        whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905)\n    callback: (locals, globals) -> None\n        function called at every steps with state of the algorithm.\n        If callback returns true training stops.\n    load_path: str\n        path to load the model from. (default: None)\n    **network_kwargs\n        additional keyword arguments to pass to the network builder.\n\n    Returns\n    -------\n    act: ActWrapper\n        Wrapper over act function. Adds ability to save it and load it.\n        See header of baselines/deepq/categorical.py for details on the act function.'
             ),
             9:
             GenericQuery(
                 'https://github.com/openai/baselines/blob/3301089b48c42b87b396e246ea3f56fa4bfc9678/baselines/common/cmd_util.py#L21-L52',
                 'Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo.'
             ),
             89153:
             GenericQuery(
                 'https://github.com/christophehurpeau/SpringbokJS/blob/bc1069baafc0785d361a33ff5a2fa604b8b3b454/src/browser/base/S.History.js#L72-L78',
                 'Attempt to load the current URL fragment.'),
         })
     self._test_queries(
         'codesearchnet/test',
         count=100529,
         items={
             0:
             GenericQuery(
                 'https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/youtube.py#L135-L143',
                 'Extracts video ID from URL.'),
             9:
             GenericQuery(
                 'https://github.com/soimort/you-get/blob/b746ac01c9f39de94cac2d56f665285b0523b974/src/you_get/extractors/sina.py#L54-L64',
                 'Downloads a Sina video by its unique vkey.\n    http://video.sina.com/'
             ),
             100528:
             GenericQuery(
                 'https://github.com/dherges/grunt-bower-event/blob/ce7dc2303ef186ccf5eaa8d5b691102e13523076/tasks/lib/BowerTask.js#L24-L30',
                 "Creates a new task.\n\n@param context Task function context (='this' inside a grunt task function)\n@param grunt Grunt object"
             ),
         })
     self._test_queries(
         'codesearchnet/challenge',
         count=99,
         items={
             0:
             GenericQuery('1', 'convert int to string'),
             9:
             GenericQuery('10', 'binomial distribution'),
             98:
             GenericQuery('99',
                          'how to read .csv file in an efficient way?'),
         })
예제 #29
0
 def test_msmarco_document_queries(self):
     self._test_queries('msmarco-document/dev', count=5193, items={
         0: GenericQuery(query_id='174249', text='does xpress bet charge to deposit money in your account'),
         9: GenericQuery(query_id='68095', text='can hives be a sign of pregnancy'),
         5192: GenericQuery(query_id='195199', text='glioma meaning')
     })
     self._test_queries('msmarco-document/eval', count=5793, items={
         0: GenericQuery(query_id='355339', text='how to display how.close you are to.cell.tower'),
         9: GenericQuery(query_id='920435', text='what was the first mammal cloned?'),
         5792: GenericQuery(query_id='132622', text='definition of attempted arson')
     })
     self._test_queries('msmarco-document/train', count=367013, items={
         0: GenericQuery(query_id='1185869', text=')what was the immediate impact of the success of the manhattan project?'),
         9: GenericQuery(query_id='666321', text='what happens in a wrist sprain'),
         367012: GenericQuery(query_id='405466', text='is carbonic acid soluble')
     })
     self._test_queries('msmarco-document/orcas', count=10405342, items={
         0: GenericQuery(query_id='9265503', text='github'),
         9: GenericQuery(query_id='3262423', text='! in c'),
         10405341: GenericQuery(query_id='10460090', text='ð§¡')
     })
     self._test_queries('msmarco-document/trec-dl-2019', count=200, items={
         0: GenericQuery(query_id='1108939', text='what slows down the flow of blood'),
         9: GenericQuery(query_id='885490', text='what party is paul ryan in'),
         199: GenericQuery(query_id='532603', text='university of dubuque enrollment')
     })
     self._test_queries('msmarco-document/trec-dl-2019/judged', count=43, items={
         0: GenericQuery(query_id='156493', text='do goldfish grow'),
         9: GenericQuery(query_id='915593', text='what types of food can you cook sous vide'),
         42: GenericQuery(query_id='146187', text='difference between a mcdouble and a double cheeseburger')
     })
     self._test_queries('msmarco-document/trec-dl-2020', count=200, items={
         0: GenericQuery(query_id='1030303', text='who is aziz hashim'),
         9: GenericQuery(query_id='1071750', text='why is pete rose banned from hall of fame'),
         199: GenericQuery(query_id='132622', text='definition of attempted arson')
     })
     self._test_queries('msmarco-document/trec-dl-2020', count=200, items={
         0: GenericQuery('1030303', 'who is aziz hashim'),
         9: GenericQuery('1071750', 'why is pete rose banned from hall of fame'),
         199: GenericQuery('132622', 'definition of attempted arson'),
     })
     self._test_queries('msmarco-document/trec-dl-2020/judged', count=45, items={
         0: GenericQuery('1030303', 'who is aziz hashim'),
         9: GenericQuery('1105792', 'define: geon'),
         44: GenericQuery('997622', 'where is the show shameless filmed'),
     })
     self._test_queries('msmarco-document/trec-dl-hard', count=50, items={
         0: GenericQuery('1108939', 'what slows down the flow of blood'),
         9: GenericQuery('451602', "medicare's definition of mechanical ventilation"),
         49: GenericQuery('88495', 'causes of stroke?'),
     })
     self._test_queries('msmarco-document/trec-dl-hard/fold1', count=10, items={
         0: GenericQuery('966413', 'where are the benefits of cinnamon as a supplement?'),
         9: GenericQuery('883915', 'what other brain proteins can cause dementia'),
     })
     self._test_queries('msmarco-document/trec-dl-hard/fold2', count=10, items={
         0: GenericQuery('588587', 'what causes heavy metal toxins in your body'),
         9: GenericQuery('794429', 'what is sculpture shape space'),
     })
     self._test_queries('msmarco-document/trec-dl-hard/fold3', count=10, items={
         0: GenericQuery('1108939', 'what slows down the flow of blood'),
         9: GenericQuery('86606', 'causes of gas in large intestine'),
     })
     self._test_queries('msmarco-document/trec-dl-hard/fold4', count=10, items={
         0: GenericQuery('1108100', 'what type of movement do bacteria exhibit?'),
         9: GenericQuery('88495', 'causes of stroke?'),
     })
     self._test_queries('msmarco-document/trec-dl-hard/fold5', count=10, items={
         0: GenericQuery('190044', 'foods to detox liver naturally'),
         9: GenericQuery('877809', 'what metal are hip replacements made of'),
     })
예제 #30
0
 def test_clueweb12_queries(self):
     self._test_queries(
         'clueweb12/trec-web-2013',
         count=50,
         items={
             0:
             TrecWebTrackQuery(
                 '201', 'raspberry pi', '\n    What is a raspberry pi?\n  ',
                 'faceted',
                 (TrecSubtopic(number='1',
                               text='\n    What is a raspberry pi?\n  ',
                               type='inf'),
                  TrecSubtopic(
                      number='2',
                      text=
                      '\n    What software does a raspberry pi use?\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='3',
                      text=
                      '\n    What are hardware options for a raspberry pi?\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='4',
                      text=
                      '\n    How much does a basic raspberry pi cost?\n  ',
                      type='nav'),
                  TrecSubtopic(
                      number='5',
                      text=
                      '\n    Find info about the raspberry pi foundation.\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='6',
                      text='\n    Find a picture of a raspberry pi.\n  ',
                      type='nav'))),
             9:
             TrecWebTrackQuery(
                 '210', 'golf gps',
                 '\n    What is the best golf gps device?\n  ', 'faceted',
                 (TrecSubtopic(
                     number='1',
                     text='\n    What is the best golf gps device?\n  ',
                     type='inf'),
                  TrecSubtopic(
                      number='2',
                      text=
                      '\n    Compare Bushnell, Callaway and Garmin golf gps systems.\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='3',
                      text=
                      '\n    Is there a golf gps app for the Iphone?\n  ',
                      type='nav'),
                  TrecSubtopic(
                      number='4',
                      text=
                      '\n    Find information on handheld golf gps devices.\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='5',
                      text=
                      '\n    Is there a golf gps system that can be used world wide?\n  ',
                      type='nav'),
                  TrecSubtopic(
                      number='6',
                      text=
                      '\n    Where can I get a used golf gps device?\n  ',
                      type='inf'))),
             49:
             TrecWebTrackQuery(
                 '250', 'ford edge problems',
                 '\n    What problems have afflicted the Ford Edge car model?\n  ',
                 'single', ()),
         })
     self._test_queries(
         'clueweb12/trec-web-2014',
         count=50,
         items={
             0:
             TrecWebTrackQuery(
                 '251', 'identifying spider bites',
                 '\n  \tFind data on how to identify spider bites.\n  ',
                 'single', ()),
             9:
             TrecWebTrackQuery(
                 '260', 'the american revolutionary',
                 '\n  \tFind a list of the major battles of the American Revolution.\n  ',
                 'faceted',
                 (TrecSubtopic(
                     number='1',
                     text=
                     '\n  \tFind a list of the major battles of the American Revolution.\n  ',
                     type='nav'),
                  TrecSubtopic(
                      number='2',
                      text=
                      '\n  \tFind a time line of the American Revolution.\n  ',
                      type='nav'),
                  TrecSubtopic(
                      number='3',
                      text=
                      '\n  \tFind images of the American Revolution.\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='4',
                      text=
                      '\n  \tWhat were the causes of the American revolutionary war?\n  ',
                      type='inf'),
                  TrecSubtopic(
                      number='5',
                      text=
                      '\n  \tWhat is the history of the American revolutionary war?\n  ',
                      type='inf'))),
             49:
             TrecWebTrackQuery(
                 '300', 'how to find the mean',
                 '\n  \tFind a page that explains how to compute the mean of a set of numbers.\n  ',
                 'single', ()),
         })
     self._test_queries('clueweb12/b13/ntcir-www-1',
                        count=100,
                        items={
                            0: GenericQuery('0001', 'ascii code'),
                            9: GenericQuery('0010', 'Jurassic World'),
                            99: GenericQuery('0100', 'weight loss'),
                        })
     self._test_queries(
         'clueweb12/b13/ntcir-www-2',
         count=80,
         items={
             0:
             NtcirQuery(
                 '0001', 'Halloween picture',
                 'Halloween is coming. You want to find some pictures about Halloween to introduce it to your children.'
             ),
             9:
             NtcirQuery(
                 '0010', 'career plan',
                 'You are an undergraduate student who is about to graduate. You want to search some information about how to plan your career.'
             ),
             79:
             NtcirQuery(
                 '0080', 'www.gardenburger.com',
                 'You want to find the website "www.gardenburger.com"'),
         })
     self._test_queries(
         'clueweb12/b13/ntcir-www-3',
         count=160,
         items={
             0:
             NtcirQuery(
                 '0001', 'Halloween picture',
                 'Halloween is coming. You want to find some pictures about Halloween to introduce it to your children.'
             ),
             9:
             NtcirQuery(
                 '0010', 'career plan',
                 'You are an undergraduate student who is about to graduate. You want to search some information about how to plan your career.'
             ),
             159:
             NtcirQuery(
                 '0180', 'quincy jones productions',
                 'You want a list of famous records produced by Quincy Jones.'
             ),
         })
     self._test_queries(
         'clueweb12/b13/trec-misinfo-2019',
         count=51,
         items={
             0:
             MisinfoQuery(
                 '1', 'cranberries urinary tract infections',
                 '10.1002/14651858.CD001321.pub5',
                 'Can cranberries prevent urinary tract infections?',
                 'Symptoms of a urinary tract infection (UTI) include burning while urinating and a persistent urge to urinate. Relevant documents should discuss the effectiveness of consuming cranberries or cranberry juice for prevention of UTIs.  This topic is specifically about prevention rather than treatment of an existing infection.'
             ),
             9:
             MisinfoQuery(
                 '10', 'gene therapy sickle cell',
                 '10.1002/14651858.CD007652.pub6',
                 'Can gene therapy prevent complications caused by sickle cell disease?',
                 'Sickle cell disease (SCD) is an inherited blood disorder that affects the development of healthy red blood cells and causes red blood cells to change their form from a normal round shape to a crescent and rigid shape. People with sickle cell disease have fewer healthy blood cells, which can affect their oxygen carrying capacity and lead to serious or life-threatening complications. Gene therapy, as a newly advanced field, is claimed to be helpful for this disease. A relevant document discusses using gene therapy for preventing the symptoms and complications of SCD.'
             ),
             50:
             MisinfoQuery(
                 '51', 'dehumidifiers asthma',
                 '10.1002/14651858.CD003563.pub2',
                 'Can dehumidifiers be used to control asthma?',
                 'Dehumidification homes might improve lives of people with asthma. Dehumidifiers are electronic devices to control the level of humidity of environment which is suggested to contribute to factors that might affect asthma. A relevant document should discuss whether or not dehumidifiers can be used to control asthma symptoms or can improve lives of people with asthma.'
             ),
         })
     self._test_queries(
         'clueweb12/b13/clef-ehealth',
         count=300,
         items={
             0:
             GenericQuery(
                 '101001',
                 'inguinal hernia repair laproscopic mesh benefits risks'),
             9:
             GenericQuery(
                 '102004',
                 '"anal" skin tags removal or treatments "recovery"'),
             299:
             GenericQuery(
                 '150006',
                 'what causes painful erections after have a foley catheter'
             ),
         })
     self._test_queries(
         'clueweb12/b13/clef-ehealth/cs',
         count=300,
         items={
             0:
             GenericQuery(
                 '101001-cs',
                 'korekce inguinální hernie laparoskopická síťka přínosy rizika'
             ),
             9:
             GenericQuery(
                 '102004-cs',
                 'odstranění kožních výrůstků v oblasti konečníku nebo zotavení se z léčby'
             ),
             299:
             GenericQuery(
                 '150006-cs',
                 'co způsobuje bolestivou erekci po zavedení Foleyova katétru'
             ),
         })
     self._test_queries(
         'clueweb12/b13/clef-ehealth/de',
         count=300,
         items={
             0:
             GenericQuery(
                 '101001-de',
                 'Leistenbruch Reparatur laparoskopisch Netz Vorteile Risiken'
             ),
             9:
             GenericQuery(
                 '102004-de',
                 'anal "Hautauswuchs Entfernung oder Behandlungen" Heilung'
             ),
             299:
             GenericQuery(
                 '150006-de',
                 'was verursacht schmerzhafte Erektion nach einem Foley-Katheter'
             ),
         })
     self._test_queries(
         'clueweb12/b13/clef-ehealth/fr',
         count=300,
         items={
             0:
             GenericQuery(
                 '101001-fr',
                 'avantages et risques du traitement des hernies inguinales par laparoscopie à maillage '
             ),
             9:
             GenericQuery(
                 '102004-fr',
                 'l\'élimination des balises anales de peau ou "la rémission" après les traitements'
             ),
             299:
             GenericQuery(
                 '150006-fr',
                 'quelle est la cause des érections douloureuses après avoir eu la sonde de Foley sur place'
             ),
         })
     self._test_queries(
         'clueweb12/b13/clef-ehealth/hu',
         count=300,
         items={
             0:
             GenericQuery(
                 '101001-hu',
                 'lágyéksérv helyreállítás laparoszkópiás háló előnyök kockázatok'
             ),
             9:
             GenericQuery(
                 '102004-hu',
                 'anális" bőrfüggelékek eltávolítás or kezelések "gyógyulás'
             ),
             299:
             GenericQuery(
                 '150006-hu',
                 'mi okozza a fájdalmas erekciót foley katéterezést követően'
             ),
         })
     self._test_queries(
         'clueweb12/b13/clef-ehealth/pl',
         count=300,
         items={
             0:
             GenericQuery(
                 '101001-pl',
                 'operacja laparoskopowa przepukliny pachwinowej z użyciem siatki korzyści ryzyko'
             ),
             9:
             GenericQuery(
                 '102004-pl',
                 'odbytowy "usunięcie brodawek miękkich skóry lub leczenie" powrót do zdrowia'
             ),
             299:
             GenericQuery(
                 '150006-pl',
                 "co powoduje bolesne erekcje po cewnikowaniu cewnikiem Foley'a"
             ),
         })
     self._test_queries(
         'clueweb12/b13/clef-ehealth/sv',
         count=300,
         items={
             0:
             GenericQuery(
                 '101001-sv',
                 'ljumskbråck reparation laparoskopisk nät fördelar risker'
             ),
             9:
             GenericQuery(
                 '102004-sv',
                 'anal" hudflikar borttagning eller behandlingar "återhämtning'
             ),
             299:
             GenericQuery(
                 '150006-sv',
                 'vad som orsakar smärtsamma erektioner efter att ha haft en Foley-kateter'
             ),
         })