def load_obqa():
    df = pd.concat([
        dm.ALLEN_AI_OBQA(OBQAType.TRAIN),
        dm.ALLEN_AI_OBQA(OBQAType.DEV),
        dm.ALLEN_AI_OBQA(OBQAType.TEST)
    ],
                   ignore_index=True,
                   sort=False)
    return df["question"].tolist()
Esempio n. 2
0
def main():
    print("Train examples:")
    print(dm.ALLEN_AI_OBQA(OBQAType.TRAIN))  # Displays a Pandas DataFrame.
    print("\n\n")

    print("Dev examples:")
    print(dm.ALLEN_AI_OBQA(OBQAType.DEV))  # Displays a Pandas DataFrame.
    print("\n\n")

    print("Test examples:")
    print(dm.ALLEN_AI_OBQA(OBQAType.TEST))  # Displays a Pandas DataFrame.
    print("\n\n")

    facts = list(OBQAFacts())
    assert (len(facts) == 1326)  # There are 1326 core facts in total.
    print("Some random facts from OpenBookQA:")
    random.shuffle(facts)
    for fact in facts[:10]:
        print("    * {}".format(fact))
    print("\n\n")

    print("Train examples with retrieved facts:")
    print(dm.ALLEN_AI_OBQA(OBQAType.TRAIN, True))
    print("\n\n")
Esempio n. 3
0
 def test_parsing_logic(self, mock_download_dataset):
     self.write_questions(OBQAType.TRAIN,
                          [GOOD_QUESTION1, GOOD_QUESTION2, GOOD_QUESTION3])
     df = dm.ALLEN_AI_OBQA(OBQAType.TRAIN)
     expected_df = pd.DataFrame(
         json.loads("""[
         {
             "id": "7-980",
             "question": "The sun is responsible for",
             "answers": [
                 "puppies learning new tricks",
                 "children growing up and getting old",
                 "flowers wilting in a vase",
                 "plants sprouting, blooming and wilting"
             ],
             "correct": "D"
         },
         {
             "id": "1158",
             "question": "Which product cannot convert energy into light?",
             "answers": [
                 "chandelier",
                 "charger",
                 "floor lamp",
                 "Christmas tree lights"
             ],
             "correct": "B"
         },
         {
             "id": "609",
             "question": "What do cows eat?",
             "answers": [
                 "Chickpeas",
                 "Chocolate",
                 "Steak",
                 "Poultry"
             ],
             "correct": "A"
         }
     ]"""))
     pd.testing.assert_frame_equal(df, expected_df)
     mock_download_dataset.assert_called_once_with(Collection.ALLEN_AI_OBQA,
                                                   ANY)  # noqa: E501
Esempio n. 4
0
 def test_retrieved_facts(self, mock_download_dataset):
     facts_file = os.path.join(datamine_cache_dir(), "ALLEN_AI_OBQA",
                               "extracted_facts.json")  # noqa: E501
     with open(facts_file, "wt") as g:
         json.dump(RETRIEVED_FACTS, g)
     self.write_questions(OBQAType.TRAIN, [GOOD_QUESTION1, GOOD_QUESTION2])
     df = dm.ALLEN_AI_OBQA(OBQAType.TRAIN, with_retrieved_facts=True)
     expected_df = pd.DataFrame(
         json.loads("""[
         {
             "id": "7-980",
             "question": "The sun is responsible for",
             "answers": [
                 "puppies learning new tricks",
                 "children growing up and getting old",
                 "flowers wilting in a vase",
                 "plants sprouting, blooming and wilting"
             ],
             "correct": "D",
             "retrieved_facts": [
                 {
                     "context": "Context 1",
                     "token_based": [
                         "tb11",
                         "tb12"
                     ],
                     "vector_based": [
                         "vb1"
                     ]
                 },
                 {
                     "context": "Context 2",
                     "token_based": [
                         "tb21",
                         "tb22"
                     ],
                     "vector_based": [
                         "vb2"
                     ]
                 },
                 {
                     "context": "Context 3",
                     "token_based": [
                         "tb31",
                         "tb32"
                     ],
                     "vector_based": [
                         "vb3"
                     ]
                 },
                 {
                     "context": "Context 4",
                     "token_based": [
                         "tb41",
                         "tb42"
                     ],
                     "vector_based": [
                         "vb4"
                     ]
                 }
             ]
         },
         {
             "id": "1158",
             "question": "Which product cannot convert energy into light?",
             "answers": [
                 "chandelier",
                 "charger",
                 "floor lamp",
                 "Christmas tree lights"
             ],
             "correct": "B",
             "retrieved_facts": [
                 {
                     "context": "Context 8",
                     "token_based": [
                         "tb81",
                         "tb82"
                     ],
                     "vector_based": [
                         "vb8"
                     ]
                 },
                 {
                     "context": "Context 6",
                     "token_based": [
                         "tb61",
                         "tb62"
                     ],
                     "vector_based": [
                         "vb6"
                     ]
                 },
                 {
                     "context": "Context 5",
                     "token_based": [
                         "tb51",
                         "tb52"
                     ],
                     "vector_based": [
                         "vb5"
                     ]
                 },
                 {
                     "context": "Context 7",
                     "token_based": [
                         "tb71",
                         "tb72"
                     ],
                     "vector_based": [
                         "vb7"
                     ]
                 }
             ]
         }
         ]"""))
     pd.testing.assert_frame_equal(df, expected_df)
     mock_download_dataset.assert_called_once_with(Collection.ALLEN_AI_OBQA,
                                                   ANY)  # noqa: E501
Esempio n. 5
0
 def test_duplicate_ids(self, mock_download_dataset):
     self.write_questions(OBQAType.DEV, [GOOD_QUESTION1, GOOD_QUESTION1])
     with self.assertRaises(AssertionError):
         dm.ALLEN_AI_OBQA(OBQAType.DEV)
     mock_download_dataset.assert_called_once_with(Collection.ALLEN_AI_OBQA,
                                                   ANY)  # noqa: E501
Esempio n. 6
0
 def test_invalid_correct_answer(self, mock_download_dataset):
     self.write_questions(OBQAType.TEST, [INVALID_CORECT_ANSWER])
     with self.assertRaises(AssertionError):
         dm.ALLEN_AI_OBQA(OBQAType.TEST)
     mock_download_dataset.assert_called_once_with(Collection.ALLEN_AI_OBQA,
                                                   ANY)  # noqa: E501
Esempio n. 7
0
 def test_empty_dataset(self, mock_download_dataset):
     self.write_questions(OBQAType.DEV, [])
     df = dm.ALLEN_AI_OBQA(OBQAType.DEV)
     mock_download_dataset.assert_called_once_with(Collection.ALLEN_AI_OBQA,
                                                   ANY)  # noqa: E501
     self.assertEqual(len(df), 0)