コード例 #1
0
class TestQuora(unittest.TestCase):

    def setUp(self) -> None:
        self.quora = QuoraQuestionsSubmodule(None)
        self.empty_input = Inputs()

    def test_elephant(self):
        inputs = self.empty_input.add_subjects({Subject("elephant")})
        inputs = self.quora.process(inputs)
        generated_facts = inputs.get_generated_facts()
        self.assertEqual(1, len(generated_facts))
        self.assertIn("elephant", generated_facts[0].get_subject().get())
        self.assertTrue(generated_facts[0].is_negative())
コード例 #2
0
class TestBingAutocomplete(unittest.TestCase):
    def setUp(self) -> None:
        self.autocomplete = BingAutocompleteSubmodule(None,
                                                      use_cache=False,
                                                      look_new=True)
        self.autocomplete_cache = BingAutocompleteSubmodule(
            None,
            use_cache=True,
            cache_name="google-cache-test",
            look_new=True)
        self.empty_input = Inputs()

    def test_elephant(self):
        suggestions, from_cache = self.autocomplete.get_suggestion(
            "why are elephants")
        self.assertFalse(from_cache)
        self.assertEqual(len(suggestions), 8)

    def test_cache(self):
        _, _ = self.autocomplete_cache.get_suggestion("why are elephants")
        time.sleep(10)
        suggestions, from_cache = self.autocomplete_cache.get_suggestion(
            "why are elephants")
        self.assertTrue(from_cache)
        self.assertEqual(len(suggestions), 8)
        self.autocomplete_cache.cache.delete_cache()

    def _test_process(self):
        inputs = self.empty_input.add_subjects([
            Subject("elephant")
        ]).add_patterns([PatternGoogle("why are <SUBJS>")])
        inputs = self.autocomplete.process(inputs)
        self.assertTrue(len(inputs.get_generated_facts()) > 16)
        trunk_facts = [
            x for x in inputs.get_generated_facts()
            if "trunk" in x.get_object().get()
        ]
        self.assertTrue(len(trunk_facts) > 0)
コード例 #3
0
 def generate_input(self):
     # just give an empty input to the seed module
     empty_input = Inputs()
     return empty_input.add_subjects({Subject("elephant")})
コード例 #4
0
class TestGoogleAutocomplete(unittest.TestCase):
    def setUp(self) -> None:
        self.autocomplete = GoogleAutocompleteSubmodule(None, use_cache=False)
        self.autocomplete_cache = GoogleAutocompleteSubmodule(
            None, use_cache=True, cache_name="google-cache-test")
        self.empty_input = Inputs()

    def test_elephant(self):
        suggestions, from_cache = self.autocomplete.get_suggestion(
            "why are elephants")
        self.assertFalse(from_cache)
        self.assertEqual(len(suggestions), 10)

    def test_cache(self):
        _, _ = self.autocomplete_cache.get_suggestion("why are elephants")
        # Remove information of the previous query
        self.autocomplete_cache.local_cache["query_regex"] = ""
        suggestions, from_cache = self.autocomplete_cache.get_suggestion(
            "why are elephants")
        self.assertTrue(from_cache)
        self.assertEqual(len(suggestions), 10)
        self.autocomplete_cache.cache.delete_cache()

    def test_process(self):
        inputs = self.empty_input.add_subjects([
            Subject("elephant")
        ]).add_patterns([PatternGoogle("why are <SUBJS>")])
        inputs = self.autocomplete.process(inputs)
        self.assertTrue(len(inputs.get_generated_facts()) > 20)
        trunk_facts = [
            x for x in inputs.get_generated_facts()
            if "trunk" in x.get_object().get()
        ]
        self.assertTrue(len(trunk_facts) > 0)

    def test_vegetarian_negative_pattern(self):
        inputs = self.empty_input.add_subjects([
            Subject("vegetarian")
        ]).add_patterns([PatternGoogle("why don't <SUBJS>", negative=True)])
        inputs = self.autocomplete.process(inputs)
        self.assertTrue(len(inputs.get_generated_facts()) > 0)
        meat_facts = [
            x for x in inputs.get_generated_facts()
            if "meat" == x.get_object().get() and not x.is_negative()
        ]
        print(meat_facts)
        self.assertTrue(len(meat_facts) == 0)

    def test_vegetarian_positive_pattern(self):
        inputs = self.empty_input.add_subjects([
            Subject("vegetarian")
        ]).add_patterns([PatternGoogle("why do <SUBJS>")])
        inputs = self.autocomplete.process(inputs)
        predicate_cleaning = CleaningPredicateSubmodule(None)
        inputs = predicate_cleaning.process(inputs)
        self.assertTrue(len(inputs.get_generated_facts()) > 0)
        meat_facts = [
            x for x in inputs.get_generated_facts()
            if "meat" == x.get_object().get() and not x.is_negative()
        ]
        self.assertTrue(len(meat_facts) > 0)
コード例 #5
0
ファイル: run_for_subject.py プロジェクト: wayne9qiu/CSK
def run_for_subject(subject):
    job = get_current_job()

    factory = DefaultSubmoduleFactory()

    submodule_generation_names = [
        "google-autocomplete",
        "bing-autocomplete",
        "yahoo-questions",
        "answerscom-questions",
        "quora-questions",
        "reddit-questions",
        "fact-combinor",
    ]

    submodule_normalization_names = [
        "lower-case",
        "tbc-cleaner",
        "only-subject",
        "filter-object",
        "no-personal",
        "singular-subject",
        "cleaning-predicate",
        "basic-modality",
        "present-continuous",
        "are-transformation",
        "can-transformation",
        "be-normalization",
        "identical-subj-obj",
        "present-conjugate"
    ]

    submodule_normalization_global_names = [
        "similar-object-remover",
        "fact-combinor"
    ]

    submodule_validation_names = [
        "google-book",
        "flickr-clusters",
        "imagetag",
        "wikipedia-cooccurrence",
        "simple-wikipedia-cooccurrence",
        "conceptual-captions",
        "what-questions"
    ]

    empty_input = Inputs()
    empty_input = empty_input.add_subjects({Subject(subject.lower())})

    module_reference = ModuleReferenceInterface("")

    pattern_submodule = factory.get_submodule("manual-patterns-google", module_reference)
    empty_input = pattern_submodule.process(empty_input)

    result = []

    result.append(dict())
    result[-1]["step name"] = "Assertion Generation"
    result[-1]["steps"] = []
    job.meta = result
    job.save_meta()
    generated_facts = []
    for submodule_name in submodule_generation_names:
        submodule = factory.get_submodule(submodule_name, module_reference)
        begin_time = time.time()
        input_temp = submodule.process(empty_input)
        generated_facts += input_temp.get_generated_facts()
        step_info = dict()
        step_info["name"] = submodule.get_name()
        step_info["facts"] = [x.to_dict() for x in input_temp.get_generated_facts()]
        step_info["time"] = time.time() - begin_time
        result[-1]["steps"].append(step_info)
        job.meta = result
        job.save_meta()
    new_input = empty_input.add_generated_facts(generated_facts)

    result.append(dict())
    result[-1]["step name"] = "Assertion Normalization"
    result[-1]["steps"] = []
    for submodule_name in submodule_normalization_names:
        submodule = factory.get_submodule(submodule_name, module_reference)
        step_info = dict()
        begin_time = time.time()
        step_info["name"] = submodule.get_name()
        step_info["modifications"] = []
        for generated_fact in new_input.get_generated_facts():
            input_temp = empty_input.add_generated_facts([generated_fact])
            input_temp = submodule.process(input_temp)
            if len(input_temp.get_generated_facts()) != 1 or input_temp.get_generated_facts()[0] != generated_fact:
                modification = {
                    "from": generated_fact.to_dict(),
                    "to": [x.to_dict() for x in input_temp.get_generated_facts()]
                }
                step_info["modifications"].append(modification)
        step_info["time"] = time.time() - begin_time
        result[-1]["steps"].append(step_info)
        job.meta = result
        job.save_meta()
        new_input = submodule.process(new_input)

    result.append(dict())
    result[-1]["step name"] = "Assertion Normalization Global"
    result[-1]["steps"] = []
    for submodule_name in submodule_normalization_global_names:
        submodule = factory.get_submodule(submodule_name, module_reference)
        begin_time = time.time()
        new_input = submodule.process(new_input)
        step_info = dict()
        step_info["name"] = submodule.get_name()
        step_info["facts"] = [x.to_dict() for x in new_input.get_generated_facts()]
        step_info["time"] = time.time() - begin_time
        result[-1]["steps"].append(step_info)
        job.meta = result
        job.save_meta()

    result.append(dict())
    result[-1]["step name"] = "Assertion Validation"
    result[-1]["steps"] = []
    begin_time = time.time()
    for submodule_name in submodule_validation_names:
        submodule = factory.get_submodule(submodule_name, module_reference)
        new_input = submodule.process(new_input)
    step_info = dict()
    step_info["name"] = "All validations"
    step_info["facts"] = [x.to_dict() for x in new_input.get_generated_facts()]
    step_info["time"] = time.time() - begin_time
    result[-1]["steps"].append(step_info)
    job.meta = result
    job.save_meta()
コード例 #6
0
class TestSentenceComparator(unittest.TestCase):
    def test_get_content(self):
        sc = ConceptualCaptionsComparatorSubmodule(None)
        self.empty_input = Inputs()
        subjects = {
            Subject("elephant"),
            Subject("penguin"),
            Subject("lion"),
            Subject("raccoon")
        }
        inputs = self.empty_input.add_subjects(subjects)
        sc.setup_processing(inputs)
        contents = sc.get_contents("elephant")
        self.assertEqual(3748, len(contents))
        contents = sc.get_contents("penguin")
        self.assertEqual(1273, len(contents))
        contents = sc.get_contents("lion")
        self.assertEqual(2616, len(contents))
        contents = sc.get_contents("raccoon")
        self.assertEqual(365, len(contents))

    def test_conceptual_caption(self):
        sc = ConceptualCaptionsComparatorSubmodule(None)
        self.empty_input = Inputs()
        self.dummy_reference = ReferencableInterface("DUMMY")

        dataset = [("elephant", "download", "baby", 0),
                   ("elephant", "have", "tusks", 1),
                   ("lion", "eat", "gazella", 0),
                   ("penguin", "eat", "fish", 0),
                   ("gorilla", "eat", "banana", 0),
                   ("sky", "hasProperty", "blue", 0),
                   ("computer", "is", "working", 1),
                   ("raccoon", "hasProperty", "blue", 0)]
        subjects = {
            Subject("elephant"),
            Subject("penguin"),
            Subject("lion"),
            Subject("gorilla"),
            Subject("sky"),
            Subject("computer"),
            Subject("raccoon")
        }

        gfs = []
        pos = 0
        for subject, predicate, obj, truth in dataset:
            pos += 1
            score = MultipleScore()
            if pos % 2 == 0:
                score.add_score(
                    truth, self.dummy_reference,
                    GoogleAutocompleteSubmodule(self.dummy_reference))
            else:
                score.add_score(
                    truth, self.dummy_reference,
                    BingAutocompleteSubmodule(self.dummy_reference))
            gfs.append(
                GeneratedFact(subject, predicate, obj, "", False, score,
                              MultipleSourceOccurrence()))
        score2 = MultipleScore()
        score2.add_score(1, self.dummy_reference,
                         GoogleAutocompleteSubmodule(self.dummy_reference))
        gfs.append(
            GeneratedFact(
                "elephant", "be", "big", "", False, score2,
                MultipleSourceOccurrence.from_raw("elephants are big", None,
                                                  1)))
        inputs = self.empty_input.add_generated_facts(gfs).add_subjects(
            subjects)
        inputs = sc.process(inputs)
        self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
        self.assertEqual(
            len(inputs.get_generated_facts()[0].get_score().scores), 2)
        self.assertNotAlmostEqual(
            inputs.get_generated_facts()[1].get_score().scores[1][0],
            0,
            delta=1e-5)