예제 #1
0
 def test_combination_modalities_long(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     generated_fact0 = GeneratedFact("parent", "go", "to Paris",
                                     "TBC[many]",
                                     False,
                                     score0,
                                     MultipleSourceOccurrence.from_raw(
                                         "parents have many children", None,
                                         1))
     generated_fact1 = GeneratedFact("parent", "go to", "Paris",
                                     "",
                                     False,
                                     score1,
                                     MultipleSourceOccurrence.from_raw(
                                         "parents have children", None, 1))
     inputs = self.empty_input.add_generated_facts([generated_fact0,
                                                    generated_fact1])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertIn("TBC[many]",
                   inputs.get_generated_facts()[0].get_modality().get())
     self.assertIn("parents have many children x#x1",
                   str(inputs.get_generated_facts()[
                           0].get_sentence_source()))
     self.assertIn("parents have children x#x1",
                   str(inputs.get_generated_facts()[
                           0].get_sentence_source()))
     self.assertEqual("go to",
                      inputs.get_generated_facts()[
                           0].get_predicate())
예제 #2
0
    def test_conceptual_caption(self):
        sc = ConceptualCaptionsComparatorSubmodule(None)
        self.empty_input = Inputs()
        self.dummy_reference = ReferencableInterface("DUMMY")

        dataset = [("elephant", "download", "baby", 0),
                   ("elephant", "have", "tusks", 1),
                   ("lion", "eat", "gazella", 0),
                   ("penguin", "eat", "fish", 0),
                   ("gorilla", "eat", "banana", 0),
                   ("sky", "hasProperty", "blue", 0),
                   ("computer", "is", "working", 1),
                   ("raccoon", "hasProperty", "blue", 0)]
        subjects = {
            Subject("elephant"),
            Subject("penguin"),
            Subject("lion"),
            Subject("gorilla"),
            Subject("sky"),
            Subject("computer"),
            Subject("raccoon")
        }

        gfs = []
        pos = 0
        for subject, predicate, obj, truth in dataset:
            pos += 1
            score = MultipleScore()
            if pos % 2 == 0:
                score.add_score(
                    truth, self.dummy_reference,
                    GoogleAutocompleteSubmodule(self.dummy_reference))
            else:
                score.add_score(
                    truth, self.dummy_reference,
                    BingAutocompleteSubmodule(self.dummy_reference))
            gfs.append(
                GeneratedFact(subject, predicate, obj, "", False, score,
                              MultipleSourceOccurrence()))
        score2 = MultipleScore()
        score2.add_score(1, self.dummy_reference,
                         GoogleAutocompleteSubmodule(self.dummy_reference))
        gfs.append(
            GeneratedFact(
                "elephant", "be", "big", "", False, score2,
                MultipleSourceOccurrence.from_raw("elephants are big", None,
                                                  1)))
        inputs = self.empty_input.add_generated_facts(gfs).add_subjects(
            subjects)
        inputs = sc.process(inputs)
        self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
        self.assertEqual(
            len(inputs.get_generated_facts()[0].get_score().scores), 2)
        self.assertNotAlmostEqual(
            inputs.get_generated_facts()[1].get_score().scores[1][0],
            0,
            delta=1e-5)
예제 #3
0
 def test_save(self):
     inputs = Inputs()
     subjects = [Subject("baba"), Subject("coko")]
     patterns = [
         PatternGoogle("why are"),
         PatternGoogle("Why are", "hasProperty", True)
     ]
     mmr = MultipleModuleReference(ModuleReferenceInterface("Module0"))
     mmr.add_reference(ModuleReferenceInterface("Module1"))
     msr = MultipleSubmoduleReference(
         SubmoduleReferenceInterface("Submodule0"))
     msr.add_reference(SubmoduleReferenceInterface("Submodule0"))
     ms0 = MultipleScore()
     ms0.add_score(1.0, ModuleReferenceInterface("Module0"),
                   SubmoduleReferenceInterface("Submodule0"))
     ms1 = MultipleScore()
     ms1.add_score(1.0, mmr, msr)
     ms1.add_score(0.5, ModuleReferenceInterface("Module1"),
                   SubmoduleReferenceInterface("Submodule2"))
     mp0 = MultiplePattern()
     mp0.add_pattern(patterns[0])
     mp1 = MultiplePattern()
     mp1.add_pattern(patterns[0])
     mp1.add_pattern(patterns[1])
     gfs = [
         GeneratedFact(
             "baba", "is", "you", "sometimes", False, ms0,
             MultipleSourceOccurrence.from_raw("baba is you", msr, 1), mp0),
         GeneratedFact(
             "coko", "is", "dead", "always", True, ms1,
             MultipleSourceOccurrence.from_raw("toto is always dead", msr,
                                               1), mp1)
     ]
     seeds = [
         Fact("baba", "is", "us", None, False),
         Fact("coko", "are", "missing", "coucou", True)
     ]
     objects = [Object("missing"), Object("you")]
     inputs = inputs.replace_seeds(seeds)
     inputs = inputs.replace_patterns(patterns)
     inputs = inputs.replace_subjects(subjects)
     inputs = inputs.replace_generated_facts(gfs)
     inputs = inputs.replace_objects(objects)
     inputs.save("temp.json")
     inputs_read = inputs.load("temp.json")
     self.assertEqual(len(inputs.get_generated_facts()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_subjects()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_patterns()),
                      len(inputs_read.get_patterns()))
     self.assertEqual(len(inputs.get_seeds()), len(inputs_read.get_seeds()))
     self.assertEqual(len(inputs.get_objects()),
                      len(inputs_read.get_objects()))
예제 #4
0
 def test_not_remove(self):
     inputs = Inputs()
     mso = MultipleSourceOccurrence()
     mso.add_raw("elephants eat big bananas", None, 2)
     gfs = [
         GeneratedFact("elephant", "eat", "bananas", "TBC[big bananas]", 0,
                       MultipleScore(), mso)
     ]
     inputs = inputs.add_generated_facts(gfs)
     tbc_cleaner = TBCCleaner(None)
     inputs = tbc_cleaner.process(inputs)
     self.assertEqual(len(inputs.get_generated_facts()), 1)
예제 #5
0
 def test_combination(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     score2 = MultipleScore()
     score2.add_score(0.7, None, None)
     generated_fact0 = GeneratedFact("lion", "eat", "zebra", "", False, score0,
                                     MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     mso = MultipleSourceOccurrence()
     mso.add_raw("lions eat zebras", None, 2)
     mso.add_raw("lions eat small zebras", None, 1)
     generated_fact1 = GeneratedFact("lion", "eat", "zebra", "", False, score1,
                                     mso)
     generated_fact2 = GeneratedFact("lion", "eat", "zebra", "", False, score2,
                                     MultipleSourceOccurrence.from_raw("lions eat small zebras", None, 1))
     new_gfs = [generated_fact0, generated_fact1, generated_fact2]
     inputs = self.empty_input.add_generated_facts(new_gfs)
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertEqual(3, len(inputs.get_generated_facts()[0].get_score().scores))
     sentence = str(inputs.get_generated_facts()[0].get_sentence_source())
     self.assertIn("lions eat zebras", sentence)
     self.assertIn("lions eat small zebras", sentence)
     self.assertIn("x#x3", sentence)
     self.assertIn("x#x2", sentence)
예제 #6
0
 def process(self, input_interface):
     logging.info("Start the " + self._name + " archit submodule")
     first = True
     spos = set()
     for gf in input_interface.get_generated_facts():
         spos.add((gf.get_subject().get(), gf.get_predicate().get(),
                   gf.get_object().get()))
     new_gfs = []
     with open(FILENAME) as f:
         for line in f:
             if first:
                 first = False
                 continue
             line = line.strip().split("\t")
             subj = line[0]
             pred = line[1]
             obj = line[2]
             if (subj, pred, obj) not in spos:
                 continue
             score = float(line[self._index])
             if score == 0:
                 continue
             multi_score = MultipleScore()
             multi_score.add_score(score, self._module_reference, self)
             new_gfs.append(
                 GeneratedFact(subj, pred, obj, "", 0, multi_score,
                               MultipleSourceOccurrence()))
     return input_interface.add_generated_facts(new_gfs)
예제 #7
0
 def test_serialize_multiple_source_occurrence(self):
     msr = MultipleSubmoduleReference(
         SubmoduleReferenceInterface("Submodule0"))
     msr.add_reference(SubmoduleReferenceInterface("Submodule0"))
     mso = MultipleSourceOccurrence.from_raw("baba is you", msr, 1)
     print(mso.to_dict())
     self.assertIsNotNone(json.dumps(mso.to_dict()))
예제 #8
0
 def test_xbox(self):
     generated_fact = GeneratedFact("test", "xbox", "nothing", "", False,
                                    0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.cleaning_predicate.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(0, len(generated_facts))
예제 #9
0
 def test_conjugated_verb3(self):
     generated_fact = GeneratedFact("elephant", "goes", "nowhere", "",
                                    False, 0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.cleaning_predicate.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
예제 #10
0
 def test_empty_predicate(self):
     generated_fact = GeneratedFact("elephant", "", "fruits", "", False,
                                    0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.cleaning_predicate.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(0, len(generated_facts))
예제 #11
0
 def test_ing(self):
     generated_fact = GeneratedFact("test", "adapting", "nothing", "",
                                    False, 0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.present_continuous.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(0, len(generated_facts))
예제 #12
0
 def test_has_beach(self):
     generated_fact = GeneratedFact("beach", "has", "sand", "", False, 0.0,
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.cleaning_predicate.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
예제 #13
0
 def test_fact_transformation(self):
     gf = GeneratedFact(
         "elephant", "eat", "zebra", "", False, 1.0,
         MultipleSourceOccurrence.from_raw("elephants do not eat zebras",
                                           None, 1))
     fact = gf.get_fact()
     self.assertEqual(fact.get_subject(), "elephant")
     self.assertEqual(fact.get_predicate(), "eat")
     self.assertEqual(fact.get_object(), "zebra")
     self.assertEqual(fact.is_negative(), False)
     gf = GeneratedFact(
         "elephant", "eat", "zebra", "", True, 1.0,
         MultipleSourceOccurrence.from_raw("elephants do not eat zebras",
                                           None, 1))
     fact = gf.get_fact()
     self.assertEqual(fact.is_negative(), True)
예제 #14
0
 def test_no_change(self):
     generated_fact = GeneratedFact("test", "is", "time", "", False, 0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.cleaning_predicate.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual("time", generated_facts[0].get_object().get())
예제 #15
0
 def test_removal(self):
     new_gfs = [
         GeneratedFact("lion", "eat", "lion", "some", False, None,
                       MultipleSourceOccurrence())
     ]
     inputs = self.empty_input.replace_generated_facts(new_gfs)
     inputs = self.identical.process(inputs)
     self.assertEqual(0, len(inputs.get_generated_facts()))
예제 #16
0
 def test_article(self):
     gfs = [
         GeneratedFact("bee", "make", "hive", "", False, 0.1,
                       MultipleSourceOccurrence()),
         GeneratedFact("bee", "make", "a hive", "", False, 0.1,
                       MultipleSourceOccurrence())
     ]
     inputs = Inputs()
     inputs = inputs.add_generated_facts(gfs)
     remover = SimilarObjectRemover(None)
     inputs = remover.process(inputs)
     self.assertEqual(len(inputs.get_generated_facts()), 2)
     self.assertEqual(
         len(
             set([
                 x.get_object().get() for x in inputs.get_generated_facts()
             ])), 1)
예제 #17
0
 def test_no_change(self):
     generated_fact = GeneratedFact("test", "adapted", "nothing", "", False,
                                    0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.be_normalization.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual("adapted", generated_facts[0].get_predicate().get())
예제 #18
0
 def test_false_s(self):
     generated_fact = GeneratedFact("test", "pass", "nothing", "", False,
                                    0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.present_conjugate.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual("pass", generated_facts[0].get_predicate().get())
예제 #19
0
 def test_no_verb2(self):
     generated_fact = GeneratedFact("wall", "clock", "yellow", "", False,
                                    0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.cleaning_predicate.process(inputs)
     generated_facts = inputs.get_generated_facts()
     print(generated_facts)
     self.assertEqual(0, len(generated_facts))
예제 #20
0
 def test_be_ing(self):
     generated_fact = GeneratedFact("test", "is adapting", "nothing",
                                    MultipleSourceOccurrence(), False, 0.0,
                                    "")
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.present_continuous.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual("adapt", generated_facts[0].get_predicate().get())
예제 #21
0
 def test_not_digest(self):
     generated_fact = GeneratedFact("elephant", "not digests", "fruits", "",
                                    False, 0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.cleaning_predicate.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual(generated_facts[0].get_predicate().get(), "digests")
     self.assertTrue(generated_facts[0].is_negative())
예제 #22
0
 def test_be_can_duplicate(self):
     generated_fact = GeneratedFact("test", "be", "can nothing", "", False,
                                    0.0, MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.can_transformation.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual("be", generated_facts[0].get_predicate().get())
     self.assertEqual("nothing", generated_facts[0].get_object().get())
예제 #23
0
 def initialize_for_generated_fact(self, generated_fact):
     fact_without_modality = get_fact_without_modality(generated_fact)
     if fact_without_modality not in self.found:
         self.found[fact_without_modality] = None
         self.sentences[fact_without_modality] = MultipleSourceOccurrence()
         self.modalities[fact_without_modality] = dict()
         self.patterns[fact_without_modality] = MultiplePattern()
         self.modules[fact_without_modality] = MultipleModuleReference()
         self.submodules[
             fact_without_modality] = MultipleSubmoduleReference()
예제 #24
0
 def test_texas(self):
     generated_fact = GeneratedFact("texas", "is a", "cat", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts(
         [generated_fact]).add_subjects({Subject("lion")})
     inputs = self.to_singular.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual("texas", generated_facts[0].get_subject().get())
예제 #25
0
def read_generated_fact(generated_fact):
    if generated_fact["type"] == "GeneratedFact":
        return GeneratedFact(
            read_subject(generated_fact["subject"]),
            read_predicate(generated_fact["predicate"]),
            read_object(generated_fact["object"]),
            read_modality(generated_fact["modality"]),
            generated_fact["negative"],
            read_score(generated_fact["score"]),
            MultipleSourceOccurrence.from_dict(generated_fact["sentence_source"]),
            read_pattern(generated_fact["pattern"])
        )
    raise UnknownSerializedObject("Unknown generated fact type" + json.dumps(generated_fact))
예제 #26
0
 def test_cache(self):
     google_book_cache = GoogleBookSubmodule(
         None, True, cache_name="google-book-cache-temp")
     generated_fact = GeneratedFact("lion", "eat", "zebra", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     google_book_cache.process(inputs)
     generated_fact = GeneratedFact("lion", "eat", "zebra", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = google_book_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_google_book = [
         x for x in scores.scores
         if x[2].get_name() == "Google Book Submodule"
     ]
     self.assertEqual(1, len(scores_google_book))
     self.assertTrue(scores_google_book[0][0] != 0)
     google_book_cache.cache.delete_cache()
예제 #27
0
 def test_cache(self):
     wikipedia_cache = WikipediaCooccurrenceSubmodule(
         None, True, "wikipedia-cache-test")
     generated_fact = GeneratedFact("lion", "is a", "cat", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     wikipedia_cache.process(inputs)
     generated_fact = GeneratedFact("lion", "is a", "cat", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = wikipedia_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_wikipedia = [
         x for x in scores.scores
         if x[2].get_name() == "Wikipedia Cooccurrence"
     ]
     self.assertEqual(1, len(scores_wikipedia))
     self.assertTrue(scores_wikipedia[0][0] != 0)
     wikipedia_cache.cache.delete_cache()
 def test_combination(self):
     dataset = [("elephant", "download", "baby", 0),
                ("elephant", "climb", "trunk", 0),
                ("elephant", "bear", "baby", 1),
                ("elephant", "download this cute illustration with", "baby",
                 0), ("elephant", "be", "ear", 0),
                ("elephant", "fry", "ear", 0),
                ("elephant", "trek", "travel", 0),
                ("elephant", "forbid love in", "water", 0),
                ("elephant", "eat", "bark", 1),
                ("elephant", "have", "tusks", 1)]
     gfs = []
     pos = 0
     for subject, predicate, obj, truth in dataset:
         pos += 1
         score = MultipleScore()
         if pos % 2 == 0:
             score.add_score(
                 truth, self.dummy_reference,
                 GoogleAutocompleteSubmodule(self.dummy_reference))
         else:
             score.add_score(
                 truth, self.dummy_reference,
                 BingAutocompleteSubmodule(self.dummy_reference))
         gfs.append(
             GeneratedFact(subject, predicate, obj, "", False, score,
                           MultipleSourceOccurrence()))
     score2 = MultipleScore()
     score2.add_score(1, self.dummy_reference,
                      GoogleAutocompleteSubmodule(self.dummy_reference))
     gfs.append(
         GeneratedFact(
             "elephant", "be", "big", "", False, score2,
             MultipleSourceOccurrence.from_raw("elephants are big", None,
                                               1)))
     inputs = self.empty_input.add_generated_facts(gfs)
     inputs = self.linear_combination.process(inputs)
     self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
예제 #29
0
 def test_lion_eat_code(self):
     generated_fact = GeneratedFact("lion", "eat", "code", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.google_book_no_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_google_book = [
         x for x in scores.scores
         if x[2].get_name() == "Google Book Submodule"
     ]
     self.assertEqual(1, len(scores_google_book))
     self.assertTrue(scores_google_book[0][0] == 0)
예제 #30
0
 def test_panda_flickr_cluster(self):
     new_gfs = [
         GeneratedFact("panda", "live", "china", "", False, MultipleScore(),
                       MultipleSourceOccurrence())
     ]
     inputs = self.empty_input.add_generated_facts(new_gfs).add_subjects(
         {Subject("panda")})
     inputs = self.associations_flick_cluster.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_flickr = [
         x for x in scores.scores if x[2].get_name() == "Flickr"
     ]
     self.assertEqual(1, len(scores_flickr))