Example #1
0
 def process(self, input_interface):
     logging.info("Start the " + self._name + " archit submodule")
     first = True
     spos = set()
     for gf in input_interface.get_generated_facts():
         spos.add((gf.get_subject().get(), gf.get_predicate().get(),
                   gf.get_object().get()))
     new_gfs = []
     with open(FILENAME) as f:
         for line in f:
             if first:
                 first = False
                 continue
             line = line.strip().split("\t")
             subj = line[0]
             pred = line[1]
             obj = line[2]
             if (subj, pred, obj) not in spos:
                 continue
             score = float(line[self._index])
             if score == 0:
                 continue
             multi_score = MultipleScore()
             multi_score.add_score(score, self._module_reference, self)
             new_gfs.append(
                 GeneratedFact(subj, pred, obj, "", 0, multi_score,
                               MultipleSourceOccurrence()))
     return input_interface.add_generated_facts(new_gfs)
    def test_conceptual_caption(self):
        sc = ConceptualCaptionsComparatorSubmodule(None)
        self.empty_input = Inputs()
        self.dummy_reference = ReferencableInterface("DUMMY")

        dataset = [("elephant", "download", "baby", 0),
                   ("elephant", "have", "tusks", 1),
                   ("lion", "eat", "gazella", 0),
                   ("penguin", "eat", "fish", 0),
                   ("gorilla", "eat", "banana", 0),
                   ("sky", "hasProperty", "blue", 0),
                   ("computer", "is", "working", 1),
                   ("raccoon", "hasProperty", "blue", 0)]
        subjects = {
            Subject("elephant"),
            Subject("penguin"),
            Subject("lion"),
            Subject("gorilla"),
            Subject("sky"),
            Subject("computer"),
            Subject("raccoon")
        }

        gfs = []
        pos = 0
        for subject, predicate, obj, truth in dataset:
            pos += 1
            score = MultipleScore()
            if pos % 2 == 0:
                score.add_score(
                    truth, self.dummy_reference,
                    GoogleAutocompleteSubmodule(self.dummy_reference))
            else:
                score.add_score(
                    truth, self.dummy_reference,
                    BingAutocompleteSubmodule(self.dummy_reference))
            gfs.append(
                GeneratedFact(subject, predicate, obj, "", False, score,
                              MultipleSourceOccurrence()))
        score2 = MultipleScore()
        score2.add_score(1, self.dummy_reference,
                         GoogleAutocompleteSubmodule(self.dummy_reference))
        gfs.append(
            GeneratedFact(
                "elephant", "be", "big", "", False, score2,
                MultipleSourceOccurrence.from_raw("elephants are big", None,
                                                  1)))
        inputs = self.empty_input.add_generated_facts(gfs).add_subjects(
            subjects)
        inputs = sc.process(inputs)
        self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
        self.assertEqual(
            len(inputs.get_generated_facts()[0].get_score().scores), 2)
        self.assertNotAlmostEqual(
            inputs.get_generated_facts()[1].get_score().scores[1][0],
            0,
            delta=1e-5)
Example #3
0
 def get_generated_fact_with_score_from_classifier(self, fact, clf):
     multiple_score = MultipleScore()
     row = self.get_fact_row(fact)
     score = clf.predict(fact, row)
     multiple_score.add_score(score, self.modules[fact],
                              self.submodules[fact])
     return GeneratedFact(
         fact.get_subject(), fact.get_predicate(), fact.get_object(),
         Modality.from_modalities_and_scores(self.modalities[fact].items()),
         fact.is_negative(), multiple_score, self.sentences[fact],
         self.patterns[fact])
Example #4
0
 def test_save(self):
     inputs = Inputs()
     subjects = [Subject("baba"), Subject("coko")]
     patterns = [
         PatternGoogle("why are"),
         PatternGoogle("Why are", "hasProperty", True)
     ]
     mmr = MultipleModuleReference(ModuleReferenceInterface("Module0"))
     mmr.add_reference(ModuleReferenceInterface("Module1"))
     msr = MultipleSubmoduleReference(
         SubmoduleReferenceInterface("Submodule0"))
     msr.add_reference(SubmoduleReferenceInterface("Submodule0"))
     ms0 = MultipleScore()
     ms0.add_score(1.0, ModuleReferenceInterface("Module0"),
                   SubmoduleReferenceInterface("Submodule0"))
     ms1 = MultipleScore()
     ms1.add_score(1.0, mmr, msr)
     ms1.add_score(0.5, ModuleReferenceInterface("Module1"),
                   SubmoduleReferenceInterface("Submodule2"))
     mp0 = MultiplePattern()
     mp0.add_pattern(patterns[0])
     mp1 = MultiplePattern()
     mp1.add_pattern(patterns[0])
     mp1.add_pattern(patterns[1])
     gfs = [
         GeneratedFact(
             "baba", "is", "you", "sometimes", False, ms0,
             MultipleSourceOccurrence.from_raw("baba is you", msr, 1), mp0),
         GeneratedFact(
             "coko", "is", "dead", "always", True, ms1,
             MultipleSourceOccurrence.from_raw("toto is always dead", msr,
                                               1), mp1)
     ]
     seeds = [
         Fact("baba", "is", "us", None, False),
         Fact("coko", "are", "missing", "coucou", True)
     ]
     objects = [Object("missing"), Object("you")]
     inputs = inputs.replace_seeds(seeds)
     inputs = inputs.replace_patterns(patterns)
     inputs = inputs.replace_subjects(subjects)
     inputs = inputs.replace_generated_facts(gfs)
     inputs = inputs.replace_objects(objects)
     inputs.save("temp.json")
     inputs_read = inputs.load("temp.json")
     self.assertEqual(len(inputs.get_generated_facts()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_subjects()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_patterns()),
                      len(inputs_read.get_patterns()))
     self.assertEqual(len(inputs.get_seeds()), len(inputs_read.get_seeds()))
     self.assertEqual(len(inputs.get_objects()),
                      len(inputs_read.get_objects()))
def read_score(score):
    if score["type"] == "MultipleScore":
        multiple_score = MultipleScore()
        for score_temp in score["scores"]:
            multiple_score.add_score(
                score_temp["score"],
                read_module_reference(score_temp["module_from"]),
                read_submodule_reference(score_temp["submodule_from"])
            )
        return multiple_score
    raise UnknownSerializedObject("Unknown score type:" + json.dumps(score))
Example #6
0
 def test_cache(self):
     wikipedia_cache = SimpleWikipediaCooccurrenceSubmodule(None, True, "simple-wikipedia-cache-test")
     generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     wikipedia_cache.process(inputs)
     generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = wikipedia_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_wikipedia = [x for x in scores.scores if x[2].get_name() == "Simple Wikipedia Cooccurrence"]
     self.assertEqual(1, len(scores_wikipedia))
     self.assertTrue(scores_wikipedia[0][0] != 0)
     wikipedia_cache.cache.delete_cache()
Example #7
0
 def test_combination_modalities_long(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     generated_fact0 = GeneratedFact("parent", "go", "to Paris",
                                     "TBC[many]",
                                     False,
                                     score0,
                                     MultipleSourceOccurrence.from_raw(
                                         "parents have many children", None,
                                         1))
     generated_fact1 = GeneratedFact("parent", "go to", "Paris",
                                     "",
                                     False,
                                     score1,
                                     MultipleSourceOccurrence.from_raw(
                                         "parents have children", None, 1))
     inputs = self.empty_input.add_generated_facts([generated_fact0,
                                                    generated_fact1])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertIn("TBC[many]",
                   inputs.get_generated_facts()[0].get_modality().get())
     self.assertIn("parents have many children x#x1",
                   str(inputs.get_generated_facts()[
                           0].get_sentence_source()))
     self.assertIn("parents have children x#x1",
                   str(inputs.get_generated_facts()[
                           0].get_sentence_source()))
     self.assertEqual("go to",
                      inputs.get_generated_facts()[
                           0].get_predicate())
Example #8
0
 def test_do_nothing(self):
     generated_fact = GeneratedFact("crisis", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact]).add_subjects({Subject("lion")})
     inputs = self.to_lower_case.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual("crisis", generated_facts[0].get_subject().get())
Example #9
0
 def test_lion(self):
     generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.simple_wikipedia_no_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_wikipedia = [x for x in scores.scores if x[2].get_name() == "Simple Wikipedia Cooccurrence"]
     self.assertEqual(1, len(scores_wikipedia))
     self.assertTrue(scores_wikipedia[0][0] != 0)
Example #10
0
 def test_turn_singular_duplicate(self):
     generated_fact = GeneratedFact("lions", "is a", "cat", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts(
         [generated_fact, generated_fact]).add_subjects({Subject("lion")})
     inputs = self.to_singular.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(2, len(generated_facts))
     self.assertEqual("lion", generated_facts[0].get_subject().get())
Example #11
0
 def test_combination_modalities(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     generated_fact0 = GeneratedFact("lion", "eat", "zebra", "some", False, score0, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     generated_fact1 = GeneratedFact("lion", "eat", "zebra", "often", False, score1, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertIn("some", inputs.get_generated_facts()[0].get_modality().get())
     self.assertIn("often", inputs.get_generated_facts()[0].get_modality().get())
Example #12
0
 def test_beach(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     mso = MultipleSourceOccurrence()
     mso.add_raw("beaches have sand", "Google Autocomplete", 4)
     mso.add_raw("some beaches have sand", "Google Autocomplete", 2)
     mso.add_raw("some beaches have sand and some rocks", "Google "
                                                          "Autocomplete", 1)
     mso.add_raw("all beaches have sand", "Google Autocomplete", 4)
     mso.add_raw("beach have sand", "Google Autocomplete", 1)
     generated_fact0 = GeneratedFact("beach", "have", "sand",
                                     "some[subj/some] x#x3 // "
                                     "some[subj/all] x#x4",
                                     False,
                                     score0,
                                     mso)
     inputs = self.empty_input.add_generated_facts([generated_fact0])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
Example #13
0
 def test_not_remove(self):
     inputs = Inputs()
     mso = MultipleSourceOccurrence()
     mso.add_raw("elephants eat big bananas", None, 2)
     gfs = [
         GeneratedFact("elephant", "eat", "bananas", "TBC[big bananas]", 0,
                       MultipleScore(), mso)
     ]
     inputs = inputs.add_generated_facts(gfs)
     tbc_cleaner = TBCCleaner(None)
     inputs = tbc_cleaner.process(inputs)
     self.assertEqual(len(inputs.get_generated_facts()), 1)
Example #14
0
 def test_cache(self):
     google_book_cache = GoogleBookSubmodule(
         None, True, cache_name="google-book-cache-temp")
     generated_fact = GeneratedFact("lion", "eat", "zebra", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     google_book_cache.process(inputs)
     generated_fact = GeneratedFact("lion", "eat", "zebra", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = google_book_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_google_book = [
         x for x in scores.scores
         if x[2].get_name() == "Google Book Submodule"
     ]
     self.assertEqual(1, len(scores_google_book))
     self.assertTrue(scores_google_book[0][0] != 0)
     google_book_cache.cache.delete_cache()
 def test_combination(self):
     dataset = [("elephant", "download", "baby", 0),
                ("elephant", "climb", "trunk", 0),
                ("elephant", "bear", "baby", 1),
                ("elephant", "download this cute illustration with", "baby",
                 0), ("elephant", "be", "ear", 0),
                ("elephant", "fry", "ear", 0),
                ("elephant", "trek", "travel", 0),
                ("elephant", "forbid love in", "water", 0),
                ("elephant", "eat", "bark", 1),
                ("elephant", "have", "tusks", 1)]
     gfs = []
     pos = 0
     for subject, predicate, obj, truth in dataset:
         pos += 1
         score = MultipleScore()
         if pos % 2 == 0:
             score.add_score(
                 truth, self.dummy_reference,
                 GoogleAutocompleteSubmodule(self.dummy_reference))
         else:
             score.add_score(
                 truth, self.dummy_reference,
                 BingAutocompleteSubmodule(self.dummy_reference))
         gfs.append(
             GeneratedFact(subject, predicate, obj, "", False, score,
                           MultipleSourceOccurrence()))
     score2 = MultipleScore()
     score2.add_score(1, self.dummy_reference,
                      GoogleAutocompleteSubmodule(self.dummy_reference))
     gfs.append(
         GeneratedFact(
             "elephant", "be", "big", "", False, score2,
             MultipleSourceOccurrence.from_raw("elephants are big", None,
                                               1)))
     inputs = self.empty_input.add_generated_facts(gfs)
     inputs = self.linear_combination.process(inputs)
     self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
Example #16
0
 def test_panda_flickr_cluster(self):
     new_gfs = [
         GeneratedFact("panda", "live", "china", "", False, MultipleScore(),
                       MultipleSourceOccurrence())
     ]
     inputs = self.empty_input.add_generated_facts(new_gfs).add_subjects(
         {Subject("panda")})
     inputs = self.associations_flick_cluster.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_flickr = [
         x for x in scores.scores if x[2].get_name() == "Flickr"
     ]
     self.assertEqual(1, len(scores_flickr))
Example #17
0
 def test_lion_eat_code(self):
     generated_fact = GeneratedFact("lion", "eat", "code", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.google_book_no_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_google_book = [
         x for x in scores.scores
         if x[2].get_name() == "Google Book Submodule"
     ]
     self.assertEqual(1, len(scores_google_book))
     self.assertTrue(scores_google_book[0][0] == 0)
Example #18
0
 def test_panda_imagetag(self):
     new_gfs = [
         GeneratedFact("panda", "climb", "tree", "", False, MultipleScore(),
                       MultipleSourceOccurrence())
     ]
     inputs = self.empty_input.add_generated_facts(new_gfs).add_subjects(
         {"panda"})
     inputs = self.associations.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_imagetag = [
         x for x in scores.scores
         if x[2].get_name() == "Image Tag submodule"
     ]
     self.assertEqual(1, len(scores_imagetag))
 def test_lion(self):
     n_copies = 10
     gfs = []
     for _ in range(n_copies):
         generated_fact = GeneratedFact("lion", "is a", "cat", "", False,
                                        MultipleScore(),
                                        MultipleSourceOccurrence())
         gfs.append(generated_fact)
     inputs = self.empty_input.add_generated_facts(gfs)
     inputs = self.wikipedia_no_cache.process(inputs)
     self.assertEqual(n_copies, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_wikipedia = [
         x for x in scores.scores
         if x[2].get_name() == "Wikipedia Cooccurrence"
     ]
     self.assertEqual(1, len(scores_wikipedia))
     self.assertTrue(scores_wikipedia[0][0] != 0)
 def add_facts_to_generated_facts(self, generated_facts, subject, predicate,
                                  obj, modality, negative,
                                  score_based_on_ranking, suggestion):
     if suggestion[SUBJECT] not in subject:
         return
     multiple_score = MultipleScore()
     multiple_score.add_score(1.0, self._module_reference,
                              reference_corenlp)
     multiple_score.add_score(score_based_on_ranking,
                              self._module_reference, self)
     new_fact_corenlp = GeneratedFact(
         subject, predicate, obj, modality, negative, multiple_score,
         MultipleSourceOccurrence.from_raw(suggestion[0], self, 1),
         suggestion[2])
     generated_facts.append(new_fact_corenlp)
 def get_fact_from_simple_extraction(self, extraction, score, suggestion):
     negative = get_negativity(suggestion) or extraction[3]
     multiple_score = MultipleScore()
     multiple_score.add_score(score, self._module_reference, self)
     multiple_score.add_score(1.0, self._module_reference, reference_manual)
     new_fact = GeneratedFact(
         extraction[0],
         extraction[1],
         extraction[2],
         None,
         negative,
         # For the score, inverse the ranking (higher is
         # better) and add the confidence of the triple
         multiple_score,
         MultipleSourceOccurrence.from_raw(suggestion[0], self, 1),
         suggestion[2])
     return new_fact
 def _openie_from_file(self, suggestions):
     openie_reader = OpenIEReader()
     generated_facts = []
     new_suggestions = []
     for suggestion in suggestions:
         self.transforms_suggestion_into_batch_component(
             suggestion, new_suggestions)
     for suggestion in new_suggestions:
         sentence = suggestion[STATEMENT]
         facts = openie_reader.get_from_sentence(sentence)
         negative = get_negativity(suggestion)
         facts = [
             fact for fact in facts if len(fact) > 0 and len(fact[0]) > 1
             and len(fact[1]) > 1 and len(fact[2]) > 1
         ]
         score_based_on_ranking = self.get_score_based_on_ranking(
             suggestion)
         facts = self._take_earliest_predicate(sentence, facts)
         for fact in facts:
             if suggestion[SUBJECT] not in fact[0]:
                 continue
             try:
                 score = float(fact[3].replace(",", "."))
             except:
                 logging.info(
                     "Problem in score reading in openie5 reader with " +
                     fact[3])
                 continue
             multiple_score = MultipleScore()
             multiple_score.add_score(score, self._module_reference,
                                      reference_openie5)
             multiple_score.add_score(score_based_on_ranking,
                                      self._module_reference, self)
             generated_facts.append(
                 GeneratedFact(
                     fact[0], fact[1], fact[2], "", negative,
                     multiple_score,
                     MultipleSourceOccurrence.from_raw(sentence, self,
                                                       1), suggestion[2]))
     del openie_reader
     return generated_facts
Example #23
0
 def test_combination(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     score2 = MultipleScore()
     score2.add_score(0.7, None, None)
     generated_fact0 = GeneratedFact("lion", "eat", "zebra", "", False, score0,
                                     MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     mso = MultipleSourceOccurrence()
     mso.add_raw("lions eat zebras", None, 2)
     mso.add_raw("lions eat small zebras", None, 1)
     generated_fact1 = GeneratedFact("lion", "eat", "zebra", "", False, score1,
                                     mso)
     generated_fact2 = GeneratedFact("lion", "eat", "zebra", "", False, score2,
                                     MultipleSourceOccurrence.from_raw("lions eat small zebras", None, 1))
     new_gfs = [generated_fact0, generated_fact1, generated_fact2]
     inputs = self.empty_input.add_generated_facts(new_gfs)
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertEqual(3, len(inputs.get_generated_facts()[0].get_score().scores))
     sentence = str(inputs.get_generated_facts()[0].get_sentence_source())
     self.assertIn("lions eat zebras", sentence)
     self.assertIn("lions eat small zebras", sentence)
     self.assertIn("x#x3", sentence)
     self.assertIn("x#x2", sentence)