コード例 #1
0
 def process(self, input_interface):
     logging.info("Start the " + self._name + " archit submodule")
     first = True
     spos = set()
     for gf in input_interface.get_generated_facts():
         spos.add((gf.get_subject().get(), gf.get_predicate().get(),
                   gf.get_object().get()))
     new_gfs = []
     with open(FILENAME) as f:
         for line in f:
             if first:
                 first = False
                 continue
             line = line.strip().split("\t")
             subj = line[0]
             pred = line[1]
             obj = line[2]
             if (subj, pred, obj) not in spos:
                 continue
             score = float(line[self._index])
             if score == 0:
                 continue
             multi_score = MultipleScore()
             multi_score.add_score(score, self._module_reference, self)
             new_gfs.append(
                 GeneratedFact(subj, pred, obj, "", 0, multi_score,
                               MultipleSourceOccurrence()))
     return input_interface.add_generated_facts(new_gfs)
コード例 #2
0
    def test_conceptual_caption(self):
        sc = ConceptualCaptionsComparatorSubmodule(None)
        self.empty_input = Inputs()
        self.dummy_reference = ReferencableInterface("DUMMY")

        dataset = [("elephant", "download", "baby", 0),
                   ("elephant", "have", "tusks", 1),
                   ("lion", "eat", "gazella", 0),
                   ("penguin", "eat", "fish", 0),
                   ("gorilla", "eat", "banana", 0),
                   ("sky", "hasProperty", "blue", 0),
                   ("computer", "is", "working", 1),
                   ("raccoon", "hasProperty", "blue", 0)]
        subjects = {
            Subject("elephant"),
            Subject("penguin"),
            Subject("lion"),
            Subject("gorilla"),
            Subject("sky"),
            Subject("computer"),
            Subject("raccoon")
        }

        gfs = []
        pos = 0
        for subject, predicate, obj, truth in dataset:
            pos += 1
            score = MultipleScore()
            if pos % 2 == 0:
                score.add_score(
                    truth, self.dummy_reference,
                    GoogleAutocompleteSubmodule(self.dummy_reference))
            else:
                score.add_score(
                    truth, self.dummy_reference,
                    BingAutocompleteSubmodule(self.dummy_reference))
            gfs.append(
                GeneratedFact(subject, predicate, obj, "", False, score,
                              MultipleSourceOccurrence()))
        score2 = MultipleScore()
        score2.add_score(1, self.dummy_reference,
                         GoogleAutocompleteSubmodule(self.dummy_reference))
        gfs.append(
            GeneratedFact(
                "elephant", "be", "big", "", False, score2,
                MultipleSourceOccurrence.from_raw("elephants are big", None,
                                                  1)))
        inputs = self.empty_input.add_generated_facts(gfs).add_subjects(
            subjects)
        inputs = sc.process(inputs)
        self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
        self.assertEqual(
            len(inputs.get_generated_facts()[0].get_score().scores), 2)
        self.assertNotAlmostEqual(
            inputs.get_generated_facts()[1].get_score().scores[1][0],
            0,
            delta=1e-5)
コード例 #3
0
 def get_generated_fact_with_score_from_classifier(self, fact, clf):
     multiple_score = MultipleScore()
     row = self.get_fact_row(fact)
     score = clf.predict(fact, row)
     multiple_score.add_score(score, self.modules[fact],
                              self.submodules[fact])
     return GeneratedFact(
         fact.get_subject(), fact.get_predicate(), fact.get_object(),
         Modality.from_modalities_and_scores(self.modalities[fact].items()),
         fact.is_negative(), multiple_score, self.sentences[fact],
         self.patterns[fact])
コード例 #4
0
 def test_save(self):
     inputs = Inputs()
     subjects = [Subject("baba"), Subject("coko")]
     patterns = [
         PatternGoogle("why are"),
         PatternGoogle("Why are", "hasProperty", True)
     ]
     mmr = MultipleModuleReference(ModuleReferenceInterface("Module0"))
     mmr.add_reference(ModuleReferenceInterface("Module1"))
     msr = MultipleSubmoduleReference(
         SubmoduleReferenceInterface("Submodule0"))
     msr.add_reference(SubmoduleReferenceInterface("Submodule0"))
     ms0 = MultipleScore()
     ms0.add_score(1.0, ModuleReferenceInterface("Module0"),
                   SubmoduleReferenceInterface("Submodule0"))
     ms1 = MultipleScore()
     ms1.add_score(1.0, mmr, msr)
     ms1.add_score(0.5, ModuleReferenceInterface("Module1"),
                   SubmoduleReferenceInterface("Submodule2"))
     mp0 = MultiplePattern()
     mp0.add_pattern(patterns[0])
     mp1 = MultiplePattern()
     mp1.add_pattern(patterns[0])
     mp1.add_pattern(patterns[1])
     gfs = [
         GeneratedFact(
             "baba", "is", "you", "sometimes", False, ms0,
             MultipleSourceOccurrence.from_raw("baba is you", msr, 1), mp0),
         GeneratedFact(
             "coko", "is", "dead", "always", True, ms1,
             MultipleSourceOccurrence.from_raw("toto is always dead", msr,
                                               1), mp1)
     ]
     seeds = [
         Fact("baba", "is", "us", None, False),
         Fact("coko", "are", "missing", "coucou", True)
     ]
     objects = [Object("missing"), Object("you")]
     inputs = inputs.replace_seeds(seeds)
     inputs = inputs.replace_patterns(patterns)
     inputs = inputs.replace_subjects(subjects)
     inputs = inputs.replace_generated_facts(gfs)
     inputs = inputs.replace_objects(objects)
     inputs.save("temp.json")
     inputs_read = inputs.load("temp.json")
     self.assertEqual(len(inputs.get_generated_facts()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_subjects()),
                      len(inputs_read.get_generated_facts()))
     self.assertEqual(len(inputs.get_patterns()),
                      len(inputs_read.get_patterns()))
     self.assertEqual(len(inputs.get_seeds()), len(inputs_read.get_seeds()))
     self.assertEqual(len(inputs.get_objects()),
                      len(inputs_read.get_objects()))
コード例 #5
0
def read_score(score):
    if score["type"] == "MultipleScore":
        multiple_score = MultipleScore()
        for score_temp in score["scores"]:
            multiple_score.add_score(
                score_temp["score"],
                read_module_reference(score_temp["module_from"]),
                read_submodule_reference(score_temp["submodule_from"])
            )
        return multiple_score
    raise UnknownSerializedObject("Unknown score type:" + json.dumps(score))
コード例 #6
0
 def test_cache(self):
     wikipedia_cache = SimpleWikipediaCooccurrenceSubmodule(None, True, "simple-wikipedia-cache-test")
     generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     wikipedia_cache.process(inputs)
     generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = wikipedia_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_wikipedia = [x for x in scores.scores if x[2].get_name() == "Simple Wikipedia Cooccurrence"]
     self.assertEqual(1, len(scores_wikipedia))
     self.assertTrue(scores_wikipedia[0][0] != 0)
     wikipedia_cache.cache.delete_cache()
コード例 #7
0
 def test_combination_modalities_long(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     generated_fact0 = GeneratedFact("parent", "go", "to Paris",
                                     "TBC[many]",
                                     False,
                                     score0,
                                     MultipleSourceOccurrence.from_raw(
                                         "parents have many children", None,
                                         1))
     generated_fact1 = GeneratedFact("parent", "go to", "Paris",
                                     "",
                                     False,
                                     score1,
                                     MultipleSourceOccurrence.from_raw(
                                         "parents have children", None, 1))
     inputs = self.empty_input.add_generated_facts([generated_fact0,
                                                    generated_fact1])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertIn("TBC[many]",
                   inputs.get_generated_facts()[0].get_modality().get())
     self.assertIn("parents have many children x#x1",
                   str(inputs.get_generated_facts()[
                           0].get_sentence_source()))
     self.assertIn("parents have children x#x1",
                   str(inputs.get_generated_facts()[
                           0].get_sentence_source()))
     self.assertEqual("go to",
                      inputs.get_generated_facts()[
                           0].get_predicate())
コード例 #8
0
ファイル: test_to_lower_case.py プロジェクト: wayne9qiu/CSK
 def test_do_nothing(self):
     generated_fact = GeneratedFact("crisis", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact]).add_subjects({Subject("lion")})
     inputs = self.to_lower_case.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(1, len(generated_facts))
     self.assertEqual("crisis", generated_facts[0].get_subject().get())
コード例 #9
0
 def test_lion(self):
     generated_fact = GeneratedFact("lion", "is a", "cat", "", False, MultipleScore(), MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.simple_wikipedia_no_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_wikipedia = [x for x in scores.scores if x[2].get_name() == "Simple Wikipedia Cooccurrence"]
     self.assertEqual(1, len(scores_wikipedia))
     self.assertTrue(scores_wikipedia[0][0] != 0)
コード例 #10
0
 def test_turn_singular_duplicate(self):
     generated_fact = GeneratedFact("lions", "is a", "cat", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts(
         [generated_fact, generated_fact]).add_subjects({Subject("lion")})
     inputs = self.to_singular.process(inputs)
     generated_facts = inputs.get_generated_facts()
     self.assertEqual(2, len(generated_facts))
     self.assertEqual("lion", generated_facts[0].get_subject().get())
コード例 #11
0
 def test_combination_modalities(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     generated_fact0 = GeneratedFact("lion", "eat", "zebra", "some", False, score0, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     generated_fact1 = GeneratedFact("lion", "eat", "zebra", "often", False, score1, MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     inputs = self.empty_input.add_generated_facts([generated_fact0, generated_fact1])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertIn("some", inputs.get_generated_facts()[0].get_modality().get())
     self.assertIn("often", inputs.get_generated_facts()[0].get_modality().get())
コード例 #12
0
 def test_beach(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     mso = MultipleSourceOccurrence()
     mso.add_raw("beaches have sand", "Google Autocomplete", 4)
     mso.add_raw("some beaches have sand", "Google Autocomplete", 2)
     mso.add_raw("some beaches have sand and some rocks", "Google "
                                                          "Autocomplete", 1)
     mso.add_raw("all beaches have sand", "Google Autocomplete", 4)
     mso.add_raw("beach have sand", "Google Autocomplete", 1)
     generated_fact0 = GeneratedFact("beach", "have", "sand",
                                     "some[subj/some] x#x3 // "
                                     "some[subj/all] x#x4",
                                     False,
                                     score0,
                                     mso)
     inputs = self.empty_input.add_generated_facts([generated_fact0])
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
コード例 #13
0
 def test_not_remove(self):
     inputs = Inputs()
     mso = MultipleSourceOccurrence()
     mso.add_raw("elephants eat big bananas", None, 2)
     gfs = [
         GeneratedFact("elephant", "eat", "bananas", "TBC[big bananas]", 0,
                       MultipleScore(), mso)
     ]
     inputs = inputs.add_generated_facts(gfs)
     tbc_cleaner = TBCCleaner(None)
     inputs = tbc_cleaner.process(inputs)
     self.assertEqual(len(inputs.get_generated_facts()), 1)
コード例 #14
0
 def test_cache(self):
     google_book_cache = GoogleBookSubmodule(
         None, True, cache_name="google-book-cache-temp")
     generated_fact = GeneratedFact("lion", "eat", "zebra", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     google_book_cache.process(inputs)
     generated_fact = GeneratedFact("lion", "eat", "zebra", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = google_book_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_google_book = [
         x for x in scores.scores
         if x[2].get_name() == "Google Book Submodule"
     ]
     self.assertEqual(1, len(scores_google_book))
     self.assertTrue(scores_google_book[0][0] != 0)
     google_book_cache.cache.delete_cache()
コード例 #15
0
 def test_combination(self):
     dataset = [("elephant", "download", "baby", 0),
                ("elephant", "climb", "trunk", 0),
                ("elephant", "bear", "baby", 1),
                ("elephant", "download this cute illustration with", "baby",
                 0), ("elephant", "be", "ear", 0),
                ("elephant", "fry", "ear", 0),
                ("elephant", "trek", "travel", 0),
                ("elephant", "forbid love in", "water", 0),
                ("elephant", "eat", "bark", 1),
                ("elephant", "have", "tusks", 1)]
     gfs = []
     pos = 0
     for subject, predicate, obj, truth in dataset:
         pos += 1
         score = MultipleScore()
         if pos % 2 == 0:
             score.add_score(
                 truth, self.dummy_reference,
                 GoogleAutocompleteSubmodule(self.dummy_reference))
         else:
             score.add_score(
                 truth, self.dummy_reference,
                 BingAutocompleteSubmodule(self.dummy_reference))
         gfs.append(
             GeneratedFact(subject, predicate, obj, "", False, score,
                           MultipleSourceOccurrence()))
     score2 = MultipleScore()
     score2.add_score(1, self.dummy_reference,
                      GoogleAutocompleteSubmodule(self.dummy_reference))
     gfs.append(
         GeneratedFact(
             "elephant", "be", "big", "", False, score2,
             MultipleSourceOccurrence.from_raw("elephants are big", None,
                                               1)))
     inputs = self.empty_input.add_generated_facts(gfs)
     inputs = self.linear_combination.process(inputs)
     self.assertEqual(len(dataset) + 1, len(inputs.get_generated_facts()))
コード例 #16
0
 def test_panda_flickr_cluster(self):
     new_gfs = [
         GeneratedFact("panda", "live", "china", "", False, MultipleScore(),
                       MultipleSourceOccurrence())
     ]
     inputs = self.empty_input.add_generated_facts(new_gfs).add_subjects(
         {Subject("panda")})
     inputs = self.associations_flick_cluster.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_flickr = [
         x for x in scores.scores if x[2].get_name() == "Flickr"
     ]
     self.assertEqual(1, len(scores_flickr))
コード例 #17
0
 def test_lion_eat_code(self):
     generated_fact = GeneratedFact("lion", "eat", "code", "", False,
                                    MultipleScore(),
                                    MultipleSourceOccurrence())
     inputs = self.empty_input.add_generated_facts([generated_fact])
     inputs = self.google_book_no_cache.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_google_book = [
         x for x in scores.scores
         if x[2].get_name() == "Google Book Submodule"
     ]
     self.assertEqual(1, len(scores_google_book))
     self.assertTrue(scores_google_book[0][0] == 0)
コード例 #18
0
 def test_panda_imagetag(self):
     new_gfs = [
         GeneratedFact("panda", "climb", "tree", "", False, MultipleScore(),
                       MultipleSourceOccurrence())
     ]
     inputs = self.empty_input.add_generated_facts(new_gfs).add_subjects(
         {"panda"})
     inputs = self.associations.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_imagetag = [
         x for x in scores.scores
         if x[2].get_name() == "Image Tag submodule"
     ]
     self.assertEqual(1, len(scores_imagetag))
コード例 #19
0
 def test_lion(self):
     n_copies = 10
     gfs = []
     for _ in range(n_copies):
         generated_fact = GeneratedFact("lion", "is a", "cat", "", False,
                                        MultipleScore(),
                                        MultipleSourceOccurrence())
         gfs.append(generated_fact)
     inputs = self.empty_input.add_generated_facts(gfs)
     inputs = self.wikipedia_no_cache.process(inputs)
     self.assertEqual(n_copies, len(inputs.get_generated_facts()))
     scores = inputs.get_generated_facts()[0].get_score()
     scores_wikipedia = [
         x for x in scores.scores
         if x[2].get_name() == "Wikipedia Cooccurrence"
     ]
     self.assertEqual(1, len(scores_wikipedia))
     self.assertTrue(scores_wikipedia[0][0] != 0)
コード例 #20
0
 def add_facts_to_generated_facts(self, generated_facts, subject, predicate,
                                  obj, modality, negative,
                                  score_based_on_ranking, suggestion):
     if suggestion[SUBJECT] not in subject:
         return
     multiple_score = MultipleScore()
     multiple_score.add_score(1.0, self._module_reference,
                              reference_corenlp)
     multiple_score.add_score(score_based_on_ranking,
                              self._module_reference, self)
     new_fact_corenlp = GeneratedFact(
         subject, predicate, obj, modality, negative, multiple_score,
         MultipleSourceOccurrence.from_raw(suggestion[0], self, 1),
         suggestion[2])
     generated_facts.append(new_fact_corenlp)
コード例 #21
0
 def get_fact_from_simple_extraction(self, extraction, score, suggestion):
     negative = get_negativity(suggestion) or extraction[3]
     multiple_score = MultipleScore()
     multiple_score.add_score(score, self._module_reference, self)
     multiple_score.add_score(1.0, self._module_reference, reference_manual)
     new_fact = GeneratedFact(
         extraction[0],
         extraction[1],
         extraction[2],
         None,
         negative,
         # For the score, inverse the ranking (higher is
         # better) and add the confidence of the triple
         multiple_score,
         MultipleSourceOccurrence.from_raw(suggestion[0], self, 1),
         suggestion[2])
     return new_fact
コード例 #22
0
 def _openie_from_file(self, suggestions):
     openie_reader = OpenIEReader()
     generated_facts = []
     new_suggestions = []
     for suggestion in suggestions:
         self.transforms_suggestion_into_batch_component(
             suggestion, new_suggestions)
     for suggestion in new_suggestions:
         sentence = suggestion[STATEMENT]
         facts = openie_reader.get_from_sentence(sentence)
         negative = get_negativity(suggestion)
         facts = [
             fact for fact in facts if len(fact) > 0 and len(fact[0]) > 1
             and len(fact[1]) > 1 and len(fact[2]) > 1
         ]
         score_based_on_ranking = self.get_score_based_on_ranking(
             suggestion)
         facts = self._take_earliest_predicate(sentence, facts)
         for fact in facts:
             if suggestion[SUBJECT] not in fact[0]:
                 continue
             try:
                 score = float(fact[3].replace(",", "."))
             except:
                 logging.info(
                     "Problem in score reading in openie5 reader with " +
                     fact[3])
                 continue
             multiple_score = MultipleScore()
             multiple_score.add_score(score, self._module_reference,
                                      reference_openie5)
             multiple_score.add_score(score_based_on_ranking,
                                      self._module_reference, self)
             generated_facts.append(
                 GeneratedFact(
                     fact[0], fact[1], fact[2], "", negative,
                     multiple_score,
                     MultipleSourceOccurrence.from_raw(sentence, self,
                                                       1), suggestion[2]))
     del openie_reader
     return generated_facts
コード例 #23
0
 def test_combination(self):
     score0 = MultipleScore()
     score0.add_score(1, None, None)
     score1 = MultipleScore()
     score1.add_score(0.5, None, None)
     score2 = MultipleScore()
     score2.add_score(0.7, None, None)
     generated_fact0 = GeneratedFact("lion", "eat", "zebra", "", False, score0,
                                     MultipleSourceOccurrence.from_raw("lions eat zebras", None, 1))
     mso = MultipleSourceOccurrence()
     mso.add_raw("lions eat zebras", None, 2)
     mso.add_raw("lions eat small zebras", None, 1)
     generated_fact1 = GeneratedFact("lion", "eat", "zebra", "", False, score1,
                                     mso)
     generated_fact2 = GeneratedFact("lion", "eat", "zebra", "", False, score2,
                                     MultipleSourceOccurrence.from_raw("lions eat small zebras", None, 1))
     new_gfs = [generated_fact0, generated_fact1, generated_fact2]
     inputs = self.empty_input.add_generated_facts(new_gfs)
     fact_combinor = FactCombinor(None)
     inputs = fact_combinor.process(inputs)
     self.assertEqual(1, len(inputs.get_generated_facts()))
     self.assertEqual(3, len(inputs.get_generated_facts()[0].get_score().scores))
     sentence = str(inputs.get_generated_facts()[0].get_sentence_source())
     self.assertIn("lions eat zebras", sentence)
     self.assertIn("lions eat small zebras", sentence)
     self.assertIn("x#x3", sentence)
     self.assertIn("x#x2", sentence)