def test_run_no_answer_sets(self): facts = [ literal.Literal("person", ["patrick"]), literal.Literal("person", ["patrick"], positive=False) ] result = self.solver.run(self.ontology, facts) self.assertEqual([], result)
def test_run_multiple_answer_sets(self): facts = [literal.Literal("person", ["patrick"])] target_1 = answer_set.AnswerSet(facts, [literal.Literal("hero", ["patrick"])]) target_2 = answer_set.AnswerSet(facts, [literal.Literal("hero", ["patrick"], positive=False)]) result = self.solver.run(self.ontology, facts) self.assertEqual(2, len(result)) self.assertTrue( (target_1 == result[0] and target_2 == result[1]) or (target_1 == result[1] and target_2 == result[0]) )
def test_eq(self): # CHECK: equality checks work as expected self.assertTrue( answer_set.AnswerSet([literal.Literal("fact-1"), literal.Literal("fact-2")], []) == answer_set.AnswerSet([literal.Literal("fact-2"), literal.Literal("fact-1")], []) ) self.assertFalse( answer_set.AnswerSet([literal.Literal("fact-1")], [literal.Literal("fact-2")]) == answer_set.AnswerSet([literal.Literal("fact-2")], [literal.Literal("fact-1")]) )
def run( self, path: str, facts: typing.Iterable[literal.Literal] ) -> typing.List[answer_set.AnswerSet]: # sanitize args path = str(path) if not os.path.isfile(path): raise ValueError( "The provided <path> does not refer to an existing file: '{}'!" .format(path)) insanity.sanitize_type("facts", facts, collections.Iterable) facts = set(facts) insanity.sanitize_iterable("facts", facts, elements_type=literal.Literal) self._sanitize_literals(facts) # prepare facts as single string to provide to DLV str_facts = ". ".join(str(f) for f in facts) if str_facts: str_facts += "." # run DLV cmd = "echo \"{}\" | {} -silent -- {}".format(str_facts, self._dlv_path, path) result = str( subprocess.check_output(cmd, shell=True, universal_newlines=True)).strip() # check if any answer set has been provided at all if result == "": return [] # split result into parts representing single answer sets result = [r.strip()[1:-1] for r in result.split("\n")] # create answer sets result_sets = [] for r in result: # iterate over all answer sets (i.e., string representations of them) # collect inferences inferences = set() if r != "": for x in r.split(", "): m = re.match(self.LITERAL_PATTERN, x) lit = literal.Literal(m.group("predicate"), m.group("terms").split(","), positive=m.group("sign") == "") if lit not in facts: inferences.add(lit) # create answer set result_sets.append(answer_set.AnswerSet(facts, inferences)) return result_sets
def test_eq(self): # CHECK: equality checks work as expected self.assertTrue( literal.Literal("person", ["patrick"]) == literal.Literal( "person", ["patrick"])) self.assertTrue( literal.Literal("person", positive=False) == literal.Literal( "person", positive=False)) self.assertFalse( literal.Literal("person", ["patrick"]) == literal.Literal( "person")) self.assertFalse( literal.Literal("person", positive=False) == literal.Literal( "person", positive=True))
def test_run(self): # CHECK: providing a non-existing ontology path causes a ValueError with self.assertRaises(ValueError): self.solver.run("/not/a/valid/path", []) with self.assertRaises(ValueError): self.solver.run(None, []) # CHECK: providing a non-iterable as facts causes a TypeError with self.assertRaises(TypeError): self.solver.run(self.ontology, "no facts") with self.assertRaises(TypeError): self.solver.run(self.ontology, None) with self.assertRaises(TypeError): self.solver.run(self.ontology, 123) # CHECK: if facts contains instances of a type other than Literal, then a TypeError is raised with self.assertRaises(TypeError): self.solver.run(self.ontology, [literal.Literal("person", ["patrick"]), 0]) with self.assertRaises(TypeError): self.solver.run(self.ontology, [None, literal.Literal("person", ["patrick"])]) # CHECK: correct invocations of run do not causes any issues self.solver.run(self.ontology, []) self.solver.run(self.ontology, (literal.Literal("person", ["patrick"]), literal.Literal("hero", ["patrick"])))
def test_iter(self): # CHECK: iterating over an answer sets covers all facts and inferences ans = answer_set.AnswerSet( [literal.Literal("fact-1"), literal.Literal("fact-2")], [literal.Literal("fact-3")] ) self.assertEqual( {literal.Literal("fact-1"), literal.Literal("fact-2"), literal.Literal("fact-3")}, set(ans) )
def test_init(self): # CHECK: the predicate symbol must not be the empty string with self.assertRaises(ValueError): literal.Literal("") # CHECK: none of the terms can be the empty string with self.assertRaises(ValueError): literal.Literal("pred", [""]) with self.assertRaises(ValueError): literal.Literal("pred", ["abc", "def", "", "jkl"]) # CHECK: legal args do not cause any issues literal.Literal("person") literal.Literal("person", [], positive=False) literal.Literal("person", ["patrick"]) literal.Literal(1) # -> translated to "1" literal.Literal(None) # -> translated to "None" # CHECK: attributes are created correctly lit = literal.Literal("pred", ["a", "b", "c"], positive=False) self.assertEqual("pred", lit.predicate) self.assertEqual(("a", "b", "c"), lit.terms) self.assertFalse(lit.positive)
def _generate_sample( self, spec_countries: typing.List[str], inf_countries: typing.List[str]=None, minimal: bool=False ) -> kg.KnowledgeGraph: """Generates a single training samples based on the provided data. The arg ``spec_countries`` contains the names of all countries that are supposed to be specified, i.e., fully known, in the dataset to create, and ``inf_countries`` indicates the names of those whose regions are to be inferred. If ``inf_countries`` is not provided, however, then the prediction targets are chosen randomly from ``spec_countries``. Args: spec_countries (list[str]): All countries whose regions are specified. inf_countries (list[str], optional): Those countries, whose regions are to be inferred. minimal (bool, optional): Specifies whether to generate a minimal sample, i.e., one that contains inferences and predictions for target countries only. This is ``False``, by default. Returns: kg.KnowledgeGraph: The created training sample. """ # randomly shuffle countries countries = spec_countries[:] if inf_countries: countries += inf_countries random.shuffle(countries) # (randomly) choose prediction targets if not provided if inf_countries: inf_countries = set(inf_countries) else: inf_countries = set(countries[-self.NUM_EVAL_COUNTRIES:]) # determine all countries that are neighbors of a prediction target (but not targets by themselves) inf_neighbors = set() for c in inf_countries: inf_neighbors |= set(self._data[c].neighbors) inf_neighbors -= set(inf_countries) # create new knowledge graph and add vocabulary sample = kg.KnowledgeGraph() sample.classes.add_all(self._classes.values()) sample.relations.add_all(self._relations.values()) # a dict that maps names to individual objects individuals = {} # create variables for storing facts class_facts = set() # all (positive) class memberships (negative ones are inferred from these) neighbor_facts = set() # all facts about (positive) neighbor-of relations (negative ones are inferred) location_facts = set() # the part of the (positive) located-in facts to infer the remaining ones from all_locations = set() # all (positive) located-in relations (negatives ones are inferred from these) # create individuals for all regions/subregions for region in itertools.chain(*((r, *s) for r, s in self._regions.items())): individuals[region] = ind_fac.IndividualFactory.create_individual(region) sample.individuals.add(individuals[region]) # create literals that describe the existing regions and subregions as well as the relations among them for r, subregions in self._regions.items(): class_facts.add(literal.Literal(voc.CLASS_REGION, [r])) for s in subregions: class_facts.add(literal.Literal(voc.CLASS_SUBREGION, [s])) loc_lit = literal.Literal(voc.RELATION_LOCATED_IN, [s, r]) location_facts.add(loc_lit) all_locations.add(loc_lit) # create individuals for all countries for c in countries: individuals[c] = ind_fac.IndividualFactory.create_individual(c) sample.individuals.add(individuals[c]) # create literals for (countries') located-in and neighbor-of relationships for cou_name in countries: # fetch the current country's region and subregion r = self._data[cou_name].region s = self._data[cou_name].subregion # create literals that describe the country as well as the relation to its region/subregion cou_lit = literal.Literal(voc.CLASS_COUNTRY, [cou_name]) reg_lit = literal.Literal(voc.RELATION_LOCATED_IN, [cou_name, r]) sub_lit = None if s is None else literal.Literal(voc.RELATION_LOCATED_IN, [cou_name, s]) class_facts.add(cou_lit) all_locations.add(reg_lit) if sub_lit is not None: all_locations.add(sub_lit) # determine whether the located-in predicates should be added to the list of provided facts if self._problem_setting == self.PROBLEM_S1: if sub_lit is not None: # subregion is provided for all countries location_facts.add(sub_lit) if cou_name not in inf_countries: # region is not provided for target countries location_facts.add(reg_lit) elif self._problem_setting == self.PROBLEM_S2: if cou_name not in inf_countries: # neither region nor subregion are provided for target countries location_facts.add(reg_lit) if sub_lit is not None: location_facts.add(sub_lit) else: if cou_name not in inf_countries and cou_name not in inf_neighbors: # region is neither provided for location_facts.add(reg_lit) # for targets nor their neighbors if cou_name not in inf_countries and sub_lit is not None: # subregion is not provided for location_facts.add(sub_lit) # target countries # iterate over all neighbors of the current country, and add according neighbor-of literals for n in self._data[cou_name].neighbors: if n in countries: # -> important, because not all of the countries in self._data might be used neighbor_facts.add(literal.Literal(self.NEIGHBOR_OF_PREDICATE, [cou_name, n])) neighbor_facts.add(literal.Literal(self.NEIGHBOR_OF_PREDICATE, [n, cou_name])) # compute all inferences that are possible based on the restricted data input_facts = list(itertools.chain(neighbor_facts, location_facts)) if self._class_facts: input_facts += class_facts answer_set = self._solver.run(self._ontology_path, input_facts)[0] # add all facts to the sample for f in list(sorted(answer_set.facts, key=lambda x: str(x))): self._add_literal_to_kg(sample, individuals, f) # add all inferences ot the sample for i in list(sorted(answer_set.inferences, key=lambda x: str(x))): if ( not minimal or i.predicate == "region" or i.predicate == "subregion" or (len(i.terms) == 1 and i.terms[0] in inf_countries) or (len(i.terms) == 2 and (i.terms[0] in inf_countries or i.terms[1] in inf_countries)) ): self._add_literal_to_kg(sample, individuals, i, inferred=True) # compute perfect knowledge perfect_knowledge = set( self._solver.run( self._ontology_path, itertools.chain(class_facts, neighbor_facts, all_locations) )[0] ) # determine all information that was neither provided nor inferred missing_knowledge = list(sorted(perfect_knowledge - set(answer_set), key=lambda x: str(x))) # add missing knowledge as prediction targets to the sample for p in missing_knowledge: if ( not minimal or p.predicate == "region" or p.predicate == "subregion" or (len(p.terms) == 1 and p.terms[0] in inf_countries) or (len(p.terms) == 2 and (p.terms[0] in inf_countries or p.terms[1] in inf_countries)) ): self._add_literal_to_kg(sample, individuals, p, prediction=True) # provide the created sample return sample
def test_sanitize_literals(self): # CHECK: illegal predicate of term symbols cause a ValueError with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("Person", ["patrick"])]) with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("1person", ["patrick"])]) with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("per-son", ["patrick"])]) with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("_person", ["patrick"])]) with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("person", ["Patrick"])]) with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("person", ["1patrick"])]) with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("person", ["pat-rick"])]) with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("person", ["_patrick"])]) with self.assertRaises(ValueError): self.solver._sanitize_literals([literal.Literal("person", ["patrick", "Patrick"])]) # CHECK: legal literals do not cause any issues self.solver._sanitize_literals([literal.Literal("person", ["patrick"])]) self.solver._sanitize_literals([literal.Literal("person1", ["patrick1"])]) self.solver._sanitize_literals([literal.Literal("per_son", ["pat_rick"])]) self.solver._sanitize_literals([literal.Literal("personPerson", ["patrickPatrick"])])
def test_run_single_answer_sets(self): facts = [literal.Literal("hero", ["patrick"])] target = answer_set.AnswerSet(facts, [literal.Literal("person", ["patrick"])]) result = self.solver.run(self.ontology, facts) self.assertEqual(1, len(result)) self.assertEqual(target, result[0])
def test_init(self): # CHECK: facts has to be an iterable of literals -> otherwise a TypeError is raised with self.assertRaises(TypeError): answer_set.AnswerSet("facts", []) answer_set.AnswerSet(literal.Literal("fact"), []) with self.assertRaises(TypeError): answer_set.AnswerSet(["fact"], []) with self.assertRaises(TypeError): answer_set.AnswerSet((literal.Literal("test"), "fact"), []) # CHECK: inferences has to be an iterable of literals -> otherwise a TypeError is raised with self.assertRaises(TypeError): answer_set.AnswerSet([], "facts") answer_set.AnswerSet([], literal.Literal("fact")) with self.assertRaises(TypeError): answer_set.AnswerSet([], ["fact"]) with self.assertRaises(TypeError): answer_set.AnswerSet([], (literal.Literal("test"), "fact")) # CHECK: providing legal values causes no issues answer_set.AnswerSet([], []) answer_set.AnswerSet([literal.Literal("fact-1")], []) answer_set.AnswerSet({literal.Literal("fact-1"), literal.Literal("fact-2")}, []) answer_set.AnswerSet([], [literal.Literal("fact-1")]) answer_set.AnswerSet([], (literal.Literal("fact-1"), literal.Literal("fact-2"))) answer_set.AnswerSet([literal.Literal("fact-1")], [literal.Literal("fact-2")]) # CHECK: attributes are defined correctly ans = answer_set.AnswerSet( [literal.Literal("fact-1"), literal.Literal("fact-2")], (literal.Literal("fact-3"), literal.Literal("fact-4"), literal.Literal("fact-5")) ) self.assertEqual( {literal.Literal("fact-1"), literal.Literal("fact-2")}, ans.facts ) self.assertEqual( {literal.Literal("fact-3"), literal.Literal("fact-4"), literal.Literal("fact-5")}, ans.inferences )