def _sample_context(knowledge_graph, individuals, context, strict_context): kg = KnowledgeGraph(rdflib.Graph()) for subject in individuals: facts = [] for predicate in context: if type(predicate) is tuple: facts_tree = _recursive_path_walk(knowledge_graph, subject, predicate, strict_context) if len(facts_tree) == 0 and strict_context: facts = [] break facts.extend(facts_tree) else: facts_list = list( knowledge_graph.graph.triples((subject, predicate, None))) if len(facts_list) == 0 and strict_context: facts = [] break facts.extend(facts_list) for fact in facts: kg.graph.add(fact) logger.info("Sample contains {} facts".format(len(kg.graph))) return kg
def query(query_string="", endpoint=""): """ Constructs a Knowledge Graph from a SPARQL endpoint and a CONSTRUCT query. Returns a Knowledge Graph """ if endpoint == "": raise ValueError("Endpoint cannot be left undefined") if query_string == "": raise ValueError("Query cannot be left undefined") logger.info("Importing RDF Graph via SPARQL query") logger.info("Endpoint set to '{}'".format(endpoint)) logger.info("Query set to '{}'".format(query_string)) graph = None try: pass #sparql = SPARQLWrapper(endpoint) #sparql.setQuery(query_string) #graph = sparql.queryAndConvert() except: raise RuntimeError("Query Failed") logger.info("Query results ({} facts) succesfully retrieved".format(len(graph))) return KnowledgeGraph(graph)
def load_dataset(self, abox, tbox): # read graphs kg_i = rdf.read(local_path=abox) kg_s = rdf.read(local_path=tbox) kg_i_sampled = KnowledgeGraph() for s, p, o in kg_i.triples(): if type(o) is rdflib.Literal: continue kg_i_sampled.graph.add((s, p, o)) return (kg_i_sampled, kg_s)
def load_dataset(self, abox, tbox): # read graphs kg_i = rdf.read(local_path=abox) kg_s = rdf.read(local_path=tbox) kg_i_sampled = KnowledgeGraph() for s, p, o in kg_i.triples(): if type(o) is rdflib.Resource: for ctype in kg_i_sampled.graph.objects(o, rdflib.type): if ctype == rdflib.URIRef("http://www.cidoc-crm.org/cidoc-crm/E55_Type") or\ ctype == rdflib.URIRef("http://www.w3.org/2004/02/skos/core#Concept"): kg_i_sampled.graph.add((s, p, o)) break continue kg_i_sampled.graph.add((s, p, o)) return (kg_i_sampled, kg_s)
def read(local_path=None, remote_path=None, format=None): """ Imports a RDF graph from local or remote file. Returns a Knowledge Graph """ if local_path is None and remote_path is None: raise ValueError("Path cannot be left undefined") logger.info("Importing RDF Graph from file") path = local_path if local_path is not None else remote_path logger.info("Path set to '{}'".format(path)) if not format: format = guess_format(path) logger.info("Format guessed to be '{}'".format(format)) graph = Graph() graph.parse(path, format=format) logger.info("RDF Graph ({} facts) succesfully imported".format(len(graph))) return KnowledgeGraph(graph)
def sample(knowledge_graph=None, patterns=[(None, None, None)], size=1, strict_size=False): """ Return spiral context up to size s of one or more instances of a non-terminal atom. :param knowledge_graph: a KnowledgeGraph instance to sample :param patterns: a list of triple patterns (None, p, o) to filter sample with :param size: the size of a context (number of facts) :param strict_size: true if size is a strong constraint :returns: the sample as a KnowledgeGraph instance """ kg = KnowledgeGraph(rdflib.Graph()) if knowledge_graph is not None: logger.info("Sampling spiral neighbourhood up to size {}".format(size)) logger.info("Pattern:\n\t" + "\n\t".join(["{}".format(pattern) for pattern in patterns])) for pattern in patterns: for subject, _, _ in knowledge_graph.graph.triples(pattern): facts = breadth_first_sampler(knowledge_graph, subject, size=size, strict_size=strict_size) type_present = False for s, p, o in facts: kg.graph.add((s, p, o)) if p == rdflib.RDF.type and s == subject: type_present = True # type is required if not type_present: for ctype in knowledge_graph.graph.objects( subject, rdflib.RDF.type): kg.graph.add((subject, rdflib.RDF.type, ctype)) logger.info("Sample contains {} facts".format(len(kg.graph))) return kg
def sample(knowledge_graph=None, patterns=[(None, None, None)], depth=1): """ Return neighbourhood context of one or more instances of a non-terminal atom. :param knowledge_graph: a KnowledgeGraph instance to sample :param patterns: a list of triple patterns (None, p, o) to filter sample with :param depth: the maximum distance from an atom to sample :returns: the sample as a KnowledgeGraph instance """ kg = KnowledgeGraph(rdflib.Graph()) if knowledge_graph is not None: logger.info("Sampling neighbourhood up to depth {}".format(depth)) logger.info("Pattern:\n\t" + "\n\t".join(["{}".format(pattern) for pattern in patterns])) for pattern in patterns: for subject, _, _ in knowledge_graph.graph.triples(pattern): facts = depth_first_sampler(knowledge_graph, subject, depth=depth) type_present = False for s, p, o in facts: kg.graph.add((s, p, o)) if p == rdflib.RDF.type and s == subject: type_present = True # type is required if not type_present: for ctype in knowledge_graph.graph.objects( subject, rdflib.RDF.type): kg.graph.add((subject, rdflib.RDF.type, ctype)) logger.info("Sample contains {} facts".format(len(kg.graph))) return kg