def _parse_rating(self, line, state): '''Parse a rating given the current line and any required state.''' info = self._RATING_RE.match(line) if not info: return state movie_string = info.group(3).strip() movie_name = term.Literal(movie_string) movie = self._uriref(self._MOVIE, movie_string) rating_string = info.group(2).strip() rating_name = term.Literal(rating_string, datatype=namespace.XSD.integer) votes_string = info.group(1).strip() votes_name = term.Literal(votes_string, datatype=namespace.XSD.float) self._graph.add((movie, self._NAME, movie_name)) self._graph.add((movie, self._RATING_OF, rating_name)) self._graph.add((movie, self._VOTES_OF, votes_name)) if info.group(6): episode_string = info.group(6).strip() episode_name = term.Literal(episode_string) self._graph.add((movie, self._EPISODE_OF, episode_name)) return state
def _parse_person(self, line, state): '''Parse a person given the current line and any required state.''' if line == '\n': state['person_name'] = None state['person'] = None return state info = self._PERSON_RE.match(line) if not info: return state if info.group(2): person_string = info.group(2).strip() state['person_name'] = term.Literal(person_string) state['person'] = self._uriref(state['person_type'], person_string) movie_string = info.group(3).strip() movie_name = term.Literal(movie_string) movie = self._uriref(self._MOVIE, movie_string) self._graph.add((movie, self._NAME, movie_name)) self._graph.add((state['person'], self._NAME, state['person_name'])) self._graph.add((state['person'], state['predicate'], movie)) if info.group(6): episode_string = info.group(6).strip() episode_name = term.Literal(episode_string) self._graph.add((movie, self._EPISODE_OF, episode_name)) return state
def test_blank_node_labels(self): """ http://www.w3.org/TR/rdf-sparql-query/#BlankNodesInResults """ g = create_graph(""" @prefix foaf: <http://xmlns.com/foaf/0.1/> . _:a foaf:name "Alice" . _:b foaf:name "Bob" . """) results = list( g.query(""" PREFIX foaf: <http://xmlns.com/foaf/0.1/> SELECT ?x ?name WHERE { ?x foaf:name ?name } """)) col1, col2 = zip(*results) col1 = sorted(col1) col2 = sorted(col2) expected_results = sorted([ term.Literal("Alice"), term.Literal("Bob"), ]) self.assertEqual(col2, expected_results) self.assertNotEqual(col1[0], col1[1]) self.assertEqual(col1[0].__class__, term.BNode)
def test_from_graph(self, building, organization, building_jsonld, model_context, store_metadata, metadata_context): store_metadata = False id = "http://test/1234" id_uri = term.URIRef(id) graph = Graph() graph.add((id_uri, RDF.type, term.URIRef("http://schema.org/Building"))) graph.add((id_uri,term.URIRef("http://schema.org/name"),term.Literal("The Empire State Building"))) graph.add((id_uri,term.URIRef("http://schema.org/description"),term.Literal("The Empire State Building is a 102-story landmark in New York City."))) graph.add((id_uri,term.URIRef("http://schema.org/image"),term.URIRef("http://www.civil.usherbrooke.ca/cours/gci215a/empire-state-building.jpg"))) bNode = term.BNode() graph.add((id_uri,term.URIRef("http://schema.org/geo"),bNode)) graph.add((bNode,term.URIRef("http://schema.org/latitude"),term.Literal("40.75"))) results = from_graph(graph) assert isinstance(results, Resource) building.id = id building.context = model_context.document["@context"] expected = building_jsonld(building, "expanded", store_metadata, None) result_jsonld = as_jsonld(results, form="expanded", store_metadata=store_metadata, model_context=model_context, metadata_context=metadata_context, context_resolver=None, na=None) assert result_jsonld == expected graph.remove((id_uri, RDF.type, term.URIRef("http://schema.org/Building"))) results = from_graph(graph) assert len(results) == 3 graph.add((id_uri, RDF.type, term.URIRef("http://schema.org/Building"))) graph.add((term.URIRef("http://www.civil.usherbrooke.ca/cours/gci215a/empire-state-building.jpg"), RDF.type, term.URIRef("http://schema.org/Image"))) results = from_graph(graph, type=["http://schema.org/Building","http://schema.org/Image"]) assert len(results) == 2 assert results[0].type is not None assert results[1].type is not None result_0 = as_jsonld(results[0], form="expanded", store_metadata=store_metadata, model_context=model_context, metadata_context=metadata_context, context_resolver=None, na=None) result_1 = as_jsonld(results[1], form="expanded", store_metadata=store_metadata, model_context=model_context, metadata_context=metadata_context, context_resolver=None, na=None) results = [result_0, result_1] assert set(["http://schema.org/Building","http://schema.org/Image"]) == {result["@type"] for result in results} frame = { "@type": ['http://schema.org/Image'], "@embed": True } results = from_graph(graph, frame=frame) assert isinstance(results, Resource) expected = {'@type': 'http://schema.org/Image', '@id': 'http://www.civil.usherbrooke.ca/cours/gci215a/empire-state-building.jpg'} result_jsonld = as_jsonld(results, form="expanded", store_metadata=store_metadata, model_context=model_context, metadata_context=metadata_context, context_resolver=None, na=None) assert result_jsonld == expected
def test_numeric_values(self): g = create_graph(self.data) results = sorted( g.query(""" PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX ns: <http://example.org/ns#> SELECT ?title ?price WHERE { ?x ns:price ?price . FILTER (?price < 30.5) ?x dc:title ?title . } """)) expected_results = [(term.Literal("The Semantic Web"), term.Literal(23))] self.assertEqual(results, expected_results)
def test_multiple_matches(self): ''' http://www.w3.org/TR/rdf-sparql-query/#MultipleMatches ''' g = create_graph(""" @prefix foaf: <http://xmlns.com/foaf/0.1/> . _:a foaf:name "Johnny Lee Outlaw" . _:a foaf:mbox <mailto:[email protected]> . _:b foaf:name "Peter Goodguy" . _:b foaf:mbox <mailto:[email protected]> . _:c foaf:mbox <mailto:[email protected]> . """) results = list( g.query(""" PREFIX foaf: <http://xmlns.com/foaf/0.1/> SELECT ?name ?mbox WHERE { ?x foaf:name ?name . ?x foaf:mbox ?mbox } """)) expected_results = [ (term.Literal(name), term.URIRef(mbox)) for name, mbox in [("Johnny Lee Outlaw", "mailto:[email protected]" ), ("Peter Goodguy", "mailto:[email protected]")] ] results.sort() expected_results.sort() self.assertEqual(results, expected_results)
def _parse_language(self, line, state): '''Parse a language given the current line and any required state.''' info = self._LANGUAGE_RE.match(line) if not info: return state movie_string = info.group(1).strip() movie_name = term.Literal(movie_string) movie = self._uriref(self._MOVIE, movie_string) language_string = info.group(4).strip() language_name = term.Literal(language_string) self._graph.add((movie, self._NAME, movie_name)) self._graph.add((movie, self._LANGUAGE_OF, language_name)) return state
def get_class_names_by_prov_type(self, classes=None, prefix=None, but=None): class_names = dict() # We at least want to have an output for Entity, Activity and Agent class_names[PROV['Entity']] = list() class_names[PROV['Activity']] = list() class_names[PROV['Agent']] = list() class_names[None] = list() if not classes: classes = self.graph.subjects(RDF['type'], OWL['Class']) deprecated = self.graph.subjects(OWL['deprecated'], term.Literal(True)) but = set(but).union(set(deprecated)) # FIXME: Is there a more efficient way? if prefix: original_classes = classes classes = list() for class_name in original_classes: if class_name.startswith(prefix): classes.append(class_name) if but: classes = list(set(classes) - set(but)) classes = sorted(classes) for class_name in classes: if not self.is_class(class_name): raise Exception('Class ' + str(class_name) + ' does not exist.') if not isinstance(class_name, term.BNode): prov_type = self.get_prov_class(class_name) if prov_type: class_names.setdefault(prov_type, list())\ .append(class_name) else: prov_type_found = False parent_classes = self.get_direct_parents(class_name) for parent_class in parent_classes: prov_type = self.get_prov_class(parent_class, recursive=3) if prov_type: class_names.setdefault(prov_type, list())\ .append(class_name) prov_type_found = True if not prov_type_found: warnings.warn('No PROV type for class: ' + self.graph.qname(class_name)) class_names.setdefault(None, list()).append(class_name) return class_names
def _parse_distributor(self, line, state): '''Parse a distributor given the current line and any required state.''' info = self._DISTRIBUTOR_RE.match(line) if not info: return state movie_string = info.group(1).strip() movie_name = term.Literal(movie_string) movie = self._uriref(self._MOVIE, movie_string) distributor_string = info.group(2).strip() distributor_name = term.Literal(distributor_string) distributor = self._uriref(self._DISTRIBUTOR, distributor_name) self._graph.add((movie, self._NAME, movie_name)) self._graph.add((distributor, self._NAME, distributor_name)) self._graph.add((movie, self._DISTRIBUTOR_OF, distributor_name)) return state
def _parse_country(self, line, state): '''Parse a country given the current line and any required state.''' info = self._COUNTRY_RE.match(line) if not info: return state title_string = info.group(1).strip() title_name = term.Literal(title_string) title = self._uriref(self._MOVIE, title_string) country_string = info.group(3).strip() country_name = term.Literal(country_string) country = self._uriref(self._COUNTRY, country_name) self._graph.add((title, self._NAME, title_name)) self._graph.add((country, self._NAME, country_name)) self._graph.add((title, self._COUNTRY_OF, country)) return state
def _parse_genre(self, line, state): '''Parse a genre given the current line and any required state.''' info = self._GENRE_RE.match(line) if not info: return state movie_string = info.group(1).strip() movie_name = term.Literal(movie_string) movie = self._uriref(self._MOVIE, movie_string) genre_string = info.group(4).strip() genre_name = term.Literal(genre_string) genre = self._uriref(self._GENRE, genre_name) self._graph.add((movie, self._NAME, movie_name)) self._graph.add((genre, self._NAME, genre_name)) self._graph.add((genre, self._GENRE_OF, movie)) return state
def _parse_movie(self, line, state): '''Parse a movie given the current line and any required state.''' info = self._MOVIE_RE.match(line) if not info: return state title_string = info.group(1).strip() title_name = term.Literal(title_string) title = self._uriref(self._MOVIE, title_string) year_string = info.group(2).strip() year = term.Literal(year_string) self._graph.add((title, self._NAME, title_name)) self._graph.add((title, self._YEAR_OF, year)) if info.group(4): episode_string = info.group(4).strip() episode_name = term.Literal(episode_string) self._graph.add((title, self._EPISODE_OF, episode_name)) return state
def test_string_values(self): g = create_graph(self.data) results = sorted( g.query(""" PREFIX dc: <http://purl.org/dc/elements/1.1/> SELECT ?title WHERE { ?x dc:title ?title FILTER regex(?title, "^SPARQL") } """)) expected_results = [(term.Literal("SPARQL Tutorial"), )] self.assertEqual(results, expected_results)
def test_case_insentitive(self): g = create_graph(self.data) results = sorted( g.query(""" PREFIX dc: <http://purl.org/dc/elements/1.1/> SELECT ?title WHERE { ?x dc:title ?title FILTER regex(?title, "web", "i" ) } """)) expected_results = [(term.Literal("The Semantic Web"), )] self.assertEqual(results, expected_results)
def _replace(self, t, text, n): """ Replace the substring given in text with $VAR### :param t: Subject or Object of triple to have the replacement done. :param text: The text to replace :param n: Integer value to put in the ### part of $VAR### :returns The given subject or object with the replacement done. """ if isinstance(t, term.URIRef): return term.URIRef(t.replace(text, "$VAR%03d" % n)) elif isinstance(t, term.Literal): return term.Literal(t.replace(text, "$VAR%03d" % n), datatype=t.datatype) return t
def _fill_in(self, item): """ Replace $VAR### in the given item with values from self._row[###] :param item: Subject or Object of a triple. Can be either term.URIRef or term.Literal. :returns The Subject or Object with row data in place of and $VAR###'s found. """ if "$VAR" not in item: return self._uri(item) if isinstance(item, term.URIRef): s = item.split("$VAR") uri = "" rand = True for w in s: if uri: v = int(w[:3]) uri += self._row[v] uri += w[3:] rand = False else: uri += w u = self._uri(uri, rand) return u elif isinstance(item, term.Literal): val = '' s = item.split('$VAR') val = s[0] for w in s[1:]: v = int(w[:3]) val += self._row[v] val += w[3:] val = self._update_namespace(val) return term.Literal(val, datatype=item.datatype) return item
def all_of_rdf_type(self, rdf_type, prefix=None, but=set(), but_type=OWL['AnnotationProperty']): classes = self.graph.subjects(RDF['type'], rdf_type) deprecated = self.graph.subjects( OWL['deprecated'], term.Literal(True)) but = set(but).union(set(deprecated)) annotations = self.graph.subjects(RDF['type'], but_type) but = but.union(set(annotations)) # FIXME: Is there a more efficient way? if prefix: original_classes = classes classes = list() for class_name in original_classes: if class_name.startswith(prefix): classes.append(class_name) if but: classes = list(set(classes) - set(but)) classes = sorted(classes) return classes
def check_rule_with_data(rule_path, data_path): r = load_yamldown(rule_path) schema = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../metadata/rules.schema.yaml") validate(r, schema) if not rule.sparql_from(r): raise Exception("No SPARQL impl for rule {}".format(rule_path)) g = rdflib.graph.ConjunctiveGraph() test_data_graph = g.parse(data_path, format="ttl", publicID="http://geneontology.org/rules/test") test_data_graph.add( (term.URIRef("http://model.geneontology.org/rules/test"), term.URIRef("http://model.geneontology.org/graphType"), term.URIRef("http://model.geneontology.org/gafCam"))) test_data_graph.add( (term.URIRef("http://model.geneontology.org/rules/test"), term.URIRef("http://www.w3.org/2000/01/rdf-schema#"), term.Literal("test_graph.ttl"))) results = g.query(rule.sparql_from(r)) return results
def graph(self): g = Graph() entry = term.BNode() g.add((entry, RDF.type, self._type_uri())) g.add((entry, ns_slog.timestamp, term.Literal(self.date))) g.add((entry, ns_slog.subject, ns_person[self.subject])) if self.entry_type == 'UR': g.add((entry, ns_slog.url, term.Literal(self.data2))) elif self.entry_type == 'BA': g.add((entry, ns_slog.command, ns_command[self._escape(self.data1)])) g.add((entry, ns_slog.command_line, term.Literal(self.data2))) elif self.entry_type == 'PO': pass elif self.entry_type[0] == 'T' or self.entry_type[0] == 'I': g.add((entry, ns_slog.command_line, term.Literal(self.data2))) else: g.add((entry, ns_slog.num_lines, term.Literal(int(self.data1)))) g.add((entry, ns_slog.command_line, term.Literal(self.data2))) return g
''' Created on 14 Jun 2012 @author: AYODELE-M.AKINGBULU ''' from rdflib import Graph, term, namespace graph = Graph(store='Sleepycat', identifier='test') graph.open("somefolder", create=True) graph.add((term.URIRef('http://www.google.com/'), namespace.RDFS.label, term.Literal('Google home page'))) graph.add((term.URIRef('http://wikipedia.org/'), namespace.RDFS.label, term.Literal('Wikipedia home page'))) graph.close() graph = Graph(store='Sleepycat', identifier='test') graph.open("somefolder") len(graph) print "things in a_graph" for s, p, o in graph: print s, p, o
def Literal(value: Any, datatype: Optional[Identifier] = None) -> Identifier: return term.Literal(value, datatype=datatype) # type: ignore