def get_people(name_or_nick): model = get_model() sources = "" FOAF = Namespace("http://xmlns.com/foaf/0.1/") sources = model.subjects(FOAF['name'], Literal(name_or_nick)) if not sources: sources = model.subjects(FOAF['nick'], Literal(name_or_nick)) people = dict() for source in sources: people[source] = {} props = get_props() for prop in props.keys(): people[source][prop] = list() available = model.objects(source, URIRef(props[prop][1])) for i in available: people[source][prop].append(i) if len(people) < 1: sources = model.subjects(FOAF['nick'], Literal(name_or_nick)) for source in sources: people[source] = {} props = get_props() for prop in props.keys(): people[source][prop] = list() available = model.objects(source, URIRef(props[prop][1])) for i in available: people[source][prop].append(i) return people
def rss2rdf(rsscontent): print "1" store.clear() query_guids = getAllQuery_guid(rsscontent) d = feedparser.parse(rsscontent) _idx = 0 # for accessing query_guid for item in d['entries']: print "*" title = _str(item.title) print "**" t = datetime.strptime( item.updated, "%a, %d %b %Y %H:%M:%S GMT") # get a datetime object unixtime = int(mktime(t.timetuple()) + 1e-6 * t.microsecond) category = item.tags[ 0].term # this is the value of category in google's rss feed, specify if the time is a query or a result description = item.summary_detail.value print "***" id = str(item.id) link = item.link sourceURI = URIRef("http://foolme.csail.mit.edu/lod/google-search#" + id) # add common terms store.add_triple(sourceURI, ns.RDF['about'], ns.HISTORY['history']) store.add_triple(sourceURI, ns.HISTORY['link'], Literal(link)) store.add_triple(sourceURI, ns.HISTORY['date'], Literal(unixtime)) print "****" # add term specific to query if (category == 'web query'): store.add_triple(sourceURI, ns.RDF['about'], ns.HISTORY['query']) store.add_triple(sourceURI, ns.HISTORY['queryTerm'], Literal(title)) store.add_triple(sourceURI, ns.HISTORY['resultClicked'], Literal(description[0])) print "*****" if (category == 'web result'): store.add_triple(sourceURI, ns.RDF['about'], ns.HISTORY['result']) store.add_triple(sourceURI, ns.HISTORY['resultTitle'], Literal(title)) smh_guid = query_guids[_idx] store.add_triple(sourceURI, ns.HISTORY['from'], ns.GHISTORY[smh_guid]) _idx += 1 print "*****--" print "2" triples = store.writer.graph.serialize(format="n3") print triples return triples
def _pythonToLiteral(self, obj, obj_types): """ obj - a python literal datatype obj_types - iterator yielding rdflib.URIRef.URIRef instances returns rdflib.Literal.Literal instance """ for obj_type in obj_types: try: return Literal(SchemaToPython[obj_type][1](obj)) except KeyError: pass return Literal(SchemaToPythonDefault[1](obj))
def createHistoryTriples(): store.clear() for history in histories: id = str(history.id) store.add_triple(ns.BHISTORY_DATA[id], ns.RDF['about'], ns.BHISTORY['history']) store.add_triple(ns.BHISTORY_DATA[id], ns.BHISTORY['from'], ns.BHISTORY_DATA[str(history.from_visit)]) #if the history.form_visit is 0, then it means it's not from any other link, not a follow through link store.add_triple(ns.BHISTORY_DATA[id], ns.BHISTORY['place_id'], ns.BPLACE_DATA[str(history.place_id)]) store.add_triple(ns.BHISTORY_DATA[id], ns.BHISTORY['visit_date'], Literal(history.visit_date)) store.add_triple(ns.BHISTORY_DATA[id], getVisitRelation(history.visit_type), ns.BPLACE_DATA[str(history.place_id)]) triples = store.writer.graph.serialize(format="n3") print "done with serialization" triples = "@prefix xsd: <http://www.w3.org/2001/XMLSchema#>." + triples triples = triples.replace("<http://www.w3.org/2001/XMLSchema#long>", "xsd:long") triples = triples.replace("<http://www.w3.org/2001/XMLSchema#integer", "xsd:integer") writeTriples(triples, "browser_history_data.n3")
def f(bindings): rt = sFunc(bindings) if isinstance(rt, Literal) and rt.datatype == target: #Literal already has target datatype return rt else: return Literal(rt, datatype=target)
def property_element_char(self, data): current = self.current if current.object is None: try: current.object = Literal(data, current.language, current.datatype) except Error, e: self.error(e.msg)
def add_warning(self, txt): """Add a warning. A comment triplet is added to the separate "warning" graph. @param txt: the warning text. It is preceded by the string "==== pyRdfa Warning ==== " """ if self.options.warning_graph != None: comment = Literal("=== pyRdfa warning === " + txt) self.options.warning_graph.add( (self.warning_URI_ref, rdfs_comment, comment))
def node_element_start(self, name, qname, attrs): name, atts = self.convert(name, qname, attrs) current = self.current absolutize = self.absolutize next = self.next next.start = self.property_element_start next.end = self.property_element_end if name in NODE_ELEMENT_EXCEPTIONS: self.error("Invalid node element URI: %s" % name) if ID in atts: if ABOUT in atts or NODE_ID in atts: self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") id = atts[ID] if not is_ncname(id): self.error("rdf:ID value is not a valid NCName: %s" % id) subject = absolutize("#%s" % id) if subject in self.ids: self.error("two elements cannot use the same ID: '%s'" % subject) self.ids[subject] = 1 # IDs can only appear once within a document elif NODE_ID in atts: if ID in atts or ABOUT in atts: self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") nodeID = atts[NODE_ID] if not is_ncname(nodeID): self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) if nodeID in self.bnode: subject = self.bnode[nodeID] else: subject = BNode() self.bnode[nodeID] = subject elif ABOUT in atts: if ID in atts or NODE_ID in atts: self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") subject = absolutize(atts[ABOUT]) else: subject = BNode() if name!=DESCRIPTION: # S1 self.store.add((subject, TYPE, absolutize(name))) if TYPE in atts: # S2 self.store.add((subject, TYPE, absolutize(atts[TYPE]))) language = current.language for att in atts: if not att.startswith(RDFNS): predicate = absolutize(att) try: object = Literal(atts[att], language) except Error, e: self.error(e.msg) elif att==TYPE: #S2 predicate = TYPE object = absolutize(atts[TYPE])
def object2node(object, objectType): if objectType == OBJECT_TYPE_RESOURCE: return URI2node(object) else: kwargs = {} if objectType.find(':') > -1: kwargs['datatype'] = objectType elif len(objectType) > 1: #must be a language id kwargs['lang'] = objectType return Literal(object, **kwargs)
def createTerm(termString, termType, store, objLanguage=None, objDatatype=None): if termType == 'L': cache = store.literalCache.get((termString, objLanguage, objDatatype)) if cache is not None: #store.cacheHits += 1 return cache else: #store.cacheMisses += 1 rt = Literal(termString, objLanguage, objDatatype) store.literalCache[((termString, objLanguage, objDatatype))] = rt return rt elif termType == 'F': cache = store.otherCache.get((termType, termString)) if cache is not None: #store.cacheHits += 1 return cache else: #store.cacheMisses += 1 rt = QuotedGraph(store, URIRef(termString)) store.otherCache[(termType, termString)] = rt return rt elif termType == 'B': cache = store.bnodeCache.get((termString)) if cache is not None: #store.cacheHits += 1 return cache else: #store.cacheMisses += 1 rt = TERM_INSTANCIATION_DICT[termType](termString) store.bnodeCache[(termString)] = rt return rt elif termType == 'U': cache = store.uriCache.get((termString)) if cache is not None: #store.cacheHits += 1 return cache else: #store.cacheMisses += 1 rt = URIRef(termString) store.uriCache[(termString)] = rt return rt else: cache = store.otherCache.get((termType, termString)) if cache is not None: #store.cacheHits += 1 return cache else: #store.cacheMisses += 1 rt = TERM_INSTANCIATION_DICT[termType](termString) store.otherCache[(termType, termString)] = rt return rt
def statement2rdflib(statement): if statement.objectType == OBJECT_TYPE_RESOURCE: object = RDFLibModel.URI2node(statement.object) else: kwargs = {} if statement.objectType.find(':') > -1: kwargs['datatype'] = statement.objectType elif len(statement.objectType) > 1: #must be a language id kwargs['lang'] = statement.objectType object = Literal(statement.object, **kwargs) return (RDFLibModel.URI2node(statement.subject), RDFLibModel.URI2node(statement.predicate), object)
def createPlaceTriples(): store.clear() #store.add_triple(ns.HISTORY.s1,ns.RDF.about,ns.BROW_HISTORY) #print store.writer.graph.serialize(format="n3") # place = places[0] for place in places: id = str(place.id) store.add_triple(ns.BPLACE_DATA[id], ns.RDF['about'], ns.BPLACE['place']) store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['url'], Literal(place.url)) store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['title'], Literal(place.title)) store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['visitCount'], Literal(place.visit_count)) store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['hidden'], Literal(place.hidden)) store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['typed'], Literal(place.typed)) store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['lastVisitDate'], Literal(place.last_visit_date)) triples = store.writer.graph.serialize(format="n3") print "done with serialization" triples = "@prefix xsd: <http://www.w3.org/2001/XMLSchema#> ." + triples triples = triples.replace("<http://www.w3.org/2001/XMLSchema#long>", "xsd:long") triples = triples.replace("<http://www.w3.org/2001/XMLSchema#integer", "xsd:integer") #I don't like the long namespace, and don't know why rdflib only substitue pred. with shorter ns naming in the triple writeTriples(triples, "browser_place_data.n3")
def term(str, default=None): """See also from_n3""" if not str: return default elif str.startswith("<") and str.endswith(">"): return URIRef(str[1:-1]) elif str.startswith('"') and str.endswith('"'): return Literal(str[1:-1]) elif str.startswith("_"): return BNode(str) else: msg = "Unknown Term Syntax: '%s'" % str raise Exception(msg)
def property_element_char(self, data): current = self.current if current.object is None: try: current.object = Literal(data, current.language, current.datatype) except Error as e: self.error(e.msg) else: if isinstance(current.object, Literal): try: current.object += data except Error as e: self.error(e.msg)
def add_person_to_model(person, model, type, url): if person.has_key('sha'): p = BNode(value="_:p%s" % person['sha'][0]) else: p = BNode() props = get_props() model.add((URIRef(url), type, p)) for prop in props.keys(): if person.has_key(prop): for i in person[prop]: if props[prop][0] == "uri": node = URIRef(i) if props[prop][0] == "literal": node = Literal(i) model.add((p, URIRef(props[prop][1]), node)) model.add((p, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("http://xmlns.com/foaf/0.1/Person")))
def search(self, terms, predicate=None): """ Returns a set of all the statements the term occurs in. """ if predicate and not isinstance(predicate, URIRef): _logger.warning("predicate is not a URIRef") predicate = URIRef(predicate) results = set() terms = [Literal(term) for term in stopper(splitter(terms))] for term in terms: for t in self.triples((None, self.term, term)): for o in self.objects(t[0], self.termin): for p in self.triples((predicate, t[0], o)): if self.linked_data is None: results.add((o, p[0], None)) else: results.add( (o, p[0], self.linked_data.value(o, p[0]))) return results
def _createResource(v): """Create an RDFLib Literal instance with the corresponding XML Schema datatype set. If the variable is already an RDFLib resource, it simply returns the resource; otherwise the corresponding Literal. A SPARQLError Exception is raised if the type is not implemented. The Literal contains the string representation of the variable (as Python does it by default) with the corresponding XML Schema URI set. @param v: Python variable @return: either an RDFLib Literal (if 'v' is not an RDFLib Resource), or the same variable if it is already an RDFLib resource (ie, Literal, BNode, or URIRef) @raise SPARQLError: if the type of 'v' is not implemented """ if isinstance(v, Literal) or isinstance(v, BNode) or isinstance(v, URIRef): # just do nothing return v else: return Literal(v) # Literal now does the datatype bits
def _fromkey(key): if key.startswith("<") and key.endswith(">"): key = key[1:-1].decode("UTF-8") if key.startswith("_"): key = ''.join(splituri(key)) return BNode(key) return URIRef(key) elif key.startswith("_"): return BNode(key) else: m = _literal.match(key) if m: d = m.groupdict() value = d["value"] value = unquote(value) value = value.decode("UTF-8") lang = d["lang"] or '' datatype = d["datatype"] return Literal(value, lang, datatype) else: msg = "Unknown Key Syntax: '%s'" % key raise Exception(msg)
def from_n3(s, default=None, backend=None): """ Creates the Identifier corresponding to the given n3 string. WARNING: untested, may contain bugs. TODO: add test cases.""" if not s: return default if s.startswith('<'): return URIRef(s[1:-1]) elif s.startswith('"'): # TODO: would a regex be faster? value, rest = rsplit(s, '"', 1) value = value[1:] # strip leading quote if rest.startswith("@"): if "^^" in rest: language, rest = rsplit(rest, '^^', 1) language = language[1:] # strip leading at sign else: language = rest[1:] # strip leading at sign rest = '' else: language = None if rest.startswith("^^"): datatype = rest[3:-1] else: datatype = None value = value.replace('\\"', '"').replace('\\\\', '\\').decode("unicode-escape") return Literal(value, language, datatype) elif s.startswith('{'): identifier = from_n3(s[1:-1]) return QuotedGraph(backend, identifier) elif s.startswith('['): identifier = from_n3(s[1:-1]) return Graph(backend, identifier) else: if s.startswith("_:"): return BNode(s[2:]) else: return BNode(s)
import base64, sets import rdflib # http://rdflib.net/ from rdflib.Identifier import Identifier as ID from rdflib.URIRef import URIRef as URI from rdflib.BNode import BNode from rdflib.Literal import Literal from rdflib import RDF, RDFS RDF_SEQi = "http://www.w3.org/1999/02/22-rdf-syntax-ns#_%s" MAX_CARD = URI("http://www.w3.org/2002/07/owl#maxCardinality") CARD = URI("http://www.w3.org/2002/07/owl#cardinality") RESTRICTION = URI("http://www.w3.org/2002/07/owl#Restriction") FUNC_PROP = URI("http://www.w3.org/2002/07/owl#FunctionalProperty") ON_PROP = URI("http://www.w3.org/2002/07/owl#onProperty") ONE = Literal("1") class ThingFactory: """ Fed a store, return a factory that can be used to instantiate Things into that world. """ def __init__(self, store, schema_store=None, alias_map=None): """ store - rdflib.Graph.Graph instance schema_store - rdflib.Graph.Graph instance; defaults to store """ self.store = store self.schema_store = schema_store or self.store self.alias_map = alias_map or {}
class TextIndex(ConjunctiveGraph): """ An rdflib graph event handler than indexes text literals that are added to a another graph. This class lets you 'search' the text literals in an RDF graph. Typically in RDF to search for a substring in an RDF graph you would have to 'brute force' search every literal string looking for your substring. Instead, this index stores the words in literals into another graph whose structure makes searching for terms much less expensive. It does this by chopping up the literals into words, removing very common words (currently only in English) and then adding each of those words into an RDF graph that describes the statements in the original graph that the word came from. First, let's create a graph that will transmit events and a text index that will receive those events, and then subscribe the text index to the event graph: >>> e = ConjunctiveGraph() >>> t = TextIndex() >>> t.subscribe_to(e) When triples are added to the event graph (e) events will be fired that trigger event handlers in subscribers. In this case our only subscriber is a text index and its action is to index triples that contain literal RDF objects. Here are 3 such triples: >>> e.add((URIRef('a'), URIRef('title'), Literal('one two three'))) >>> e.add((URIRef('b'), URIRef('title'), Literal('two three four'))) >>> e.add((URIRef('c'), URIRef('title'), Literal('three four five'))) Of the three literal objects that were added, they all contain five unique terms. These terms can be queried directly from the text index: >>> t.term_strings() == set(['four', 'five', 'three', 'two', 'one']) True Now we can search for statement that contain certain terms. Let's search for 'one' which occurs in only one of the literals provided, 'a'. This can be queried for: >>> t.search('one') set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None)]) 'one' and 'five' only occur in one statement each, 'two' and 'four' occur in two, and 'three' occurs in three statements: >>> len(list(t.search('one'))) 1 >>> len(list(t.search('two'))) 2 >>> len(list(t.search('three'))) 3 >>> len(list(t.search('four'))) 2 >>> len(list(t.search('five'))) 1 Lets add some more statements with different predicates. >>> e.add((URIRef('a'), URIRef('creator'), Literal('michel'))) >>> e.add((URIRef('b'), URIRef('creator'), Literal('Atilla the one Hun'))) >>> e.add((URIRef('c'), URIRef('creator'), Literal('michel'))) >>> e.add((URIRef('d'), URIRef('creator'), Literal('Hun Mung two'))) Now 'one' occurs in two statements: >>> assert len(list(t.search('one'))) == 2 And 'two' occurs in three statements, here they are: >>> t.search('two') set([(rdflib.URIRef('d'), rdflib.URIRef('creator'), None), (rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)]) The predicates that are searched can be restricted by provding an argument to 'search()': >>> t.search('two', URIRef('creator')) set([(rdflib.URIRef('d'), rdflib.URIRef('creator'), None)]) >>> t.search('two', URIRef(u'title')) set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)]) You can search for more than one term by simply including it in the query: >>> t.search('two three', URIRef(u'title')) set([(rdflib.URIRef('c'), rdflib.URIRef('title'), None), (rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)]) The above query returns all the statements that contain 'two' OR 'three'. For the documents that contain 'two' AND 'three', do an intersection of two queries: >>> t.search('two', URIRef(u'title')).intersection(t.search(u'three', URIRef(u'title'))) set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)]) Intersection two queries like this is probably not the most efficient way to do it, but for reasonable data sets this isn't a problem. Larger data sets will want to query the graph with sparql or something else more efficient. In all the above queries, the object of each statement was always 'None'. This is because the index graph does not store the object data, that would make it very large, and besides the data is available in the original data graph. For convenience, a method is provides to 'link' an index graph to a data graph. This allows the index to also provide object data in query results. >>> t.link_to(e) >>> set([str(i[2]) for i in t.search('two', URIRef(u'title')).intersection(t.search(u'three', URIRef(u'title')))]) == set(['two three four', 'one two three']) True You can remove the link by assigning None: >>> t.link_to(None) Unindexing means to remove statments from the index graph that corespond to a statement in the data graph. Note that while it is possible to remove the index information of the occurances of terms in statements, it is not possible to remove the terms themselves, terms are 'absolute' and are never removed from the index graph. This is not a problem since languages have finite terms: >>> e.remove((URIRef('a'), URIRef('creator'), Literal('michel'))) >>> e.remove((URIRef('b'), URIRef('creator'), Literal('Atilla the one Hun'))) >>> e.remove((URIRef('c'), URIRef('creator'), Literal('michel'))) >>> e.remove((URIRef('d'), URIRef('creator'), Literal('Hun Mung two'))) Now 'one' only occurs in one statement: >>> assert len(list(t.search('one'))) == 1 And 'two' only occurs in two statements, here they are: >>> t.search('two') set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)]) The predicates that are searched can be restricted by provding an argument to 'search()': >>> t.search('two', URIRef(u'creator')) set([]) >>> t.search('two', URIRef(u'title')) set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)]) """ linked_data = None text_index = Namespace('http://rdflib.net/text_index#') term = Namespace('http://rdflib.net/text_index#')["term"] termin = Namespace('http://rdflib.net/text_index#')["termin"] def __init__(self, store='default'): super(TextIndex, self).__init__(store) def add_handler(self, event): if type(event.triple[2]) is Literal: self.index(event.triple) def remove_handler(self, event): if type(event.triple[2]) is Literal: self.unindex(event.triple) def index(self, (s, p, o)): # this code is tricky so it's annotated. unindex is the reverse of this method. if type( o ) is Literal: # first, only index statements that have a literal object for word in stopper( splitter(o)): # split the literal and remove any stopwords word = Literal( word) # create a new literal for each word in the object # if that word already exists in the statement # loop over each context the term occurs in if self.value(predicate=self.term, object=word, any=True): for t in set(self.triples((None, self.term, word))): t = t[0] # if the graph does not contain an occurance of the term in the statement's subject # then add it if not (t, self.termin, s) in self: self.add((t, self.termin, s)) # ditto for the predicate if not (p, t, s) in self: self.add((p, t, s)) else: # if the term does not exist in the graph, add it, and the references to the statement. # t gets used as a predicate, create identifier accordingly (AKA can't be a BNode) h = md5(word.encode('utf-8')) h.update(s.encode('utf-8')) h.update(p.encode('utf-8')) t = self.text_index["term_%s" % h.hexdigest()] self.add((t, self.term, word)) self.add((t, self.termin, s)) self.add((p, t, s))
def property_element_start(self, name, qname, attrs): name, atts = self.convert(name, qname, attrs) current = self.current absolutize = self.absolutize next = self.next object = None current.list = None if not name.startswith(RDFNS): current.predicate = absolutize(name) elif name==LI: current.predicate = current.next_li() elif name in PROPERTY_ELEMENT_EXCEPTIONS: self.error("Invalid property element URI: %s" % name) else: current.predicate = absolutize(name) id = atts.get(ID, None) if id is not None: if not is_ncname(id): self.error("rdf:ID value is not a value NCName: %s" % id) current.id = absolutize("#%s" % id) else: current.id = None resource = atts.get(RESOURCE, None) nodeID = atts.get(NODE_ID, None) parse_type = atts.get(PARSE_TYPE, None) if resource is not None and nodeID is not None: self.error("Property element cannot have both rdf:nodeID and rdf:resource") if resource is not None: object = absolutize(resource) next.start = self.node_element_start next.end = self.node_element_end elif nodeID is not None: if not is_ncname(nodeID): self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) if nodeID in self.bnode: object = self.bnode[nodeID] else: subject = BNode() self.bnode[nodeID] = subject object = subject next.start = self.node_element_start next.end = self.node_element_end else: if parse_type is not None: for att in atts: if att!=PARSE_TYPE and att!=ID: self.error("Property attr '%s' now allowed here" % att) if parse_type=="Resource": current.subject = object = BNode() current.char = self.property_element_char next.start = self.property_element_start next.end = self.property_element_end elif parse_type=="Collection": current.char = None next.start = self.node_element_start next.end = self.list_node_element_end else: #if parse_type=="Literal": # All other values are treated as Literal # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt #object = Literal("", current.language, XMLLiteral) object = Literal("", "", XMLLiteral) current.char = self.literal_element_char current.declared = {} next.start = self.literal_element_start next.char = self.literal_element_char next.end = self.literal_element_end current.object = object return else: object = None current.char = self.property_element_char next.start = self.node_element_start next.end = self.node_element_end datatype = current.datatype = atts.get(DATATYPE, None) language = current.language if datatype is not None: # TODO: check that there are no atts other than datatype and id pass else: for att in atts: if not att.startswith(RDFNS): predicate = absolutize(att) elif att in PROPERTY_ELEMENT_ATTRIBUTES: continue elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: self.error("""Invalid property attribute URI: %s""" % att) else: predicate = absolutize(att) if att==TYPE: o = URIRef(atts[att]) else: o = Literal(atts[att], language, datatype) if object is None: object = BNode() self.store.add((object, predicate, o)) if object is None: object = Literal("", language, datatype) current.object = object
def len_graph(request): """ This Works...""" #store = Graph() #store.bind("contact", "http://www.example.com/contact#") #store.bind("person", "http://www.example.com/person#") #store.bind("xs", "http://www.w3.org/2001/XMLSchema#") #store.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") #store.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") #store.bind("owl", "http://www.w3.org/2002/07/owl#") # Declare namespaces to use. ns_sn = Namespace("http://www.snee.com/ns/misc#") ns_sd = Namespace("http://www.snee.com/docs/") ns_dc = Namespace("http://purl.org/dc/elements/1.1/") ns_pr = Namespace("http://prismstandard.org/1.0#") myfile = '/var/rdf/municipality.rdf' # Create storage object for triples. store = Graph() # Add triples to store. graph.add( (ns_sd["d1001"], ns_dc["title"], Literal("Sample Acrobat document"))) graph.add((ns_sd["d1001"], ns_dc["format"], Literal("PDF"))) graph.add((ns_sd["d1001"], ns_dc["creator"], Literal("Billy Shears"))) graph.add( (ns_sd["d1001"], ns_pr["publicationTime"], Literal("2002-12-19"))) graph.add((ns_sd["d1002"], ns_dc["title"], Literal("Sample RTF document"))) graph.add((ns_sd["d1002"], ns_dc["format"], Literal("RTF"))) graph.add((ns_sd["d1002"], ns_dc["creator"], Literal("Nanker Phelge"))) graph.add( (ns_sd["d1002"], ns_pr["publicationTime"], Literal("2002-12-15"))) graph.add( (ns_sd["d1003"], ns_dc["title"], Literal("Sample LaTeX document"))) graph.add((ns_sd["d1003"], ns_dc["format"], Literal("LaTeX"))) graph.add((ns_sd["d1003"], ns_dc["creator"], Literal("Richard Mutt"))) graph.add( (ns_sd["d1003"], ns_pr["publicationTime"], Literal("2002-12-16"))) graph.add((ns_sd["d1003"], ns_sn["quality"], Literal("pretty good"))) #store.parse (myfile) rdf_subjects = graph.subjects() rdf_predicates = graph.predicates() rdf_objects = graph.objects() select_predicate_by_subject = graph.predicates(subject=ns_sd["d1001"]) select_object_by_predicate = graph.objects(predicate=ns_dc["title"]) g = Graph() g.parse(myfile, format="xml") exec "html = 'the lenght of the graph is: %s'" % len(g) context = {'html': html, 'g': select_predicate_by_subject} return render_to_response('len_graph.html', context)
try: object = Literal(atts[att], language) except Error, e: self.error(e.msg) elif att==TYPE: #S2 predicate = TYPE object = absolutize(atts[TYPE]) elif att in NODE_ELEMENT_ATTRIBUTES: continue elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3 self.error("Invalid property attribute URI: %s" % att) continue # for when error does not throw an exception else: predicate = absolutize(att) try: object = Literal(atts[att], language) except Error, e: self.error(e.msg) self.store.add((subject, predicate, object)) current.subject = subject def node_element_end(self, name, qname): self.parent.object = self.current.subject def property_element_start(self, name, qname, attrs): name, atts = self.convert(name, qname, attrs) current = self.current absolutize = self.absolutize next = self.next
xmlns:dc ="http://purl.org/dc/elements/1.1/" xmlns:foaf ="http://xmlns.com/foaf/0.1/" xmlns:ns ="http://example.org/ns#" xmlns:dt ="http://example.org/datatype#" > <rdf:Description> <ns:p rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">42</ns:p> <ns:p rdf:datatype="http://example.org/datatype#specialDatatype">abc</ns:p> <ns:p>2005-02-27</ns:p> <ns:p xml:lang="en">cat</ns:p> </rdf:Description> </rdf:RDF> """ from testSPARQL import ns_rdf from testSPARQL import ns_rdfs from testSPARQL import ns_dc from testSPARQL import ns_foaf from testSPARQL import ns_ns from rdflib.Literal import Literal import datetime from rdflib.sparql.graphPattern import GraphPattern select = ["?v"] pattern = GraphPattern([("?v", "?p", Literal("cat", lang="en"))]) optional = [] tripleStore = None expected = ''' ?v : (some Bnode id) '''
def prepare_row(row): ''' Convert a single row from the results of the big SPARQL solution query to a map from query variables to lexical values. :Parameters: - `row`: The return value of `fetchone()` on an MySQLdb cursor object after executing the SPARQL solving SQL query. Returns a dictionary from SPARQL variable names to one set of correct values for the original list of SPARQL triple patterns. ''' # First, turn the list into a map from column names to values. row_map = dict( zip([description[0] for description in cursor.description], row)) # As the values are all integers, we must execute another SQL # query to map the integers to their lexical values. This query # is straightforward to build, so we can do it here instead of in # using helper objects. prefix = self._internedId columns = [] from_fragments = [] where_fragments = [] substitutions = [] for varname, column_name, is_object, cluster in variable_columns: component_name = "component_" + str(len(from_fragments)) columns.append(component_name + ".lexical as " + column_name) where_fragments.append(component_name + '.id = %s') substitutions.append(row_map[column_name]) term = row_map[column_name + '_term'] if 'L' == term: from_fragments.append('%s_literals as %s' % (prefix, component_name)) datatype = row_map[column_name + '_datatype'] if datatype: from_fragments.append('%s_identifiers as %s_datatype' % (prefix, component_name)) columns.append('%s_datatype.lexical as %s_datatype' % (component_name, column_name)) where_fragments.append(component_name + '_datatype.id = %s') substitutions.append(datatype) else: from_fragments.append('%s_identifiers as %s' % (prefix, component_name)) query = ('select\n%s\nfrom\n%s\nwhere\n%s\n' % (', '.join(columns), ',\n'.join(from_fragments), ' and '.join(where_fragments))) if self.debug: print >> sys.stderr, query, substitutions preparation_cursor.execute(query, substitutions) prepared_map = dict( zip([ description[0] for description in preparation_cursor.description ], preparation_cursor.fetchone())) # Unwrap the elements of `variable_columns`, which provide the # original SPARQL variable names and the corresponding SQL column # names and management information. Then map these SPARQL # variable names to the correct RDFLib node objects, using the # lexical information obtained using the query above. new_row = {} for varname, column_name, is_object, cluster in variable_columns: aVariable = Variable(varname) lexical = prepared_map[column_name] term = row_map[column_name + '_term'] if 'L' == term: datatype = prepared_map.get(column_name + '_datatype', None) if datatype: datatype = URIRef(datatype) language = row_map[column_name + '_language'] node = Literal(lexical, datatype=datatype, lang=language) elif 'B' == term: node = BNode(lexical) elif 'U' == term: node = URIRef(lexical) else: raise ValueError('Unknown term type ' + term) new_row[aVariable] = node return new_row
self.add((p, t, s)) else: # if the term does not exist in the graph, add it, and the references to the statement. # t gets used as a predicate, create identifier accordingly (AKA can't be a BNode) h = md5(word.encode('utf-8')) h.update(s.encode('utf-8')) h.update(p.encode('utf-8')) t = self.text_index["term_%s" % h.hexdigest()] self.add((t, self.term, word)) self.add((t, self.termin, s)) self.add((p, t, s)) def unindex(self, (s, p, o)): if type(o) is Literal: for word in stopper(splitter(o)): word = Literal(word) if self.value(predicate=self.term, object=word, any=True): for t in self.triples((None, self.term, word)): t = t[0] if (t, self.termin, s) in self: self.remove((t, self.termin, s)) if (p, t, s) in self: self.remove((p, t, s)) def terms(self): """ Returns a generator that yields all of the term literals in the graph. """ return set(self.objects(None, self.term)) def term_strings(self): """ Return a list of term strings. """ return set([str(i) for i in self.terms()])
def runTests(store, resultStore): resultStore.add((system, RDFS["label"], Literal("Surnia"))) resultStore.add(( system, RDFS["comment"], Literal( """Surnia is an OWL Full reasoner using Python (including librdf) for language translation, OTTER for inference, and custom axioms. """))) for testType in (testTypes): print print "Trying each", testType, "..." print tests = [] for s in store.subjects(TYPE, OTEST[testType]): tests.append(s) for s in store.subjects(TYPE, RTEST[testType]): tests.append(s) tests.sort() for s in tests: if failed > maxFailed: print print "maxFailed reached; testing aborted." return name = str(s) if name.startswith("http://www.w3.org/2002/03owlt/"): name = name[len("http://www.w3.org/2002/03owlt/"):] if name.startswith("http://www.w3.org/2000/10/rdf-tests/rdfcore/"): name = "rdfcore-" + name[ len("http://www.w3.org/2000/10/rdf-tests/rdfcore/"):] creator = only(store.objects(s, DC["creator"])) status = only(store.objects(s, RTEST["status"])) if str(status) == "OBSOLETED": continue if str(status) != "APPROVED": continue #print "%-40s %-20s %s" % (name, creator, status) print s, if str(s) in skip: print "skipping because '%s'" % skip[str(s)] continue if testType == "PositiveEntailmentTest": pdoc = only(store.objects(s, RTEST["premiseDocument"])) cdoc = only(store.objects(s, RTEST["conclusionDocument"])) if (cdoc, RDF["type"], RTEST["False-Document"]) in store: # concluding a False-Document is the same as just # being inconsistent cdoc = None run(store, s, name, pdoc, cdoc, "Inconsistent", resultStore) elif testType == "NegativeEntailmentTest": pdoc = only(store.objects(s, RTEST["premiseDocument"])) cdoc = only(store.objects(s, RTEST["conclusionDocument"])) if (cdoc, RDF["type"], RTEST["False-Document"]) in store: # concluding a False-Document is the same as just # being inconsistent cdoc = None run(store, s, name, pdoc, cdoc, "Consistent", resultStore) elif testType == "InconsistencyTest": idoc = only(store.objects(s, RTEST["inputDocument"])) run(store, s, name, idoc, None, "Inconsistent", resultStore) elif testType == "ConsistencyTest": idoc = only(store.objects(s, RTEST["inputDocument"])) run(store, s, name, idoc, None, "Consistent", resultStore) else: print "skipped, unsupported test type"
def generate_literal(node, graph, subject, state): """Generate the literal the C{@property}, taking into account datatype, etc. Note: this method is called only if the C{@property} is indeed present, no need to check. The C{@content} property is also treated on the caller side. This method is an encoding of the algorithm documented U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}. @param node: DOM element node @param graph: the (RDF) graph to add the properies to @param subject: the RDFLib URIRef serving as a subject for the generated triples @param state: the current state to be used for the CURIE-s @type state: L{State.ExecutionContext} @return: whether a triple has been added to the graph or not @rtype: Boolean """ def _get_literal(Pnode): """ Get (recursively) the full text from a DOM Node. @param Pnode: DOM Node @return: string """ rc = "" for node in Pnode.childNodes: if node.nodeType == node.TEXT_NODE: rc = rc + node.data elif node.nodeType == node.ELEMENT_NODE: rc = rc + _get_literal(node) # The decision of the group in February 2008 is not to normalize the result by default. # This is reflected in the default value of the option if state.options.space_preserve: return rc else: return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() # end getLiteral def _get_XML_literal(Pnode): """ Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done via a C{node.toxml} call of the xml minidom implementation.) @param Pnode: DOM Node @return: string """ def collectPrefixes(prefixes, node): def addPf(prefx, string): pf = string.split(':')[0] if pf != string and pf not in prefx: prefx.append(pf) # edn addPf # first the local name of the node addPf(prefixes, node.tagName) # get all the attributes and children for child in node.childNodes: if child.nodeType == node.ELEMENT_NODE: collectPrefixes(prefixes, child) elif child.nodeType == node.ATTRIBUTE_NODE: addPf(prefixes, node.child.name) # end collectPrefixes rc = "" prefixes = [] for node in Pnode.childNodes: if node.nodeType == node.ELEMENT_NODE: collectPrefixes(prefixes, node) for node in Pnode.childNodes: if node.nodeType == node.TEXT_NODE: rc = rc + node.data elif node.nodeType == node.ELEMENT_NODE: # Decorate the element with namespaces and lang values for prefix in prefixes: if prefix in state.ns and not node.hasAttribute( "xmlns:%s" % prefix): node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix]) # Set the default namespace, if not done (and is available) if not node.getAttribute("xmlns") and state.defaultNS != None: node.setAttribute("xmlns", state.defaultNS) # Get the lang, if necessary if not node.getAttribute("xml:lang") and state.lang != None: node.setAttribute("xml:lang", state.lang) rc = rc + node.toxml() return rc # If XML Literals must be canonicalized for space, then this is the return line: #return re.sub(r'(\r| |\n|\t)+'," ",rc).strip() # end getXMLLiteral retval = False # Get the Property URI-s props = state.get_resources(node.getAttribute("property"), prop=True) # Get, if exists, the value of @datatype datatype = '' dtset = False if node.hasAttribute("datatype"): dtset = True dt = node.getAttribute("datatype") if dt != "": datatype = state.get_resource(dt) if state.lang != None: lang = state.lang else: lang = '' # The simple case: separate @content attribute if node.hasAttribute("content"): val = node.getAttribute("content") object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang) # The value of datatype has been set, and the keyword paramaters take care of the rest else: # see if there *is* a datatype (even if it is empty!) if dtset: # yep. The Literal content is the pure text part of the current element: # We have to check whether the specified datatype is, in fact, and # explicit XML Literal if datatype == XMLLiteral: object = Literal(_get_XML_literal(node), datatype=XMLLiteral) else: object = Literal(_get_literal(node), datatype=datatype, lang=lang) else: # no controlling @datatype. We have to see if there is markup in the contained # element if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]: # yep, and XML Literal should be generated object = Literal(_get_XML_literal(node), datatype=XMLLiteral) else: object = Literal(_get_literal(node), lang=lang) # The object may be empty, for example in an ill-defined <meta> element... if object != "": for prop in props: retval = True graph.add((subject, prop, object)) return True
print "Image Annotation Tool" print "Enter the URI of a photo to annotate:" uri = raw_input("> ") mem_model.add((URIRef(uri), URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("http://xmlns.com/foaf/0.1/Image"))) print "Should I add a thumbnail for this image, by adding '.sized' before the extension?" thumbnail = raw_input("> ") if thumbnail: thumb = "%ssized.%s" % (uri[:-3], uri[-3:]) mem_model.add((URIRef(uri), FOAF['thumbnail'], URIRef(thumb))) print "Enter a title for the photo:" title = raw_input("> ") mem_model.add( (URIRef(uri), URIRef("http://purl.org/dc/elements/1.1/title"), Literal(title))) print "Enter a description for the photo:" description = raw_input("> ") mem_model.add( (URIRef(uri), URIRef("http://purl.org/dc/elements/1.1/description"), Literal(description))) while 1: print "Photo Creator?" person = raw_input("> ") people = get_people(person) count = 0 people_array = {} if not person: break if len(people) > 1: for i in people.keys(): count += 1