def build(self, person): FOAF = Namespace("http://xmlns.com/foaf/0.1/") MYTERMS = Namespace("http://peoples.org/terms/") graph = ConjunctiveGraph() user = BNode('user') graph.add((user, MYTERMS['vkontakteID'], Literal(person.id))) graph.add((user, RDF.type, FOAF['Person'])) graph.add((user, FOAF['firstName'], Literal(person.name))) graph.add((user, FOAF['surname'], Literal(person.sname))) attr = [item for item in dir(person) if not callable(item)] if 'icq' in attr: graph.add((user, FOAF['icqChatID'], Literal(person.icq))) if 'nickname' in attr: graph.add((user, FOAF['nick'], Literal(person.nickname))) if 'town' in attr: graph.add((user, MYTERMS['town'], Literal(person.town))) if 'gender' in attr: graph.add((user, FOAF['gender'], Literal(person.gender))) if 'status' in attr: graph.add((user, MYTERMS['marital'], Literal(person.status))) if 'religion' in attr: graph.add((user, MYTERMS['religion'], Literal(person.religion))) if 'politic' in attr: graph.add((user, MYTERMS['politic'], Literal(person.politic))) if 'bday' or 'bmonth' or 'byear' in attr: birthday = BNode('birthday') graph.add((user, FOAF['birthday'], birthday)) if 'bday' in attr: graph.add((birthday, MYTERMS['bday'], Literal(person.bday))) if 'bmonth' in attr: graph.add((birthday, MYTERMS['bmonth'], Literal(person.bmonth))) if 'byear' in attr: graph.add((birthday, MYTERMS['byear'], Literal(person.byear))) if 'university' in attr: university = BNode('university') graph.add((user, MYTERMS['university'], university)) graph.add((university, MYTERMS['university_name'], Literal( \ person.university))) if 'faculty' in attr: graph.add((university, MYTERMS['faculty_name'], Literal( \ person.faculty))) if 'chair' in attr: graph.add((university, MYTERMS['chair_name'], Literal( \ person.chair))) if 'edu_form' in attr: graph.add((university, MYTERMS['edu_form'], Literal( \ person.edu_form))) friends = BNode('friends') graph.add((user, MYTERMS['friends'], friends)) graph.add((friends, RDF.type, RDF.Bag)) for id in person.friends.keys(): friend = URIRef('http://vkontakte.ru/id%s' % id) graph.add((friends, RDF.li, friend)) # save graph to rdf-file fname = "%s.rdf" % person.id graph.serialize(os.path.join(self.store_path, fname), \ format='xml', encoding='UTF-8')
def createFeatureGraphs(rows): albums = {} graph = Graph(identifier = URIRef(graph_uri)) counter = 1 databases[catalogueID] = [] for row in rows: # Create all the relevant nodes (with the correct IDs) database = getNewNode('database') databases[catalogueID].append(database) feature = getNewNode('feature') segmentation = getNewNode('segmentation') window = getNewNode('window') if row['feature'] == "cqt": graph.add((feature, RDF.type, audiodb['CQTFeature'])) elif row['feature'] == "chr": graph.add((feature, RDF.type, audiodb['ChromagramFeature'])) elif row['feature'] == "mfcc": graph.add((feature, RDF.type, audiodb['MFCCFeature'])) if row['segtype'] == "frames": graph.add((segmentation, RDF.type, audiodb['FrameSegmentation'])) elif row['segtype'] == "beats": graph.add((segmentation, RDF.type, audiodb['BeatSegmentation'])) elif row['segtype'] == "segs": graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation'])) if row['windowtype'] == "hamming": graph.add((window, RDF.type, audiodb['HammingWindow'])) graph.add((feature, audiodb["window"], window)) graph.add((feature, audiodb["segmentation"], segmentation)) graph.add((feature, audiodb["dimension"], Literal(row['dim']))) graph.add((feature, audiodb["hop-size"], Literal(row['hopsize']))) graph.add((feature, audiodb["window-length"], Literal(row['winlen']))) graph.add((feature, audiodb["nfft"], Literal(row['nfft']))) graph.add((feature, audiodb["segn"], Literal(row['segn']))) graph.add((feature, audiodb["channel"], Literal(row['channel']))) graph.add((feature, audiodb["loedge"], Literal(row['loedge']))) graph.add((feature, audiodb["hiedge"], Literal(row['hiedge']))) graph.add((feature, audiodb["octaveres"], Literal(row['octaveres']))) version = buildNewExtractor(graph, row['software'], row['version'], row['platform']) project = buildNewProject(graph, row['software']) graph.add((project, doap['release'], version)) graph.add((database, RDF.type, audiodb["Database"])) graph.add((database, audiodb["feature"], feature)) graph.add((database, audiodb["extractor"], version)) counter += 1 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf")
def to_rdf(self, format="settings"): """Convert the RdfSerializer store into RDF.""" graph = Graph() for k, v in self.NAMESPACES.iteritems(): graph.bind(k, v) for g in self.subgraphs: graph += g if format == "settings": format = settings.RDF_SERIALIZATION return graph.serialize(format=format)
def del_vocab_from_creator(userid, vocab): if not os.path.isfile(os.path.join(ag.creatorsdir, '%s.rdf'%userid)): return False graph = Graph() graph.parse(os.path.join(ag.creatorsdir, '%s.rdf'%userid)) vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix) for s, p, o in graph.triples((URIRef(vocab_uri), namespaces['dcterms']['mediator'], None)): graph.remove((s, p, o)) rdf_str = None rdf_str = graph.serialize() f = codecs.open(creatorfile, 'w', 'utf-8') f.write(rdf_str) f.close() return True
def del_vocab_from_creator(userid, vocab): if not os.path.isfile(os.path.join(ag.creatorsdir, '%s.rdf' % userid)): return False graph = Graph() graph.parse(os.path.join(ag.creatorsdir, '%s.rdf' % userid)) vocab_uri = URIRef("http://vocab.ox.ac.uk/%s" % vocabprefix) for s, p, o in graph.triples( (URIRef(vocab_uri), namespaces['dcterms']['mediator'], None)): graph.remove((s, p, o)) rdf_str = None rdf_str = graph.serialize() f = codecs.open(creatorfile, 'w', 'utf-8') f.write(rdf_str) f.close() return True
def getRdfXml(rdf): n3 = "" # Append the RDF namespace and print the prefix namespace mappings rdf['namespaces']['xh1'] = "http://www.w3.org/1999/xhtml/vocab#" rdf['namespaces']['rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" for prefix, uri in rdf['namespaces'].items(): n3 += "@prefix %s: <%s> .\n" % (prefix, uri) # Print each subject-based triple to the screen triples = rdf['triples'] processed = [] # Get all of the non-bnode subjects nonBnodeSubjects = getNonBnodeSubjects(triples) # Get all of the bnode subjects bnodeSubjects = getBnodeSubjects(triples) for subject in nonBnodeSubjects: subjectTriples = getTriplesBySubject(subject, triples) #print "PROCESSING NB SUBJECT:", subjectTriples if(subject not in processed): n3 += tripleToN3(subjectTriples, processed, triples) processed.append(subject) for subject in bnodeSubjects: subjectTriples = getTriplesBySubject(subject, triples) #print "PROCESSING BN SUBJECT:", subject if(subject not in processed): n3 += bnodeToN3(subjectTriples, processed, triples) n3 += " .\n" #print n3 g = ConjunctiveGraph() g.parse(StringIO(n3), format="n3") rdfxml = g.serialize() return rdfxml
# Connecion to Sesame con=connection('http://freerisk.org:8280/openrdf-sesame/') con.use_repository('joblistings') con.addnamespace('company',str(COMPANY)) cg=ConjunctiveGraph() cg.bind('dc',DC) cg.bind('jobboard',JB) # Find companies with ticker symbols res=con.query('select ?id ?ticker where {?id company:symbol ?ticker .}') # Loop over the results for row in res: company=URIRef(row['id']['value']) ticker=row['ticker']['value'] url='http://ichart.finance.yahoo.com/table.csv?s=%s&a=00&b=28&c=2008&d=00&e=28&f=2009&g=m&ignore=.csv' % ticker rows=[row for row in reader(urllib.urlopen(url))] current=float(rows[1][6]) yearago=float(rows[-1][6]) # Calculate percent change change=((current-yearago)/current)*100 cg.add((company,JB['stockpricechange'],Literal(change))) print cg.serialize(format='xml')
def resolveURI(self, uri): return _urljoin(self.baseuri or '', uri) def _popStacks(self, event, node): # check abouts if len(self.abouts) <> 0: about, aboutnode = self.abouts[-1] if aboutnode == node: self.abouts.pop() # keep track of nodes going out of scope self.elementStack.pop() # track xml:base and xml:lang going out of scope if self.xmlbases: self.xmlbases.pop() if self.xmlbases and self.xmlbases[-1]: self.baseuri = self.xmlbases[-1] if self.langs: self.langs.pop() if self.langs and self.langs[-1]: self.lang = self.langs[-1] if __name__ == "__main__": store = ConjunctiveGraph() store.load(sys.argv[1], format="rdfa") print store.serialize(format="pretty-xml")
def resolveURI(self, uri): return _urljoin(self.baseuri or "", uri) def _popStacks(self, event, node): # check abouts if len(self.abouts) <> 0: about, aboutnode = self.abouts[-1] if aboutnode == node: self.abouts.pop() # keep track of nodes going out of scope self.elementStack.pop() # track xml:base and xml:lang going out of scope if self.xmlbases: self.xmlbases.pop() if self.xmlbases and self.xmlbases[-1]: self.baseuri = self.xmlbases[-1] if self.langs: self.langs.pop() if self.langs and self.langs[-1]: self.lang = self.langs[-1] if __name__ == "__main__": store = ConjunctiveGraph() store.load(sys.argv[1], format="rdfa") print store.serialize(format="pretty-xml")
from rdflib.Graph import ConjunctiveGraph from rdflib import Namespace, BNode, Literal, RDF, URIRef import csv import pysesame JB = Namespace("http://semprog.com/schemas/jobboard#") GEO = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#') lg=ConjunctiveGraph() lg.bind('geo',GEO) for city,lat,long in csv.reader(file('city_locations.csv','U')): lg.add((JB[city],GEO['lat'],Literal(float(lat)))) lg.add((JB[city],GEO['long'],Literal(float(long)))) data=lg.serialize(format='xml') print data c=pysesame.connection('http://semprog.com:8280/openrdf-sesame/') c.use_repository('joblistings') print c.postdata(data)
class TriplesDatabase(object): """A database from the defined triples""" def __init__(self): self._open = False def open(self, filename, graphClass=None): """ Load existing database at 'filename'. """ if filename is None: if graphClass is None: self.graph = Graph() else: self.graph = graphClass() else: assert os.path.exists(filename), ( "%s must be an existing database" % (filename,)) path, filename = os.path.split(filename) self.graph = sqliteBackedGraph(path, filename) self._open = True def query(self, rest, initNs=None, initBindings=None): """ Execute a SPARQL query and get the results as a SPARQLResult {rest} is a string that should begin with "SELECT ", usually """ assert self._open if initNs is None: initNs = dict(self.graph.namespaces()) if initBindings is None: initBindings = {} sel = select(self.getBase(), rest) ret = self.graph.query(sel, initNs=initNs, initBindings=initBindings, DEBUG=False) return ret def getBase(self): d = dict(self.graph.namespaces()) return d.get('', RDFSNS) def addTriple(self, s, v, *objects): """ Make a statement/arc/triple in the database. Strings, ints and floats as s or o will automatically be coerced to RDFLiteral(). It is an error to give a RDFLiteral as v, so no coercion will be done in that position. 2-tuples will be coerced to bnodes. If more than one object is given, i.e. addTriple(a, b, c1, c2, c3) this is equivalent to: addTriple(a,b,c1); addTriple(a,b,c2); addTriple(a,b,c3) """ assert self._open assert len(objects) >= 1, "You must provide at least one object" if canBeLiteral(s): s = RDFLiteral(s) bnode = None for o in objects: if canBeLiteral(o): o = RDFLiteral(o) elif isinstance(o, tuple) and len(o) == 2: if bnode is None: bnode = BNode() self.addTriple(bnode, *o) o = bnode assert None not in [s,v,o] self.graph.add((s, v, o)) def dump(self): assert self._open io = StringIO() try: self.graph.serialize(destination=io, format='n3') except Exception, e: import sys, pdb; pdb.post_mortem(sys.exc_info()[2]) return io.getvalue()
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties): #(id, base, prefix) = get_vocab_base(vocabfile) html_vocab_properties = {} html_vocab_properties['format'] = 'text/html' html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0] html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name']) html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name']) newrdf_vocab_properties = {} newrdf_vocab_properties['format'] = 'application/rdf+xml' newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0] newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name']) newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name']) graph = Graph() graph.parse(rdf_vocab_properties['path']) subject = None for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])): subject = s #graph2 = Graph() graph_ns = [] for nsprefix, nsurl in graph.namespaces(): graph_ns.append(str(nsurl)) for prefix, url in namespaces.iteritems(): if not str(url) in graph_ns: graph.bind(prefix, URIRef(url)) #properties = get_vocab_properties(prefix) #subject = None #for s in graph.subjects(namespaces['dc']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dc']['creator'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['creator'], None): # subject = s formatNode1 = BNode() formatNode2 = BNode() #Add vocabulary properties identifier and format graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri']))) graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1)) graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html'))) graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML'))) graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2)) graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml'))) graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF'))) graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) #Add rdfs:isDefinedBy for each class / property / term of the vocabulary #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term #testo = vocab_type_definitions_test['rdfs'] #subjects = [] #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo)) #for s in subs: # subjects.append(s) #if subjects: # objects = vocab_type_definitions_rdfs #else: # objects = vocab_type_definitions_owl #For all subjects that are of the type found above, add rdfs:isDefinedBy #for o in objects: # subs = graph.subjects(namespaces['rdf']['type'], o) # for s in subs: # graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) list_of_terms = get_terms(rdf_vocab_properties['path']) for s in list_of_terms: graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) rdf_str = None rdf_str = graph.serialize(format="pretty-xml") #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8') f = codecs.open(newrdf_vocab_properties['path'], 'w') f.write(rdf_str) f.close() return (newrdf_vocab_properties, html_vocab_properties)
jg.add((vacancy, JB['location'], JB[location_id])) jg.add((JB[location_id], DC['title'], Literal(location))) # Salary range salaryrange = BNode() jg.add((vacancy, JOBS['salaryrange'], salaryrange)) jg.add((salaryrange, JOBS['minimumsalary'], Literal(float(salary)))) # Create the company cnode = JB[company.lower().replace(' ', '_')] jg.add((vacancy, JOBS['vacancywith'], cnode)) jg.add((cnode, RDF.type, JOBS['Organization'])) # Ticker symbol if ticker != "": jg.add((cnode, COMPANY['symbol'], Literal(ticker))) jg.add((cnode, COMPANY['name'], Literal(company))) # Crunchbase (see also) if crunchbase != "": jg.add((cnode, RDFS['seeAlso'], Literal('http://api.crunchbase.com/v/1/company/%s.js' % crunchbase))) # Print the serialized graph data = jg.serialize(format='xml') print data #c=pysesame.connection('http://freerisk.org:8280/openrdf-sesame/') #c.use_repository('joblistings') #print c.postdata(data)
def __toRDF(self): """ Dump mailing list into a RDF file """ #rdf graph store = ConjunctiveGraph() #namespaces store.bind('rdfs', RDFS) store.bind('swaml', SWAML) store.bind('sioc', SIOC) store.bind('sioct', SIOCT) store.bind('foaf', FOAF) store.bind('dc', DC) store.bind('mvcb', MVCB) #fisrt the host graph host = self.config.get('host') if (len(host) > 0): self.__addSite(store, host) #and then the mailing list list = URIRef(self.__getUri()) store.add((list, RDF.type, SIOC['Forum'])) #store.add((list, RDF.type, SIOCT['MailingList'])) #list information title = self.config.get('title') if (len(title) > 0): store.add((list, DC['title'], Literal(title))) description = self.config.get('description') if (len(description) > 0): store.add((list, DC['description'], Literal(description))) if (len(host) > 0): store.add((list, SIOC['has_host'], URIRef(host))) store.add((list, SWAML['address'], Literal(self.config.get('to')))) store.add((list, DC['date'], Literal(FileDate(self.config.get('mbox')).getStringFormat()))) store.add((list, MVCB['generatorAgent'], URIRef(self.config.getAgent()))) store.add((list, MVCB['errorReportsTo'], URIRef('http://swaml.berlios.de/bugs'))) if (self.lang != None): store.add((list, DC['language'], Literal(self.lang))) #subscribers subscribers = self.subscribers.getSubscribersUris() for uri in subscribers: store.add((list, SIOC['has_subscriber'], URIRef(uri))) store.add((URIRef(uri), RDF.type, SIOC['UserAccount'])) #and all messages for msg in self.index.items: uri = msg.getUri() store.add((list, SIOC['container_of'], URIRef(uri))) store.add((URIRef(uri), RDF.type, SIOC['Post'])) parent = msg.getParent() if (parent != None): store.add((URIRef(uri), SIOC['reply_of'], URIRef(parent))) #and dump to disk try: rdf_file = open(self.config.get('dir')+'forum.rdf', 'w+') rdf_file.write(store.serialize(format="pretty-xml")) rdf_file.flush() rdf_file.close() except IOError, detail: print 'Error exporting mailing list to RDF: ' + str(detail)
def testParse(self): g = ConjunctiveGraph() g.parse("http://groups.csail.mit.edu/dig/2005/09/rein/examples/troop42-policy.n3", format="n3") print g.serialize()
def createMediaGraphs(rows): albums = {} artists = { 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']} counter = 1 for row in rows: graph = Graph(identifier = URIRef(graph_uri)) # Create all the relevant nodes (with the correct IDs) work = getNewNode('work') composition = getNewNode('composition') track = getNewNode('track') record = getNewNode('record') performance = getNewNode('performance') signal = Namespace(graph_uri+"/"+row['uid']) # If we don't have an artist url, make a foaf Agent instead. if row['artist']: try: artist = artists[row['artist']] except KeyError: artist = getNewNode('artist') graph.add((artist, RDF.type, foaf['Agent'])) graph.add((artist, foaf['name'], Literal(row['artist'].strip()))) artists[row['artist']] = artist; if row['composer']: try: composer = artists[row['composer']] except KeyError: composer = getNewNode('artist') graph.add((composer, RDF.type, foaf['Agent'])) graph.add((composer, foaf['name'], Literal(row['composer'].strip()))) artists[row['composer']] = composer; else: composer = artist # Work graph.add((work, RDF.type, mo['MusicalWork'])) # Composition graph.add((composition, RDF.type, mo['Composition'])) if composer: graph.add((composition, mo['composer'], composer)) graph.add((composition, mo['produced_work'], work)) # Track graph.add((track, RDF.type, mo['Track'])) if row['artist']: graph.add((track, foaf['maker'], artist)) if row['tracknum']: graph.add((track, mo['track_number'], Literal(row['tracknum']))) if row['album']: # Album try: album = albums[row['album']] except KeyError: album = getNewNode('album') graph.add((album, RDF.type, mo['Record'])) graph.add((album, dc['title'], Literal(row['album'].strip()))) graph.add((album, mo['release_type'], mo['album'])) albums[row['album']] = album graph.add((album, mo['track'], track)) # Signal graph.add((signal, RDF.type, mo['Signal'])) graph.add((signal, mo['published_as'], record)) if row['track']: graph.add((signal, dc['title'], Literal(row['track'].strip()))) if row['isrc']: graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) # Add to the various databases dbs = databases[catalogueID] for db in dbs: graph.add((db, audiodb["has-signal"], signal)) # Record graph.add((record, RDF.type, mo['Record'])) graph.add((record, mo['publication_of'], signal)) graph.add((record, mo['track'], track)) # Performance graph.add((performance, RDF.type, mo['Performance'])) graph.add((performance, mo['performance_of'], work)) if row['artist']: graph.add((performance, mo['performer'], artist)) graph.add((performance, mo['recorded_as'], signal)) graph.close() graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf") counter += 1
class rdf_transform: def __init__(self): self.g = Graph('IOMemory') self.g.bind('dc', dublin_core) self.g.bind('foaf', FOAF) self.g.bind('time-entry', owl_time) self.g.bind('letter', letter_ns) self.g.bind('owl', owl) self.g.bind('ex', exam) self.g.bind('geo', geo) self.g.bind('base', base_uri) def create_rdf_letter (self, letters): ''' creates an rdf representation of letter used to load into the triple store ''' for l in letters: correspondence = base_uri + "letters/resource/" + l.type + '/' + urllib.quote(l.correspondent) + '/' + str(l.id) + '/rdf' self.add_author(correspondence, "Charles Dickens") self.add_subject(correspondence, "letter") self.add_time(correspondence, str(l.letter_date)+'T00:00:00') self.add_correspondent(correspondence, l.correspondent) #self.add_place(correspondence, parse_text.find_geographical(l.letter_text)) place = '' try: place = str(l.letter_place) #unicode errors are text related except UnicodeError: pass if place is not '': self.add_place(correspondence, place) self.add_letter_text(correspondence, l.letter_text) self.add_salutation(correspondence, l.correspondent, l.salutation) #for line in l.letter_text.splitlines(): # if len(line.strip()) > 1: # self.add_open(correspondence, parse_text.parse_salutation_line(line)) #this section will parse for proper names in due course #commented out whilst code is being ported #letter_name = parse_text.parseProperNames(text) # print"names, ", letter_name #for name in letter_name: # letter_rdf += "<letter:personReferred>%s</letter:personReferred>" %(name) letter_quotes = parse_text.parse_balanced_quotes(l.letter_text) for quote in letter_quotes: if str(quote[0:1]).isupper and "!" not in quote: if quote == "ALL THE YEAR ROUND" or quote=="HOUSEHOLD WORDS" or quote== "Household Words": self.add_magazine(correspondence, parse_text.stripPunc(quote)) else: self.add_text(correspondence, parse_text.stripPunc(quote)) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_rdf_end (self): ''' function to create an endpoint in rdf/xml ''' correspondence = base_uri letter = {} letter = dbase.get_endpoint_rdf() letter_items = letter.items() letter_items.sort() works = set() works = dbase.get_books() for url, text in letter_items: try: correspondence = base_uri + "letters/resource/dickens/" + urllib.quote(str(text[1])) + '/' + str(url) + '/rdf' self.add_author(correspondence, "Charles Dickens") self.add_subject(correspondence, "letter") self.add_subject(correspondence, "Charles Dickens") self.add_subject(correspondence, parse_text.camel_case(str(text[1]))) self.add_time(correspondence, str(text[3])+'T00:00:00') self.add_correspondent(correspondence, str(text[1])) self.add_salutation(correspondence, urllib.quote(str(text[1])), str(text[4])) place = str(text[5]) #for line in str(text[2]).splitlines(): # self.add_open(correspondence, parse_text.parse_salutation_line(str(text[2]))) letter = str(text[2]) #unicode errors are text related except UnicodeError: pass if place is not None: self.add_place(correspondence, place) self.add_letter_text(correspondence, letter) #this section will parse for proper names in due course #commented out whilst code is being ported #letter_name = parse_text.parseProperNames(text) # print"names, ", letter_name letter_quotes = parse_text.parse_balanced_quotes(text[2]) for quote in letter_quotes: work = parse_text.stripPunc(quote) #TODO: Normalise the text to reduce code repetition periodicals = set(['All The Year Round', 'Household Words', 'The Daily News']) #print "quote", parse_text.stripPunc(quote) if quote in periodicals: self.add_magazine(correspondence, quote) if work in works: if work == "Copperfield": work = "David Copperfield" elif work == "Nickleby": work = "Nicholas Nickleby" elif work == "Edwin Drood": work = "The Mystery of Edwin Drood" elif work == "Dombey": work = "Dombey and Son" elif work == "Tale of Two Cities": work = "A Tale of Two Cities" elif work == "Christmas Carol": work = "A Christmas Carol" self.add_text(correspondence, work) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_correspondent(self, corr, letter_items): u_corr = unicode(corr) correspondence = base_uri + "correspondent/resource/" + urllib.quote(corr) self.add_subject(correspondence, "correspondent") #self.add_correspondent(correspondence, corr) for url, text in letter_items: if url is not None or url != '': self.add_salutation(correspondence, corr, str(url)) #need rules to define relationships - family, authors if u_corr == "Miss Hogarth": self.add_subject(correspondence, "daughter") self.add_daughter(correspondence, "Charles Dickens") self.add_sameas(correspondence, "http://dbpedia.org/page/Georgina_Hogarth") letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_publication(self, title, type): books_set = {} start = ''; end = ''; abstract = ''; uri_str = ''; source = ''; books = dbase.get_book_rdf(title) book_items = books.items() book_items.sort() for u, book in book_items: title = u start = book[0] end = book[1] abstract = book[2] uri_str = book[3] source = book[4] #create a books dictionary as a list of records to build a list of uris from # title => uri string books_set[u] = uri_str if ":" in u: for bk in u.split(":"): books_set[bk[0]] = uri_str if "The " in u or "A " in u: aka = u.replace("The ", "").replace("A ", "") books_set[aka] = uri_str correspondence = base_uri + type + "/resource/" + title.strip().replace(" ", "_") self.add_subject(correspondence, type) self.add_subject(correspondence, "Charles Dickens") self.add_author(correspondence, "Charles Dickens") self.add_time(correspondence, start) self.add_time(correspondence, end) self.add_title(correspondence, title) self.add_abstract(correspondence, abstract) uri = u"http://dbpedia.org/page/" + uri_str self.add_sameas(correspondence, uri) if type == "book": source_uri = "http://gutenberg.org/ebooks/" + source self.add_sameas(correspondence, source_uri) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_place (self, placeobj): (lat, long, place_name, source) = ('','','','') for location in placeobj: place_name= location.placeid lat = location.latitude long = location.longitude source = location.source correspondence = base_uri + "place/resource/" + urllib.quote(place_name)+ "/rdf" self.add_latitude(correspondence, lat) self.add_longitude(correspondence, long) self.add_place_name(correspondence, place_name) #self.add_description(correspondence, place_abstract) self.add_sameas(correspondence, source) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_author (self, data): ''' function to return an author graph in rdf ''' author = u"Charles Dickens" subject = u"author" born = u"1812-02-07" died = u"1870-06-09" abstract = u"Charles John Huffam Dickens, pen-name 'Boz', was the most popular English novelist of the Victorian era, and one of the most popular of all time, responsible for some of English literature's most iconic characters. Many of his novels, with their recurrent theme of social reform, first appeared in periodicals and magazines in serialised form, a popular format for fiction at the time. Unlike other authors who completed entire novels before serial production began, Dickens often wrote them while they were being serialized, creating them in the order in which they were meant to appear. The practice lent his stories a particular rhythm, punctuated by one 'cliffhanger' after another to keep the public looking forward to the next installment. The continuing popularity of his novels and short stories is such that they have never gone out of print. His work has been praised for its mastery of prose and unique personalities by writers such as George Gissing and G. K. Chesterton, though the same characteristics prompted others, such as Henry James and Virginia Woolf, to criticize him for sentimentality and implausibility." author_url = u"http://en.wikipedia.org/wiki/Charles_Dickens" correspondence = base_uri + "author/resource/" + author self.add_subject(correspondence, "Charles Dickens") self.add_subject(correspondence, "author") self.add_nick(correspondence, "Boz") self.add_time(correspondence, born) self.add_time(correspondence, died) self.add_abstract(correspondence, abstract) self.add_sameas(correspondence, author_url) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def add_author(self, correspondence, name): ''' function to add author to graph ''' dc_author = urllib.quote(name) lauthor = URIRef(base_uri+ 'author/resource/%s' % dc_author)+ "/rdf" self.g.add((correspondence, dublin_core['creator'], lauthor)) #self.g.add((correspondence, dublin_core['creator'], Literal(name))) return lauthor def add_salutation(self, correspondence, author, name): ''' function to add salutation to graph ''' nameid = urllib.quote(author) person = URIRef(base_uri + 'correspondent/resource/%s' % nameid)+ "/rdf" #self.g.add((person, RDF.type, FOAF['nick'])) self.g.add((correspondence, FOAF['nick'], Literal(name))) return person def add_correspondent(self, correspondence, name): ''' function to add correspondent to graph ''' nameid = urllib.quote(name) person = URIRef(base_uri + 'correspondent/resource/%s' % nameid)+ "/rdf" self.g.add((correspondence, letter_ns["correspondent"], person)) #self.g.add((person, Letter, Literal(name))) return person def add_magazine(self, correspondence, name): ''' function to add magazine to graph ''' nameid = urllib.quote(name) magazine = URIRef(base_uri + 'magazine/resource/%s' % nameid)+ "/rdf" self.g.add((correspondence, letter_ns['textReferred'], magazine)) #self.g.add((person, Letter, Literal(name))) return magazine def add_text (self, correspondence, textname): ''' function to add referred text to the graph''' textid = base_uri + "book/resource/"+textname.replace("\n", "_").replace(" ", "_")+ "/rdf" return self.g.add((correspondence, letter_ns['textReferred'], URIRef(textid))) def add_author_text (self, correspondence, textname): ''' function to add author referred text to the graph''' textid = urllib.quote(textname) self.g.add((correspondence, letter_ns['textAuthorReferred'], Literal(textname))) return book def add_place(self, correspondence, place): return self.g.add((correspondence, dublin_core['date'], Literal(str(time)))) def add_subject (self, correspondence, subject): return self.g.add((correspondence, dublin_core['subject'], Literal(subject))) def add_sameas (self, correspondence, link): return self.g.add((correspondence, owl['sameAs'], URIRef(link))) def add_time(self, correspondence, time): ''' function to add time ''' return self.g.add((correspondence, dublin_core['date'], Literal(str(time)))) def add_title (self, correspondence, title): return self.g.add((correspondence, dublin_core['title'], Literal(title))) def add_nick (self, correspondence, nick): return self.g.add((correspondence, FOAF['nick'], Literal(nick))) def add_place (self, correspondence, place): return self.g.add((correspondence, dublin_core['title'], URIRef(base_uri + "place/resource/"+urllib.quote(place)+ "/rdf"))) def add_daughter (self, correspondence, author): return self.g.add((correspondence, exam['daughter'], URIRef(base_uri + "author/resource/" + author + "/rdf"))) def add_letter_text (self, correspondence, letter_text): return self.g.add((correspondence, letter_ns['Text'], Literal(letter_text))) def add_longitude (self, correspondence, long): return self.g.add((correspondence, geo['long'], Literal(long))) def add_latitude (self, correspondence, lat): return self.g.add((correspondence, geo['lat'], Literal(lat))) def add_description (self, correspondence, abstract): return self.g.add((correspondence, geo['desc'], Literal(abstract))) def add_place_name (self, correspondence, name): return self.g.add((correspondence, geo['name'], Literal(name))) def add_abstract (self, correspondence, letters): return self.g.add((correspondence, letter_ns['text'], Literal(letters))) def add_open (self, correspondence, letters): return self.g.add((correspondence, letter_ns['open'], Literal(letters))) def add_close (self, correspondence, letters): return self.g.add((correspondence, letter_ns['close'], Literal(letters)))
# Get the sqlite plugin. You may have to install the python sqlite libraries store = plugin.get('SQLite', Store)('rdfstore.db') # Open previously created store, or create it if it doesn't exist yet try: rt = store.open(configString,create=False) except OperationalError, e: try: # There is no underlying sqlite infrastructure, create it rt = store.open(configString,create=True) assert rt == VALID_STORE except OperationalError, e: raise import sys, pdb; pdb.post_mortem(sys.exc_info()[2]) # There is a store, use it graph = Graph(store, identifier = URIRef(default_graph_uri)) print "Triples in graph before add: ", len(graph) # Now we'll add some triples to the graph & commit the changes rdflibNS = Namespace('http://rdflib.net/test/') graph.add((rdflibNS['pic:1'], rdflibNS['name'], Literal('Jane & Bob'))) graph.add((rdflibNS['pic:2'], rdflibNS['name'], Literal('Squirrel in Tree'))) graph.commit() print "Triples in graph after add: ", len(graph) # display the graph in RDF/XML print graph.serialize()
Further information about Construct can be obtained from http://www.construct-infrastructure.org """ from construct.proxy import proxy from construct.constructservice import ServiceError from rdflib.Graph import ConjunctiveGraph # Create a new proxy object. proxy = proxy() print "Executing Script" try: # Generate a piece of FOAF RDF store = ConjunctiveGraph() store.load("joebloggs_foaf.rdf") data = store.serialize(format="nt") # Send the FOAF RDF to the data store if(proxy.insert(data)): # Now query for joebloggs web address query = """SELECT ?nickname WHERE{ ?subject <http://xmlns.com/foaf/0.1/name> "Joe Bloggs". ?subject <http://xmlns.com/foaf/0.1/nick> ?nickname.} """ results = proxy.query(query) print "Here is the N3 form of the QueryResults Object:" print results except ServiceError, e: print e # Close the proxy. proxy.close()
class rdf_transform: def __init__(self): self.g = Graph('IOMemory') self.g.bind('dc', dublin_core) self.g.bind('foaf', FOAF) self.g.bind('time-entry', owl_time) self.g.bind('letter', letter_ns) self.g.bind('owl', owl) self.g.bind('ex', exam) self.g.bind('geo', geo) self.g.bind('base', base_uri) def create_rdf_letter(self, letters): ''' creates an rdf representation of letter used to load into the triple store ''' for l in letters: correspondence = base_uri + "letters/resource/" + l.type + '/' + urllib.quote( l.correspondent) + '/' + str(l.id) + '/rdf' self.add_author(correspondence, "Charles Dickens") self.add_subject(correspondence, "letter") self.add_time(correspondence, str(l.letter_date) + 'T00:00:00') self.add_correspondent(correspondence, l.correspondent) #self.add_place(correspondence, parse_text.find_geographical(l.letter_text)) place = '' try: place = str(l.letter_place) #unicode errors are text related except UnicodeError: pass if place is not '': self.add_place(correspondence, place) self.add_letter_text(correspondence, l.letter_text) self.add_salutation(correspondence, l.correspondent, l.salutation) #for line in l.letter_text.splitlines(): # if len(line.strip()) > 1: # self.add_open(correspondence, parse_text.parse_salutation_line(line)) #this section will parse for proper names in due course #commented out whilst code is being ported #letter_name = parse_text.parseProperNames(text) # print"names, ", letter_name #for name in letter_name: # letter_rdf += "<letter:personReferred>%s</letter:personReferred>" %(name) letter_quotes = parse_text.parse_balanced_quotes(l.letter_text) for quote in letter_quotes: if str(quote[0:1]).isupper and "!" not in quote: if quote == "ALL THE YEAR ROUND" or quote == "HOUSEHOLD WORDS" or quote == "Household Words": self.add_magazine(correspondence, parse_text.stripPunc(quote)) else: self.add_text(correspondence, parse_text.stripPunc(quote)) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_rdf_end(self): ''' function to create an endpoint in rdf/xml ''' correspondence = base_uri letter = {} letter = dbase.get_endpoint_rdf() letter_items = letter.items() letter_items.sort() works = set() works = dbase.get_books() for url, text in letter_items: try: correspondence = base_uri + "letters/resource/dickens/" + urllib.quote( str(text[1])) + '/' + str(url) + '/rdf' self.add_author(correspondence, "Charles Dickens") self.add_subject(correspondence, "letter") self.add_subject(correspondence, "Charles Dickens") self.add_subject(correspondence, parse_text.camel_case(str(text[1]))) self.add_time(correspondence, str(text[3]) + 'T00:00:00') self.add_correspondent(correspondence, str(text[1])) self.add_salutation(correspondence, urllib.quote(str(text[1])), str(text[4])) place = str(text[5]) #for line in str(text[2]).splitlines(): # self.add_open(correspondence, parse_text.parse_salutation_line(str(text[2]))) letter = str(text[2]) #unicode errors are text related except UnicodeError: pass if place is not None: self.add_place(correspondence, place) self.add_letter_text(correspondence, letter) #this section will parse for proper names in due course #commented out whilst code is being ported #letter_name = parse_text.parseProperNames(text) # print"names, ", letter_name letter_quotes = parse_text.parse_balanced_quotes(text[2]) for quote in letter_quotes: work = parse_text.stripPunc(quote) #TODO: Normalise the text to reduce code repetition periodicals = set([ 'All The Year Round', 'Household Words', 'The Daily News' ]) #print "quote", parse_text.stripPunc(quote) if quote in periodicals: self.add_magazine(correspondence, quote) if work in works: if work == "Copperfield": work = "David Copperfield" elif work == "Nickleby": work = "Nicholas Nickleby" elif work == "Edwin Drood": work = "The Mystery of Edwin Drood" elif work == "Dombey": work = "Dombey and Son" elif work == "Tale of Two Cities": work = "A Tale of Two Cities" elif work == "Christmas Carol": work = "A Christmas Carol" self.add_text(correspondence, work) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_correspondent(self, corr, letter_items): u_corr = unicode(corr) correspondence = base_uri + "correspondent/resource/" + urllib.quote( corr) self.add_subject(correspondence, "correspondent") #self.add_correspondent(correspondence, corr) for url, text in letter_items: if url is not None or url != '': self.add_salutation(correspondence, corr, str(url)) #need rules to define relationships - family, authors if u_corr == "Miss Hogarth": self.add_subject(correspondence, "daughter") self.add_daughter(correspondence, "Charles Dickens") self.add_sameas(correspondence, "http://dbpedia.org/page/Georgina_Hogarth") letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_publication(self, title, type): books_set = {} start = '' end = '' abstract = '' uri_str = '' source = '' books = dbase.get_book_rdf(title) book_items = books.items() book_items.sort() for u, book in book_items: title = u start = book[0] end = book[1] abstract = book[2] uri_str = book[3] source = book[4] #create a books dictionary as a list of records to build a list of uris from # title => uri string books_set[u] = uri_str if ":" in u: for bk in u.split(":"): books_set[bk[0]] = uri_str if "The " in u or "A " in u: aka = u.replace("The ", "").replace("A ", "") books_set[aka] = uri_str correspondence = base_uri + type + "/resource/" + title.strip( ).replace(" ", "_") self.add_subject(correspondence, type) self.add_subject(correspondence, "Charles Dickens") self.add_author(correspondence, "Charles Dickens") self.add_time(correspondence, start) self.add_time(correspondence, end) self.add_title(correspondence, title) self.add_abstract(correspondence, abstract) uri = u"http://dbpedia.org/page/" + uri_str self.add_sameas(correspondence, uri) if type == "book": source_uri = "http://gutenberg.org/ebooks/" + source self.add_sameas(correspondence, source_uri) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_place(self, placeobj): (lat, long, place_name, source) = ('', '', '', '') for location in placeobj: place_name = location.placeid lat = location.latitude long = location.longitude source = location.source correspondence = base_uri + "place/resource/" + urllib.quote( place_name) + "/rdf" self.add_latitude(correspondence, lat) self.add_longitude(correspondence, long) self.add_place_name(correspondence, place_name) #self.add_description(correspondence, place_abstract) self.add_sameas(correspondence, source) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def create_author(self, data): ''' function to return an author graph in rdf ''' author = u"Charles Dickens" subject = u"author" born = u"1812-02-07" died = u"1870-06-09" abstract = u"Charles John Huffam Dickens, pen-name 'Boz', was the most popular English novelist of the Victorian era, and one of the most popular of all time, responsible for some of English literature's most iconic characters. Many of his novels, with their recurrent theme of social reform, first appeared in periodicals and magazines in serialised form, a popular format for fiction at the time. Unlike other authors who completed entire novels before serial production began, Dickens often wrote them while they were being serialized, creating them in the order in which they were meant to appear. The practice lent his stories a particular rhythm, punctuated by one 'cliffhanger' after another to keep the public looking forward to the next installment. The continuing popularity of his novels and short stories is such that they have never gone out of print. His work has been praised for its mastery of prose and unique personalities by writers such as George Gissing and G. K. Chesterton, though the same characteristics prompted others, such as Henry James and Virginia Woolf, to criticize him for sentimentality and implausibility." author_url = u"http://en.wikipedia.org/wiki/Charles_Dickens" correspondence = base_uri + "author/resource/" + author self.add_subject(correspondence, "Charles Dickens") self.add_subject(correspondence, "author") self.add_nick(correspondence, "Boz") self.add_time(correspondence, born) self.add_time(correspondence, died) self.add_abstract(correspondence, abstract) self.add_sameas(correspondence, author_url) letter_rdf = self.g.serialize(format="pretty-xml", max_depth=3) return letter_rdf def add_author(self, correspondence, name): ''' function to add author to graph ''' dc_author = urllib.quote(name) lauthor = URIRef(base_uri + 'author/resource/%s' % dc_author) + "/rdf" self.g.add((correspondence, dublin_core['creator'], lauthor)) #self.g.add((correspondence, dublin_core['creator'], Literal(name))) return lauthor def add_salutation(self, correspondence, author, name): ''' function to add salutation to graph ''' nameid = urllib.quote(author) person = URIRef(base_uri + 'correspondent/resource/%s' % nameid) + "/rdf" #self.g.add((person, RDF.type, FOAF['nick'])) self.g.add((correspondence, FOAF['nick'], Literal(name))) return person def add_correspondent(self, correspondence, name): ''' function to add correspondent to graph ''' nameid = urllib.quote(name) person = URIRef(base_uri + 'correspondent/resource/%s' % nameid) + "/rdf" self.g.add((correspondence, letter_ns["correspondent"], person)) #self.g.add((person, Letter, Literal(name))) return person def add_magazine(self, correspondence, name): ''' function to add magazine to graph ''' nameid = urllib.quote(name) magazine = URIRef(base_uri + 'magazine/resource/%s' % nameid) + "/rdf" self.g.add((correspondence, letter_ns['textReferred'], magazine)) #self.g.add((person, Letter, Literal(name))) return magazine def add_text(self, correspondence, textname): ''' function to add referred text to the graph''' textid = base_uri + "book/resource/" + textname.replace( "\n", "_").replace(" ", "_") + "/rdf" return self.g.add( (correspondence, letter_ns['textReferred'], URIRef(textid))) def add_author_text(self, correspondence, textname): ''' function to add author referred text to the graph''' textid = urllib.quote(textname) self.g.add((correspondence, letter_ns['textAuthorReferred'], Literal(textname))) return book def add_place(self, correspondence, place): return self.g.add( (correspondence, dublin_core['date'], Literal(str(time)))) def add_subject(self, correspondence, subject): return self.g.add( (correspondence, dublin_core['subject'], Literal(subject))) def add_sameas(self, correspondence, link): return self.g.add((correspondence, owl['sameAs'], URIRef(link))) def add_time(self, correspondence, time): ''' function to add time ''' return self.g.add( (correspondence, dublin_core['date'], Literal(str(time)))) def add_title(self, correspondence, title): return self.g.add( (correspondence, dublin_core['title'], Literal(title))) def add_nick(self, correspondence, nick): return self.g.add((correspondence, FOAF['nick'], Literal(nick))) def add_place(self, correspondence, place): return self.g.add((correspondence, dublin_core['title'], URIRef(base_uri + "place/resource/" + urllib.quote(place) + "/rdf"))) def add_daughter(self, correspondence, author): return self.g.add( (correspondence, exam['daughter'], URIRef(base_uri + "author/resource/" + author + "/rdf"))) def add_letter_text(self, correspondence, letter_text): return self.g.add( (correspondence, letter_ns['Text'], Literal(letter_text))) def add_longitude(self, correspondence, long): return self.g.add((correspondence, geo['long'], Literal(long))) def add_latitude(self, correspondence, lat): return self.g.add((correspondence, geo['lat'], Literal(lat))) def add_description(self, correspondence, abstract): return self.g.add((correspondence, geo['desc'], Literal(abstract))) def add_place_name(self, correspondence, name): return self.g.add((correspondence, geo['name'], Literal(name))) def add_abstract(self, correspondence, letters): return self.g.add( (correspondence, letter_ns['text'], Literal(letters))) def add_open(self, correspondence, letters): return self.g.add( (correspondence, letter_ns['open'], Literal(letters))) def add_close(self, correspondence, letters): return self.g.add( (correspondence, letter_ns['close'], Literal(letters)))
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties): #(id, base, prefix) = get_vocab_base(vocabfile) html_vocab_properties = {} html_vocab_properties['format'] = 'text/html' html_vocab_properties['name'] = "%s.html" % os.path.splitext( rdf_vocab_properties['name'])[0] html_vocab_properties['path'] = rdf_vocab_properties['path'].replace( rdf_vocab_properties['name'], html_vocab_properties['name']) html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace( rdf_vocab_properties['name'], html_vocab_properties['name']) newrdf_vocab_properties = {} newrdf_vocab_properties['format'] = 'application/rdf+xml' newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext( rdf_vocab_properties['name'])[0] newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace( rdf_vocab_properties['name'], newrdf_vocab_properties['name']) newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace( rdf_vocab_properties['name'], newrdf_vocab_properties['name']) graph = Graph() graph.parse(rdf_vocab_properties['path']) subject = None for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])): subject = s #graph2 = Graph() graph_ns = [] for nsprefix, nsurl in graph.namespaces(): graph_ns.append(str(nsurl)) for prefix, url in namespaces.iteritems(): if not str(url) in graph_ns: graph.bind(prefix, URIRef(url)) #properties = get_vocab_properties(prefix) #subject = None #for s in graph.subjects(namespaces['dc']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['title'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dc']['creator'], None): # subject = s #if not subject: # for s in graph.subjects(namespaces['dcterms']['creator'], None): # subject = s formatNode1 = BNode() formatNode2 = BNode() #Add vocabulary properties identifier and format graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri']))) graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri']))) graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri']))) graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1)) graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html'))) graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML'))) graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text']))) graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2)) graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml'))) graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF'))) graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT']))) #Add rdfs:isDefinedBy for each class / property / term of the vocabulary #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term #testo = vocab_type_definitions_test['rdfs'] #subjects = [] #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo)) #for s in subs: # subjects.append(s) #if subjects: # objects = vocab_type_definitions_rdfs #else: # objects = vocab_type_definitions_owl #For all subjects that are of the type found above, add rdfs:isDefinedBy #for o in objects: # subs = graph.subjects(namespaces['rdf']['type'], o) # for s in subs: # graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) list_of_terms = get_terms(rdf_vocab_properties['path']) for s in list_of_terms: graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri']))) rdf_str = None rdf_str = graph.serialize(format="pretty-xml") #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8') f = codecs.open(newrdf_vocab_properties['path'], 'w') f.write(rdf_str) f.close() return (newrdf_vocab_properties, html_vocab_properties)
# Named graph: http://example.org/foaf/bobFoaf @prefix foaf: <http://xmlns.com/foaf/0.1/> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . _:z foaf:mbox <mailto:[email protected]> . _:z rdfs:seeAlso <http://example.org/foaf/bobFoaf> . _:z foaf:nick "Robert" . <http://example.org/foaf/bobFoaf> rdf:type foaf:PersonalProfileDocument . """ graph = ConjunctiveGraph(plugin.get('IOMemory', Store)()) graph.parse(StringIO(text), format="n3") print graph.serialize(format='xml') test_query = """ PREFIX data: <http://example.org/foaf/> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?mbox ?nick ?ppd WHERE { GRAPH data:aliceFoaf { ?alice foaf:mbox <mailto:[email protected]> ; foaf:knows ?whom . ?whom foaf:mbox ?mbox ; rdfs:seeAlso ?ppd .
# Connecion to Sesame con = connection('http://freerisk.org:8280/openrdf-sesame/') con.use_repository('joblistings') con.addnamespace('company', str(COMPANY)) cg = ConjunctiveGraph() cg.bind('dc', DC) cg.bind('jobboard', JB) # Find companies with ticker symbols res = con.query('select ?id ?ticker where {?id company:symbol ?ticker .}') # Loop over the results for row in res: company = URIRef(row['id']['value']) ticker = row['ticker']['value'] url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=00&b=28&c=2008&d=00&e=28&f=2009&g=m&ignore=.csv' % ticker rows = [row for row in reader(urllib.urlopen(url))] current = float(rows[1][6]) yearago = float(rows[-1][6]) # Calculate percent change change = ((current - yearago) / current) * 100 cg.add((company, JB['stockpricechange'], Literal(change))) print cg.serialize(format='xml')