def get_all_measurement_types(ontology_file): graph = ConjunctiveGraph() graph.load(ontology_file, format="n3") query_str = '''SELECT DISTINCT ?mt ?label ?comment ?defn WHERE { ?mt rdfs:label ?label . ?mt rdfs:subClassOf <%s> . ?mt rdfs:subClassOf ?r1 . ?r1 owl:onProperty oboe:measuresEntity ; owl:someValuesFrom ?ent . ?mt rdfs:subClassOf ?r2 . ?r2 owl:onProperty oboe:measuresCharacteristic ; owl:someValuesFrom ?char . OPTIONAL { ?mt rdfs:comment ?comment } OPTIONAL { ?mt skos:definition ?defn } }''' % (MeasurementType) qres = list(graph.query(query_str, initNs=dict(oboe=URIRef("http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#"), owl=OWL,rdfs=RDFS,skos=SKOS))) if len(qres) > 0: qres.sort(key=lambda x: x[0], reverse=True) result = dict() i = 0 for row in qres: result[i] = {'uri' : row[0], 'label' : row[1], 'comment' : row[2], 'defn' : row[3]} i = i + 1 print "Sparql query finished!" return result return None
def __init__(self, path=None): self.__dict__ = self.__shared_state if (self.data == None): if (path == None): raise ValueError("djubby's configuration MUST be initialized a first time, read http://code.google.com/p/djubby/wiki/GettingStarted") else: self.path = os.path.abspath(path) logging.debug("Reading djubby's configuration from %s..." % self.path) if (not os.path.exists(self.path)): raise ValueError("Not found a proper file at '%s' with a configuration for djubby. Please, provide a right path" % self.path) data = ConjunctiveGraph() data.bind("conf", ns.config) try: data.load(path, format='n3') except Exception, e: raise ValueError("Not found a proper N3 file at '%s' with a configuration for djubby. Please, provide a valid N3 file" % self.path) self.data = data try: self.graph = self.get_value("sparqlDefaultGraph") self.endpoint = self.get_value("sparqlEndpoint") except Exception, e: raise ValueError("Not found the graph not the endpoint that it's supposed djubby have to query. Please, provide a right donfiguration") logging.info("Using <%s> as default graph to query the endpoint <%s>" % (self.graph, self.endpoint)) self.__class__.__dict__['_Configuration__shared_state']["data"] = data #FIXME
def update(): """ Update the library with new articles. """ graph = ConjunctiveGraph() # load the existing graph library = 'data/articles.rdf' graph.load(library) feeds = { "http://www3.interscience.wiley.com/rss/journal/118485807": "wiley.xsl", "http://phg.sagepub.com/rss/current.xml": "sage.xsl", "http://www.informaworld.com/ampp/rss~content=t713446924": "infoworld.xsl", "http://www.informaworld.com/ampp/rss~content=t788352614": "infoworld.xsl", "http://www.envplan.com/rss.cgi?journal=D": "envplan.xsl", "http://www.envplan.com/rss.cgi?journal=A": "envplan.xsl", "http://cgj.sagepub.com/rss/current.xml": "sage.xsl" } for feed, stylesheet in feeds.iteritems(): # grab the feed and transform it print "grabbing ", feed new = StringIO.StringIO(feed_transform(feed, stylesheet)) # merge the new triples into the graph graph.parse(new) new.close() graph.serialize(library, format='pretty-xml')
def catalyst_graph_for(file): if file.startswith('/'): file = 'file://'+file logging.info("InferenceStore catalyst_graph_for started") # quads = jsonld.to_rdf(file, {'format': 'application/nquads'}) logging.info("InferenceStore JSON-LD loaded") g = ConjunctiveGraph() g.namespace_manager = namespace_manager # g.parse(data=quads, format='nquads') g.load(file, format="json-ld") logging.info("InferenceStore base graph loaded") f = FuXiInferenceStore.get_instance() # get the inference engine cl = f.get_inference(g) logging.info("InferenceStore inference graph loaded") union_g = rdflib.ConjunctiveGraph() for s,p,o in g.triples( (None, None, None) ): union_g.add( (s,p,o) ) for s,p,o in cl.triples( (None, None, None) ): union_g.add( (s,p,o) ) logging.info("InferenceStore union graph prepared") return union_g
class RecursionTests(unittest.TestCase): # debug = True def setUp(self): self.graph = ConjunctiveGraph() self.graph.load(StringIO(testContent), format='n3') def test_simple_recursion(self): graph = ConjunctiveGraph() graph.load(StringIO(BASIC_KNOWS_DATA), format='n3') results = graph.query(KNOWS_QUERY, processor="sparql", DEBUG=False) results = set(results) person1 = URIRef('ex:person.1') person2 = URIRef('ex:person.2') nose.tools.assert_equal( results, set([(person1, None), (person1, Literal('person 3')), (person2, Literal('person 3'))])) def test_secondary_recursion(self): graph = ConjunctiveGraph() graph.load(StringIO(SUBCLASS_DATA), format='n3') results = graph.query(SUBCLASS_QUERY, processor="sparql", DEBUG=False) results = set(results) ob = URIRef('ex:ob') class1 = URIRef('ex:class.1') class2 = URIRef('ex:class.2') class3 = URIRef('ex:class.3') nose.tools.assert_equal( results, set([(ob, class1), (ob, class2), (ob, class3)]))
def runTest(self): testfile = self.testbase + ".htm" resultsf = self.testbase + ".ttl" self.failIf(not os.path.isfile(resultsf), "missing expected results file.") store1 = RGraph() store1.load(resultsf, publicID=self.pubId, format="n3") pcontents = store1.serialize(format='nt') pg = Graph() for a, b, c in store1: pg.triples.add(tuple(map(self.nodeToString, (a, b, c)))) #print tuple(map(self.nodeToString, (a,b,c))) store2 = RGraph() store2.load(testfile, publicID=self.pubId, format="rdfa") qcontents = store2.serialize(format='nt') qg = Graph() for a, b, c in store2: qg.triples.add(tuple(map(self.nodeToString, (a, b, c)))) self.failIf( not hash(pg) == hash(qg), "In %s: results do not match.\n%s\n\n%s" % (self.shortDescription(), pcontents, qcontents))
def catalyst_graph_for(file): if file.startswith("/"): file = "file://" + file logging.info("InferenceStore catalyst_graph_for started") # quads = jsonld.to_rdf(file, {'format': 'application/nquads'}) logging.info("InferenceStore JSON-LD loaded") g = ConjunctiveGraph() g.namespace_manager = namespace_manager # g.parse(data=quads, format='nquads') g.load(file, format="json-ld") logging.info("InferenceStore base graph loaded") f = FuXiInferenceStore.get_instance() # get the inference engine cl = f.get_inference(g) logging.info("InferenceStore inference graph loaded") union_g = rdflib.ConjunctiveGraph() for s, p, o in g.triples((None, None, None)): union_g.add((s, p, o)) for s, p, o in cl.triples((None, None, None)): union_g.add((s, p, o)) logging.info("InferenceStore union graph prepared") return union_g
def to_rdf_etree(sources): graph = ConjunctiveGraph() for source in sources: graph.load(source, format=guess_format(source)) io = StringIO() graph.serialize(io, format="pretty-xml") io.seek(0) return etree.parse(io)
def build_network(rules): if isinstance(rules, basestring): rules = StringIO(rules) graph = ConjunctiveGraph() graph.load(rules, publicID='test', format='n3') network = NetworkFromN3(graph, additionalBuiltins={STRING_NS.startsWith:StringStartsWith}) network.feedFactsToAdd(generateTokenSet(extractBaseFacts(graph))) return network
def build_network(rules): if isinstance(rules, basestring): rules = StringIO(rules) graph = ConjunctiveGraph() graph.load(rules, publicID='test', format='n3') network = NetworkFromN3( graph, additionalBuiltins={STRING_NS.startsWith: StringStartsWith}) network.feedFactsToAdd(generateTokenSet(extractBaseFacts(graph))) return network
class Store: def __init__(self): self.graph = ConjunctiveGraph() if os.path.exists(storefn): self.graph.load(storeuri, format='n3') self.graph.bind('dc', 'http://purl.org/dc/elements/1.1/') self.graph.bind('foaf', 'http://xmlns.com/foaf/0.1/') self.graph.bind('imdb', 'http://www.csd.abdn.ac.uk/~ggrimnes/dev/imdb/IMDB#') self.graph.bind('rev', 'http://purl.org/stuff/rev#') def save(self): self.graph.serialize(storeuri, format='n3') def who(self, who=None): if who is not None: name, email = (r_who.match(who).group(1), r_who.match(who).group(2)) self.graph.add( (URIRef(storeuri), DC['title'], Literal(title % name))) self.graph.add( (URIRef(storeuri + '#author'), RDF.type, FOAF['Person'])) self.graph.add( (URIRef(storeuri + '#author'), FOAF['name'], Literal(name))) self.graph.add( (URIRef(storeuri + '#author'), FOAF['mbox'], Literal(email))) self.save() else: return self.graph.objects(URIRef(storeuri + '#author'), FOAF['name']) def new_movie(self, movie): movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID) self.graph.add((movieuri, RDF.type, IMDB['Movie'])) self.graph.add((movieuri, DC['title'], Literal(movie['title']))) self.graph.add((movieuri, IMDB['year'], Literal(int(movie['year'])))) self.save() def new_review(self, movie, date, rating, comment=None): review = BNode( ) # @@ humanize the identifier (something like #rev-$date) movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID) self.graph.add( (movieuri, REV['hasReview'], URIRef('%s#%s' % (storeuri, review)))) self.graph.add((review, RDF.type, REV['Review'])) self.graph.add((review, DC['date'], Literal(date))) self.graph.add((review, REV['maxRating'], Literal(5))) self.graph.add((review, REV['minRating'], Literal(0))) self.graph.add((review, REV['reviewer'], URIRef(storeuri + '#author'))) self.graph.add((review, REV['rating'], Literal(rating))) if comment is not None: self.graph.add((review, REV['text'], Literal(comment))) self.save() def movie_is_in(self, uri): return (URIRef(uri), RDF.type, IMDB['Movie']) in self.graph
def build_network2(rules): graph = ConjunctiveGraph() graph.load(StringIO(rules), publicID='test', format='n3') rule_store, rule_graph = SetupRuleStore( StringIO(rules), additionalBuiltins={STRING_NS.startsWith: StringStartsWith}) from FuXi.Rete.Network import ReteNetwork network = ReteNetwork(rule_store) network.feedFactsToAdd(generateTokenSet(extractBaseFacts(graph))) return network
def build_network2(rules): graph = ConjunctiveGraph() graph.load(StringIO(rules), publicID='test', format='n3') rule_store, rule_graph=SetupRuleStore( StringIO(rules), additionalBuiltins={STRING_NS.startsWith:StringStartsWith}) from FuXi.Rete.Network import ReteNetwork network = ReteNetwork(rule_store) network.feedFactsToAdd(generateTokenSet(extractBaseFacts(graph))) return network
class Store: def __init__(self): self.graph = ConjunctiveGraph() if os.path.exists(storefn): self.graph.load(storeuri, format="n3") self.graph.bind("dc", DC) self.graph.bind("foaf", FOAF) self.graph.bind("imdb", IMDB) self.graph.bind("rev", "http://purl.org/stuff/rev#") def save(self): self.graph.serialize(storeuri, format="n3") def who(self, who=None): if who is not None: name, email = (r_who.match(who).group(1), r_who.match(who).group(2)) self.graph.add( (URIRef(storeuri), DC["title"], Literal(title % name))) self.graph.add( (URIRef(storeuri + "#author"), RDF.type, FOAF["Person"])) self.graph.add( (URIRef(storeuri + "#author"), FOAF["name"], Literal(name))) self.graph.add( (URIRef(storeuri + "#author"), FOAF["mbox"], Literal(email))) self.save() else: return self.graph.objects(URIRef(storeuri + "#author"), FOAF["name"]) def new_movie(self, movie): movieuri = URIRef("http://www.imdb.com/title/tt%s/" % movie.movieID) self.graph.add((movieuri, RDF.type, IMDB["Movie"])) self.graph.add((movieuri, DC["title"], Literal(movie["title"]))) self.graph.add((movieuri, IMDB["year"], Literal(int(movie["year"])))) self.save() def new_review(self, movie, date, rating, comment=None): review = BNode( ) # @@ humanize the identifier (something like #rev-$date) movieuri = URIRef("http://www.imdb.com/title/tt%s/" % movie.movieID) self.graph.add( (movieuri, REV["hasReview"], URIRef("%s#%s" % (storeuri, review)))) self.graph.add((review, RDF.type, REV["Review"])) self.graph.add((review, DC["date"], Literal(date))) self.graph.add((review, REV["maxRating"], Literal(5))) self.graph.add((review, REV["minRating"], Literal(0))) self.graph.add((review, REV["reviewer"], URIRef(storeuri + "#author"))) self.graph.add((review, REV["rating"], Literal(rating))) if comment is not None: self.graph.add((review, REV["text"], Literal(comment))) self.save() def movie_is_in(self, uri): return (URIRef(uri), RDF.type, IMDB["Movie"]) in self.graph
def labchords2RDF(infilename, outfilename, format="xml", audiofilename=None, withdescriptions=False): if withdescriptions: commonchords = ConjunctiveGraph() commonchords.load("CommonChords.rdf") extrachords = ConjunctiveGraph() infile = open(infilename, 'r') lines = infile.readlines() mi = mopy.MusicInfo() homepage = mopy.foaf.Document("http://sourceforge.net/projects/motools") mi.add(homepage) program = mopy.foaf.Agent() program.name = "labchords2RDF.py" program.homepage = homepage mi.add(program) tl = RelativeTimeLine("#tl") tl.label = "Timeline derived from "+infilename tl.maker = program mi.add(tl) intervalNum = 0 for line in lines: i = Interval("#i"+str(intervalNum)) try: [start_s, end_s, label] = parseLabLine(line) i.beginsAtDuration = secondsToXSDDuration(start_s) i.endsAtDuration = secondsToXSDDuration(end_s) #i.label = "Interval containing "+label+" chord." i.onTimeLine = tl # Produce chord object for the label : chordURI = "http://purl.org/ontology/chord/symbol/"+label.replace("#","s").replace(",","%2C") if withdescriptions and \ len(list(commonchords.predicate_objects(URIRef(chordURI)))) == 0 and \ len(list(extrachords.predicate_objects(URIRef(chordURI)))) == 0: # Deref to grab chord info print "loading "+chordURI+"..." extrachords.load(chordURI) c = mopy.chord.Chord(chordURI) c_event = mopy.chord.ChordEvent("#ce"+str(intervalNum)) c_event.chord = c c_event.label = label c_event.time = i except Exception, e: raise Exception("Problem parsing input file at line "+str(intervalNum+1)+" !\n"+str(e)) mi.add(i) mi.add(c) mi.add(c_event) intervalNum+=1
class ConstraintParser(object): def __init__(self, input_graphs): self.root = Node(OWL.Thing, []) self.g = ConjunctiveGraph() self.journal = {OWL.Thing: self.root} for path in input_graphs: print('Loading ', path) self.g.load(path, format='turtle') def get_node(self, uri): if uri in self.journal: return self.journal[uri] else: logging.debug('Creating node for : ' + unicode(uri)) new_node = Node(uri, []) self.journal[uri] = new_node return new_node def get_all_children(self, uri): children = self.get_node(uri).children nodes_to_visit = children.copy() result = children.copy() childs_visited = 0 while (len(nodes_to_visit) > 0): current_node = nodes_to_visit.pop() nodes_to_visit = nodes_to_visit.union(current_node.children) result.add(current_node) childs_visited += 1 logging.debug("Child : " + str(childs_visited) + ' ' + unicode(current_node)) return result def parse_hierarchy(self): types = self.g.objects(None, RDF.type) all_super_classes = set() # first find upper classes under root for t in types: super_classes = list(self.g.objects(t, RDFS.subClassOf)) all_super_classes = all_super_classes.union(set(super_classes)) logging.debug( unicode(t) + ' has super classes: ' + ''.join(super_classes)) if len(super_classes) == 0 or (len(super_classes) == 1 and super_classes[0] == OWL.Thing): # super_class is root tmp_node = self.get_node(t) # Node(t, [self.root]) tmp_node.parents.add(self.root) self.root.children.add(tmp_node) else: tmp_node = self.get_node(t) tmp_node.parents.union( set([self.get_node(p) for p in super_classes])) for p in super_classes: self.get_node(p).children.add(tmp_node) logging.debug(all_super_classes)
class TestSparqlOPT_FILTER2(unittest.TestCase): def setUp(self): self.graph = ConjunctiveGraph() self.graph.load(StringIO(testContent), format='n3') def test_OPT_FILTER(self): results = self.graph.query(QUERY, DEBUG=False) results = list(results) self.failUnless( results == [(doc1,)], "expecting : %s . Got: %s"%([(doc1,)],repr(results)))
class TestSparqlOPT_FILTER2(unittest.TestCase): def setUp(self): self.graph = ConjunctiveGraph() self.graph.load(StringIO(testContent), format='n3') def test_OPT_FILTER(self): results = self.graph.query(QUERY, DEBUG=False) results = list(results) self.failUnless( results == [(doc1, )], "expecting : %s . Got: %s" % ([(doc1, )], repr(results)))
def test_simple_recursion(self): graph = ConjunctiveGraph() graph.load(StringIO(BASIC_KNOWS_DATA), format='n3') results = graph.query(KNOWS_QUERY, processor="sparql", DEBUG=False) results = set(results) person1 = URIRef('ex:person.1') person2 = URIRef('ex:person.2') nose.tools.assert_equal( results, set([(person1, None), (person1, Literal('person 3')), (person2, Literal('person 3'))]))
def test_secondary_recursion(self): graph = ConjunctiveGraph() graph.load(StringIO(SUBCLASS_DATA), format='n3') results = graph.query(SUBCLASS_QUERY, processor="sparql", DEBUG=False) results = set(results) ob = URIRef('ex:ob') class1 = URIRef('ex:class.1') class2 = URIRef('ex:class.2') class3 = URIRef('ex:class.3') nose.tools.assert_equal( results, set([(ob, class1), (ob, class2), (ob, class3)]))
class Install(RDFFile): """ TODO: Add documentation """ def __init__(self, fileName): self.graph = ConjunctiveGraph() self.subject = URIRef("urn:mozilla:install-manifest") try: self.graph.load(fileName) except IOError, e: pass
def main(filename, name): all_sections = getCharSheetSections() charactersheet = NS('http://trinket.thorne.id.au/2007/%s.n3#' % name) character = URIRef(charactersheet + name) graph = ConjunctiveGraph() for f in glob.glob(os.path.join(sibpath(__file__, 'data'), '*.n3')): if f.endswith('monster.n3'): continue try: graph.load(f, format='n3') except Exception, e: print 'Could not load', f, 'because', e
def _convertRDF(self) : """ Convert an RDF/XML result into an RDFLib triple store. This method can be overwritten in a subclass for a different conversion method. @return: converted result @rtype: RDFLib Graph """ from rdflib import ConjunctiveGraph retval = ConjunctiveGraph() # this is a strange hack. If the publicID is not set, rdflib (or the underlying xml parser) makes a funny #(and, as far as I could see, meaningless) error message... retval.load(self.response,publicID=' ') return retval
def test_citation_prov_ttl(self): g1 = ConjunctiveGraph() g1.load(self.citation_prov_ttl_path, format="nquads") g2 = ConjunctiveGraph() for c in [ self.citation_1, self.citation_2, self.citation_3, self.citation_4, self.citation_5, self.citation_6 ]: for s, p, o, g in c.get_citation_prov_rdf(self.base_url).quads( (None, None, None, None)): g2.add((s, p, o, g)) self.assertTrue(isomorphic(g1, g2))
def test_citation_data_ttl(self): g1 = ConjunctiveGraph() g1.load(self.citation_data_ttl_path, format="nt11") g2 = ConjunctiveGraph() for c in [ self.citation_1, self.citation_2, self.citation_3, self.citation_4, self.citation_5, self.citation_6 ]: for s, p, o in c.get_citation_rdf(self.base_url, False, False, False): g2.add((s, p, o)) self.assertTrue(isomorphic(g1, g2))
def render_graph(result, cfg, **kwargs): """ Render for output a result that can be parsed as an RDF graph """ # Mapping from MIME types to formats accepted by RDFlib rdflib_formats = { 'text/rdf+n3': 'n3', 'text/turtle': 'turtle', 'application/x-turtle': 'turtle', 'text/turtle': 'turtle', 'application/rdf+xml': 'xml', 'text/rdf': 'xml', 'application/rdf+xml': 'xml', } try: got = kwargs.get('format', 'text/rdf+n3') fmt = rdflib_formats[got] except KeyError: raise KrnlException('Unsupported format for graph processing: {!s}', got) g = ConjunctiveGraph() g.load(StringInputSource(result), format=fmt) display = cfg.dis[0] if is_collection(cfg.dis) else cfg.dis if display in ('png', 'svg'): try: literal = len(cfg.dis) > 1 and cfg.dis[1].startswith('withlit') opt = {'lang': cfg.lan, 'literal': literal, 'graphviz': []} data, metadata = draw_graph(g, fmt=display, options=opt) return {'data': data, 'metadata': metadata} except Exception as e: raise KrnlException('Exception while drawing graph: {!r}', e) elif display == 'table': it = rdf_iterator(g, add_vtype=cfg.typ, lang=cfg.lan) n, data = html_table(it, limit=cfg.lmt, withtype=cfg.typ) data += div('Shown: {}, Total rows: {}', n if cfg.lmt else 'all', len(g), css="tinfo") data = {'text/html': div(data)} elif len(g) == 0: data = {'text/html': div(div('empty graph', css='krn-warn'))} else: data = {'text/plain': g.serialize(format='nt').decode('utf-8')} return {'data': data, 'metadata': {}}
class Store: def __init__(self): self.graph = ConjunctiveGraph() if os.path.exists(storefn): self.graph.load(storeuri, format='n3') self.graph.bind('dc', DC) self.graph.bind('foaf', FOAF) self.graph.bind('imdb', IMDB) self.graph.bind('rev', 'http://purl.org/stuff/rev#') def save(self): self.graph.serialize(storeuri, format='n3') def who(self, who=None): if who is not None: name, email = (r_who.match(who).group(1), r_who.match(who).group(2)) self.graph.add((URIRef(storeuri), DC['title'], Literal(title % name))) self.graph.add((URIRef(storeuri + '#author'), RDF.type, FOAF['Person'])) self.graph.add((URIRef(storeuri + '#author'), FOAF['name'], Literal(name))) self.graph.add((URIRef(storeuri + '#author'), FOAF['mbox'], Literal(email))) self.save() else: return self.graph.objects(URIRef(storeuri + '#author'), FOAF['name']) def new_movie(self, movie): movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID) self.graph.add((movieuri, RDF.type, IMDB['Movie'])) self.graph.add((movieuri, DC['title'], Literal(movie['title']))) self.graph.add((movieuri, IMDB['year'], Literal(int(movie['year'])))) self.save() def new_review(self, movie, date, rating, comment=None): review = BNode() # @@ humanize the identifier (something like #rev-$date) movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID) self.graph.add((movieuri, REV['hasReview'], URIRef('%s#%s' % (storeuri, review)))) self.graph.add((review, RDF.type, REV['Review'])) self.graph.add((review, DC['date'], Literal(date))) self.graph.add((review, REV['maxRating'], Literal(5))) self.graph.add((review, REV['minRating'], Literal(0))) self.graph.add((review, REV['reviewer'], URIRef(storeuri + '#author'))) self.graph.add((review, REV['rating'], Literal(rating))) if comment is not None: self.graph.add((review, REV['text'], Literal(comment))) self.save() def movie_is_in(self, uri): return (URIRef(uri), RDF.type, IMDB['Movie']) in self.graph
def process_tools(): """ Go through all bio.tools entries in bioschemas JSON-LD and produce an single RDF file. """ tool_files = get_bioschemas_files_in_repo() print(len(tool_files)) rdf_graph = ConjunctiveGraph() for tool_file in tool_files: print(tool_file) rdf_graph.load(tool_file, format="json-ld") rdf_graph.serialize( format="turtle", destination="bioschemas-dump.ttl" #destination=os.path.join(directory, tpe_id + "bioschemas.jsonld") )
def render_graph(result, cfg, **kwargs): """ Render to output a result that can be parsed as an RDF graph """ # Mapping from MIME types to formats accepted by RDFlib rdflib_formats = {'text/rdf+n3': 'n3', 'text/turtle': 'turtle', 'application/x-turtle': 'turtle', 'text/turtle': 'turtle', 'application/rdf+xml': 'xml', 'text/rdf': 'xml', 'application/rdf+xml': 'xml'} try: got = kwargs.get('format', 'text/rdf+n3') fmt = rdflib_formats[got] except KeyError: raise KrnlException('Unsupported format for graph processing: {!s}', got) g = ConjunctiveGraph() g.load(StringInputSource(result), format=fmt) display = cfg.dis[0] if is_collection(cfg.dis) else cfg.dis if display in ('png', 'svg'): try: literal = len(cfg.dis) > 1 and cfg.dis[1].startswith('withlit') opt = {'lang': cfg.lan, 'literal': literal, 'graphviz': []} data, metadata = draw_graph(g, fmt=display, options=opt) return {'data': data, 'metadata': metadata} except Exception as e: raise KrnlException('Exception while drawing graph: {!r}', e) elif display == 'table': it = rdf_iterator(g, set(cfg.lan), add_vtype=cfg.typ) n, data = html_table(it, limit=cfg.lmt, withtype=cfg.typ) data += div('Shown: {}, Total rows: {}', n if cfg.lmt else 'all', len(g), css="tinfo") data = {'text/html': div(data)} elif len(g) == 0: data = {'text/html': div(div('empty graph', css='krn-warn'))} else: data = {'text/plain': g.serialize(format='nt').decode('utf-8')} return {'data': data, 'metadata': {}}
class DateFilterTest(unittest.TestCase): # debug = True def setUp(self): self.graph = ConjunctiveGraph() self.graph.load(StringIO(testContent), format='n3') def test_DATE_FILTER1(self): for query in [QUERY1,QUERY2,QUERY3]: # print query #pQuery = Parse(query) #print RenderSPARQLAlgebra(pQuery) results = self.graph.query(query, processor="sparql", DEBUG=False) results = list(results) self.failUnless( len(results) and results == [(ANSWER1,)], "expecting : %s . Got: %s"%([(ANSWER1,)],repr(results)))
class DateFilterTest(unittest.TestCase): # debug = True def setUp(self): self.graph = ConjunctiveGraph() self.graph.load(StringIO(testContent), format='n3') def test_DATE_FILTER1(self): for query in [QUERY1, QUERY2, QUERY3]: # print query #pQuery = Parse(query) #print RenderSPARQLAlgebra(pQuery) # Skip until issue is resolved if query == QUERY1 and rdflib.py3compat.PY3: raise SkipTest('Known issue with Python 3') results = self.graph.query(query, processor="sparql", DEBUG=False) results = list(results) self.failUnless( len(results) and results == [(ANSWER1, )], "expecting : %s . Got: %s" % ([(ANSWER1, )], repr(results)))
def catalyst_graph_for(file): if file.startswith('/'): file = 'file://'+file logging.info("InferenceStore catalyst_graph_for started") # quads = jsonld.to_rdf(file, {'format': 'application/nquads'}) logging.info("InferenceStore JSON-LD loaded") g = ConjunctiveGraph() g.namespace_manager = namespace_manager # g.parse(data=quads, format='nquads') g.load(file, format="json-ld") logging.info("InferenceStore base graph loaded") # get the inference engine get_inference_store().get_inference(g) logging.info("InferenceStore inference graph loaded") return g
class DeepGraphStore(): store_name = 'SQLite' def __init__(self, create=False, parse=None): self.parse = parse self.create = create self.graph = None def setUp(self): self.path = "" + random_file_generating() self.graph = Graph(store=self.store_name) self.graph.open(self.path, create=self.create) if self.create: if not self.parse: self.graph.parse("http://njh.me/foaf.rdf", format='xml') else: self.graph.parse(self.parse) self.graph.commit() def open(self, path): self.graph = ConjunctiveGraph(self.store_name) self.path = path self.graph.open(self.path, create=False) def query(self, sparql_query): return self.graph.query(sparql_query) def parse(self, path_to_file_): self.graph.parse(path_to_file_) def load(self, triples): self.graph.load(triples) def close(self): self.graph.close() def size(self): size = self.graph.__len__() size = len(self.graph) # self.close() return size
def runTest(self): testfile = self.testbase + ".htm" resultsf = self.testbase + ".ttl" self.failIf(not os.path.isfile(resultsf), "missing expected results file.") store1 = RGraph() store1.load(resultsf, publicID=self.pubId, format="n3") pcontents = store1.serialize(format='nt') pg = Graph() for a, b, c in store1: pg.triples.add(tuple(map(self.nodeToString, (a,b,c)))) #print tuple(map(self.nodeToString, (a,b,c))) store2 = RGraph() store2.load(testfile, publicID=self.pubId, format="rdfa") qcontents = store2.serialize(format='nt') qg = Graph() for a, b, c in store2: qg.triples.add(tuple(map(self.nodeToString, (a,b,c)))) self.failIf(not hash(pg) == hash(qg), "In %s: results do not match.\n%s\n\n%s" % (self.shortDescription(), pcontents, qcontents))
def populate_ontology(): ont_path = path_kg + 'traffic_ontology.xml' metadata = pd.read_csv(path_src + 'trafficMetaData.csv', sep=',') g = ConjunctiveGraph() g.load(ont_path) g.add((URIRef(base_uri), RDF.type, OWL.Ontology)) g.bind("owl", OWL) g.bind("rdf", RDF) g.bind("rdfs", RDFS) # g.bind("city", base_uri) # populate from metadata: [Path, from[name], to[name], from[has[street]], to[has[street]]] populate_from_metadata(metadata, g) poi = parse_log() for entry in poi: point = entry[0][0].split('_')[0] + "_" + entry[0][0].split('_')[1] metadata_entry = metadata[metadata['REPORT_ID'] == int(entry[0][0].split('_')[2])] address_id = metadata_entry[point + '_NAME'].values[0] poi_list = entry[0][1] for tmp_poi in poi_list: # generate an id for the poi tmp_poi_id = str(abs(hash(point + '_' + str(address_id) + '_' + tmp_poi))) g.add((base_uri[tmp_poi_id], RDF.type, base_uri['Point_of_interest'])) g.add((base_uri[tmp_poi_id], RDF.type, base_uri[tmp_poi[0].upper() + tmp_poi[1:]])) g.add((base_uri[tmp_poi_id], base_uri['locatedAt'], base_uri[str(address_id)])) simple_sequence = [] events = pd.read_csv(path_processed + 'events.csv') mapping = pd.read_csv(path_processed + 'mapping.csv').T.to_dict() for k, v in mapping.iteritems(): g.add((base_uri[v['Unnamed: 0']], base_uri['occursAt'], base_uri[str(v['occursAt'])])) g.add((base_uri[v['Unnamed: 0']], RDF.type, base_uri[v['type']])) for e in events['Id']: simple_sequence.append(e) with open(path_processed + 'sequence.txt', "wb") as seq_file: seq_file.write(','.join(simple_sequence)) g.serialize(path_kg + 'traffic_individuals.xml', format='xml')
def main(): # f = open("CommonChords.rdf",'w'); g = ConjunctiveGraph() prefix = "http://purl.org/ontology/chord/symbol/" notes = ['C','D','E','F','G','A','B'] mods = ['b','','s'] bases = ['', ':maj', ':min', ':dim', ':aug', ':maj7', ':min7', ':7', ':dim7', \ ':hdim7', ':minmaj7', ':maj6', ':min6', ':9', ':maj9', ':min9', ':sus4', ':sus2'] chordURI = prefix+'N' print "loading "+chordURI g.load(chordURI) for note in notes: for mod in mods: for base in bases: chordURI = prefix+note+mod+base #f.write(prefix+note+mod+base+"\n") print "loading "+chordURI g.load(chordURI) print "Writing graph out..." g.serialize('CommonChords.rdf','xml')
def handle(self, **options): _logger.debug("linking places") for place in models.Place.objects.filter(dbpedia__isnull=True): if not place.city or not place.state: continue # formulate a dbpedia place uri path = urllib2.quote('%s,_%s' % (_clean(place.city), _clean(place.state))) url = URIRef('http://dbpedia.org/resource/%s' % path) # attempt to get a graph from it graph = ConjunctiveGraph() try: _logger.debug("looking up %s" % url) graph.load(url) except urllib2.HTTPError, e: _logger.error(e) # if we've got more than 3 assertions extract some stuff from # the graph and save back some info to the db, would be nice # to have a triple store underneath where we could persist # all the facts eh? if len(graph) >= 3: place.dbpedia = url place.latitude = graph.value(url, geo['lat']) place.longitude = graph.value(url, geo['long']) for object in graph.objects(URIRef(url), owl['sameAs']): if object.startswith('http://sws.geonames.org'): place.geonames = object place.save() _logger.info("found dbpedia resource %s" % url) else: _logger.warn("couldn't find dbpedia resource for %s" % url) reset_queries()
def GScsv2RDF(infilename, outfilename, format="xml", withdescriptions=False): if withdescriptions: commonchords = ConjunctiveGraph() commonchords.load("CommonChords.rdf") extrachords = ConjunctiveGraph() lines = open(infilename).readlines() # # Initial model bits # mi = mopy.MusicInfo() homepage = mopy.foaf.Document("http://sourceforge.net/projects/motools") mi.add(homepage) program = mopy.foaf.Agent() program.name = "GScsv2RDF.py" program.homepage = homepage mi.add(program) tl = TimeLine("#tl") tl.label = "Timeline derived from "+infilename tl.maker = program mi.add(tl) [artistStr, titleStr] = [f.strip() for f in lines[0].split("\t")] # Add artist & title metadata signal = Signal() signal.time = sig_int = Interval() sig_int.label="Whole signal interval"; sig_int.beginsAtDuration = secondsToXSDDuration(0); sig_int.onTimeLine = tl signal.published_as = track = Track() artist = MusicArtist() artist.made = track artist.name = artistStr track.title = titleStr mi.add(sig_int) mi.add(signal) mi.add(track) mi.add(artist) lineNum = 1 segmentNum = 0 thisSegment_i = None chordSymbol='' t_secs = 0.0 for line in lines[1:]: # print "parsing line "+str(lineNum) try: lastChordSymbol = chordSymbol t_secs = getTimestamp(line) chordSymbol = getChordSymbol(line) if chordSymbol != lastChordSymbol: # print " handling new chord symbol" segmentNum += 1 lastSegment_i = thisSegment_i thisSegment_i = Interval("#i_"+str(segmentNum)) thisSegment_i.beginsAtDuration = secondsToXSDDuration(t_secs) if lastSegment_i != None: # print " terminating last interval" lastSegment_i.endsAtDuration = secondsToXSDDuration(t_secs) thisSegment_i.intervalAfter = lastSegment_i lastSegment_i.intervalBefore = thisSegment_i mi.add(thisSegment_i) chordURI = "http://purl.org/ontology/chord/symbol/"+chordSymbol.replace("#","s").replace(",","%2C") if withdescriptions and \ len(list(commonchords.predicate_objects(URIRef(chordURI)))) == 0 and \ len(list(extrachords.predicate_objects(URIRef(chordURI)))) == 0: # Deref to grab chord info print "loading <"+chordURI+">..." extrachords.load(chordURI) c = Chord(chordURI) c_event = ChordEvent("#ce_"+str(segmentNum)) c_event.chord = c c_event.time = thisSegment_i c_event.label = chordSymbol mi.add(c); mi.add(c_event); # print " added new chord event for "+chordURI except Exception, e: print("ERROR : Problem parsing input file at line "+str(lineNum)+" !\n") raise lineNum+=1
def importRDFFile(filename, format="xml", strict=True): g = ConjunctiveGraph() g.load(filename, format=format) return importRDFGraph(g, strict)
def query_test(t): uri, name, comment, data, graphdata, query, resfile, syntax = t # the query-eval tests refer to graphs to load by resolvable filenames rdflib_sparql_module.SPARQL_LOAD_GRAPHS = True if uri in skiptests: raise SkipTest() def skip(reason='(none)'): print "Skipping %s from now on." % uri f = open("skiptests.list", "a") f.write("%s\t%s\n" % (uri, reason)) f.close() try: g = ConjunctiveGraph() if data: g.default_context.load(data, format=_fmt(data)) if graphdata: for x in graphdata: g.load(x, format=_fmt(x)) if not resfile: # no result - syntax test if syntax: translateQuery(parseQuery(open(query[7:]).read()), base=urljoin(query, '.')) else: # negative syntax test try: translateQuery(parseQuery(open(query[7:]).read()), base=urljoin(query, '.')) assert False, 'Query should not have parsed!' except: pass # it's fine - the query should not parse return # eval test - carry out query res2 = g.query(open(query[7:]).read(), base=urljoin(query, '.')) if resfile.endswith('ttl'): resg = Graph() resg.load(resfile, format='turtle', publicID=resfile) res = RDFResultParser().parse(resg) elif resfile.endswith('rdf'): resg = Graph() resg.load(resfile, publicID=resfile) res = RDFResultParser().parse(resg) elif resfile.endswith('srj'): res = Result.parse(open(resfile[7:]), format='json') elif resfile.endswith('tsv'): res = Result.parse(open(resfile[7:]), format='tsv') elif resfile.endswith('csv'): res = Result.parse(open(resfile[7:]), format='csv') # CSV is lossy, round-trip our own resultset to # lose the same info :) # write bytes, read strings... s = BytesIO() res2.serialize(s, format='csv') print s.getvalue() s = StringIO(s.getvalue().decode('utf-8')) # hmm ? res2 = Result.parse(s, format='csv') else: res = Result.parse(open(resfile[7:]), format='xml') if not DETAILEDASSERT: eq(res.type, res2.type, 'Types do not match') if res.type == 'SELECT': eq(set(res.vars), set(res2.vars), 'Vars do not match') comp = bindingsCompatible( set(frozenset(x.iteritems()) for x in res.bindings), set(frozenset(x.iteritems()) for x in res2.bindings)) assert comp, 'Bindings do not match' elif res.type == 'ASK': eq(res.askAnswer, res2.askAnswer, 'Ask answer does not match') elif res.type in ('DESCRIBE', 'CONSTRUCT'): assert isomorphic(res.graph, res2.graph), 'graphs are not isomorphic!' else: raise Exception('Unknown result type: %s' % res.type) else: eq(res.type, res2.type, 'Types do not match: %r != %r' % (res.type, res2.type)) if res.type == 'SELECT': eq( set(res.vars), set(res2.vars), 'Vars do not match: %r != %r' % (set(res.vars), set(res2.vars))) assert bindingsCompatible( set(frozenset(x.iteritems()) for x in res.bindings), set(frozenset(x.iteritems()) for x in res2.bindings) ), 'Bindings do not match: \n%s\n!=\n%s' % (_bindingsTable( res.bindings), _bindingsTable(res2.bindings)) elif res.type == 'ASK': eq( res.askAnswer, res2.askAnswer, "Ask answer does not match: %r != %r" % (res.askAnswer, res2.askAnswer)) elif res.type in ('DESCRIBE', 'CONSTRUCT'): assert isomorphic(res.graph, res2.graph), 'graphs are not isomorphic!' else: raise Exception('Unknown result type: %s' % res.type) except Exception, e: if isinstance(e, AssertionError): failed_tests.append(uri) fails[str(e)] += 1 else: error_tests.append(uri) errors[str(e)] += 1 if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL: print "======================================" print uri print name print comment if not resfile: if syntax: print "Positive syntax test" else: print "Negative syntax test" if data: print "----------------- DATA --------------------" print ">>>", data print open(data[7:]).read() if graphdata: print "----------------- GRAPHDATA --------------------" for x in graphdata: print ">>>", x print open(x[7:]).read() print "----------------- Query -------------------" print ">>>", query print open(query[7:]).read() if resfile: print "----------------- Res -------------------" print ">>>", resfile print open(resfile[7:]).read() try: pq = parseQuery(open(query[7:]).read()) print "----------------- Parsed ------------------" pprintAlgebra(translateQuery(pq, base=urljoin(query, '.'))) except: print "(parser error)" print decodeStringEscape(unicode(e)) import pdb pdb.post_mortem(sys.exc_info()[2]) # pdb.set_trace() # nose.tools.set_trace() raise
def update_test(t): # the update-eval tests refer to graphs on http://example.org rdflib_sparql_module.SPARQL_LOAD_GRAPHS = False uri, name, comment, data, graphdata, query, res, syntax = t if uri in skiptests: raise SkipTest() try: g = ConjunctiveGraph() if not res: if syntax: translateUpdate(parseUpdate(open(query[7:]))) else: try: translateUpdate(parseUpdate(open(query[7:]))) raise AssertionError("Query shouldn't have parsed!") except: pass # negative syntax test return resdata, resgraphdata = res # read input graphs if data: g.default_context.load(data, format=_fmt(data)) if graphdata: for x, l in graphdata: g.load(x, publicID=URIRef(l), format=_fmt(x)) req = translateUpdate(parseUpdate(open(query[7:]))) evalUpdate(g, req) # read expected results resg = ConjunctiveGraph() if resdata: resg.default_context.load(resdata, format=_fmt(resdata)) if resgraphdata: for x, l in resgraphdata: resg.load(x, publicID=URIRef(l), format=_fmt(x)) eq( set(x.identifier for x in g.contexts() if x != g.default_context), set(x.identifier for x in resg.contexts() if x != resg.default_context)) assert isomorphic(g.default_context, resg.default_context), \ 'Default graphs are not isomorphic' for x in g.contexts(): if x == g.default_context: continue assert isomorphic(x, resg.get_context(x.identifier)), \ "Graphs with ID %s are not isomorphic" % x.identifier except Exception, e: if isinstance(e, AssertionError): failed_tests.append(uri) fails[str(e)] += 1 else: error_tests.append(uri) errors[str(e)] += 1 if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL: print "======================================" print uri print name print comment if not res: if syntax: print "Positive syntax test" else: print "Negative syntax test" if data: print "----------------- DATA --------------------" print ">>>", data print open(data[7:]).read() if graphdata: print "----------------- GRAPHDATA --------------------" for x, l in graphdata: print ">>>", x, l print open(x[7:]).read() print "----------------- Request -------------------" print ">>>", query print open(query[7:]).read() if res: if resdata: print "----------------- RES DATA --------------------" print ">>>", resdata print open(resdata[7:]).read() if resgraphdata: print "----------------- RES GRAPHDATA -------------------" for x, l in resgraphdata: print ">>>", x, l print open(x[7:]).read() print "------------- MY RESULT ----------" print g.serialize(format='trig') try: pq = translateUpdate(parseUpdate(open(query[7:]).read())) print "----------------- Parsed ------------------" pprintAlgebra(pq) # print pq except: print "(parser error)" print decodeStringEscape(unicode(e)) import pdb pdb.post_mortem(sys.exc_info()[2]) raise
def handle(self, **options): LOGGER.debug("linking places") for place in models.Place.objects.filter(dbpedia__isnull=True): if not place.city or not place.state: continue # formulate a dbpedia place uri path = urllib2.quote('%s,_%s' % (_clean(place.city), _clean(place.state))) url = URIRef('http://dbpedia.org/resource/%s' % path) # attempt to get a graph from it graph = ConjunctiveGraph() try: LOGGER.debug("looking up %s" % url) graph.load(url) except urllib2.HTTPError as e: LOGGER.error(e) # if we've got more than 3 assertions extract some stuff from # the graph and save back some info to the db, would be nice # to have a triple store underneath where we could persist # all the facts eh? if len(graph) >= 3: place.dbpedia = url place.latitude = graph.value(url, geo['lat']) place.longitude = graph.value(url, geo['long']) for object in graph.objects(URIRef(url), owl['sameAs']): if object.startswith('http://sws.geonames.org'): place.geonames = object place.save() LOGGER.info("found dbpedia resource %s" % url) else: LOGGER.warn("couldn't find dbpedia resource for %s" % url) reset_queries() LOGGER.info("finished looking up places in dbpedia") LOGGER.info("dumping place_links.json fixture") # so it would be nice to use django.core.serializer here # but it serializes everything about the model, including # titles that are linked to ... and this could theoretically # change over time, so we only preserve the facts that have # been harvested from dbpedia, so they can overlay over # the places that have been extracted during title load json_src = [] places_qs = models.Place.objects.filter(dbpedia__isnull=False) for p in places_qs.order_by('name'): json_src.append({ 'name': p.name, 'dbpedia': p.dbpedia, 'geonames': p.geonames, 'longitude': p.longitude, 'latitude': p.latitude }) reset_queries() json.dump(json_src, file('core/fixtures/place_links.json', 'w'), indent=2) LOGGER.info("finished dumping place_links.json fixture")
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") graph.bind("default", ns) to_update = "" for prefix, nsuri in graph.namespaces(): #print("prefix: " + str(prefix) + " - " + str(nsuri)) if nsuri in ns: to_update = nsuri for s, p, o in graph: # print s, p, o if to_update != "" and to_update in s: graph.remove((s, p, o)) s = URIRef(s.replace(to_update, ns)) graph.add((s, p, o)) act = "" scene = "" line = "" char = 0 loc = 0 #timeline = ns['timeline/narrative'] #graph.add((timeline, RDF.type, ome['Timeline'])) tree = ET.parse(teifile) cast = dict() titleNode = tree.find('//title') castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') if roleNode != None: id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if(roleNode != None and roleNode.get("about")): charname = roleNode.get("about") if(charname.find(":") > -1): nmsp,nom = charname.split(":", 1) charcode = "character/" + str(char) charref = nmsp + ":" + charcode + "]" role = extractCURIEorURI(graph, charref,nom[0:-1]) char += 1 #print("1:" + charname + ": adding id " + id + " to " + role) else: role = extractCURIEorURI(graph, charname) #print("2:" + charname + ": adding id " + id + " to " + role) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print(charname + ": adding id " + id + " to " + role) if(actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 groupCount = 1 prior_event = None actItems = tree.findall('/text/body/div1') ref = "" for actItem in actItems: if actItem.get("type") == "act": act = actItem.get("n") sceneItems = actItem.findall('div2') for sceneItem in sceneItems: #print("Found sceneItems!") if sceneItem.get("type") == "scene": scene = sceneItem.get("n") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") #internalnum = 1 stagenum = 0 speechnum = 1 for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("about") != None: locname = stageItem.get("about") # Adding location type/oml:space for location if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) #print "1. Location: " + str(location) + " Type: " + str(type) elif stageItem.get("about"): #print "2. Location: " + str(locname) type = extractCURIEorURI(graph, oml['Space']) # Get location value and add rdfs:label is location is not using the TEI value if(locname.find(":") > -1): nmsp,nom = locname.split(":", 1) loccode = "location/" + str(loc) locref = nmsp + ":" + loccode + "]" location = extractCURIEorURI(graph, locref, nom[0:-1]) loc += 1 graph.add((location, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(nom[0:-1]))) else: location = extractCURIEorURI(graph, stageItem.get("about")) # Add location to graph graph.add((location, RDF.type, type)) else: location = "" #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() speakers = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/'+str(eventCount)] group = ns['group/'+str(groupCount)] refersTo = list() #parent = None speakerNodes = list() speakerRef = list() xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) stagecount = 0 stage_array = list() for node in sceneItem.getiterator(): #print("Node: " + node.tag) """ if node.tag == "lb": if node.get("ed") == "F1": line = node.get("n") if titleNode != None: ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line) else: ref = str(act) + "." + str(scene) + "." + str(line) #xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "'])" xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) #print("Ref: " + xpointer) """ if node.tag == "sp": id = node.get("who") if id and cast: speakers.append(cast[id[1:]]) speakerNodes.append(node) if perseusid == None: speakerRef.append(ref) else: #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)" speechRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum) + "])"; speakerRef.append(speechRef) #print("Line ref: " + ref) if cast[id[1:]] not in currentCast: currentCast.append(cast[id[1:]]) #internalnum = 1 speechnum += 1 stagecount = 0 previousl = 0 for subnode in node.getiterator(): if subnode.tag == "l": previousl += 1 if subnode.tag == "stage": #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n") stage_array.append(previousl) stagecount += 1 elif node.tag == "stage": if stagecount > 0: s_max = len(stage_array) diff = s_max - stagecount #if diff == 0: # stagenum += 1 entRef = xpointer + "#xpointer(//div2/sp[" + str(speechnum - 1) + "]/l[" + str(stage_array[diff]) +"]/stage)"; #internalnum += 1 stagecount -= 1 else: stagenum += 1 entRef = xpointer + "#xpointer(//div2/stage[" + str(stagenum) +"])"; if node.get("type") == "entrance": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref))) else: #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)" graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef))) #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = node.get("about") if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: striped = chunk.strip() if(len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"): striped = striped[1:-1] currentCast.append(cast[striped]) if chunk_count > 1: graph.add((group, ome['contains'], cast[striped])) if en == chunk_count: event_label = event_label[0:-2] + " and " + striped graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " arrive"))) elif en < chunk_count: event_label += striped + ", " else: #print("Adding person as subject-entity to entry event " + str(eventCount)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(striped + " arrives"))) graph.add((event, ome['has-subject-entity'], cast[striped])) en += 1 if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to entry event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] if node.get("type") == "exit": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), Literal(ref))) else: #exitRef = xpointer #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef))) graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(entRef))) #print("Found entrence event!") if location != None: graph.add((event, ome['from'], location)) involved = node.get("about") if involved.strip() == "" or "-all" in involved: # Remove everyone #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #for peep in currentCast: # print(peep) if len(currentCast) > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for peep in currentCast: short_ref = "" for key, value in cast.iteritems(): if peep == value: short_ref = key if len(currentCast) > 1: graph.add((group, ome['contains'], peep)) if en == len(currentCast): event_label = event_label[0:-2] + " and " + short_ref graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exuant event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], peep)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves"))) en += 1 if len(currentCast) > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exuant event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] currentCast = list() elif "!" in involved: #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Event: " + involved); if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] involved = involved.strip() if(len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"): involved = involved[2:-1] #print("involved: " + involved) striped = involved.strip() c_ids = striped.split() chunks = list() for stay in c_ids: #print("Staying: " + cast[stay]) chunks.append(cast[stay]) staying = list() going = list() for player in currentCast: #print("Player: " + player) if player in chunks: staying.append(player) else: going.append(player) going_count = len(going) if going_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for ghost in going: #print("ghost: " + ghost) short_ref = "" for key, value in cast.iteritems(): if ghost == value: short_ref = key if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if going_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(going): event_label = event_label[0:-2] + " and " + short_ref graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(going): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], ghost)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(short_ref + " leaves"))) en += 1 if going_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] else: #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] striped = involved.strip() chunks = striped.split() #print("striped: " + striped) chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: #print("chunk: " + chunk) ghost = cast[chunk] #print("ghost: " + ghost) if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if chunk_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(currentCast): event_label = event_label[0:-2] + " and " + chunk graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += chunk + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add((event, ome['has-subject-entity'], ghost)) graph.add((event, rdflib.URIRef('http://www.w3.org/2000/01/rdf-schema#label'), Literal(chunk + " leaves"))) en += 1 if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/'+str(groupCount)] if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] #elif node.tag == "rs": # #print("Found rs node") # if parent: # #print("Parent type is " + parent.tag) # if parent.tag == "p" or parent.tag == "l": # refersTo.append(node.get("about")) #parent = node # Add Social Events for all the people who spoke since the last break (if there were any) #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers))) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/'+str(eventCount)] group = ns['group/'+str(groupCount)] speakers = list() speakerNodes = list() currentCast = list() speakerRef = list() print graph.serialize(format='xml')
#import rdfextras from rdflib.namespace import DC, FOAF #rdfextras.registerplugins() # so we can Graph.query() owlNS = Namespace("http://www.w3.org/2002/07/owl#") owlClass = owlNS["Class"] owlObjectProperty = owlNS["ObjectProperty"] owlDatatypeProperty = owlNS["DatatypeProperty"] rdfNS = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") rdfProperty = rdfNS["Property"] rdfType = rdfNS["type"] rdfsNS = Namespace("http://www.w3.org/2000/01/rdf-schema#") rdfsSubClassOf = rdfsNS["subClassOf"] rdfsDomain = rdfsNS["domain"] rdfsRange = rdfsNS["range"] graph = ConjunctiveGraph() graph.load("./data/Film_Tbox.owl") s = graph.serialize(format='n3') #print(s) #print("graph has %s statements." % len(graph)) def isSubClassOf(subClass, superClass, graph): if subClass == superClass: return True for parentClass in graph.objects(subClass, rdfsSubClassOf): if isSubClassOf(parentClass, superClass, graph): return True else: return False
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . @prefix : <tag://example.org,2007/literals-test#> . <http://example.org/thing> :plain "plain"; :integer 1; :float 1.1e0; :decimal 1.1 ; :string "string"^^xsd:string; :date "2007-04-28"^^xsd:date; :escape "a \\"test\\""; rdfs:label "Thing"@en, "Sak"@sv . """ graph = ConjunctiveGraph() graph.load(StringIO(testRdf), format='n3') PROLOGUE = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX t: <tag://example.org,2007/literals-test#> """ thing = URIRef("http://example.org/thing") SPARQL = PROLOGUE+" SELECT ?uri WHERE { ?uri %s . } " TEST_DATA = [ ('plain', SPARQL % 't:plain "plain"', [(thing,)]), ('integer', SPARQL % 't:integer 1', [(thing,)]), ('decimal', SPARQL % 't:decimal 1.1', [(thing,)]), ('float', SPARQL % 't:float 1.1e0', [(thing,)]),
class Store: def __init__(self, tripleFile): self.graph = ConjunctiveGraph() self.storefn = abspath(tripleFile) self.storeuri = 'file://' + self.storefn if exists(self.storefn): self.graph.load(self.storeuri, format='n3') self.graph.bind('mo', MusicOntology) self.graph.bind('ourvocab', OurVocab) self.graph.bind('dc', DC) self.graph.bind('foaf', foaf) self.graph.bind('geo', geo) self.graph.bind('dbpediaowl', dbpediaowl) self.graph.bind('rev', 'http://purl.org/stuff/rev#') def save(self): self.graph.serialize(self.storeuri, format='n3') def addTrack(self, mbid, track): trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % mbid) self.graph.add((trackuri, RDF.type, MusicOntology.Track)) self.graph.add((trackuri, DC.title, Literal(track['name']))) self.graph.add( (trackuri, OurVocab.has_playcount, Literal(track['playcount']))) self.graph.add((trackuri, OurVocab.has_listener_count, Literal(track['listeners']))) if track['artist']['mbid'] != '': artisturi = URIRef('http://musicbrainz.org/artist/%s#_' % track['artist']['mbid']) self.graph.add((artisturi, RDF.type, MusicOntology.MusicArtist)) self.graph.add((trackuri, MusicOntology.performer, artisturi)) self.graph.add( (artisturi, foaf.name, Literal(track['artist']['name']))) if isinstance(track['toptags'], dict) and 'tag' in track['toptags'].keys(): for tag in track['toptags']['tag']: if isinstance(tag, dict): self.graph.add( (trackuri, OurVocab.has_tag, Literal(tag['name']))) def addArtist(self, trackMBID, artistData, trackData): trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % trackMBID) #If there is no mbid, it means there is no earlier artist entry in triplestore if trackData['artist']['mbid'] == '': artisturi = URIRef(artistData['artist']['value'].encode('utf-8')) if artistData['artist']['type'] == 'artist': self.graph.add( (artisturi, RDF.type, MusicOntology.MusicArtist)) else: self.graph.add((artisturi, RDF.type, MusicOntology.MusicGroup)) self.graph.add((trackuri, MusicOntology.performer, artisturi)) self.graph.add( (artisturi, foaf.name, Literal(trackData['artist']['name'].encode('utf-8')))) #if there is an artist entry, make sure the artist/band association is appropriate else: artisturi = URIRef('http://musicbrainz.org/artist/%s#_' % trackData['artist']['mbid']) if artistData['artist']['type'] == "band" and\ (artisturi, RDF.type, MusicOntology.MusicArtist) in self.graph: self.graph.remove( (artisturi, RDF.type, MusicOntology.MusicArtist)) self.graph.add((artisturi, RDF.type, MusicOntology.MusicGroup)) #now the location data! if 'hometown' not in artistData.keys(): return if "http" in artistData['hometown']['value']: townuri = URIRef(artistData['hometown']['value'].encode('utf-8')) if (townuri, RDF.type, dbpediaowl.Place) not in self.graph: self.graph.add((townuri, RDF.type, dbpediaowl.Place)) if "hometownName" in artistData.keys(): self.graph.add((townuri, foaf.name, Literal(artistData['hometownName'] ['value'].encode('utf-8')))) if "coordinates" in artistData.keys(): self.graph.add((townuri, geo.geometry, Literal(artistData['coordinates'] ['value'].encode('utf-8')))) self.graph.add((artisturi, dbpediaowl.hometown, townuri)) else: self.graph.add((artisturi, dbpediaowl.hometown, Literal(artistData['hometown']['value']))) def _matchAlbum(self, trackInfo, albumFiles): """ A function to return the correct match of an album given a track. Deprecated for most cases where the match is done using mbids. Use only for cases where there is no mbid link betweeb album and track. """ try: albumName = trackInfo['album']['name'] artistName = trackInfo['artist']['name'] except: return None for af in albumFiles: albumInfo = json.load(file(af)) albumInfo = albumInfo['album'] if albumName == albumInfo['name'] and artistName == albumInfo[ 'artist']: return af def addAlbum(self, trackMBID, albumInfo): """ A function to add album data into triple store. At the moment, only the releasedate is taken from the album data. More to be added soon. """ try: albumInfo = albumInfo['album'] except: return if 'releasedate' not in albumInfo.keys(): return trackuri = URIRef('http://musicbrainz.org/recording/%s#_' % trackMBID) self.graph.add( (trackuri, OurVocab.has_releasedate, Literal(albumInfo['releasedate'].strip().encode('utf-8'))))
class MirbaseDB(object): def __init__(self, db_path): self.g = ConjunctiveGraph() self.path = db_path self.choices = set() self.labels = {} def create_graph(self): self.g.open(self.path + "data.rdf", create=True) data = self.parse_mirbase(self.path) #g = ConjunctiveGraph(store="SPARQLUpdateStore") # g.bind() mirna_class = URIRef("http://purl.obolibrary.org/obo/SO_0000276") for mid in data: mirna_instance = URIRef(MIRBASE + data[mid]["acc"]) self.g.add((mirna_instance, RDF.type, mirna_class)) label = Literal(data[mid]["name"]) self.g.add((mirna_instance, RDFS.label, label)) description = Literal(data[mid]["description"]) self.g.add((mirna_instance, RDFS.comment, description)) for p in data[mid]["previous_names"]: if p.strip(): previous_name = Literal(p) self.g.add((mirna_instance, MIRBASE["previous_acc"], previous_name)) for mature in data[mid]["mature"]: mature_instance = URIRef(MIRBASE + data[mid]["mature"][mature]["acc"]) self.g.add((mature_instance, RDF.type, mirna_class)) mature_label = Literal(data[mid]["mature"][mature]["name"]) self.g.add((mature_instance, RDFS.label, mature_label)) for mature_p in data[mid]["mature"][mature]["previous_names"]: if mature_p.strip(): mature_previous_name = Literal(mature_p) self.g.add((mature_instance, MIRBASE["previous_acc"], mature_previous_name)) self.g.add((mirna_instance, MIRBASE["stemloopOf"], mature_instance)) def parse_mirbase(self, mirbase_root): mirna_dic = {} with open(mirbase_root + "mirna.txt") as mirnas: for m in mirnas: props = m.strip().split("\t") mname = props[2] mid = props[0] macc = props[1] mdesc = props[4] mprev = props[3].split(";") if int(props[-1]) != 22: # not h**o sapiens continue mirna_dic[mid] = {} mirna_dic[mid]["name"] = mname mirna_dic[mid]["acc"] = macc mirna_dic[mid]["previous_names"] = mprev mirna_dic[mid]["description"] = mdesc mature_dic = {} with open(mirbase_root + "mirna_mature.txt") as mirnas: for m in mirnas: props = m.strip().split("\t") mname = props[1] mid = props[0] macc = props[3] # mdesc = props[4] mprev = props[2].split(";") if not mname.startswith("hsa-"): # not h**o sapiens continue mature_dic[mid] = {} mature_dic[mid]["name"] = mname mature_dic[mid]["previous_names"] = mprev mature_dic[mid]["acc"] = macc with open(mirbase_root + "mirna_pre_mature.txt") as mirnas: for m in mirnas: props = m.strip().split("\t") mid, matureid = props[:2] if mid in mirna_dic: if "mature" not in mirna_dic[mid]: mirna_dic[mid]["mature"] = {} mirna_dic[mid]["mature"][matureid] = mature_dic[matureid] # pp.pprint(mirna_dic) return mirna_dic def map_label(self, label): label = label.lower() label = label.replace("microrna", "mir") label = label.replace("mirna", "mir") if not label.startswith("hsa-"): label = "hsa-" + label result = process.extractOne(label, self.choices) # result = process.extract(label, choices, limit=3) """if result[1] != 100: print print "original:", label.encode("utf-8"), result # if label[-1].isdigit(): # label += "a" # else: new_label = label + "-1" revised_result = process.extractOne(new_label, self.choices) if revised_result[1] != 100: new_label = label + "a" revised_result = process.extractOne(new_label, self.choices) if revised_result[1] > result[1]: result = revised_result print "revised:", label.encode("utf-8"), result""" return result def load_graph(self): self.g.load(self.path + "data.rdf") # print "Opened graph with {} triples".format(len(self.g)) self.get_label_to_acc() self.choices = self.labels.keys() def get_label_to_acc(self): for subj, pred, obj in self.g.triples((None, RDFS.label, None)): self.labels[str(obj)] = str(subj) for subj, pred, obj in self.g.triples((None, RDFS.label, None)): self.labels[str(obj)] = str(subj) def save_graph(self): self.g.serialize(self.path + "data.rdf", format='pretty-xml') print('Triples in graph after add: ', len(self.g)) self.g.close()
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") graph.bind("default", ns) to_update = "" for prefix, nsuri in graph.namespaces(): #print("prefix: " + str(prefix) + " - " + str(nsuri)) if nsuri in ns: to_update = nsuri for s, p, o in graph: # print s, p, o if to_update != "" and to_update in s: graph.remove((s, p, o)) s = URIRef(s.replace(to_update, ns)) graph.add((s, p, o)) act = "" scene = "" line = "" char = 0 loc = 0 #timeline = ns['timeline/narrative'] #graph.add((timeline, RDF.type, ome['Timeline'])) tree = ET.parse(teifile) cast = dict() titleNode = tree.find('//title') castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') if roleNode != None: id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if (roleNode != None and roleNode.get("about")): charname = roleNode.get("about") if (charname.find(":") > -1): nmsp, nom = charname.split(":", 1) charcode = "character/" + str(char) charref = nmsp + ":" + charcode + "]" role = extractCURIEorURI(graph, charref, nom[0:-1]) char += 1 #print("1:" + charname + ": adding id " + id + " to " + role) else: role = extractCURIEorURI(graph, charname) #print("2:" + charname + ": adding id " + id + " to " + role) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print(charname + ": adding id " + id + " to " + role) if (actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 groupCount = 1 prior_event = None actItems = tree.findall('/text/body/div1') ref = "" for actItem in actItems: if actItem.get("type") == "act": act = actItem.get("n") sceneItems = actItem.findall('div2') for sceneItem in sceneItems: #print("Found sceneItems!") if sceneItem.get("type") == "scene": scene = sceneItem.get("n") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") #internalnum = 1 stagenum = 0 speechnum = 1 for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("about") != None: locname = stageItem.get("about") # Adding location type/oml:space for location if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) #print "1. Location: " + str(location) + " Type: " + str(type) elif stageItem.get("about"): #print "2. Location: " + str(locname) type = extractCURIEorURI(graph, oml['Space']) # Get location value and add rdfs:label is location is not using the TEI value if (locname.find(":") > -1): nmsp, nom = locname.split(":", 1) loccode = "location/" + str(loc) locref = nmsp + ":" + loccode + "]" location = extractCURIEorURI( graph, locref, nom[0:-1]) loc += 1 graph.add(( location, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(nom[0:-1]))) else: location = extractCURIEorURI( graph, stageItem.get("about")) # Add location to graph graph.add((location, RDF.type, type)) else: location = "" #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() speakers = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/' + str(eventCount)] group = ns['group/' + str(groupCount)] refersTo = list() #parent = None speakerNodes = list() speakerRef = list() xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str( perseusid) + ":act=" + str(act) + ":scene=" + str(scene) stagecount = 0 stage_array = list() for node in sceneItem.getiterator(): #print("Node: " + node.tag) """ if node.tag == "lb": if node.get("ed") == "F1": line = node.get("n") if titleNode != None: ref = titleNode.text + " " + str(act) + "." + str(scene) + "." + str(line) else: ref = str(act) + "." + str(scene) + "." + str(line) #xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "'])" xpointer = "http://www.perseus.tufts.edu/hopper/xmlchunk?doc=Perseus:text:" + str(perseusid) + ":act=" + str(act) + ":scene=" + str(scene) #print("Ref: " + xpointer) """ if node.tag == "sp": id = node.get("who") if id and cast: speakers.append(cast[id[1:]]) speakerNodes.append(node) if perseusid == None: speakerRef.append(ref) else: #speechRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(int(line) + 1) + "']/ancestor::sp)" speechRef = xpointer + "#xpointer(//div2/sp[" + str( speechnum) + "])" speakerRef.append(speechRef) #print("Line ref: " + ref) if cast[id[1:]] not in currentCast: currentCast.append(cast[id[1:]]) #internalnum = 1 speechnum += 1 stagecount = 0 previousl = 0 for subnode in node.getiterator(): if subnode.tag == "l": previousl += 1 if subnode.tag == "stage": #print ("Stagecount: " + str(stagecount) + " Previousl: " + str(previousl) + "\n") stage_array.append(previousl) stagecount += 1 elif node.tag == "stage": if stagecount > 0: s_max = len(stage_array) diff = s_max - stagecount #if diff == 0: # stagenum += 1 entRef = xpointer + "#xpointer(//div2/sp[" + str( speechnum - 1) + "]/l[" + str( stage_array[diff]) + "]/stage)" #internalnum += 1 stagecount -= 1 else: stagenum += 1 entRef = xpointer + "#xpointer(//div2/stage[" + str( stagenum) + "])" if node.get("type") == "entrance": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), Literal(ref))) else: #entRef = xpointer + "#xpointer(//lb[@ed='F1' and @n='" + str(line) + "']/following-sibling::*[1]/self::stage)" graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), URIRef(entRef))) #print("Entrance event. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = node.get("about") if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: striped = chunk.strip() if (len(striped) > 0 and striped[0] == "[" and striped[-1] == "]"): striped = striped[1:-1] currentCast.append(cast[striped]) if chunk_count > 1: graph.add( (group, ome['contains'], cast[striped])) if en == chunk_count: event_label = event_label[ 0:-2] + " and " + striped graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " arrive"))) elif en < chunk_count: event_label += striped + ", " else: #print("Adding person as subject-entity to entry event " + str(eventCount)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(striped + " arrives"))) graph.add((event, ome['has-subject-entity'], cast[striped])) en += 1 if chunk_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to entry event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] if (prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/' + str(eventCount)] if node.get("type") == "exit": # Add Social Events for all the people who spoke since the last break (if there were any) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] speakers = list() speakerNodes = list() speakerRef = list() # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) if perseusid == None: graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), Literal(ref))) else: #exitRef = xpointer #graph.add((event, rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#seeAlso"), URIRef(exitRef))) graph.add(( event, rdflib.URIRef( "http://www.w3.org/2000/01/rdf-schema#seeAlso" ), URIRef(entRef))) #print("Found entrence event!") if location != None: graph.add((event, ome['from'], location)) involved = node.get("about") if involved.strip() == "" or "-all" in involved: # Remove everyone #print("Exit all. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #for peep in currentCast: # print(peep) if len(currentCast) > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for peep in currentCast: short_ref = "" for key, value in cast.iteritems(): if peep == value: short_ref = key if len(currentCast) > 1: graph.add((group, ome['contains'], peep)) if en == len(currentCast): event_label = event_label[ 0:-2] + " and " + short_ref graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exuant event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], peep)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(short_ref + " leaves"))) en += 1 if len(currentCast) > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exuant event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] currentCast = list() elif "!" in involved: #print("Exit except some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) #print("Event: " + involved); if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] involved = involved.strip() if (len(involved) > 0 and involved[0] == "!" and involved[1] == "(" and involved[-1] == ")"): involved = involved[2:-1] #print("involved: " + involved) striped = involved.strip() c_ids = striped.split() chunks = list() for stay in c_ids: #print("Staying: " + cast[stay]) chunks.append(cast[stay]) staying = list() going = list() for player in currentCast: #print("Player: " + player) if player in chunks: staying.append(player) else: going.append(player) going_count = len(going) if going_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for ghost in going: #print("ghost: " + ghost) short_ref = "" for key, value in cast.iteritems(): if ghost == value: short_ref = key if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if going_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(going): event_label = event_label[ 0:-2] + " and " + short_ref graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(going): event_label += short_ref + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], ghost)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(short_ref + " leaves"))) en += 1 if going_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] else: #print("Exit some. GroupCount: " + str(groupCount) + ", EventCount: " + str(eventCount) + ", current cast count: " + str(len(currentCast))) if (len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] striped = involved.strip() chunks = striped.split() #print("striped: " + striped) chunk_count = len(chunks) if chunk_count > 1: #type = extractCURIEorURI(graph, "[omb:Group]") #graph.add((group, RDF.type, type)) graph.add((group, RDF.type, omb['Group'])) event_label = "" en = 1 for chunk in chunks: #print("chunk: " + chunk) ghost = cast[chunk] #print("ghost: " + ghost) if ghost in currentCast: currentCast.remove(ghost) #print("Current cast count: " + str(len(currentCast))) if chunk_count > 1: graph.add((group, ome['contains'], ghost)) if en == len(currentCast): event_label = event_label[ 0:-2] + " and " + chunk graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(event_label + " leave"))) elif en < len(currentCast): event_label += chunk + ", " else: #print("Adding person as subject-entity to exit event " + str(eventCount)) graph.add( (event, ome['has-subject-entity'], ghost)) graph.add(( event, rdflib.URIRef( 'http://www.w3.org/2000/01/rdf-schema#label' ), Literal(chunk + " leaves"))) en += 1 if chunk_count > 1: graph.add( (event, ome['has-subject-entity'], group)) #print("Adding group as subject-entity to exit event " + str(eventCount)) groupCount = groupCount + 1 group = ns['group/' + str(groupCount)] if (prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/' + str(eventCount)] #elif node.tag == "rs": # #print("Found rs node") # if parent: # #print("Parent type is " + parent.tag) # if parent.tag == "p" or parent.tag == "l": # refersTo.append(node.get("about")) #parent = node # Add Social Events for all the people who spoke since the last break (if there were any) #print("Final section of scene, currentCast:" + str(len(currentCast)) + " sperkers: " + str(len(speakers))) update = list() update = getSocial(graph, ns, speakers, speakerNodes, speakerRef, cast, currentCast, eventCount, event, prior_event, location) eventCount = update[0] prior_event = update[1] event = ns['event/' + str(eventCount)] group = ns['group/' + str(groupCount)] speakers = list() speakerNodes = list() currentCast = list() speakerRef = list() print graph.serialize(format='xml')
def convert(teifile, namespace): #graph_uri = "http://contextus.net/resource/blue_velvet/" ns = Namespace(namespace) graph = ConjunctiveGraph() graph.load(teifile, format="rdfa") tree = ET.parse(teifile) cast = dict() castItems = tree.findall('/text/body/div1/castList//castItem') for castItem in castItems: actorNode = castItem.find('actor') roleNode = castItem.find('role') id = roleNode.get("{http://www.w3.org/XML/1998/namespace}id") #print("Found castItem!") actor = None role = None # Check to see if we already have an entry if(roleNode != None and roleNode.get("about")): role = extractCURIEorURI(graph, roleNode.get("about")) cast[id] = role graph.add((role, RDF.type, omb['Character'])) #print("Adding id " + id + " to " + role) if(actorNode != None and actorNode.get("about")): actor = extractCURIEorURI(graph, actorNode.get("about")) graph.add((actor, RDF.type, omb['Being'])) if actor != None and role != None: graph.add((actor, omb['portrays'], role)) graph.add((role, omb['portrayed-by'], actor)) eventCount = 1 prior_event = None sceneItems = tree.findall('/text/body/div1/div2') for sceneItem in sceneItems: #print("Found sceneItems!") # Work out the location of this scene location = None stageItems = sceneItem.findall("stage") for stageItem in stageItems: if stageItem.get("type") == "location": # The RDFa parser doesn't handle the type - so we can grab that here. if stageItem.get("typeof") and stageItem.get("about"): type = extractCURIEorURI(graph, stageItem.get("typeof")) location = extractCURIEorURI(graph, stageItem.get("about")) graph.add((location, RDF.type, type)) elif stageItem.get("about"): type = extractCURIEorURI(graph, "[loc:Space]") location = extractCURIEorURI(graph, stageItem.get("about")) graph.add((location, RDF.type, type)) #print("Adding location type: " + type + " (" + location + ")") if cast: # Work out a list of all cast in a given section currentCast = list() previousCast = list() # Iterate through elements within stageItem # Find speaker events and add to list of current cast for inclusion in social event # Find reference events and add to ongoing social event ? # Find stage events # If event is an entrance then # create social event for people talking before entrance # create travel event i.e. entrance # add new arrival to current cast list # If event is exit event then # create social event for people talking before exit # create travel event i.e. exit # if leavers are not named directly the calculate who is leaving # remove leavers from current cast list # If reach end of scene then create social event with current cast list #Also need to check if social event before exit has same composition as social event after exit since then they should be merged event = ns['event/'+str(eventCount)] group = ns['group/'+str(eventCount)] for node in sceneItem.getiterator(): #print("Node: " + node.tag) if(node.tag == "sp") id = speechItem.get("who") if id and cast: currentCast.append(cast[id[1:]]) elif(node.tag == "stage") if node.get("type") == "entrance": # Add Social Event if there are people in the CurrentCast list # Add Travel Event graph.add((event, RDF.type, omj['Travel'])) #print("Found entrence event!") if location: graph.add((event, ome['to'], location)) involved = stageItem.get("about") if(len(involved) > 0 and involved[0] == "[" and involved[-1] == "]"): involved = involved[1:-1] chunks = involved.split() chunk_count = len(chunks) if chunk_count > 1: type = extractCURIEorURI(graph, "[omb:Group]") graph.add((group, RDF.type, type)) for chunk in chunks: striped = chunk.strip() peep = extractCURIEorURI(graph, striped) if chunk_count > 1: graph.add((group, ome['contains'], peep)) else: graph.add((event, ome['has-subject-entity'], peep)) if chunk_count > 1: graph.add((event, ome['has-subject-entity'], group)) if(prior_event): graph.add((event, ome['follows'], prior_event)) graph.add((prior_event, ome['precedes'], event)) prior_event = event eventCount = eventCount + 1 event = ns['event/'+str(eventCount)] group = ns['group/'+str(eventCount)] print graph.serialize(format='xml')
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . @prefix : <tag://example.org,2007/literals-test#> . <http://example.org/thing> :plain "plain"; :integer 1; :float 1.1e0; :decimal 1.1 ; :string "string"^^xsd:string; :date "2007-04-28"^^xsd:date; :escape "a \\"test\\""; rdfs:label "Thing"@en, "Sak"@sv . """ graph = ConjunctiveGraph() graph.load(StringIO(testRdf), format='n3') PROLOGUE = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX t: <tag://example.org,2007/literals-test#> """ thing = URIRef("http://example.org/thing") SPARQL = PROLOGUE + " SELECT ?uri WHERE { ?uri %s . } " TEST_DATA = [('plain', SPARQL % 't:plain "plain"', [(thing, )]), ('integer', SPARQL % 't:integer 1', [(thing, )]), ('decimal', SPARQL % 't:decimal 1.1', [(thing, )]), ('float', SPARQL % 't:float 1.1e0', [(thing, )]), ('langlabel_en', SPARQL % 'rdfs:label "Thing"@en', [(thing, )]),
def mma2RDF(infilename, outfilename, format="xml", audiofilename=None, withdescriptions=False): if withdescriptions: commonchords = ConjunctiveGraph() commonchords.load("CommonChords.rdf") extrachords = ConjunctiveGraph() # Compile mma file and grab output lines = os.popen(mmabin + ' "' + infilename + '" -nrw').readlines() print "\n".join(lines) # # Initial model bits # mi = mopy.MusicInfo() homepage = mopy.foaf.Document("http://sourceforge.net/projects/motools") mi.add(homepage) program = mopy.foaf.Agent() program.name = "mma2RDF.py" program.homepage = homepage mi.add(program) tl = TimeLine("#tl") tl.label = "Timeline derived from " + infilename tl.maker = program mi.add(tl) # extract tempo from mma file tempo = 60 mmafile = open(infilename, "r") for line in mmafile: if line.startswith("Tempo "): tempo = int(line[len("Tempo ") :].strip().split()[0]) print "Found tempo = " + str(tempo) break lineNum = 1 thisBar_i = None i = None t_secs = 0.0 for line in lines: print "parsing line " + str(lineNum) try: # i = None barNum = getBarNum(line) lastBar_i = thisBar_i thisBar_i = Interval("#i_" + str(barNum)) thisBar_i.beginsAtDuration = secondsToXSDDuration(t_secs) if lastBar_i != None: lastBar_i.endsAtDuration = secondsToXSDDuration(t_secs) thisBar_i.intervalAfter = lastBar_i lastBar_i.intervalBefore = thisBar_i mi.add(thisBar_i) chordMMASymbols = getChordSymbols(line) beatNum = 1 for chordMMASymbol in chordMMASymbols: if chordMMASymbol != "/": print " handling new chord symbol" if i != None: print " terminating last interval" i.endsAtDuration = secondsToXSDDuration(t_secs) mi.add(i) i = Interval("#i_" + str(barNum) + "_" + str(beatNum)) i.onTimeLine = tl i.beginsAtDuration = secondsToXSDDuration(t_secs) chordURI = "http://purl.org/ontology/chord/symbol/" + mmaSymbolToChordSymbol( chordMMASymbol ).replace("#", "s").replace(",", "%2C") if ( withdescriptions and len(list(commonchords.predicate_objects(URIRef(chordURI)))) == 0 and len(list(extrachords.predicate_objects(URIRef(chordURI)))) == 0 ): # Deref to grab chord info print "loading <" + chordURI + ">..." extrachords.load(chordURI) c = Chord(chordURI) c_event = ChordEvent("#ce_" + str(barNum) + "_" + str(beatNum)) c_event.chord = c c_event.time = i c_event.label = mmaSymbolToChordSymbol(chordMMASymbol) mi.add(c) mi.add(c_event) mi.add(i) print " added new chord event for " + chordURI else: if beatNum == 1: # Need to continue the last seen chord print " continuing last bar's chord" # i = Interval("i_"+str(barNum)+"_"+str(beatNum)) # i.onTimeLine = tl # i.beginsAtDuration = secondsToXSDDuration(t_secs) # c_event = ChordEvent("ce_"+str(barNum)+"_"+str(beatNum)) # c_event.chord = c # c_event.time = i # mi.add(c_event); mi.add(i) beatNum += 1 t_secs += 60.0 / tempo except Exception, e: print ("ERROR : Problem parsing input file at line " + str(lineNum) + " !\n") raise lineNum += 1
class PreProcessor(object): def __init__(self, kg_path): self.kg_path = kg_path self.ent_dict = dict() self.rel_dict = dict() self.g = ConjunctiveGraph() self.unique_msgs = self.ent_dict.copy() def load_knowledge_graph(self, format='xml', exclude_rels=[], clean_schema=True, amberg_params=None, excluded_entities=None): self.g.load(self.kg_path, format=format) # remove triples with excluded relation remove_rel_triples(self.g, exclude_rels) # remove triples with relations between class-level constructs if clean_schema: remove_rel_triples(self.g, schema_relations) if excluded_entities is not None: remove_ent_triples(self.g, excluded_entities) if amberg_params: path_to_events = amberg_params[0] max_events = amberg_params[1] self.merged = get_merged_dataframe(path_to_events, max_events) self.unique_msgs, unique_vars, unique_mods, unique_fes = get_unique_entities( self.merged) update_amberg_ontology(self.g, self.ent_dict, self.unique_msgs, unique_mods, unique_fes, unique_vars, self.merged) self.update_entity_relation_dictionaries() def update_entity_relation_dictionaries(self): """ Given an existing entity dictionary, update it to *ontology* :param ontology: :param ent_dict: the existing entity dictionary :return: """ ent_counter = 0 fixed_ids = set([id for id in self.ent_dict.values()]) # sorting ensures equal random splits on equal seeds for h in sorted( set(self.g.subjects(None, None)).union( set(self.g.objects(None, None)))): uni_h = unicode(h) if uni_h not in self.ent_dict: while ent_counter in fixed_ids: ent_counter += 1 self.ent_dict.setdefault(uni_h, ent_counter) ent_counter += 1 # add new relations to dict for r in sorted(set(self.g.predicates(None, None))): uni_r = unicode(r) if uni_r not in self.rel_dict: self.rel_dict.setdefault(uni_r, len(self.rel_dict)) def load_unique_msgs_from_txt(self, path, max_events=None): """ Assuming csv text files with two columns :param path: :return: """ with open(path, "rb") as f: for line in f: split = line.split(',') try: emb_id = int(split[1].strip()) except: print("Error reading id of {0} in given dictionary".format( line)) # skip this event entitiy, treat it as common entitiy later on continue self.ent_dict[split[0]] = emb_id # sort ascending w.r.t. embedding id, in case of later stripping # self.ent_dict = sorted(self.ent_dict.items(), key=operator.itemgetter(1), reverse=False) self.unique_msgs = self.ent_dict.copy() if max_events is not None: all_msgs = sorted(self.unique_msgs.items(), key=operator.itemgetter(1), reverse=False) self.unique_msgs = dict(all_msgs[:max_events]) excluded_events = dict(all_msgs[max_events:]).keys() return excluded_events def prepare_sequences(self, path_to_input, use_dict=True): """ Dumps pickle for sequences and dictionary :param data_frame: :param file_name: :param index: :param classification_event: :return: """ print("Preparing sequential data...") with open(path_to_input, "rb") as f: result = [] for line in f: entities = line.split(',') if use_dict: result.append([ int(e.strip()) for e in entities if int(e.strip()) in self.unique_msgs.values() ]) else: result.append([int(e.strip()) for e in entities]) print("Processed {0} sequences".format(len(result))) return result def get_vocab_size(self): return len(self.unique_msgs) def get_ent_dict(self): return self.ent_dict def get_rel_dict(self): return self.rel_dict def get_kg(self): return self.g def get_unique_msgs(self): return self.unique_msgs def get_merged(self): return self.merged
def query_test(t): uri, name, comment, data, graphdata, query, resfile, syntax = t # the query-eval tests refer to graphs to load by resolvable filenames rdflib_sparql_module.SPARQL_LOAD_GRAPHS = True if uri in skiptests: raise SkipTest() def skip(reason="(none)"): print "Skipping %s from now on." % uri f = open("skiptests.list", "a") f.write("%s\t%s\n" % (uri, reason)) f.close() try: g = ConjunctiveGraph() if data: g.default_context.load(data, format=_fmt(data)) if graphdata: for x in graphdata: g.load(x, format=_fmt(x)) if not resfile: # no result - syntax test if syntax: translateQuery(parseQuery(open(query[7:]).read()), base=urljoin(query, ".")) else: # negative syntax test try: translateQuery(parseQuery(open(query[7:]).read()), base=urljoin(query, ".")) assert False, "Query should not have parsed!" except: pass # it's fine - the query should not parse return # eval test - carry out query res2 = g.query(open(query[7:]).read(), base=urljoin(query, ".")) if resfile.endswith("ttl"): resg = Graph() resg.load(resfile, format="turtle", publicID=resfile) res = RDFResultParser().parse(resg) elif resfile.endswith("rdf"): resg = Graph() resg.load(resfile, publicID=resfile) res = RDFResultParser().parse(resg) elif resfile.endswith("srj"): res = Result.parse(open(resfile[7:]), format="json") elif resfile.endswith("tsv"): res = Result.parse(open(resfile[7:]), format="tsv") elif resfile.endswith("csv"): res = Result.parse(open(resfile[7:]), format="csv") # CSV is lossy, round-trip our own resultset to # lose the same info :) # write bytes, read strings... s = BytesIO() res2.serialize(s, format="csv") print s.getvalue() s = StringIO(s.getvalue().decode("utf-8")) # hmm ? res2 = Result.parse(s, format="csv") else: res = Result.parse(open(resfile[7:]), format="xml") if not DETAILEDASSERT: eq(res.type, res2.type, "Types do not match") if res.type == "SELECT": eq(set(res.vars), set(res2.vars), "Vars do not match") comp = bindingsCompatible(set(res), set(res2)) assert comp, "Bindings do not match" elif res.type == "ASK": eq(res.askAnswer, res2.askAnswer, "Ask answer does not match") elif res.type in ("DESCRIBE", "CONSTRUCT"): assert isomorphic(res.graph, res2.graph), "graphs are not isomorphic!" else: raise Exception("Unknown result type: %s" % res.type) else: eq(res.type, res2.type, "Types do not match: %r != %r" % (res.type, res2.type)) if res.type == "SELECT": eq(set(res.vars), set(res2.vars), "Vars do not match: %r != %r" % (set(res.vars), set(res2.vars))) assert bindingsCompatible(set(res), set(res2)), "Bindings do not match: \n%s\n!=\n%s" % ( res.serialize(format="txt", namespace_manager=g.namespace_manager), res2.serialize(format="txt", namespace_manager=g.namespace_manager), ) elif res.type == "ASK": eq( res.askAnswer, res2.askAnswer, "Ask answer does not match: %r != %r" % (res.askAnswer, res2.askAnswer), ) elif res.type in ("DESCRIBE", "CONSTRUCT"): assert isomorphic(res.graph, res2.graph), "graphs are not isomorphic!" else: raise Exception("Unknown result type: %s" % res.type) except Exception, e: if isinstance(e, AssertionError): failed_tests.append(uri) fails[str(e)] += 1 else: error_tests.append(uri) errors[str(e)] += 1 if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL: print "======================================" print uri print name print comment if not resfile: if syntax: print "Positive syntax test" else: print "Negative syntax test" if data: print "----------------- DATA --------------------" print ">>>", data print open(data[7:]).read() if graphdata: print "----------------- GRAPHDATA --------------------" for x in graphdata: print ">>>", x print open(x[7:]).read() print "----------------- Query -------------------" print ">>>", query print open(query[7:]).read() if resfile: print "----------------- Res -------------------" print ">>>", resfile print open(resfile[7:]).read() try: pq = parseQuery(open(query[7:]).read()) print "----------------- Parsed ------------------" pprintAlgebra(translateQuery(pq, base=urljoin(query, "."))) except: print "(parser error)" print decodeStringEscape(unicode(e)) import pdb pdb.post_mortem(sys.exc_info()[2]) # pdb.set_trace() # nose.tools.set_trace() raise