def test_html_decoded_entity_xhtml(self): if platform.system() == "Java": raise SkipTest('problem with HTML entities for html5lib in Jython') g = ConjunctiveGraph() g.parse(data=htmlentitydecode(html), format='rdfa') self.assertEqual(len(g), 1) self.assertEqual(g.value(URIRef("http://example.com"), URIRef("http://purl.org/dc/terms/title") ), u"Exampl\xe9")
def getRecentComments(self, n=10, notOlderThan=None, withSpam=False): self.mongo['comment'].ensure_index('created') spec = {} if not withSpam: spec = self.notSpam if notOlderThan is not None: now = datetime.datetime.now(tzlocal()) spec['created'] = { '$gt' : now - datetime.timedelta(days=notOlderThan)} for doc in self.mongo['comment'].find(spec, limit=n, sort=[('created', -1)]): g = ConjunctiveGraph() g.parse(StringInputSource(doc['n3'].encode('utf8')), format='n3') parent, _, uri = g.triples((None, SIOC.has_reply, None)).next() created = g.value(uri, DCTERMS.created) content = g.value(uri, CONTENT.encoded) creator = g.value(uri, SIOC.has_creator) docId = str(doc['_id']) isSpam = doc.get('type', '') yield vars()
def test_html_entity_xhtml(self): if sys.version_info[0] == 3: raise SkipTest('minidom parser strips HTML entities in Python 3.2') if platform.system() == "Java": raise SkipTest('problem with HTML entities for html5lib in Jython') g = ConjunctiveGraph() warnings.simplefilter('ignore', UserWarning) g.parse(data=html, format='rdfa') self.assertEqual(len(g), 1) self.assertTrue( g.value(URIRef("http://example.com"), URIRef("http://purl.org/dc/terms/title")).eq(u"Exampl"))
def test_html_entity_xhtml(self): if sys.version_info[0] == 3 or sys.version_info[:2] < (2,5): raise SkipTest('minidom parser strips HTML entities in Python 3.2') if platform.system() == "Java": raise SkipTest('problem with HTML entities for html5lib in Jython') g = ConjunctiveGraph() warnings.simplefilter('ignore', UserWarning) g.parse(data=html, format='rdfa') self.assertEqual(len(g), 1) self.assertTrue(g.value(URIRef("http://example.com"), URIRef("http://purl.org/dc/terms/title") ).eq( u"Exampl"))
def generictest(testFile): func_name = __name__ = __doc__ = id = 'test_sparql.' + \ os.path.splitext(testFile)[0][8:].translate( maketrans('-/','__')) store = plugin.get(STORE,Store)() bootStrapStore(store) store.commit() prefix = testFile.split('.rq')[-1] manifestPath = '/'.join(testFile.split('/')[:-1]+['manifest.n3']) manifestPath2 = '/'.join(testFile.split('/')[:-1]+['manifest.ttl']) queryFileName = testFile.split('/')[-1] store = plugin.get(STORE,Store)() store.open(configString,create=False) assert len(store) == 0 manifestG=ConjunctiveGraph(store) if not os.path.exists(manifestPath): assert os.path.exists(manifestPath2) manifestPath = manifestPath2 manifestG.default_context.parse(open(manifestPath), publicID=URIRef(TEST_BASE), format='n3') manifestData = manifestG.query( MANIFEST_QUERY, processor='sparql', initBindings={'query' : TEST_BASE[queryFileName]}, initNs=manifestNS, DEBUG = False) store.rollback() store.close() for source,testCaseName,testCaseComment,expectedRT in manifestData: if expectedRT: expectedRT = '/'.join(testFile.split('/')[:-1] + \ [expectedRT.replace(TEST_BASE,'')]) if source: source = '/'.join(testFile.split('/')[:-1] + \ [source.replace(TEST_BASE,'')]) testCaseName = testCaseComment and testCaseComment or testCaseName # log.debug("## Source: %s ##"%source) # log.debug("## Test: %s ##"%testCaseName) # log.debug("## Result: %s ##"%expectedRT) #Expected results if expectedRT: store = plugin.get(STORE,Store)() store.open(configString,create=False) resultG=ConjunctiveGraph(store).default_context log.debug("###"*10) log.debug("parsing: %s" % open(expectedRT).read()) log.debug("###"*10) assert len(store) == 0 # log.debug("## Parsing (%s) ##"%(expectedRT)) if not trialAndErrorRTParse(resultG,expectedRT,DEBUG): log.debug( "Unexpected result format (for %s), skipping" % \ (expectedRT)) store.rollback() store.close() continue log.debug("## Done .. ##") rtVars = [rtVar for rtVar in resultG.objects(None,RESULT_NS.resultVariable)] bindings = [] resultSetNode = resultG.value(predicate=RESULT_NS.value, object=RESULT_NS.ResultSet) for solutionNode in resultG.objects(resultSetNode, RESULT_NS.solution): bindingDict = dict([(key,None) for key in rtVars]) for bindingNode in resultG.objects(solutionNode, RESULT_NS.binding): value = resultG.value(subject=bindingNode, predicate=RESULT_NS.value) name = resultG.value(subject=bindingNode, predicate=RESULT_NS.variable) bindingDict[name] = value rbinds = [bindingDict[vName] for vName in rtVars] # print("Rbinds", rbinds) if len(rbinds) > 1 and ( isinstance(rbinds, list) or isinstance(rbinds, tuple)): bindings.append(frozenset(rbinds)) elif len(rbinds) == 1 and ( isinstance(rbinds, list) or isinstance(rbinds, tuple)): bindings.append(rbinds[0]) else: bindings.append(rbinds) # bindings.append(tuple([bindingDict[vName] for vName in rtVars])) log.debug(open(expectedRT).read()) store.rollback() store.close() if testFile in tests2Skip.keys(): log.debug("Skipping test (%s) %s\n" % \ (testFile, tests2Skip[testFile])) raise SkipTest("Skipping test (%s) %s\n" % \ (testFile, tests2Skip[testFile])) query = open(testFile).read() log.debug("### %s (%s) ###" % (testCaseName,testFile)) log.debug(query) p = parse(query)#,DEBUG_PARSE) log.debug(p) if EVALUATE and source: log.debug("### Source Graph: ###") log.debug(open(source).read()) store = plugin.get(STORE,Store)() store.open(configString,create=False) g = ConjunctiveGraph(store) try: g.parse(open(source),format='n3') except: log.debug("Unexpected data format (for %s), skipping" % \ (source)) store.rollback() store.close() continue rt = g.query(query, processor='sparql', DEBUG = False) if expectedRT: try: result = rt.result except AttributeError: result = rt if isinstance(result, Graph): resgraph = open(graphtests[testFile]).read() store = plugin.get(STORE,Store)() store.open(configString,create=False) g = ConjunctiveGraph(store) g.parse(data=resgraph,format="n3") assert result == g, \ "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \ (g.serialize(format="n3"), result.serialize(format="n3")) else: # result = [r[0] for r in result if isinstance(r, (tuple, list))] def stab(r): if isinstance(r, (tuple, list)): return frozenset(r) else: return r results = set( [stab(r) for r in result]) assert set(bindings).difference(results) == set([]) or set(bindings) == results, \ "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \ (set(bindings), results) log.debug("### Test Passed: ###") store.rollback()
def convert(muldicat_csv): g = ConjunctiveGraph() g.bind('skos', SKOS) g.bind('dct', DCT) # add concept scheme g.add((muldicat, RDF.type, SKOS.ConceptScheme)) g.add((muldicat, DCT.title, Literal("Multilingual Dictionary of Cataloging Terms and Concepts", lang="en"))) g.add((muldicat, DCT.description, Literal(description, datatype=XHTML))) g.add((muldicat, DCT.modified, Literal(datetime.date.today()))) # work through each row of the spreadsheet, adding concepts as we go subject = None for row in unicode_csv_reader(codecs.open(muldicat_csv, encoding='utf-8')): # strip whitespace from row row = [cell.strip() for cell in row] # older version of the table had an unused ID column if len(row) == 8: print "popping" row.pop(0) if row[0] == 'Language': continue elif row == [u'', u'', u'', u'', u'', u'', u'', u'']: continue else: lang, label, definition, see, see_also, source, modified = row lang = languages.get(lang, None) label = label.strip() if not lang or not label: continue # use the english label to form part of the URI for the concept # hopefully not too controversial? if lang == 'en': subject = make_id(label) g.add((subject, RDF.type, SKOS.Concept)) g.add((subject, SKOS.prefLabel, Literal(label, lang=lang))) g.add((subject, SKOS.inScheme, muldicat)) if definition: g.add((subject, SKOS.definition, Literal(definition, lang=lang))) if source: g.add((subject, DCT.source, Literal(source, lang=lang))) if modified: date = datetime.datetime.strptime(modified, '%Y%m%d').date() # only record the latest last modification date for the concept existing_date = g.value(subject, DCT.modified) if not existing_date and date: g.add((subject, DCT.modified, Literal(date))) elif date and existing_date and date > existing_date.toPython(): g.remove((subject, DCT.modified, existing_date)) g.add((subject, DCT.modified, Literal(date))) for alt_label in see.split(','): if not alt_label: continue alt_label = alt_label.strip() g.add((subject, SKOS.altLabel, Literal(alt_label, lang=lang))) # link up relations if we have the english label if lang == 'en' and see_also: for s in see_also.split(','): s = s.strip() match = re.match(r'(.*) \[(.*?)\]', s) if not match: continue label, reltype = match.groups() reltype = reltype.strip('[]') # some are formatted wrong object = make_id(label) if reltype == 'BT': g.add((subject, SKOS.broader, object)) g.add((object, SKOS.narrower, subject)) elif reltype == 'NT': g.add((subject, SKOS.narrower, object)) g.add((object, SKOS.broader, subject)) elif reltype == 'RT': g.add((subject, SKOS.related, object)) g.add((object, SKOS.related, subject)) else: raise RuntimeError(reltype) return g
class InMemoryStorage(object): def __init__(self): store = IOMemory() self.g = ConjunctiveGraph(store=store) self.g.bind("lada",ns_lada) self.g.bind('data', ns_data) self.g.bind('cube', ns_cube) self.g.bind('qb', ns_cube) self.g.bind('lcd', ns_lcd) self.g.bind('xsd', ns_xsd) self.g.bind('qb4cc', ns_qb4cc) self.g.bind('skos', ns_skos) self.initNs = { 'lada': ns_lada, 'data': ns_data, 'qb': ns_cube, 'lcd': ns_lcd, 'xsd': ns_xsd, 'qb4cc': ns_qb4cc, 'skos': ns_skos } def _concatenate_graphs(self, graphs): source = Graph() for g in graphs: if g in graph_dict: source += self.g.get_context(graph_dict[g]) elif type(g) is URIRef: source += self.g.get_context(g) return source def add_triple(self, triple, context): if context: if type(context) is str: self.g.get_context(graph_dict[context]).add(triple) else: self.g.get_context(context).add(triple) else: self.g.add(triple) def add_graph(self, graph, context): if context: g = None if type(context) is str: g = self.g.get_context(graph_dict[context]) else: g = self.g.get_context(context) g += graph else: self.g += graph def add_file(self, file, format, context): if context: if type(context) is str: self.g.get_context(graph_dict[context]).parse(file, format=format) else: self.g.get_context(context).parse(file, format=format) else: self.g.parse(file, format=format) def query(self, queryString, contexts): if contexts: if type(contexts) is list: return self._concatenate_graphs(contexts).query(queryString, initNs=self.initNs) elif type(contexts) is str: return self.g.get_context(graph_dict[contexts]).query(queryString, initNs=self.initNs) else: return self.g.get_context(contexts).query(queryString, initNs=self.initNs) else: return self.g.query(queryString, initNs=self.initNs) def value(self, subject, predicate, context): if context: if type(context) is str: return self.g.get_context(graph_dict[context]).value(subject, predicate) else: return self.g.get_context(context).value(subject, predicate) else: return self.g.value(subject, predicate) def remove(self, triple_pattern, contexts): if contexts: if type(contexts) is list: self._concatenate_graphs(contexts).remove(triple_pattern) else: self.g.get_context(graph_dict[contexts]).remove(triple_pattern) else: self.g.remove(triple_pattern) def clear(self, context): if context: if type(context) is str: self.g.remove_context(self.g.get_context(graph_dict[context])) else: self.g.remove_context(self.g.get_context(context)) else: self.g.remove( (None, None, None) ) def count_triples(self): c = 0; for s, p, o in self.g: c = c +1; return c def export(self, context): if type(context) is str: self.g.get_context(graph_dict[context]).serialize(context + ".ttl", format="turtle")
def generictest(testFile): func_name = __name__ = __doc__ = id = 'test_sparql.' + \ os.path.splitext(testFile)[0][8:].translate( maketrans('-/','__')) store = plugin.get(STORE, Store)() bootStrapStore(store) store.commit() prefix = testFile.split('.rq')[-1] manifestPath = '/'.join(testFile.split('/')[:-1] + ['manifest.n3']) manifestPath2 = '/'.join(testFile.split('/')[:-1] + ['manifest.ttl']) queryFileName = testFile.split('/')[-1] store = plugin.get(STORE, Store)() store.open(configString, create=False) assert len(store) == 0 manifestG = ConjunctiveGraph(store) if not os.path.exists(manifestPath): assert os.path.exists(manifestPath2) manifestPath = manifestPath2 manifestG.default_context.parse(open(manifestPath), publicID=TEST_BASE, format='n3') manifestData = manifestG.query( MANIFEST_QUERY, processor='sparql', initBindings={'query': TEST_BASE[queryFileName]}, initNs=manifestNS, DEBUG=False) store.rollback() store.close() for source, testCaseName, testCaseComment, expectedRT in manifestData: if expectedRT: expectedRT = '/'.join(testFile.split('/')[:-1] + \ [expectedRT.replace(TEST_BASE,'')]) if source: source = '/'.join(testFile.split('/')[:-1] + \ [source.replace(TEST_BASE,'')]) testCaseName = testCaseComment and testCaseComment or testCaseName # log.debug("## Source: %s ##"%source) # log.debug("## Test: %s ##"%testCaseName) # log.debug("## Result: %s ##"%expectedRT) #Expected results if expectedRT: store = plugin.get(STORE, Store)() store.open(configString, create=False) resultG = ConjunctiveGraph(store).default_context log.debug("###" * 10) log.debug("parsing: %s" % open(expectedRT).read()) log.debug("###" * 10) assert len(store) == 0 # log.debug("## Parsing (%s) ##"%(expectedRT)) if not trialAndErrorRTParse(resultG, expectedRT, DEBUG): log.debug( "Unexpected result format (for %s), skipping" % \ (expectedRT)) store.rollback() store.close() continue log.debug("## Done .. ##") rtVars = [ rtVar for rtVar in resultG.objects(None, RESULT_NS.resultVariable) ] bindings = [] resultSetNode = resultG.value(predicate=RESULT_NS.value, object=RESULT_NS.ResultSet) for solutionNode in resultG.objects(resultSetNode, RESULT_NS.solution): bindingDict = dict([(key, None) for key in rtVars]) for bindingNode in resultG.objects(solutionNode, RESULT_NS.binding): value = resultG.value(subject=bindingNode, predicate=RESULT_NS.value) name = resultG.value(subject=bindingNode, predicate=RESULT_NS.variable) bindingDict[name] = value rbinds = [bindingDict[vName] for vName in rtVars] # print("Rbinds", rbinds) if len(rbinds) > 1 and (isinstance(rbinds, list) or isinstance(rbinds, tuple)): bindings.append(frozenset(rbinds)) elif len(rbinds) == 1 and (isinstance(rbinds, list) or isinstance(rbinds, tuple)): bindings.append(rbinds[0]) else: bindings.append(rbinds) # bindings.append(tuple([bindingDict[vName] for vName in rtVars])) log.debug(open(expectedRT).read()) store.rollback() store.close() if testFile in tests2Skip.keys(): log.debug("Skipping test (%s) %s\n" % \ (testFile, tests2Skip[testFile])) raise SkipTest("Skipping test (%s) %s\n" % \ (testFile, tests2Skip[testFile])) query = open(testFile).read() log.debug("### %s (%s) ###" % (testCaseName, testFile)) log.debug(query) p = parse(query) #,DEBUG_PARSE) log.debug(p) if EVALUATE and source: log.debug("### Source Graph: ###") log.debug(open(source).read()) store = plugin.get(STORE, Store)() store.open(configString, create=False) g = ConjunctiveGraph(store) try: g.parse(open(source), format='n3') except: log.debug("Unexpected data format (for %s), skipping" % \ (source)) store.rollback() store.close() continue rt = g.query(query, processor='sparql', DEBUG=False) if expectedRT: try: result = rt.result except AttributeError: result = rt if isinstance(result, Graph): resgraph = open(graphtests[testFile]).read() store = plugin.get(STORE, Store)() store.open(configString, create=False) g = ConjunctiveGraph(store) g.parse(data=resgraph, format="n3") assert result == g, \ "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \ (g.serialize(format="n3"), result.serialize(format="n3")) else: # result = [r[0] for r in result if isinstance(r, (tuple, list))] def stab(r): if isinstance(r, (tuple, list)): return frozenset(r) else: return r results = set([stab(r) for r in result]) assert set(bindings).difference(results) == set([]) or set(bindings) == results, \ "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \ (set(bindings), results) log.debug("### Test Passed: ###") store.rollback()