Beispiel #1
0
 def test_html_decoded_entity_xhtml(self):
     if platform.system() == "Java":
         raise SkipTest('problem with HTML entities for html5lib in Jython')
     g = ConjunctiveGraph()
     g.parse(data=htmlentitydecode(html), format='rdfa')
     self.assertEqual(len(g), 1)
     self.assertEqual(g.value(URIRef("http://example.com"),
                               URIRef("http://purl.org/dc/terms/title")
                               ), u"Exampl\xe9")
Beispiel #2
0
 def getRecentComments(self, n=10, notOlderThan=None, withSpam=False):
     self.mongo['comment'].ensure_index('created')
     spec = {}
     if not withSpam:
         spec = self.notSpam
     if notOlderThan is not None:
         now = datetime.datetime.now(tzlocal())
         spec['created'] = {
             '$gt' : now - datetime.timedelta(days=notOlderThan)}
     for doc in self.mongo['comment'].find(spec, limit=n,
                                           sort=[('created', -1)]):
         g = ConjunctiveGraph()
         g.parse(StringInputSource(doc['n3'].encode('utf8')), format='n3')
         parent, _, uri = g.triples((None, SIOC.has_reply, None)).next()
         created = g.value(uri, DCTERMS.created)
         content = g.value(uri, CONTENT.encoded)
         creator = g.value(uri, SIOC.has_creator)
         docId = str(doc['_id'])
         isSpam = doc.get('type', '')
         yield vars()
Beispiel #3
0
 def test_html_entity_xhtml(self):
     if sys.version_info[0] == 3:
         raise SkipTest('minidom parser strips HTML entities in Python 3.2')
     if platform.system() == "Java":
         raise SkipTest('problem with HTML entities for html5lib in Jython')
     g = ConjunctiveGraph()
     warnings.simplefilter('ignore', UserWarning)
     g.parse(data=html, format='rdfa')
     self.assertEqual(len(g), 1)
     self.assertTrue(
         g.value(URIRef("http://example.com"),
                 URIRef("http://purl.org/dc/terms/title")).eq(u"Exampl"))
Beispiel #4
0
 def test_html_entity_xhtml(self):
     if sys.version_info[0] == 3 or sys.version_info[:2] < (2,5):
         raise SkipTest('minidom parser strips HTML entities in Python 3.2')
     if platform.system() == "Java":
         raise SkipTest('problem with HTML entities for html5lib in Jython')
     g = ConjunctiveGraph()
     warnings.simplefilter('ignore', UserWarning)
     g.parse(data=html, format='rdfa')
     self.assertEqual(len(g), 1)
     self.assertTrue(g.value(URIRef("http://example.com"),
                              URIRef("http://purl.org/dc/terms/title")
                              ).eq( u"Exampl"))
Beispiel #5
0
def generictest(testFile):
    func_name = __name__ = __doc__ = id = 'test_sparql.' + \
                os.path.splitext(testFile)[0][8:].translate(
                                                    maketrans('-/','__'))
    store = plugin.get(STORE,Store)()
    bootStrapStore(store)
    store.commit()
    prefix = testFile.split('.rq')[-1]
    manifestPath = '/'.join(testFile.split('/')[:-1]+['manifest.n3'])
    manifestPath2 = '/'.join(testFile.split('/')[:-1]+['manifest.ttl'])
    queryFileName = testFile.split('/')[-1]
    store = plugin.get(STORE,Store)()
    store.open(configString,create=False)
    assert len(store) == 0
    manifestG=ConjunctiveGraph(store)
    if not os.path.exists(manifestPath):
        assert os.path.exists(manifestPath2)
        manifestPath = manifestPath2
    manifestG.default_context.parse(open(manifestPath),
                                    publicID=URIRef(TEST_BASE),
                                    format='n3')
    manifestData = manifestG.query(
                      MANIFEST_QUERY,
                      processor='sparql',
                      initBindings={'query' : TEST_BASE[queryFileName]},
                      initNs=manifestNS,
                      DEBUG = False)
    store.rollback()
    store.close()
    for source,testCaseName,testCaseComment,expectedRT in manifestData:
        if expectedRT:
            expectedRT = '/'.join(testFile.split('/')[:-1] + \
                                    [expectedRT.replace(TEST_BASE,'')])
        if source:
            source = '/'.join(testFile.split('/')[:-1] + \
                                    [source.replace(TEST_BASE,'')])
        testCaseName = testCaseComment and testCaseComment or testCaseName
        # log.debug("## Source: %s ##"%source)
        # log.debug("## Test: %s ##"%testCaseName)
        # log.debug("## Result: %s ##"%expectedRT)
        #Expected results
        if expectedRT:
            store = plugin.get(STORE,Store)()
            store.open(configString,create=False)
            resultG=ConjunctiveGraph(store).default_context
            log.debug("###"*10)
            log.debug("parsing: %s" % open(expectedRT).read())
            log.debug("###"*10)
            assert len(store) == 0
            # log.debug("## Parsing (%s) ##"%(expectedRT))
            if not trialAndErrorRTParse(resultG,expectedRT,DEBUG):
                log.debug(
                    "Unexpected result format (for %s), skipping" % \
                                                    (expectedRT))
                store.rollback()
                store.close()
                continue
            log.debug("## Done .. ##")
            rtVars = [rtVar for rtVar in 
                        resultG.objects(None,RESULT_NS.resultVariable)]
            bindings = []
            resultSetNode = resultG.value(predicate=RESULT_NS.value,
                                          object=RESULT_NS.ResultSet)
            for solutionNode in resultG.objects(resultSetNode,
                                                RESULT_NS.solution):
                bindingDict = dict([(key,None) for key in rtVars])
                for bindingNode in resultG.objects(solutionNode,
                                                   RESULT_NS.binding):
                    value = resultG.value(subject=bindingNode,
                                          predicate=RESULT_NS.value)
                    name  = resultG.value(subject=bindingNode,
                                          predicate=RESULT_NS.variable)
                    bindingDict[name] = value
                rbinds = [bindingDict[vName] for vName in rtVars]
                # print("Rbinds", rbinds)
                if len(rbinds) > 1 and (
                    isinstance(rbinds, list) or isinstance(rbinds, tuple)):
                    bindings.append(frozenset(rbinds))
                elif len(rbinds) == 1 and (
                    isinstance(rbinds, list) or isinstance(rbinds, tuple)):
                    bindings.append(rbinds[0])
                else:
                    bindings.append(rbinds)
                # bindings.append(tuple([bindingDict[vName] for vName in rtVars]))
            log.debug(open(expectedRT).read())
            store.rollback()
            store.close()
        if testFile in tests2Skip.keys():
            log.debug("Skipping test (%s) %s\n" % \
                        (testFile, tests2Skip[testFile]))
            raise SkipTest("Skipping test (%s) %s\n" % \
                        (testFile, tests2Skip[testFile]))
        query = open(testFile).read()
        log.debug("### %s (%s) ###" % (testCaseName,testFile))
        log.debug(query)
        p = parse(query)#,DEBUG_PARSE)
        log.debug(p)
        if EVALUATE and source:
            log.debug("### Source Graph: ###")
            log.debug(open(source).read())
            store = plugin.get(STORE,Store)()
            store.open(configString,create=False)
            g = ConjunctiveGraph(store)
            try:
                g.parse(open(source),format='n3')
            except:
                log.debug("Unexpected data format (for %s), skipping" % \
                                                                (source))
                store.rollback()
                store.close()
                continue
            rt = g.query(query,
                         processor='sparql',
                         DEBUG = False)
            if expectedRT:
                try:
                    result = rt.result
                except AttributeError:
                    result = rt
                if isinstance(result, Graph):
                    resgraph = open(graphtests[testFile]).read()
                    store = plugin.get(STORE,Store)()
                    store.open(configString,create=False)
                    g = ConjunctiveGraph(store)
                    g.parse(data=resgraph,format="n3")
                    assert result == g, \
                            "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \
                                    (g.serialize(format="n3"), 
                                     result.serialize(format="n3"))
                else:
                    # result = [r[0] for r in result if isinstance(r, (tuple, list))]
                    def stab(r):
                        if isinstance(r, (tuple, list)):
                            return frozenset(r)
                        else:
                            return r
                    results = set(
                        [stab(r) for r in result])
                    assert set(bindings).difference(results) == set([]) or set(bindings) == results, \
                            "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \
                                    (set(bindings), results)
                log.debug("### Test Passed: ###")
            store.rollback()
Beispiel #6
0
def convert(muldicat_csv):
    g = ConjunctiveGraph()
    g.bind('skos', SKOS)
    g.bind('dct', DCT)

    # add concept scheme
    g.add((muldicat, RDF.type, SKOS.ConceptScheme))
    g.add((muldicat, DCT.title, Literal("Multilingual Dictionary of Cataloging Terms and Concepts", lang="en")))
    g.add((muldicat, DCT.description, Literal(description, datatype=XHTML)))
    g.add((muldicat, DCT.modified, Literal(datetime.date.today())))

    # work through each row of the spreadsheet, adding concepts as we go
    subject = None
    for row in unicode_csv_reader(codecs.open(muldicat_csv, encoding='utf-8')):
        # strip whitespace from row
        row = [cell.strip() for cell in row]

        # older version of the table had an unused ID column
        if len(row) == 8:
            print "popping"
            row.pop(0)

        if row[0] == 'Language':
            continue
        elif row == [u'', u'', u'', u'', u'', u'', u'', u'']:
            continue
        else:
            lang, label, definition, see, see_also, source, modified  = row
            lang = languages.get(lang, None)
            label = label.strip()
            if not lang or not label:
                continue

            # use the english label to form part of the URI for the concept 
            # hopefully not too controversial?
            if lang == 'en':
                subject = make_id(label)
            
            g.add((subject, RDF.type, SKOS.Concept))
            g.add((subject, SKOS.prefLabel, Literal(label, lang=lang)))
            g.add((subject, SKOS.inScheme, muldicat))

            if definition:
                g.add((subject, SKOS.definition, Literal(definition, lang=lang)))

            if source:
                g.add((subject, DCT.source, Literal(source, lang=lang)))

            if modified:
                date = datetime.datetime.strptime(modified, '%Y%m%d').date()

                # only record the latest last modification date for the concept
                existing_date = g.value(subject, DCT.modified)
                if not existing_date and date:
                    g.add((subject, DCT.modified, Literal(date)))
                elif date and existing_date and date > existing_date.toPython():
                    g.remove((subject, DCT.modified, existing_date))
                    g.add((subject, DCT.modified, Literal(date)))

            for alt_label in see.split(','):
                if not alt_label:
                    continue
                alt_label = alt_label.strip()
                g.add((subject, SKOS.altLabel, Literal(alt_label, lang=lang)))
            
            # link up relations if we have the english label
            if lang == 'en' and see_also:
                for s in see_also.split(','):
                    s = s.strip()
                    match = re.match(r'(.*) \[(.*?)\]', s)
                    if not match:
                        continue
                    label, reltype = match.groups()
                    reltype = reltype.strip('[]') # some are formatted wrong
                    
                    object = make_id(label)

                    if reltype == 'BT':
                        g.add((subject, SKOS.broader, object))
                        g.add((object, SKOS.narrower, subject))
                    elif reltype == 'NT':
                        g.add((subject, SKOS.narrower, object))
                        g.add((object, SKOS.broader, subject))
                    elif reltype == 'RT':
                        g.add((subject, SKOS.related, object))
                        g.add((object, SKOS.related, subject))
                    else:
                        raise RuntimeError(reltype)
    return g
Beispiel #7
0
class InMemoryStorage(object):

    def __init__(self):

        store = IOMemory()

        self.g = ConjunctiveGraph(store=store)

        self.g.bind("lada",ns_lada)
        self.g.bind('data', ns_data)
        self.g.bind('cube', ns_cube)
        self.g.bind('qb', ns_cube)
        self.g.bind('lcd', ns_lcd)
        self.g.bind('xsd', ns_xsd)
        self.g.bind('qb4cc', ns_qb4cc)
        self.g.bind('skos', ns_skos)

        self.initNs = {
            'lada': ns_lada,
            'data': ns_data,
            'qb': ns_cube,
            'lcd': ns_lcd,
            'xsd': ns_xsd,
            'qb4cc': ns_qb4cc,
            'skos': ns_skos
        }


    def _concatenate_graphs(self, graphs):
        source = Graph()
        for g in graphs:
            if g in graph_dict:
                source += self.g.get_context(graph_dict[g])
            elif type(g) is URIRef:
                source += self.g.get_context(g)
        return source

    def add_triple(self, triple, context):
        if context:
            if type(context) is str:
                self.g.get_context(graph_dict[context]).add(triple)
            else:
                self.g.get_context(context).add(triple)
        else:
            self.g.add(triple)

    def add_graph(self, graph, context):
        if context:
            g = None
            if type(context) is str:
                g = self.g.get_context(graph_dict[context])
            else:
                g = self.g.get_context(context)
            g += graph
        else:
            self.g += graph

    def add_file(self, file, format, context):
        if context:
            if type(context) is str:
                self.g.get_context(graph_dict[context]).parse(file, format=format)
            else:
                self.g.get_context(context).parse(file, format=format)
        else:
            self.g.parse(file, format=format)


    def query(self, queryString, contexts):

        if contexts:
            if type(contexts) is list:
                return self._concatenate_graphs(contexts).query(queryString, initNs=self.initNs)
            elif type(contexts) is str:
                return self.g.get_context(graph_dict[contexts]).query(queryString, initNs=self.initNs)
            else:
                return self.g.get_context(contexts).query(queryString, initNs=self.initNs)
        else:
            return self.g.query(queryString, initNs=self.initNs)

    def value(self, subject, predicate, context):
        if context:
            if type(context) is str:
                return self.g.get_context(graph_dict[context]).value(subject, predicate)
            else:
                return self.g.get_context(context).value(subject, predicate)
        else:
            return self.g.value(subject, predicate)

    def remove(self, triple_pattern, contexts):
        if contexts:
            if type(contexts) is list:
                self._concatenate_graphs(contexts).remove(triple_pattern)
            else:
                self.g.get_context(graph_dict[contexts]).remove(triple_pattern)
        else:
            self.g.remove(triple_pattern)

    def clear(self, context):
        if context:
            if type(context) is str:
                self.g.remove_context(self.g.get_context(graph_dict[context]))
            else:
                self.g.remove_context(self.g.get_context(context))
        else:
            self.g.remove( (None, None, None) )

    def count_triples(self):
        c = 0;
        for s, p, o in self.g:
            c = c +1;
        return c

    def export(self, context):
        if type(context) is str:
            self.g.get_context(graph_dict[context]).serialize(context + ".ttl", format="turtle")
Beispiel #8
0
def generictest(testFile):
    func_name = __name__ = __doc__ = id = 'test_sparql.' + \
                os.path.splitext(testFile)[0][8:].translate(
                                                    maketrans('-/','__'))
    store = plugin.get(STORE, Store)()
    bootStrapStore(store)
    store.commit()
    prefix = testFile.split('.rq')[-1]
    manifestPath = '/'.join(testFile.split('/')[:-1] + ['manifest.n3'])
    manifestPath2 = '/'.join(testFile.split('/')[:-1] + ['manifest.ttl'])
    queryFileName = testFile.split('/')[-1]
    store = plugin.get(STORE, Store)()
    store.open(configString, create=False)
    assert len(store) == 0
    manifestG = ConjunctiveGraph(store)
    if not os.path.exists(manifestPath):
        assert os.path.exists(manifestPath2)
        manifestPath = manifestPath2
    manifestG.default_context.parse(open(manifestPath),
                                    publicID=TEST_BASE,
                                    format='n3')
    manifestData = manifestG.query(
        MANIFEST_QUERY,
        processor='sparql',
        initBindings={'query': TEST_BASE[queryFileName]},
        initNs=manifestNS,
        DEBUG=False)
    store.rollback()
    store.close()
    for source, testCaseName, testCaseComment, expectedRT in manifestData:
        if expectedRT:
            expectedRT = '/'.join(testFile.split('/')[:-1] + \
                                    [expectedRT.replace(TEST_BASE,'')])
        if source:
            source = '/'.join(testFile.split('/')[:-1] + \
                                    [source.replace(TEST_BASE,'')])
        testCaseName = testCaseComment and testCaseComment or testCaseName
        # log.debug("## Source: %s ##"%source)
        # log.debug("## Test: %s ##"%testCaseName)
        # log.debug("## Result: %s ##"%expectedRT)
        #Expected results
        if expectedRT:
            store = plugin.get(STORE, Store)()
            store.open(configString, create=False)
            resultG = ConjunctiveGraph(store).default_context
            log.debug("###" * 10)
            log.debug("parsing: %s" % open(expectedRT).read())
            log.debug("###" * 10)
            assert len(store) == 0
            # log.debug("## Parsing (%s) ##"%(expectedRT))
            if not trialAndErrorRTParse(resultG, expectedRT, DEBUG):
                log.debug(
                    "Unexpected result format (for %s), skipping" % \
                                                    (expectedRT))
                store.rollback()
                store.close()
                continue
            log.debug("## Done .. ##")
            rtVars = [
                rtVar
                for rtVar in resultG.objects(None, RESULT_NS.resultVariable)
            ]
            bindings = []
            resultSetNode = resultG.value(predicate=RESULT_NS.value,
                                          object=RESULT_NS.ResultSet)
            for solutionNode in resultG.objects(resultSetNode,
                                                RESULT_NS.solution):
                bindingDict = dict([(key, None) for key in rtVars])
                for bindingNode in resultG.objects(solutionNode,
                                                   RESULT_NS.binding):
                    value = resultG.value(subject=bindingNode,
                                          predicate=RESULT_NS.value)
                    name = resultG.value(subject=bindingNode,
                                         predicate=RESULT_NS.variable)
                    bindingDict[name] = value
                rbinds = [bindingDict[vName] for vName in rtVars]
                # print("Rbinds", rbinds)
                if len(rbinds) > 1 and (isinstance(rbinds, list)
                                        or isinstance(rbinds, tuple)):
                    bindings.append(frozenset(rbinds))
                elif len(rbinds) == 1 and (isinstance(rbinds, list)
                                           or isinstance(rbinds, tuple)):
                    bindings.append(rbinds[0])
                else:
                    bindings.append(rbinds)
                # bindings.append(tuple([bindingDict[vName] for vName in rtVars]))
            log.debug(open(expectedRT).read())
            store.rollback()
            store.close()
        if testFile in tests2Skip.keys():
            log.debug("Skipping test (%s) %s\n" % \
                        (testFile, tests2Skip[testFile]))
            raise SkipTest("Skipping test (%s) %s\n" % \
                        (testFile, tests2Skip[testFile]))
        query = open(testFile).read()
        log.debug("### %s (%s) ###" % (testCaseName, testFile))
        log.debug(query)
        p = parse(query)  #,DEBUG_PARSE)
        log.debug(p)
        if EVALUATE and source:
            log.debug("### Source Graph: ###")
            log.debug(open(source).read())
            store = plugin.get(STORE, Store)()
            store.open(configString, create=False)
            g = ConjunctiveGraph(store)
            try:
                g.parse(open(source), format='n3')
            except:
                log.debug("Unexpected data format (for %s), skipping" % \
                                                                (source))
                store.rollback()
                store.close()
                continue
            rt = g.query(query, processor='sparql', DEBUG=False)
            if expectedRT:
                try:
                    result = rt.result
                except AttributeError:
                    result = rt
                if isinstance(result, Graph):
                    resgraph = open(graphtests[testFile]).read()
                    store = plugin.get(STORE, Store)()
                    store.open(configString, create=False)
                    g = ConjunctiveGraph(store)
                    g.parse(data=resgraph, format="n3")
                    assert result == g, \
                            "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \
                                    (g.serialize(format="n3"),
                                     result.serialize(format="n3"))
                else:
                    # result = [r[0] for r in result if isinstance(r, (tuple, list))]
                    def stab(r):
                        if isinstance(r, (tuple, list)):
                            return frozenset(r)
                        else:
                            return r

                    results = set([stab(r) for r in result])
                    assert set(bindings).difference(results) == set([]) or set(bindings) == results, \
                            "### Test Failed: ###\n\nB:\n%s\n\nR:\n%s\n\n" % \
                                    (set(bindings), results)
                log.debug("### Test Passed: ###")
            store.rollback()