Beispiel #1
0
def getGraph():
    """
    get the main graph, including data from trains.n3 on disk
    """
    graph = ConjunctiveGraph()
    graph.parse("trains.n3", format="n3", publicID=TT['disk#context'])
    return graph
Beispiel #2
0
def readGraphs():
    g = ConjunctiveGraph()
    # this file should only be reread when it changes
    g.parse("../config.n3", format="n3")
    dl = []
    startTime = time.time()
    for uri in [
        "http://bang:9055/graph",
        "http://bang:9069/graph",
        "http://bang:9070/graph",
        "http://bang:9072/bang-9002/processStatus",
        "http://bang:9072/bang/processStatus",
        "http://bang:9072/dash/processStatus",
        "http://bang:9072/slash-11000/processStatus",
        "http://bang:9072/slash/processStatus",
        "http://bang:9072/star/processStatus",
        "http://bang:9075/graph",
        ]:
        # this ought to not reparse the ones that are 304 not modified
        d = getPage(uri)
        def done(trig, uri):
            g.addN(parseTrig(trig))
            print "%s done in %.02fms" % (uri, 1000 * (time.time() - startTime))
        d.addCallback(done, uri)
        dl.append(d)
    return defer.DeferredList(dl).addCallback(lambda result: g)
Beispiel #3
0
def check_propeties(vocabfile):
    graph = Graph()
    try:
        graph.parse(vocabfile)
    except:
        return {}

    subject = none
    for s in graph.subjects(namespaces['dc']['title'], None):
        subject = s
    if not subject:
        for s in graph.subjects(namespaces['dcterms']['title'], None):
            subject = s
    if not subject:
        for s in graph.subjects(namespaces['dc']['creator'], None):
            subject = s
    if not subject:
        for s in graph.subjects(namespaces['dcterms']['creator'], None):
            subject = s

    properties = {}

    #Identifier
    identifier = []
    ids = graph.objects(subject, namespaces['dc']['identifier'])
    for id in ids:
        identifier = id
    if not identifier:
        ids = graph.objects(subject, namespaces['dcterms']['identifier'])
        for id in ids:
            identifier.append(id)
    properties['identifier'] = [id]

    #hasFormat
    format = False

    prop1 = True
    prop2 = False
    prop3 = False
    properties['format'] = []
    for fm in graph.objects(subject, namespaces['dcterms']['hasFormat']):
        properties['format'].append(fm)
        bnodes = graph.objects(fm, namespaces['dc']['format'])
        for b in bnodes:
            for value in graph.objects(b, namespaces['rdf']['value']):
                prop1 = True
            for value in graph.objects(b, namespaces['rdfs']['label']):
                prop2 = True
            for value in graph.objects(b, namespaces['rdfs']['type']):
                prop3 = True
        if not 'all' in properties:
            properties['all'] = prop1 and prop2 and prop3
        else:
            properties['all'] = properties['all'] and prop1 and prop2 and prop3

    conforms = False
    if identifier and properties['format'] and properties['all']:
        conforms = True

    return (conforms, properties)
def main(specloc="file:index.rdf"):
    """The meat and potatoes: Everything starts here."""
    m = Graph()
    m.parse(specloc)

    classlist, proplist = specInformation(m)

    # Build HTML list of terms.
    azlist = buildazlist(classlist, proplist)

    # Generate Term HTML
    termlist = "<h3>Classes and Properties (full detail)</h3>"
    termlist += "<div class='termdetails'>"
    termlist += docTerms('Class', classlist, m)
    termlist += docTerms('Property', proplist, m)
    termlist += "</div>"

    # Generate RDF from original namespace.
    u = urllib.urlopen(specloc)
    rdfdata = u.read()
    rdfdata.replace("""<?xml version="1.0"?>""", "")

    # wip.template is a template file for the spec, python-style % escapes
    # for replaced sections.
    f = open("../0.1/template.html", "r")
    template = f.read()
    print template % (azlist.encode("utf-8"), termlist.encode("utf-8"),
                      rdfdata)
Beispiel #5
0
def check_type_definitions(vocabfile):
    graph = Graph()
    try:
        graph.parse(vocabfile)
    except:
        return False
    all_definitions = True

    testo = vocab_type_definitions_test['rdfs']
    subjects = []
    subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    for s in subs:
        subjects.append(s)
    if subjects:
        objects = vocab_type_definitions_rdfs
    else:
        objects = vocab_type_definitions_owl
    for o in objects:
        subs = graph.subjects(namespaces['rdf']['type'], o)
        done = []
        for s in subs:
            if s in done:
                continue
            done.append(s)
            definition = False
            vals = graph.objects(s, namespaces['rdfs']['isDefinedBy'])
            for val in vals:
                definition = True
            all_definitions = all_definitions and definition
    return all_definitions
Beispiel #6
0
def main():
	"""Main Function
	Simple command-line procedure for web2rdf."""

	if len(sys.argv) != 2:
		print "Must call with a URI parameter."
		print "Usage: %s uriSrc" % sys.argv[0]
		return

	uri = sys.argv[1]

	# Get the RDF
	wrdf = Web2Rdf(uri)
	rdf = wrdf.getRdf()

	if not rdf:
		print "No RDF returned!"
		return False

	print "Got RDF..."
	rdf = rdfString(rdf)

	# Open Storage
	print "Opening store..."
	db = "./testdb.sqlite"
	rstore = RdfStore('sqlite', db)
	rstore.open()

	print "Storing..."
	graph = Graph(rstore.get(), identifier = URIRef("http://slashdot/Test2"))
	#graph.parse("example.rdf")
	graph.parse(rdf, publicID=uri)

	graph.commit()
def check_type_definitions(vocabfile):
    graph = Graph()
    try:
        graph.parse(vocabfile)
    except:
        return False
    all_definitions = True
    
    testo = vocab_type_definitions_test['rdfs']
    subjects = []
    subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    for s in subs:
        subjects.append(s)
    if subjects:
        objects = vocab_type_definitions_rdfs
    else:
        objects = vocab_type_definitions_owl
    for o in objects: 
        subs = graph.subjects(namespaces['rdf']['type'], o)
        done = []
        for s in subs:
            if s in done:
               continue
            done.append(s) 
            definition = False
            vals = graph.objects(s, namespaces['rdfs']['isDefinedBy'])
            for val in vals:
               definition = True
            all_definitions = all_definitions and definition
    return all_definitions
def check_propeties(vocabfile):
    graph = Graph()
    try:
        graph.parse(vocabfile)
    except:
        return {}

    subject = none
    for s in graph.subjects(namespaces['dc']['title'], None):
        subject = s
    if not subject:
        for s in graph.subjects(namespaces['dcterms']['title'], None):
            subject = s
    if not subject:
        for s in graph.subjects(namespaces['dc']['creator'], None):
            subject = s
    if not subject:
        for s in graph.subjects(namespaces['dcterms']['creator'], None):
            subject = s

    properties = {}

    #Identifier
    identifier = []
    ids = graph.objects(subject, namespaces['dc']['identifier'])
    for id in ids:
        identifier = id
    if not identifier:
        ids = graph.objects(subject, namespaces['dcterms']['identifier'])
        for id in ids:
            identifier.append(id)
    properties['identifier'] = [id]
    
    #hasFormat
    format = False
    
    prop1 = True
    prop2 = False
    prop3 = False
    properties['format'] = []
    for fm in graph.objects(subject, namespaces['dcterms']['hasFormat']):
        properties['format'].append(fm)
        bnodes = graph.objects(fm, namespaces['dc']['format'])
        for b in bnodes:
            for value in graph.objects(b, namespaces['rdf']['value']):
                prop1 = True
            for value in graph.objects(b, namespaces['rdfs']['label']):
                prop2 = True
            for value in graph.objects(b, namespaces['rdfs']['type']):
                prop3 = True
        if not 'all' in properties:
            properties['all'] = prop1 and prop2 and prop3
        else:
            properties['all'] = properties['all'] and prop1 and prop2 and prop3

    conforms = False
    if identifier and properties['format'] and properties['all']:
         conforms = True
    
    return (conforms, properties)
Beispiel #9
0
 def parse(self, result):
     """
     Parse query result
     
     @param result: text result
     @return: rdf graph
     """
     graph = ConjunctiveGraph()
     graph.parse(StringInputSource(result))
     return graph
 def parse(self, result):
     """
     Parse query result
     
     @param result: text result
     @return: rdf graph
     """        
     graph = ConjunctiveGraph()
     graph.parse(StringInputSource(result))
     return graph
Beispiel #11
0
    def testModel(self):
        g = ConjunctiveGraph()
        g.parse(StringInputSource(input), format="n3")
        i = 0
        for s, p, o in g:
            if isinstance(s, Graph):
                i += 1
        self.assertEquals(i, 3)
        self.assertEquals(len(list(g.contexts())), 13)

        g.close()
Beispiel #12
0
    def testModel(self):
        g = ConjunctiveGraph()
        g.parse(StringInputSource(input), format="n3")
        i = 0
        for s, p, o in g:
            if isinstance(s, Graph):
                i += 1
        self.assertEquals(i, 3)
        self.assertEquals(len(list(g.contexts())), 13)

        g.close()
Beispiel #13
0
 def testModel(self):
     print 'Probando la función testModel\n_____________________________'
     g = ConjunctiveGraph()
     g.parse(StringInputSource(input), format="n3")
     i = 0
     for s, p, o in g:
         if isinstance(s, Graph):
             i += 1
             print i
     #self.assertEquals(i, 3)
     #self.assertEquals(len(list(g.contexts())), 13)
     #print g.serialize()
     g.close()
def del_vocab_from_creator(userid, vocab):
    if not os.path.isfile(os.path.join(ag.creatorsdir, '%s.rdf'%userid)):
        return False
    graph = Graph()
    graph.parse(os.path.join(ag.creatorsdir, '%s.rdf'%userid))
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix)
    for s, p, o in graph.triples((URIRef(vocab_uri), namespaces['dcterms']['mediator'], None)):
        graph.remove((s, p, o))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(creatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True 
Beispiel #15
0
def del_vocab_from_creator(userid, vocab):
    if not os.path.isfile(os.path.join(ag.creatorsdir, '%s.rdf' % userid)):
        return False
    graph = Graph()
    graph.parse(os.path.join(ag.creatorsdir, '%s.rdf' % userid))
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s" % vocabprefix)
    for s, p, o in graph.triples(
        (URIRef(vocab_uri), namespaces['dcterms']['mediator'], None)):
        graph.remove((s, p, o))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(creatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
Beispiel #16
0
 def _discover_meta(self, homepage, candidate):
     self.triples.push_meta(homepage, candidate)
     self.stats.count_rdf()
     logging.debug("Analyzing '%s'" % candidate)
     # FIXME: Not a good idea, think about it.
     if re.match(r".*\.rdf$", candidate) is not None:
         graph = ConjunctiveGraph()
         try:
             graph.parse(candidate)
         except (SAXParseException, RdflibParserError), e:
             self.stats.count_invalidrdf()
             raise RDFDiscoveringMalformedError(str(e), candidate)
         except urllib2.URLError:
             self.stats.count_invalidrdf()
             raise RDFDiscoveringBrokenLinkError(candidate)
Beispiel #17
0
 def testQueryingMore(self):
     for result in self.results:
         uri = result[0]
         g = ConjunctiveGraph()
         g.parse(uri)
         query = Parse("""
                             SELECT ?person
                             WHERE {
                                      <%s> foaf:primaryTopic ?person .
                                      ?person rdf:type foaf:Person . 
                                   }
                       """ % uri )
         queryResults = g.query(query, initNs=NSbindings).serialize('python')
         if (len(queryResults)>0):
             self.assertEquals(str(queryResults[0]), "http://www.wikier.org/foaf#wikier")
Beispiel #18
0
class TestSPARQLToldBNodes(unittest.TestCase):
    def setUp(self):
        NS = u"http://example.org/"
        self.graph = ConjunctiveGraph()
        self.graph.parse(StringInputSource("""
           @prefix    : <http://example.org/> .
           @prefix rdf: <%s> .
           @prefix rdfs: <%s> .
           [ :prop :val ].
           [ a rdfs:Class ]."""%(RDF.RDFNS,RDFS.RDFSNS)), format="n3")
    def testToldBNode(self):
        for s,p,o in self.graph.triples((None,RDF.type,None)):
            pass
        query = """SELECT ?obj WHERE { %s ?prop ?obj }"""%s.n3()
        print query
        rt = self.graph.query(query)
        self.failUnless(len(rt) == 1,"BGP should only match the 'told' BNode by name (result set size: %s)"%len(rt))
Beispiel #19
0
 def testQueryingMore(self):
     for result in self.results:
         uri = result[0]
         g = ConjunctiveGraph()
         g.parse(uri)
         query = Parse("""
                             SELECT ?person
                             WHERE {
                                      <%s> foaf:primaryTopic ?person .
                                      ?person rdf:type foaf:Person . 
                                   }
                       """ % uri)
         queryResults = g.query(query,
                                initNs=NSbindings).serialize('python')
         if (len(queryResults) > 0):
             self.assertEquals(str(queryResults[0]),
                               "http://www.wikier.org/foaf#wikier")
Beispiel #20
0
def main(specloc, template, mode="spec"):
    """The meat and potatoes: Everything starts here."""

    m = Graph()
    m.parse(specloc)

#    m = RDF.Model()
#    p = RDF.Parser()
#    p.parse_into_model(m, specloc)
    
    classlist, proplist = specInformation(m)
    
    if mode == "spec":
        # Build HTML list of terms.
        azlist = buildazlist(classlist, proplist)
    elif mode == "list":
        # Build simple <ul> list of terms.
        azlist = build_simple_list(classlist, proplist)

    # Generate Term HTML
#    termlist = "<h3>Classes and Properties (full detail)</h3>"
    termlist = docTerms('Class',classlist,m)
    termlist += docTerms('Property',proplist,m)
    
    # Generate RDF from original namespace.
    u = urllib.urlopen(specloc)
    rdfdata = u.read()
    rdfdata = re.sub(r"(<\?xml version.*\?>)", "", rdfdata)
    rdfdata = re.sub(r"(<!DOCTYPE[^]]*]>)", "", rdfdata)
    rdfdata.replace("""<?xml version="1.0"?>""", "")
    
    # print template % (azlist.encode("utf-8"), termlist.encode("utf-8"), rdfdata.encode("ISO-8859-1"))
    #template = re.sub(r"^#format \w*\n", "", template)
    #template = re.sub(r"\$VersionInfo\$", owlVersionInfo(m).encode("utf-8"), template) 
    
    # NOTE: This works with the assumtpion that all "%" in the template are escaped to "%%" and it
    #       contains the same number of "%s" as the number of parameters in % ( ...parameters here... )

    print "AZlist",azlist
    print "Termlist",termlist
 
#xxx    template = template % (azlist.encode("utf-8"), termlist.encode("utf-8"));    
#    template += "<!-- specification regenerated at " + time.strftime('%X %x %Z') + " -->"
    
    return template
Beispiel #21
0
 def load_store(files):
     """
    Takes a directory of RDf files and loads them into the store.
    """
     try:
         store = plugin.get("MySQL", Store)("rdflib_db")
         store.open(config["rdflib.config"])
         graph = ConjunctiveGraph(store)
         # iterate through files and load them into the graph
         for fpath in fl:
             graph.parse(fpath, format=get_format(fpath), publicID=context_uri(fpath))
             print fpath + " loaded."
         # save triples to store
         graph.commit()
         graph.close()
     except:
         print "=== error opening RDF store ==="
         exit
Beispiel #22
0
def testSPARQLNotEquals():
    NS = u"http://example.org/"
    graph = ConjunctiveGraph()
    graph.parse(StringInputSource("""
       @prefix    : <http://example.org/> .
       @prefix rdf: <%s> .
       :foo rdf:value 1.
       :bar rdf:value 2."""%RDF.RDFNS), format="n3")
    rt = graph.query("""SELECT ?node 
                        WHERE {
                                ?node rdf:value ?val.
                                FILTER (?val != 1)
                               }""",
                           initNs={'rdf':RDF.RDFNS},                           
                           DEBUG=False)
    for row in rt:        
        item = row[0]
        assert item == URIRef("http://example.org/bar")
Beispiel #23
0
def testSPARQLNotEquals():
    NS = u"http://example.org/"
    graph = ConjunctiveGraph()
    graph.parse(StringInputSource("""
       @prefix    : <http://example.org/> .
       @prefix rdf: <%s> .
       :foo rdf:value 1.
       :bar rdf:value 2.""" % RDF.RDFNS),
                format="n3")
    rt = graph.query("""SELECT ?node 
                        WHERE {
                                ?node rdf:value ?val.
                                FILTER (?val != 1)
                               }""",
                     initNs={'rdf': RDF.RDFNS},
                     DEBUG=False)
    for row in rt:
        item = row[0]
        assert item == URIRef("http://example.org/bar")
Beispiel #24
0
def getRdfXml(rdf):
    n3 = ""
    
    # Append the RDF namespace and print the prefix namespace mappings
    rdf['namespaces']['xh1'] = "http://www.w3.org/1999/xhtml/vocab#"
    rdf['namespaces']['rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    for prefix, uri in rdf['namespaces'].items():
        n3 += "@prefix %s: <%s> .\n" % (prefix, uri)
        
    # Print each subject-based triple to the screen
    triples = rdf['triples']
    processed = []

    # Get all of the non-bnode subjects
    nonBnodeSubjects = getNonBnodeSubjects(triples)

    # Get all of the bnode subjects
    bnodeSubjects = getBnodeSubjects(triples)

    for subject in nonBnodeSubjects:
        subjectTriples = getTriplesBySubject(subject, triples)
        #print "PROCESSING NB SUBJECT:", subjectTriples

        if(subject not in processed):
            n3 += tripleToN3(subjectTriples, processed, triples)
        processed.append(subject)

    for subject in bnodeSubjects:
        subjectTriples = getTriplesBySubject(subject, triples)
        #print "PROCESSING BN SUBJECT:", subject
        if(subject not in processed):
            n3 += bnodeToN3(subjectTriples, processed, triples)
            n3 += " .\n"

    #print n3

    g = ConjunctiveGraph()
    g.parse(StringIO(n3), format="n3")
    rdfxml = g.serialize()

    return rdfxml
Beispiel #25
0
def store_ontology( default_graph_uri ):
      #  graph_local = Graph( identifier = URIRef( default_graph_uri ) )
        print "Preparing to read graph: " + default_graph_uri + "\n"
     #   graph_local.parse( default_graph_uri )
      #  print "Read Graph saving to database"
        configString = "host=dbclass,user=brandon,password=b2341x,db=portal"
        store = plugin.get( 'MySQL', Store)('rdflib')
        rt = store.open(configString, create=False)
        if rt == NO_STORE:
                store.open(configString, create=True)
        else:
                assert rt == VALID_STORE, "The underlying store is not valid"
        print "Opened Store"
        graph = Graph(store, identifier = URIRef( default_graph_uri ))
#        graph = Graph( identifier = URIRef( default_graph_uri ))       
        # for stmt in graph_local:
        #        graph.add( stmt ) 
        graph.parse( default_graph_uri )
        print "Committing Graph to database\n"
        graph.commit()
        print "Saved Graph to database\n"
        store.close()
        return len( graph )
Beispiel #26
0
def testBasic(DEBUG = False):
    from glob import glob
    from sre import sub
    for testFile in glob('data/examples/*.rq'):#glob('data/*/*.rq'):
        store = plugin.get(STORE,Store)()
        bootStrapStore(store)
        store.commit()

        prefix = testFile.split('.rq')[-1]
        manifestPath = '/'.join(testFile.split('/')[:-1]+['manifest.n3'])
        manifestPath2 = '/'.join(testFile.split('/')[:-1]+['manifest.ttl'])
        queryFileName = testFile.split('/')[-1]
        store = plugin.get(STORE,Store)()
        store.open(configString,create=False)
        assert len(store) == 0
        manifestG=ConjunctiveGraph(store)
        if not os.path.exists(manifestPath):
            assert os.path.exists(manifestPath2)
            manifestPath = manifestPath2
        manifestG.default_context.parse(open(manifestPath),publicID=TEST_BASE,format='n3')
        manifestData = \
           manifestG.query(
                                  PARSED_MANIFEST_QUERY,
                                  initBindings={'?query' : TEST_BASE[queryFileName]},
                                  initNs=manifestNS,
                                  DEBUG = False)
        store.rollback()
        store.close()
        for source,testCaseName,testCaseComment,expectedRT in manifestData:

            if expectedRT:
                expectedRT = '/'.join(testFile.split('/')[:-1]+[expectedRT.replace(TEST_BASE,'')])
            if source:
                source = '/'.join(testFile.split('/')[:-1]+[source.replace(TEST_BASE,'')])

            testCaseName = testCaseComment and testCaseComment or testCaseName
            print "## Source: %s ##"%source
            print "## Test: %s ##"%testCaseName
            print "## Result: %s ##"%expectedRT

            #Expected results
            if expectedRT:
                store = plugin.get(STORE,Store)()
                store.open(configString,create=False)
                resultG=ConjunctiveGraph(store).default_context
#                if DEBUG:
#                    print "###"*10
#                    print "parsing: ", open(expectedRT).read()
#                    print "###"*10
                assert len(store) == 0
                print "## Parsing (%s) ##"%(expectedRT)
                if not trialAndErrorRTParse(resultG,expectedRT,DEBUG):
                    if DEBUG:
                        print "Unexpected result format (for %s), skipping"%(expectedRT)
                    store.rollback()
                    store.close()
                    continue
                if DEBUG:
                    print "## Done .. ##"

                rtVars = [rtVar for rtVar in resultG.objects(None,RESULT_NS.resultVariable)]
                bindings = []
                resultSetNode = resultG.value(predicate=RESULT_NS.value,object=RESULT_NS.ResultSet)
                for solutionNode in resultG.objects(resultSetNode,RESULT_NS.solution):
                    bindingDict = dict([(key,None) for key in rtVars])
                    for bindingNode in resultG.objects(solutionNode,RESULT_NS.binding):
                        value = resultG.value(subject=bindingNode,predicate=RESULT_NS.value)
                        name  = resultG.value(subject=bindingNode,predicate=RESULT_NS.variable)
                        bindingDict[name] = value
                    bindings.append(tuple([bindingDict[vName] for vName in rtVars]))
                if DEBUG:
                    print "Expected bindings: ", bindings
                    print open(expectedRT).read()
                store.rollback()
                store.close()

            if testFile.startswith('data/NegativeSyntax'):
                try:
                    query = open(testFile).read()
                    p = Parse(query,DEBUG)
                except:
                    continue
                else:
                    raise Exception("Test %s should have failed!"%testFile)
            if testFile in tests2Skip:
                print "Skipping test (%s)"%testCaseName
                continue
            query = open(testFile).read()
            print "### %s (%s) ###"%(testCaseName,testFile)
            print query
            p = Parse(query,DEBUG_PARSE)
            if DEBUG:
                print p
            if EVALUATE and source:
                if DEBUG:
                    print "### Source Graph: ###"
                    print open(source).read()
                store = plugin.get(STORE,Store)()
                store.open(configString,create=False)
                g=ConjunctiveGraph(store)
                try:
                    g.parse(open(source),format='n3')
                except:
                    print "Unexpected data format (for %s), skipping"%(source)
                    store.rollback()
                    store.close()
                    continue
                #print store
                rt = g.query(p,DEBUG = DEBUG)
                if expectedRT:
                    if rt != bindings and Set([Set(i) for i in rt]) != Set([Set(i) for i in bindings]):#unorderedComparison(rt,bindings):
                        print "### Expected Result (%s) ###"%expectedRT
                        pprint(bindings)
                        print "### Actual Results ###"
                        pprint(rt)
                        raise Exception("### TEST FAILED!: %s ###"%testCaseName)
                    else:
                        print "### TEST PASSED!: %s ###"%testCaseName
                store.rollback()
Beispiel #27
0
class sparql_funcs():
    def __init__(self):
        self.g = Graph('IOMemory')
        #self.endpoint = "http://www.opencorrespondence.org/data/endpoint/rdf"
        #self.g.bind('geo', geo)

    def find_places(self):
        '''
            Function to get the distinct locations mentioned in the headers of the letters. 
            These are the locations from which Dickens wrote. 
            TODO: Parsing the letters to get the places mentioned in them
        '''
        row = set()
        o = OFS()

        for b in o.list_buckets():
            endpoint = o.get_stream(b, "endpoint")

        self.g.parse(endpoint)

        for s, _, n in self.g.triples((None, dublin_core['title'], None)):
            loc_key = urllib.unquote(
                n.replace("http://www.opencorrespondence.org/place/resource/",
                          "").replace("/rdf", ""))
            row.add(self.tidy_location(loc_key))

        return row

    def tidy_location(self, location):
        '''
           Function to tidy up some of the places where they refer to the same place
           TODO: prob need some language processing to make this scalable
        '''
        ret_location = ''
        if location == 'Office Of "household Words,':
            ret_location = "Household Words"
        elif location == '"household Words" Office':
            ret_location = "Household Words"
        elif location == '"household Words"':
            ret_location = "Household Words"
        elif location == 'H. W. Office':
            ret_location = "Household Words"
        elif location == '"household Words,':
            ret_location = "Household Words"
        elif location == '"all The Year Round" Office':
            ret_location = "All The Year Round"
        elif location == 'Office Of "all The Year Round,':
            ret_location = "All The Year Round"
        elif location == "Gad's Hill Place":
            ret_location = "Gads Hill"
        elif location == "Gad's Hill":
            ret_location = "Gads Hill"
        elif location == "Gad's Hill Place, Higham":
            ret_location = "Gads Hill"
        elif location == "Tavistock House, Tavistock Square":
            ret_location = "Tavistock House"
        elif location == "London, Tavistock House":
            ret_location = "Tavistock House"
        elif location == "Tavistock House, London":
            ret_location = "Tavistock House"
        else:
            if "U.s." in location:
                location = str(location).replace("U.s", "")
            ret_location = str(location).replace(".", "")

        return ret_location

    def find_correspondents(self):
        '''
            Function to get the distinct locations mentioned in the headers of the letters. 
            These are the locations from which Dickens wrote. 
            TODO: Parsing the letters to get the places mentioned in them
        '''
        row = set()
        self.g.parse(self.endpoint)

        for s, _, n in self.g.triples((None, letter['correspondent'], None)):
            loc_key = urllib.unquote(
                n.replace(
                    "http://www.opencorrespondence.org/correspondent/resource/",
                    "").replace("/rdf", ""))
            row.add(loc_key)

        return row

    def get_abstract(self, resource_id):

        self.g.parse('http://dbpedia.org/resource/'.resource_id)
        q = '''
          SELECT *
                WHERE 
                {
                ?x dbpedia:abstract ?abstract .
                FILTER (lang(?abstract) = 'en')
                }
        '''
        for row in self.g.query(
                q,
                initNs=dict(dbpedia=Namespace("http://dbpedia.org/ontology/")),
                initBindings={}):
            return row[1]

    def query_dates(self, author):
        '''query to identify individual dates to a correspondent'''
        q = '''
        SELECT ?date
        FROM <http://localhost:5000/data/endpoint/rdf>
        WHERE {
            ?r dc:subject  \'''' + author + '''\' .  
            ?r dc:date  ?date.  
        }
        '''
        dates = []
        for row in self.g.query(
                q,
                initNs=dict(letter=Namespace(
                    "http://www.opencorrespondence.org/schema#"),
                            dc=Namespace("http://purl.org/dc/elements/1.1/")),
                initBindings={}):

            date = str(row[0]).split('-')

            if date[0][1:].isdigit():
                dates.append(date[0])
        print dates
        dic = {}

        for dt in dates:
            dic[dt] = dates.count(dt)

        return dic
Beispiel #28
0
 def testParse(self):
     g = ConjunctiveGraph()
     g.parse("http://groups.csail.mit.edu/dig/2005/09/rein/examples/troop42-policy.n3", format="n3")
Beispiel #29
0
def schemadoc(uris): 
   G = Graph()
   for uri in uris: 
      G.parse(uri)

   print """
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Schema Documentation</title>
<style type="text/css">
body { margin: 1em; font-family: Georgia, sans-serif; }
h1 { font-family: Tahoma, sans-serif; }
h2, h3, h4, h5, h6 { font-family: Arial, sans-serif; }
a { font-weight: bold; color: #036; }
dt.class { margin-top: 0.75em; }
dt.property { margin-top: 0.75em; }
address { padding-top: 0.35em; border-top: 2px solid #369; }
</style>
</head>
<body>
<h1>Schema Documentation</h1>
"""
   classes = []
   for metaclass in [RDFS.Class, OWL.Class]: 
      for uri in G.subjects(RDF.type, metaclass): 
         if not isinstance(uri, URIRef): continue

         c = Class(uri)
         c.classes = [Class(u) for u in G.objects(uri, RDFS.subClassOf)
                      if isinstance(u, URIRef)]
         for prop in G.subjects(RDFS.domain, uri): 
            p = Property(prop)
            ranges = [Class(u) for u in G.objects(prop, RDFS.range)]
            c.properties.append((p, ranges))
         # c.properties = [Property(u) for u in G.subjects(RDFS.domain, uri)]
         c.comments = [str(s) for s in G.objects(uri, RDFS.comment)]
         classes.append(c)

   print '<h2>Classes</h2>'
   print '<ul>'
   for c in sorted(classes): 
      print '<li>'
      print '<dl>'
      print '<dt class="class">'
      sys.stdout.write(c.name())

      if c.classes: 
         o = ', '.join(cls.name(format='text') for cls in sorted(c.classes))
         print '(' + o + ')'
      else: print
      print '</dt>'

      for comment in c.comments: 
         print '<dd>'
         print comment
         print '</dd>'

      for prop, ranges in sorted(c.properties): 
         print '<dd>'
         print '   ' + prop.name()
         if ranges: 
            print ' => ' + ', '.join(range.name() for range in ranges)
         print '</dd>'
      print '</dt>'
      print '</li>'
   print '</ul>'

   print '<h2>Properties</h2>'
   properties = []
   print '<dl>'
   for propclass in [RDF.Property, OWL.FunctionalProperty,
                     OWL.InverseFunctionalProperty]: 
      for uri in G.subjects(RDF.type, propclass): 
         if not isinstance(uri, URIRef): continue

         p = Property(uri)
         properties.append(p)
         p.kind = Class(propclass)
         p.domains = [Class(u) for u in G.objects(uri, RDFS.domain)
                      if isinstance(u, URIRef)]
         p.ranges = [Class(u) for u in G.objects(uri, RDFS.range) 
                     if isinstance(u, URIRef)]
         p.comments = [str(s) for s in G.objects(uri, RDFS.comment)]

   for p in sorted(properties): 
      print '<dt class="property">'
      print p.name() + ' (' + p.kind.name(format='text') + ')'
      print '</dt>'

      for comment in p.comments: 
         print '<dd>'
         print comment
         print '</dd>'

      if p.domains: 
         print '<dd>domain: '
         print ', '.join(domain.name() for domain in p.domains)
         print '</dd>'

      if p.ranges: 
         print '<dd>range: '
         print ', '.join(range.name() for range in p.ranges)
         print '</dd>'
   print '</dl>'

   print '<address>'
   print 'Generated by <a href="http://inamidst.com/proj/sdoc/"'
   print '>Schemadoc</a>'
   print '</address>'
   print '</body>'
   print '</html>'
Beispiel #30
0
   if parse.bozo:
       try:
           raise parse.bozo_exception
       except (SAXParseException, feedparser.CharacterEncodingUnknown, 
               feedparser.NonXMLContentType):
           self.stats.count_invalidfeed()
           raise RSSParsingFeedMalformedError(feed)
       except feedparser.CharacterEncodingOverride:
           pass
 
   if parse.version:
       graph = ConjunctiveGraph()
       if parse.version in ("rss10"):
           logging.debug("Looks like RDF (%s)" % parse.version)
           try:
               graph.parse(feed, format="xml")
           except (SAXParseException, RdflibParserError), e:
               self.stats.count_invalidfeed()
               raise RSSParsingFeedMalformedError(feed)
           self.stats.count_rss1feed()
       elif parse.version in ("rss20", "rss094"):
           logging.debug("Looks like XML (%s)" % parse.version)
           try:
               transformed_feed = self._transform_feed(feed, parse.encoding)
           except Exception:  # Ignoring all errors
               self.stats.count_invalidfeed()
               raise RSSParsingXSLTError(feed)
           try:
               graph.parse(StringIO(transformed_feed), format="xml")
           except (SAXParseException, RdflibParserError), e:
               self.stats.count_invalidfeed()
 def testFileUploadBulk(self):
     for i in range(0, 1500):       
         """Upload file to dataset - POST file to /silo_name/datasets/dataset_name"""
         # Create a new dataset, check response
         start = datetime.now()
         dataset_id= uuid4().hex
         #dataset_id='TestSubmission%d'%i
         f = open('test_times.log', 'a')
         f.write('%s: Creating and uploading file to dataset %s \n'%(start.isoformat(), dataset_id))
         f.close()
         self.createSubmissionDataset(dataset_id=dataset_id)
         #Access state information
         (resp, respdata) = self.doHTTP_GET(
             resource="states/%s"%dataset_id, 
             expect_status=200, expect_reason="OK", expect_type="application/json")
         # Upload zip file, check response
         zipdata = self.uploadSubmissionZipfile(dataset_id=dataset_id, file_to_upload='images.zip', filename='images.zip')
         end = datetime.now()
         delta = end - start
         time_used = delta.days * 86400 + delta.seconds
         f = open('test_times.log', 'a')
         f.write('    Time taken: %s \n\n'%str(time_used))
         f.close()
         # Access and check list of contents
         (resp, rdfdata) = self.doHTTP_GET(
             resource="datasets/%s"%dataset_id, 
             expect_status=200, expect_reason="OK", expect_type="application/rdf+xml")
         rdfgraph = Graph()
         rdfstream = StringIO(rdfdata)
         rdfgraph.parse(rdfstream) 
         subj  = URIRef(self.getManifestUri("datasets/%s"%dataset_id))
         base = self.getManifestUri("datasets/%s/"%dataset_id)
         dcterms = "http://purl.org/dc/terms/"
         ore  = "http://www.openarchives.org/ore/terms/"
         oxds = "http://vocab.ox.ac.uk/dataset/schema#"
         stype = URIRef(oxds+"DataSet")
         self.assertEqual(len(rdfgraph),12,'Graph length %i' %len(rdfgraph))
         self.failUnless((subj,RDF.type,stype) in rdfgraph, 'Testing submission type: '+subj+", "+stype)
         self.failUnless((subj,URIRef(dcterms+"created"),None) in rdfgraph, 'dcterms:created')
         self.failUnless((subj,URIRef(ore+"aggregates"),URIRef(base+"images.zip")) in rdfgraph)
         self.failUnless((subj,URIRef(dcterms+"identifier"),None) in rdfgraph, 'dcterms:identifier')
         self.failUnless((subj,URIRef(dcterms+"mediator"),None) in rdfgraph, 'dcterms:mediator')
         self.failUnless((subj,URIRef(dcterms+"rights"),None) in rdfgraph, 'dcterms:rights')
         self.failUnless((subj,URIRef(dcterms+"license"),None) in rdfgraph, 'dcterms:license')
         self.failUnless((subj,URIRef(dcterms+"publisher"),None) in rdfgraph, 'dcterms:publisher')
         self.failUnless((subj,URIRef(oxds+"isEmbargoed"),None) in rdfgraph, 'oxds:isEmbargoed')
         self.failUnless((subj,URIRef(oxds+"currentVersion"),'1') in rdfgraph, 'oxds:currentVersion')
         self.failUnless((subj,URIRef(dcterms+"modified"),None) in rdfgraph, 'dcterms:modified')
         # Access and check zip file content
         (resp, zipfile) = self.doHTTP_GET(
             resource="datasets/%s/images.zip"%dataset_id,
             expect_status=200, expect_reason="OK", expect_type="application/zip")
         self.assertEqual(zipdata, zipfile, "Difference between local and remote zipfile!")
         #Access state information and check
         (resp, data) = self.doHTTP_GET(
             resource="states/%s"%dataset_id, 
             expect_status=200, expect_reason="OK", expect_type="application/json")
         state = data['state']
         parts = data['parts']
         self.assertEqual(len(state.keys()), 12, "States")
         self.assertEqual(state['item_id'], dataset_id, "Submission item identifier")
         self.assertEqual(len(state['versions']), 2, "Two versions")
         self.assertEqual(state['versions'][0], '0', "Version 0")
         self.assertEqual(state['versions'][1], '1', "Version 1")
         self.assertEqual(state['currentversion'], '1', "Current version == 1")
         self.assertEqual(state['rdffileformat'], 'xml', "RDF file type")
         self.assertEqual(state['rdffilename'], 'manifest.rdf', "RDF file name")
         self.assertEqual(state['files']['0'], ['manifest.rdf'], "List should contain just manifest.rdf")
         self.assertEqual(len(state['files']['1']), 2, "List should contain manifest.rdf and images.zip")
         self.assertEqual(len(state['metadata_files']['0']), 0, "metadata_files of version 0")
         self.assertEqual(len(state['metadata_files']['1']), 0, "metadata_files of version 1")
         self.assertEqual(len(state['subdir']['0']), 0,   "Subdirectory count for version 0")
         self.assertEqual(len(state['subdir']['1']), 0,   "Subdirectory count for version 1")
         self.assertEqual(state['metadata']['createdby'], RDFDatabankConfig.endpointuser, "Created by")
         d = (datetime.now() + relativedelta(years=+70)).isoformat()
         d = d.split('T')[0]
         self.assertEqual(state['metadata']['embargoed'], True, "Embargoed?")
         self.assertTrue(d in state['metadata']['embargoed_until'], "embargoed_until %s?"%d)
         self.failUnless('Created new data package' in state['versionlog']['0'], "Version 0 log")
         self.failUnless('Added or updated file images.zip' in state['versionlog']['1'], "Version 1 log")
         self.assertEqual(len(parts.keys()), 4, "Parts")
         self.assertEqual(len(parts['4=%s'%dataset_id].keys()), 13, "File stats for 4=%s"%dataset_id)
         self.assertEqual(len(parts['manifest.rdf'].keys()), 13, "File stats for manifest.rdf")
         self.assertEqual(len(parts['images.zip'].keys()), 13, "File stats for images.zip")
Beispiel #32
0
def testN3Store(store="default", configString=None):
    g = ConjunctiveGraph(store=store)
    if configString:
        g.destroy(configString)
        g.open(configString)
    g.parse(StringInputSource(testN3), format="n3")
    print g.store
    try:
        for s, p, o in g.triples((None, implies, None)):
            formulaA = s
            formulaB = o

        assert type(formulaA) == QuotedGraph and type(formulaB) == QuotedGraph
        a = URIRef('http://test/a')
        b = URIRef('http://test/b')
        c = URIRef('http://test/c')
        d = URIRef('http://test/d')
        v = Variable('y')

        universe = ConjunctiveGraph(g.store)

        #test formula as terms
        assert len(list(universe.triples((formulaA, implies, formulaB)))) == 1

        #test variable as term and variable roundtrip
        assert len(list(formulaB.triples((None, None, v)))) == 1
        for s, p, o in formulaB.triples((None, d, None)):
            if o != c:
                assert isinstance(o, Variable)
                assert o == v
        s = list(universe.subjects(RDF.type, RDFS.Class))[0]
        assert isinstance(s, BNode)
        assert len(list(universe.triples((None, implies, None)))) == 1
        assert len(list(universe.triples((None, RDF.type, None)))) == 1
        assert len(list(formulaA.triples((None, RDF.type, None)))) == 1
        assert len(list(formulaA.triples((None, None, None)))) == 2
        assert len(list(formulaB.triples((None, None, None)))) == 2
        assert len(list(universe.triples((None, None, None)))) == 3
        assert len(
            list(formulaB.triples((None, URIRef('http://test/d'), None)))) == 2
        assert len(
            list(universe.triples((None, URIRef('http://test/d'), None)))) == 1

        #context tests
        #test contexts with triple argument
        assert len(list(universe.contexts((a, d, c)))) == 1

        #Remove test cases
        universe.remove((None, implies, None))
        assert len(list(universe.triples((None, implies, None)))) == 0
        assert len(list(formulaA.triples((None, None, None)))) == 2
        assert len(list(formulaB.triples((None, None, None)))) == 2

        formulaA.remove((None, b, None))
        assert len(list(formulaA.triples((None, None, None)))) == 1
        formulaA.remove((None, RDF.type, None))
        assert len(list(formulaA.triples((None, None, None)))) == 0

        universe.remove((None, RDF.type, RDFS.Class))

        #remove_context tests
        universe.remove_context(formulaB)
        assert len(list(universe.triples((None, RDF.type, None)))) == 0
        assert len(universe) == 1
        assert len(formulaB) == 0

        universe.remove((None, None, None))
        assert len(universe) == 0

        g.store.destroy(configString)
    except:
        g.store.destroy(configString)
        raise
def update_rdf_for_conversion(vocabprefix, vocab_properties,
                              rdf_vocab_properties):
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
        ET._namespace_map[str(nsurl)] = str(nsprefix)
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            ET._namespace_map[str(url)] = str(prefix)

    def_tags = [
        "{http://www.w3.org/2000/01/rdf-schema#}Class".lower(),
        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Property".lower(),
        "{http://www.w3.org/2002/07/owl#}ObjectProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}DatatypeProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}Class".lower(),
    ]

    tree = ET.ElementTree(file=rdf_vocab_properties['path'])
    ns_uri = vocab_properties['preferredNamespaceUri']
    html_uri = html_vocab_properties['uri']
    rdf_uri = rdf_vocab_properties['uri']

    tree_root = tree.getroot()
    #vocab= tree_root.findall("{http://www.w3.org/2002/07/owl#}Ontology")
    vocab = tree_root.find("{http://www.w3.org/2002/07/owl#}Ontology")
    if vocab:
        #for vocab in vocabs:
        if not vocab.findall("{http://purl.org/dc/elements/1.1/}identifier"):
            se0 = ET.SubElement(
                vocab, "{http://purl.org/dc/elements/1.1/}identifier")
            se0.text = rdf_uri
        if not vocab.findall("{http://purl.org/dc/terms/}isVersionOf"):
            se1 = ET.SubElement(
                vocab,
                "{http://purl.org/dc/terms/}isVersionOf",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":
                    ns_uri
                })
        if not vocab.findall(
                "{http://purl.org/vocab/vann/}preferredNamespacePrefix"):
            se2a = ET.SubElement(
                vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
            se2a.text = vocab_properties['preferredNamespacePrefix']
        if not vocab.findall(
                "{http://purl.org/vocab/vann/}preferredNamespaceUri"):
            se2b = ET.SubElement(
                vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
            se2b.text = vocab_properties['preferredNamespaceUri']
        if not vocab.findall("{http://purl.org/dc/terms/}hasFormat"):
            #Add html uri - html_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(
                se3a,
                "{http://purl.org/dc/dcmitype/}Text",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                    html_uri
                })
            se3c = ET.SubElement(se3b,
                                 "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(
                se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'text/html'
            #ET.TreeBuilder.data('text/html')
            se3f = ET.SubElement(
                se3d,
                "{http://www.w3.org/2000/01/rdf-schema#}label",
                attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
            se3f.text = 'HTML'
            #ET.TreeBuilder.data('HTML')
            #Add rdf uri - rdf_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(
                se3a,
                "{http://purl.org/dc/dcmitype/}Text",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                    rdf_uri
                })
            se3c = ET.SubElement(se3b,
                                 "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(
                se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'application/rdf+xml'
            #ET.TreeBuilder.data('application/rdf+xml')
            se3f = ET.SubElement(
                se3d,
                "{http://www.w3.org/2000/01/rdf-schema#}label",
                attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
            se3f.text = 'RDF'
            #ET.TreeBuilder.data('RDF')
        else:
            #Check the formats available and add if necessary
            formats = vocab.findall("{http://purl.org/dc/terms/}hasFormat")
            available_types = []
            for f in formats:
                type_tags = f.findall(
                    ".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                for type_tag in type_tags:
                    if type_tag.attrib.get(
                            "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"
                    ):
                        ftype = type_tag.attrib.get(
                            "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"
                        )
                    elif type_tag.text:
                        ftype = type_tag.text
                    if ftype and 'html' in ftype.lower():
                        available_types.append('html')
                    elif ftype and 'rdf' in ftype.lower():
                        available_types.append('rdf')
            if not 'html' in available_types:
                #Add html file - vocabfile_html
                se3a = ET.SubElement(vocab,
                                     "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(
                    se3a,
                    "{http://purl.org/dc/dcmitype/}Text",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                        html_uri
                    })
                se3c = ET.SubElement(
                    se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(
                    se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'text/html'
                #ET.TreeBuilder.data('text/html')
                se3f = ET.SubElement(
                    se3d,
                    "{http://www.w3.org/2000/01/rdf-schema#}label",
                    attrib={
                        "{http://www.w3.org/XML/1998/namespace}lang": "en"
                    })
                se3f.text = 'HTML'
                #ET.TreeBuilder.data('HTML')
            if not 'rdf' in available_types:
                #Add rdf file - vocabfile
                se3a = ET.SubElement(vocab,
                                     "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(
                    se3a,
                    "{http://purl.org/dc/dcmitype/}Text",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                        rdf_uri
                    })
                se3c = ET.SubElement(
                    se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(
                    se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'application/rdf+xml'
                #ET.TreeBuilder.data('application/rdf+xml')
                se3f = ET.SubElement(
                    se3d,
                    "{http://www.w3.org/2000/01/rdf-schema#}label",
                    attrib={
                        "{http://www.w3.org/XML/1998/namespace}lang": "en"
                    })
                se3f.text = 'RDF'
                #ET.TreeBuilder.data('RDF')
    else:
        vocab = ET.SubElement(
            tree_root,
            "{http://www.w3.org/2002/07/owl#}Ontology",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": ns_uri
            })
        se0 = ET.SubElement(vocab,
                            "{http://purl.org/dc/elements/1.1/}identifier")
        se0.text = rdf_uri
        se1 = ET.SubElement(
            vocab,
            "{http://purl.org/dc/terms/}isVersionOf",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource": ns_uri
            })
        se2a = ET.SubElement(
            vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
        se2a.text = vocab_properties['preferredNamespacePrefix']
        se2b = ET.SubElement(
            vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
        se2b.text = vocab_properties['preferredNamespaceUri']
        #Add html uri - html_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(
            se3a,
            "{http://purl.org/dc/dcmitype/}Text",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": html_uri
            })
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(
            se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'text/html'
        se3f = ET.SubElement(
            se3d,
            "{http://www.w3.org/2000/01/rdf-schema#}label",
            attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
        se3f.text = 'HTML'
        #Add rdf uri - rdf_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(
            se3a,
            "{http://purl.org/dc/dcmitype/}Text",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": rdf_uri
            })
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(
            se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'application/rdf+xml'
        se3f = ET.SubElement(
            se3d,
            "{http://www.w3.org/2000/01/rdf-schema#}label",
            attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
        se3f.text = 'RDF'
    terms = tree_root.getiterator()
    #terms = vocab.getiterator()
    for term in terms:
        if term.tag.lower().strip() in def_tags:
            defby = None
            defby = term.find(
                "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy")
            if not defby:
                se4 = ET.SubElement(
                    term,
                    "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":
                        ns_uri
                    })

    #Move ontology to the first element
    tree_root.remove(vocab)
    tree_root.insert(0, vocab)

    tree.write(newrdf_vocab_properties['path'])
    #tree_root.write(newrdf_vocab_properties['path'])
    return (newrdf_vocab_properties, html_vocab_properties)
Beispiel #34
0
def schemadoc(uris):
    G = Graph()
    for uri in uris:
        G.parse(uri)

    print """
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Schema Documentation</title>
<style type="text/css">
body { margin: 1em; font-family: Georgia, sans-serif; }
h1 { font-family: Tahoma, sans-serif; }
h2, h3, h4, h5, h6 { font-family: Arial, sans-serif; }
a { font-weight: bold; color: #036; }
dt.class { margin-top: 0.75em; }
dt.property { margin-top: 0.75em; }
address { padding-top: 0.35em; border-top: 2px solid #369; }
</style>
</head>
<body>
<h1>Schema Documentation</h1>
"""
    classes = []
    for metaclass in [RDFS.Class, OWL.Class]:
        for uri in G.subjects(RDF.type, metaclass):
            if not isinstance(uri, URIRef): continue

            c = Class(uri)
            c.classes = [
                Class(u) for u in G.objects(uri, RDFS.subClassOf)
                if isinstance(u, URIRef)
            ]
            for prop in G.subjects(RDFS.domain, uri):
                p = Property(prop)
                ranges = [Class(u) for u in G.objects(prop, RDFS.range)]
                c.properties.append((p, ranges))
            # c.properties = [Property(u) for u in G.subjects(RDFS.domain, uri)]
            c.comments = [str(s) for s in G.objects(uri, RDFS.comment)]
            classes.append(c)

    print '<h2>Classes</h2>'
    print '<ul>'
    for c in sorted(classes):
        print '<li>'
        print '<dl>'
        print '<dt class="class">'
        sys.stdout.write(c.name())

        if c.classes:
            o = ', '.join(cls.name(format='text') for cls in sorted(c.classes))
            print '(' + o + ')'
        else:
            print
        print '</dt>'

        for comment in c.comments:
            print '<dd>'
            print comment
            print '</dd>'

        for prop, ranges in sorted(c.properties):
            print '<dd>'
            print '   ' + prop.name()
            if ranges:
                print ' => ' + ', '.join(range.name() for range in ranges)
            print '</dd>'
        print '</dt>'
        print '</li>'
    print '</ul>'

    print '<h2>Properties</h2>'
    properties = []
    print '<dl>'
    for propclass in [
            RDF.Property, OWL.FunctionalProperty, OWL.InverseFunctionalProperty
    ]:
        for uri in G.subjects(RDF.type, propclass):
            if not isinstance(uri, URIRef): continue

            p = Property(uri)
            properties.append(p)
            p.kind = Class(propclass)
            p.domains = [
                Class(u) for u in G.objects(uri, RDFS.domain)
                if isinstance(u, URIRef)
            ]
            p.ranges = [
                Class(u) for u in G.objects(uri, RDFS.range)
                if isinstance(u, URIRef)
            ]
            p.comments = [str(s) for s in G.objects(uri, RDFS.comment)]

    for p in sorted(properties):
        print '<dt class="property">'
        print p.name() + ' (' + p.kind.name(format='text') + ')'
        print '</dt>'

        for comment in p.comments:
            print '<dd>'
            print comment
            print '</dd>'

        if p.domains:
            print '<dd>domain: '
            print ', '.join(domain.name() for domain in p.domains)
            print '</dd>'

        if p.ranges:
            print '<dd>range: '
            print ', '.join(range.name() for range in p.ranges)
            print '</dd>'
    print '</dl>'

    print '<address>'
    print 'Generated by <a href="http://inamidst.com/proj/sdoc/"'
    print '>Schemadoc</a>'
    print '</address>'
    print '</body>'
    print '</html>'
Beispiel #35
0
def main():
	"""Uploads many RDF graphs into storage."""

	uris = [
		'http://tech.slashdot.org/story/09/11/02/0734253/Transpacific-Unity-Fiber-Optic-Cable-Leaves-Japan',
		'http://games.slashdot.org/story/09/10/29/2026217/Nintendo-Announces-DSi-XL',
		'http://games.slashdot.org/story/09/10/28/1634218/Leaked-Modern-Warfare-2-Footage-Causes-Outrage',
		'http://games.slashdot.org/story/09/11/01/1421253/Scams-and-Social-Gaming',
		'http://apple.slashdot.org/story/09/10/26/2256212/Psystars-Rebel-EFI-Hackintosh-Tool-Reviewed-Found-Wanting',
		'http://yro.slashdot.org/story/09/11/03/0331227/Feds-Bust-Cable-Modem-Hacker',
		'http://hardware.slashdot.org/story/09/11/02/2048234/Europe-Launches-Flood-Predicting-Satellite-and-Test-Probe',
		'http://yro.slashdot.org/story/09/11/02/1411252/An-Inbox-Is-Not-a-Glove-Compartment',
		'http://science.slashdot.org/story/09/11/03/1554207/2-Companies-Win-NASAs-Moon-Landing-Prize-Money',
		'http://news.slashdot.org/story/09/11/03/1751232/Rise-of-the-Robot-Squadrons',
		'http://it.slashdot.org/story/09/10/21/2120251/Some-Users-Say-Win7-Wants-To-Remove-iTunes-Google-Toolbar',
		'http://apple.slashdot.org/story/09/10/23/1456221/Apple-Seeks-Patent-On-Operating-System-Advertising',
		'http://games.slashdot.org/story/09/10/29/0225250/Physics-Rebel-Aims-To-Shake-Up-the-Video-Game-World',
		'http://games.slashdot.org/story/09/10/28/030237/2D-Boy-Posts-Pay-What-You-Want-Final-Wrap-up',
		'http://it.slashdot.org/story/09/11/02/1622218/IT-Snake-Oil-mdash-Six-Tech-Cure-Alls-That-Went-Bunk',
		'http://apple.slashdot.org/story/09/10/20/1833228/Apple-Blurs-the-Server-Line-With-Mac-Mini-Server',
		'http://games.slashdot.org/story/09/11/02/1530221/Free-3G-Wireless-For-Nintendos-Next-Handheld',
		'http://hardware.slashdot.org/story/09/11/03/1530258/Dell-Rugged-Laptops-Not-Quite-Tough-Enough',
		'http://linux.slashdot.org/story/09/11/03/2211231/Some-Early-Adopters-Stung-By-Ubuntus-Karmic-Koala',
		'http://hardware.slashdot.org/story/09/10/31/0120223/Contest-To-Hack-Brazilian-Voting-Machines',
		'http://ask.slashdot.org/story/09/10/25/1615203/Low-Power-Home-Linux-Server',
		'http://games.slashdot.org/story/09/10/30/0149253/FCC-Mulling-More-Control-For-Electronic-Media',
		'http://mobile.slashdot.org/story/09/11/03/1649246/Unfinished-Windows-7-Hotspot-Feature-Exploited',
		'http://games.slashdot.org/story/09/10/30/2040230/Nokias-N-Gage-Service-To-End-After-2010',
		'http://linux.slashdot.org/story/09/10/29/128205/Ubuntu-910-Officially-Released',
		'http://ask.slashdot.org/story/09/10/30/2126252/Installing-Linux-On-Old-Hardware',
		'http://games.slashdot.org/story/09/10/31/1428225/Controlling-Games-and-Apps-Through-Muscle-Sensors',
		'http://tech.slashdot.org/story/09/11/01/2131249/uTorrent-To-Build-In-Transfer-Throttling-Ability',
		'http://news.slashdot.org/story/09/11/02/2342258/Microsoft-Links-Malware-Rates-To-Pirated-Windows',
		'http://apple.slashdot.org/story/09/10/29/0311214/Speech-to-Speech-Translator-Developed-For-iPhone',
		'http://games.slashdot.org/story/09/10/30/022242/DampD-On-Google-Wave',
		'http://science.slashdot.org/story/09/11/02/1435227/Bacteria-Could-Survive-In-Martian-Soil',
		'http://apple.slashdot.org/story/09/11/02/0853219/For-September-Book-Related-Apps-Overtook-Games-On-iPhone',
		'http://hardware.slashdot.org/story/09/11/03/1427210/Negroponte-Hints-At-Paper-Like-Design-For-XO-3',
		'http://science.slashdot.org/story/09/11/03/0313242/Giant-Rift-In-Africa-Will-Create-a-New-Ocean',
		'http://yro.slashdot.org/story/09/11/02/132211/Attorney-General-Says-Wiretap-Lawsuit-Must-Be-Thrown-Out',
		'http://linux.slashdot.org/story/09/10/25/0450232/Ryan-Gordon-Wants-To-Bring-Universal-Binaries-To-Linux',
		'http://science.slashdot.org/story/09/11/01/2145208/Computer-Activities-for-Those-With-Speech-and-Language-Difficulties',
		'http://science.slashdot.org/story/09/11/03/1842247/The-Tech-Aboard-the-International-Space-Station',
		'http://science.slashdot.org/story/09/11/03/1450211/Scientists-Build-a-Smarter-Rat',
		'http://yro.slashdot.org/story/09/11/03/2023209/Spring-Design-Sues-Barnes-amp-Noble-Over-Nook-IP',
		'http://apple.slashdot.org/story/09/11/01/195232/Apple-Says-Booting-OS-X-Makes-an-Unauthorized-Copy',
		'http://yro.slashdot.org/story/09/10/22/1541220/Nokia-Sues-Apple-For-Patent-Infringement-In-iPhone',
		'http://linux.slashdot.org/story/09/10/23/1639234/Ubuntu-Karmic-Koala-RC-Hits-the-Streets-With-Windows-7',
		'http://linux.slashdot.org/story/09/10/27/1335227/Comparing-the-Freedoms-Offered-By-Maemo-and-Android'
	]

	i = 0
	for uri in uris:

		# Get the RDF
		wrdf = Web2Rdf(uri)
		rdf = wrdf.getRdf()

		if not rdf:
			print "No RDF returned!"
			return False

		print "Got RDF..."
		rdf = rdfString(rdf)

		# Open Storage
		print "Opening store..."
		#params = "./newdatabase.sqlite" 					# SQLITE
		params = ('localhost', 'tuser', 'tuser', 'rdf') 	# MYSQL
		rstore = RdfStore(params)
		rstore.open()

		print "Storing..."
		graph = Graph(rstore.get(), identifier = URIRef("http://slashdot/"))
		#graph.parse("example.rdf")
		graph.parse(rdf, publicID=uri)

		graph.commit()
		i+=1
		print "%d of %d uris complete." % (i, len(uris))
Beispiel #36
0
def testN3Store(store="default", configString=None):
    g = ConjunctiveGraph(store=store)
    if configString:
        g.destroy(configString)
        g.open(configString)
    g.parse(StringInputSource(testN3), format="n3")
    print g.store
    try:
        for s,p,o in g.triples((None,implies,None)):
            formulaA = s
            formulaB = o

        assert type(formulaA)==QuotedGraph and type(formulaB)==QuotedGraph
        a = URIRef('http://test/a')
        b = URIRef('http://test/b')
        c = URIRef('http://test/c')
        d = URIRef('http://test/d')
        v = Variable('y')

        universe = ConjunctiveGraph(g.store)

        #test formula as terms
        assert len(list(universe.triples((formulaA,implies,formulaB))))==1

        #test variable as term and variable roundtrip
        assert len(list(formulaB.triples((None,None,v))))==1
        for s,p,o in formulaB.triples((None,d,None)):
            if o != c:
                assert isinstance(o,Variable)
                assert o == v
        s = list(universe.subjects(RDF.type, RDFS.Class))[0]
        assert isinstance(s,BNode)
        assert len(list(universe.triples((None,implies,None)))) == 1
        assert len(list(universe.triples((None,RDF.type,None)))) ==1
        assert len(list(formulaA.triples((None,RDF.type,None))))==1
        assert len(list(formulaA.triples((None,None,None))))==2
        assert len(list(formulaB.triples((None,None,None))))==2
        assert len(list(universe.triples((None,None,None))))==3
        assert len(list(formulaB.triples((None,URIRef('http://test/d'),None))))==2
        assert len(list(universe.triples((None,URIRef('http://test/d'),None))))==1

        #context tests
        #test contexts with triple argument
        assert len(list(universe.contexts((a,d,c))))==1

        #Remove test cases
        universe.remove((None,implies,None))
        assert len(list(universe.triples((None,implies,None))))==0
        assert len(list(formulaA.triples((None,None,None))))==2
        assert len(list(formulaB.triples((None,None,None))))==2

        formulaA.remove((None,b,None))
        assert len(list(formulaA.triples((None,None,None))))==1
        formulaA.remove((None,RDF.type,None))
        assert len(list(formulaA.triples((None,None,None))))==0

        universe.remove((None,RDF.type,RDFS.Class))


        #remove_context tests
        universe.remove_context(formulaB)
        assert len(list(universe.triples((None,RDF.type,None))))==0
        assert len(universe)==1
        assert len(formulaB)==0

        universe.remove((None,None,None))
        assert len(universe)==0

        g.store.destroy(configString)
    except:
        g.store.destroy(configString)
        raise
Beispiel #37
0
 def testParse(self):
     g = ConjunctiveGraph()
     g.parse(
         "http://groups.csail.mit.edu/dig/2005/09/rein/examples/troop42-policy.n3",
         format="n3")
Beispiel #38
0
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties):

    #(id, base, prefix) = get_vocab_base(vocabfile)
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])

    subject = None
    for s in graph.subjects(namespaces['rdf']['type'],
                            URIRef(namespaces['owl']['Ontology'])):
        subject = s

    #graph2 = Graph()
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            graph.bind(prefix, URIRef(url))

    #properties = get_vocab_properties(prefix)
    #subject = None
    #for s in graph.subjects(namespaces['dc']['title'], None):
    #    subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['title'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dc']['creator'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['creator'], None):
    #        subject = s

    formatNode1 = BNode()
    formatNode2 = BNode()

    #Add vocabulary properties identifier and format
    graph.add((subject, namespaces['dc']['identifier'],
               URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['isVersionOf'],
               URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'],
               URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'],
               URIRef(html_vocab_properties['uri'])))
    graph.add((subject, namespaces['vann']['preferredNamespaceUri'],
               URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['vann']['preferredNamespacePrefix'],
               URIRef(vocab_properties['preferredNamespacePrefix'])))

    graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'],
               URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(html_vocab_properties['uri']),
               namespaces['dc']['format'], formatNode1))
    graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html')))
    graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML')))
    graph.add((formatNode1, namespaces['rdf']['type'],
               URIRef(namespaces['dcterms']['IMT'])))

    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'],
               URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'],
               formatNode2))
    graph.add((formatNode2, namespaces['rdf']['value'],
               Literal('application/rdf+xml')))
    graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF')))
    graph.add((formatNode2, namespaces['rdf']['type'],
               URIRef(namespaces['dcterms']['IMT'])))

    #Add rdfs:isDefinedBy for each class / property / term of the vocabulary
    #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term
    #testo = vocab_type_definitions_test['rdfs']
    #subjects = []
    #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    #for s in subs:
    #    subjects.append(s)
    #if subjects:
    #    objects = vocab_type_definitions_rdfs
    #else:
    #    objects = vocab_type_definitions_owl

    #For all subjects that are of the type found above, add rdfs:isDefinedBy
    #for o in objects:
    #    subs = graph.subjects(namespaces['rdf']['type'], o)
    #    for s in subs:
    #        graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    list_of_terms = get_terms(rdf_vocab_properties['path'])
    for s in list_of_terms:
        graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'],
                   URIRef(vocab_properties['preferredNamespaceUri'])))

    rdf_str = None
    rdf_str = graph.serialize(format="pretty-xml")
    #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8')
    f = codecs.open(newrdf_vocab_properties['path'], 'w')
    f.write(rdf_str)
    f.close()
    return (newrdf_vocab_properties, html_vocab_properties)
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties):

    #(id, base, prefix) = get_vocab_base(vocabfile)
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])

    subject = None
    for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])):
        subject = s

    #graph2 = Graph()
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            graph.bind(prefix, URIRef(url))

    
    #properties = get_vocab_properties(prefix)
    #subject = None
    #for s in graph.subjects(namespaces['dc']['title'], None):
    #    subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['title'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dc']['creator'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['creator'], None):
    #        subject = s

    formatNode1 = BNode()
    formatNode2 = BNode()

    #Add vocabulary properties identifier and format
    graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri'])))
    graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix'])))

    graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1))
    graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html')))
    graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML')))
    graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT'])))

    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2))
    graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml')))
    graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF')))
    graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT'])))

    #Add rdfs:isDefinedBy for each class / property / term of the vocabulary
    #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term
    #testo = vocab_type_definitions_test['rdfs']
    #subjects = []
    #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    #for s in subs:
    #    subjects.append(s)
    #if subjects:
    #    objects = vocab_type_definitions_rdfs
    #else:
    #    objects = vocab_type_definitions_owl

    #For all subjects that are of the type found above, add rdfs:isDefinedBy
    #for o in objects: 
    #    subs = graph.subjects(namespaces['rdf']['type'], o)
    #    for s in subs:
    #        graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    list_of_terms = get_terms(rdf_vocab_properties['path'])
    for s in list_of_terms:
        graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    rdf_str = None
    rdf_str = graph.serialize(format="pretty-xml")
    #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8')
    f = codecs.open(newrdf_vocab_properties['path'], 'w')
    f.write(rdf_str)
    f.close()
    return (newrdf_vocab_properties, html_vocab_properties)
Beispiel #40
0
def readSubgraphXML(filename):
    if SUBGRAPH_FORMAT == 'xml':
        return open(filename).read()
    g = ConjunctiveGraph()
    g.parse(filename, format=SUBGRAPH_FORMAT)
    return g
def update_rdf_for_conversion(vocabprefix, vocab_properties, rdf_vocab_properties):
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
        ET._namespace_map[str(nsurl)] = str(nsprefix)
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            ET._namespace_map[str(url)] = str(prefix)

    def_tags = [
        "{http://www.w3.org/2000/01/rdf-schema#}Class".lower(),
        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Property".lower(),
        "{http://www.w3.org/2002/07/owl#}ObjectProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}DatatypeProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}Class".lower(),
    ]

    tree = ET.ElementTree(file=rdf_vocab_properties['path'])
    ns_uri = vocab_properties['preferredNamespaceUri']
    html_uri = html_vocab_properties['uri']
    rdf_uri = rdf_vocab_properties['uri']

    tree_root = tree.getroot()
    #vocab= tree_root.findall("{http://www.w3.org/2002/07/owl#}Ontology")
    vocab= tree_root.find("{http://www.w3.org/2002/07/owl#}Ontology")
    if vocab:
        #for vocab in vocabs:
        if not vocab.findall("{http://purl.org/dc/elements/1.1/}identifier"):
            se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier")
            se0.text = rdf_uri
        if not vocab.findall("{http://purl.org/dc/terms/}isVersionOf"):
            se1 = ET.SubElement(vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})
        if not vocab.findall("{http://purl.org/vocab/vann/}preferredNamespacePrefix"):
            se2a = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
            se2a.text = vocab_properties['preferredNamespacePrefix']
        if not vocab.findall("{http://purl.org/vocab/vann/}preferredNamespaceUri"):
            se2b = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
            se2b.text = vocab_properties['preferredNamespaceUri']
        if not vocab.findall("{http://purl.org/dc/terms/}hasFormat"):
            #Add html uri - html_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
            se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'text/html'
            #ET.TreeBuilder.data('text/html')
            se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
            se3f.text = 'HTML'
            #ET.TreeBuilder.data('HTML')
            #Add rdf uri - rdf_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
            se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'application/rdf+xml'
            #ET.TreeBuilder.data('application/rdf+xml')
            se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
            se3f.text = 'RDF'
            #ET.TreeBuilder.data('RDF')
        else:
            #Check the formats available and add if necessary
            formats = vocab.findall("{http://purl.org/dc/terms/}hasFormat")
            available_types = []
            for f in formats:
                type_tags = f.findall(".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                for type_tag in type_tags:
                    if type_tag.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"):
                        ftype = type_tag.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource")
                    elif type_tag.text:
                        ftype = type_tag.text
                    if ftype and 'html' in ftype.lower():
                        available_types.append('html')
                    elif ftype and 'rdf' in ftype.lower():
                        available_types.append('rdf')
            if not 'html' in available_types:
                #Add html file - vocabfile_html
                se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
                se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'text/html'
                #ET.TreeBuilder.data('text/html')
                se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
                se3f.text = 'HTML'
                #ET.TreeBuilder.data('HTML')
            if not 'rdf' in available_types:
                #Add rdf file - vocabfile
                se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
                se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'application/rdf+xml'
                #ET.TreeBuilder.data('application/rdf+xml')
                se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
                se3f.text = 'RDF'
                #ET.TreeBuilder.data('RDF')
    else:
        vocab = ET.SubElement(tree_root, "{http://www.w3.org/2002/07/owl#}Ontology", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":ns_uri})
        se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier")
        se0.text = rdf_uri
        se1 = ET.SubElement(vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})
        se2a = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
        se2a.text = vocab_properties['preferredNamespacePrefix']
        se2b = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
        se2b.text = vocab_properties['preferredNamespaceUri']
        #Add html uri - html_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'text/html'
        se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
        se3f.text = 'HTML'
        #Add rdf uri - rdf_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'application/rdf+xml'
        se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
        se3f.text = 'RDF'
    terms = tree_root.getiterator()
    #terms = vocab.getiterator()
    for term in terms:
        if term.tag.lower().strip() in def_tags:
            defby = None
            defby = term.find("{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy")
            if not defby:
                se4 = ET.SubElement(term, "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})

    #Move ontology to the first element
    tree_root.remove(vocab)
    tree_root.insert(0, vocab)

    tree.write(newrdf_vocab_properties['path'])
    #tree_root.write(newrdf_vocab_properties['path'])
    return (newrdf_vocab_properties, html_vocab_properties)
Beispiel #42
0
# Named graph: http://example.org/foaf/bobFoaf
@prefix  foaf:  <http://xmlns.com/foaf/0.1/> .
@prefix  rdf:    <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix  rdfs:   <http://www.w3.org/2000/01/rdf-schema#> .

_:z  foaf:mbox     <mailto:[email protected]> .
_:z  rdfs:seeAlso  <http://example.org/foaf/bobFoaf> .
_:z  foaf:nick     "Robert" .
<http://example.org/foaf/bobFoaf>
     rdf:type      foaf:PersonalProfileDocument .

"""

graph = ConjunctiveGraph(plugin.get('IOMemory', Store)())
graph.parse(StringIO(text), format="n3")
print graph.serialize(format='xml')

test_query = """
PREFIX  data:  <http://example.org/foaf/>
PREFIX  foaf:  <http://xmlns.com/foaf/0.1/>
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?mbox ?nick ?ppd
WHERE
{
  GRAPH data:aliceFoaf
  {
    ?alice foaf:mbox <mailto:[email protected]> ;
           foaf:knows ?whom .
    ?whom  foaf:mbox ?mbox ;
Beispiel #43
0
class sparql_funcs():
    
    def __init__(self):
        self.g = Graph('IOMemory')
        #self.endpoint = "http://www.opencorrespondence.org/data/endpoint/rdf"
        #self.g.bind('geo', geo)


    def find_places(self):
        '''
            Function to get the distinct locations mentioned in the headers of the letters. 
            These are the locations from which Dickens wrote. 
            TODO: Parsing the letters to get the places mentioned in them
        '''
        row = set()
        o = OFS()
        
        for b in o.list_buckets():
            endpoint = o.get_stream(b, "endpoint")

        self.g.parse(endpoint)

        for s,_,n in self.g.triples((None, dublin_core['title'], None)):
            loc_key = urllib.unquote(n.replace("http://www.opencorrespondence.org/place/resource/", "").replace("/rdf",""))
            row.add(self.tidy_location(loc_key))

        return row
    
    def tidy_location (self, location):
        '''
           Function to tidy up some of the places where they refer to the same place
           TODO: prob need some language processing to make this scalable
        '''
        ret_location = '';
        if location == 'Office Of "household Words,':
            ret_location = "Household Words"
        elif location== '"household Words" Office':
            ret_location = "Household Words"
        elif location== '"household Words"':
            ret_location = "Household Words"
        elif location== 'H. W. Office':
            ret_location = "Household Words"
        elif location == '"household Words,':
            ret_location = "Household Words"
        elif location == '"all The Year Round" Office':
            ret_location = "All The Year Round"
        elif location == 'Office Of "all The Year Round,':
            ret_location = "All The Year Round"
        elif location == "Gad's Hill Place":
            ret_location = "Gads Hill"
        elif location == "Gad's Hill":
            ret_location = "Gads Hill"
        elif location == "Gad's Hill Place, Higham":
            ret_location = "Gads Hill"
        elif location == "Tavistock House, Tavistock Square":
            ret_location = "Tavistock House"
        elif location == "London, Tavistock House":
            ret_location = "Tavistock House"
        elif location == "Tavistock House, London":
            ret_location = "Tavistock House"
        else:
            if "U.s." in location:
                location = str(location).replace("U.s", "")
            ret_location = str(location).replace(".", "")    
            
        return ret_location
    
    def find_correspondents(self):
        '''
            Function to get the distinct locations mentioned in the headers of the letters. 
            These are the locations from which Dickens wrote. 
            TODO: Parsing the letters to get the places mentioned in them
        '''
        row = set()
        self.g.parse(self.endpoint)

        for s,_,n in self.g.triples((None, letter['correspondent'], None)):
            loc_key = urllib.unquote(n.replace("http://www.opencorrespondence.org/correspondent/resource/", "").replace("/rdf", ""))
            row.add(loc_key)

        return row
    
    def get_abstract (self, resource_id):
        
        self.g.parse('http://dbpedia.org/resource/'.resource_id)
        q = '''
          SELECT *
                WHERE 
                {
                ?x dbpedia:abstract ?abstract .
                FILTER (lang(?abstract) = 'en')
                }
        '''
        for row in self.g.query(q,
                   initNs=dict(dbpedia=Namespace("http://dbpedia.org/ontology/")),
                   initBindings={}):
            return row[1]

    
    def query_dates(self, author):
        '''query to identify individual dates to a correspondent'''
        q = '''
        SELECT ?date
        FROM <http://localhost:5000/data/endpoint/rdf>
        WHERE {
            ?r dc:subject  \'''' + author + '''\' .  
            ?r dc:date  ?date.  
        }
        '''
        dates = []
        for row in self.g.query(q,
                       initNs=dict(letter=Namespace("http://www.opencorrespondence.org/schema#"), dc=Namespace("http://purl.org/dc/elements/1.1/")),
                       initBindings={}):
            
            date = str(row[0]).split('-')
        
            if date[0][1:].isdigit():
                dates.append(date[0])
        print dates
        dic = {}

        for dt in dates:
            dic[dt] = dates.count(dt)
    
        return dic