Esempio n. 1
0
def add_mediator(params):
    #Write user metadata and save the rdf file
    graph = Graph()
    for prefix, url in namespaces.iteritems():
        graph.bind(prefix, URIRef(url))
    uri = URIRef("http://vocab.ox.ac.uk/owner/uuid:%s"%uuid.uuid4())
    graph.add((uri, namespaces['foaf']['firstName'], Literal(params['firstname'])))
    graph.add((uri, namespaces['foaf']['lastName'], Literal(params['lastname'])))
    graph.add((uri, namespaces['foaf']['mbox'], Literal(params['email'])))
    graph.add((uri, namespaces['foaf']['account'], Literal(params['username'])))
    if 'title' in params and params['title']:
        graph.add((uri, namespaces['foaf']['title'], Literal(params['title'])))
    if 'department' in params and params['department']:
        department = params['department'].split(';')
        for d in department:
            graph.add((uri, namespaces['dcterms']['isPartOf'], Literal(d.strip())))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(os.path.join(ag.mediatorsdir, '%s.rdf'%params['username']), 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    graph2 = Graph()
    graph2.parse(ag.mediatorslist)
    for prefix, url in namespaces.iteritems():
        graph2.bind(prefix, URIRef(url))
    graph2.add((uri, namespaces['foaf']['account'], Literal(params['username'])))
    rdf_str = None
    rdf_str = graph2.serialize()
    f = codecs.open(ag.mediatorslist, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
Esempio n. 2
0
def update_mediator(params):
    #Write user metadata and save the rdf file
    if not ('username' in params and params['username']):
        return False
    det = get_mediator_details(params['username'])
    graph = Graph()
    graph.parse(os.path.join(ag.mediatorsdir, '%s.rdf'%params['username']))
    for prefix, url in namespaces.iteritems():
        graph.bind(prefix, URIRef(url))
    uri = URIRef(det['uri'])
    if 'firstname' in params and params['firstname']:
        graph.remove((uri, namespaces['foaf']['firstName'], None))
        graph.add((uri, namespaces['foaf']['firstName'], Literal(params['firstname'])))
    if 'lastname' in params and params['lastname']:
        graph.remove((uri, namespaces['foaf']['lastName'], None))
        graph.add((uri, namespaces['foaf']['lastName'], Literal(params['lastname'])))
    if 'email' in params and params['email']:
        graph.remove((uri, namespaces['foaf']['mbox'], None))
        graph.add((uri, namespaces['foaf']['mbox'], Literal(params['email'])))
    if 'title' in params and params['title']:
        graph.remove((uri, namespaces['foaf']['title'], None))
        graph.add((uri, namespaces['foaf']['title'], Literal(params['title'])))
    if 'department' in params and params['department']:
        graph.remove((uri, namespaces['dcterms']['isPartOf'], None))
        department = params['department'].split(';')
        for d in department:
            graph.add((uri, namespaces['dcterms']['isPartOf'], Literal(d.strip())))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(os.path.join(ag.mediatorsdir, '%s.rdf'%params['username']), 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
Esempio n. 3
0
def change_status(vocabprefix, uri, predicate, message, action):
    if not action in ['add', 'remove']:
        return False
    vocab_uri = URIRef(uri)
    vocabdir = os.path.join(ag.vocabulariesdir, vocabprefix)
    vocabstatusfile = os.path.join(vocabdir, "status.rdf")
    if not os.path.isfile(vocabstatusfile):
        return False
    graph = Graph()
    graph.parse(vocabstatusfile)
    predicate = predicate.split(':')
    ns = predicate[0]
    term = predicate[1]
    if message and (message.startswith('http://') or message.startswith('file://')):
        message = URIRef(message)
    elif message:
        message = Literal(message)
    if action == 'add':
        for prefix, url in namespaces.iteritems():
            graph.bind(prefix, URIRef(url))
        graph.add((vocab_uri, namespaces[ns][term], message))
    elif action == 'remove':
        graph.remove((vocab_uri, namespaces[ns][term], message))
     
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
Esempio n. 4
0
def add_ref_vocab(vocabprefix, source_uri):
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix)
    graph = Graph()
    if os.path.isfile(ag.vocabulariesref):
        graph.parse(ag.vocabulariesref)
    for prefix, url in namespaces.iteritems():
        graph.bind(prefix, URIRef(url))
    graph.add((URIRef(vocab_uri), namespaces['dcterms']['isVersionOf'], URIRef(source_uri)))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(ag.vocabulariesref, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
Esempio n. 5
0
def add_file_to_vocab_status(vocabprefix, properties, addHasFormat=True):
    vocabdir = os.path.join(ag.vocabulariesdir, vocabprefix)
    vocabstatusfile = os.path.join(vocabdir, "status.rdf")
    vocaburi = "http://vocab.ox.ac.uk/%s"%vocabprefix
    
    if not 'name' in properties or not properties['name'] or not 'path' in properties or not properties['path']:
        return False
    if not 'uri' in properties or not properties['uri']:
        properties['uri'] = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, properties['name']))
    if not 'format' in properties or not properties['format']:
        # get mimetype of file
        mt = None
        if os.path.isfile(vocabfile):
            if check_rdf(vocabfile):
                properties['format'] = 'application/rdf+xml'
            else:
                mt1 = mimetypes.guess_type(vocabfile)
                if mt1[0]:
                    properties['format'] = mt1[0]
                else:
                    properties['format'] = get_file_mimetype(vocabfile)
    graph = Graph()
    if os.path.isfile(vocabstatusfile):
        graph.parse(vocabstatusfile)
    else:
        return False
    for prefix, url in namespaces.iteritems():
        graph.bind(prefix, URIRef(url))
    if addHasFormat:
        graph.add((URIRef(vocaburi), namespaces['dcterms']['hasFormat'], URIRef(properties['uri'])))
    if properties['format']:
        graph.add((URIRef(properties['uri']), namespaces['dcterms']['format'], Literal(properties['format'])))
    if os.path.isfile(properties['path']):
        graph.add((URIRef(properties['uri']), namespaces['nfo']['fileUrl'], Literal('file://%s'%properties['path'])))
        graph.add((URIRef(properties['uri']), namespaces['nfo']['fileName'], Literal(properties['name'])))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
def update_rdf_for_conversion(vocabprefix, vocab_properties,
                              rdf_vocab_properties):
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
        ET._namespace_map[str(nsurl)] = str(nsprefix)
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            ET._namespace_map[str(url)] = str(prefix)

    def_tags = [
        "{http://www.w3.org/2000/01/rdf-schema#}Class".lower(),
        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Property".lower(),
        "{http://www.w3.org/2002/07/owl#}ObjectProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}DatatypeProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}Class".lower(),
    ]

    tree = ET.ElementTree(file=rdf_vocab_properties['path'])
    ns_uri = vocab_properties['preferredNamespaceUri']
    html_uri = html_vocab_properties['uri']
    rdf_uri = rdf_vocab_properties['uri']

    tree_root = tree.getroot()
    #vocab= tree_root.findall("{http://www.w3.org/2002/07/owl#}Ontology")
    vocab = tree_root.find("{http://www.w3.org/2002/07/owl#}Ontology")
    if vocab:
        #for vocab in vocabs:
        if not vocab.findall("{http://purl.org/dc/elements/1.1/}identifier"):
            se0 = ET.SubElement(
                vocab, "{http://purl.org/dc/elements/1.1/}identifier")
            se0.text = rdf_uri
        if not vocab.findall("{http://purl.org/dc/terms/}isVersionOf"):
            se1 = ET.SubElement(
                vocab,
                "{http://purl.org/dc/terms/}isVersionOf",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":
                    ns_uri
                })
        if not vocab.findall(
                "{http://purl.org/vocab/vann/}preferredNamespacePrefix"):
            se2a = ET.SubElement(
                vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
            se2a.text = vocab_properties['preferredNamespacePrefix']
        if not vocab.findall(
                "{http://purl.org/vocab/vann/}preferredNamespaceUri"):
            se2b = ET.SubElement(
                vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
            se2b.text = vocab_properties['preferredNamespaceUri']
        if not vocab.findall("{http://purl.org/dc/terms/}hasFormat"):
            #Add html uri - html_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(
                se3a,
                "{http://purl.org/dc/dcmitype/}Text",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                    html_uri
                })
            se3c = ET.SubElement(se3b,
                                 "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(
                se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'text/html'
            #ET.TreeBuilder.data('text/html')
            se3f = ET.SubElement(
                se3d,
                "{http://www.w3.org/2000/01/rdf-schema#}label",
                attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
            se3f.text = 'HTML'
            #ET.TreeBuilder.data('HTML')
            #Add rdf uri - rdf_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(
                se3a,
                "{http://purl.org/dc/dcmitype/}Text",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                    rdf_uri
                })
            se3c = ET.SubElement(se3b,
                                 "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(
                se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'application/rdf+xml'
            #ET.TreeBuilder.data('application/rdf+xml')
            se3f = ET.SubElement(
                se3d,
                "{http://www.w3.org/2000/01/rdf-schema#}label",
                attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
            se3f.text = 'RDF'
            #ET.TreeBuilder.data('RDF')
        else:
            #Check the formats available and add if necessary
            formats = vocab.findall("{http://purl.org/dc/terms/}hasFormat")
            available_types = []
            for f in formats:
                type_tags = f.findall(
                    ".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                for type_tag in type_tags:
                    if type_tag.attrib.get(
                            "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"
                    ):
                        ftype = type_tag.attrib.get(
                            "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"
                        )
                    elif type_tag.text:
                        ftype = type_tag.text
                    if ftype and 'html' in ftype.lower():
                        available_types.append('html')
                    elif ftype and 'rdf' in ftype.lower():
                        available_types.append('rdf')
            if not 'html' in available_types:
                #Add html file - vocabfile_html
                se3a = ET.SubElement(vocab,
                                     "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(
                    se3a,
                    "{http://purl.org/dc/dcmitype/}Text",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                        html_uri
                    })
                se3c = ET.SubElement(
                    se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(
                    se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'text/html'
                #ET.TreeBuilder.data('text/html')
                se3f = ET.SubElement(
                    se3d,
                    "{http://www.w3.org/2000/01/rdf-schema#}label",
                    attrib={
                        "{http://www.w3.org/XML/1998/namespace}lang": "en"
                    })
                se3f.text = 'HTML'
                #ET.TreeBuilder.data('HTML')
            if not 'rdf' in available_types:
                #Add rdf file - vocabfile
                se3a = ET.SubElement(vocab,
                                     "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(
                    se3a,
                    "{http://purl.org/dc/dcmitype/}Text",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                        rdf_uri
                    })
                se3c = ET.SubElement(
                    se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(
                    se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'application/rdf+xml'
                #ET.TreeBuilder.data('application/rdf+xml')
                se3f = ET.SubElement(
                    se3d,
                    "{http://www.w3.org/2000/01/rdf-schema#}label",
                    attrib={
                        "{http://www.w3.org/XML/1998/namespace}lang": "en"
                    })
                se3f.text = 'RDF'
                #ET.TreeBuilder.data('RDF')
    else:
        vocab = ET.SubElement(
            tree_root,
            "{http://www.w3.org/2002/07/owl#}Ontology",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": ns_uri
            })
        se0 = ET.SubElement(vocab,
                            "{http://purl.org/dc/elements/1.1/}identifier")
        se0.text = rdf_uri
        se1 = ET.SubElement(
            vocab,
            "{http://purl.org/dc/terms/}isVersionOf",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource": ns_uri
            })
        se2a = ET.SubElement(
            vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
        se2a.text = vocab_properties['preferredNamespacePrefix']
        se2b = ET.SubElement(
            vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
        se2b.text = vocab_properties['preferredNamespaceUri']
        #Add html uri - html_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(
            se3a,
            "{http://purl.org/dc/dcmitype/}Text",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": html_uri
            })
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(
            se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'text/html'
        se3f = ET.SubElement(
            se3d,
            "{http://www.w3.org/2000/01/rdf-schema#}label",
            attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
        se3f.text = 'HTML'
        #Add rdf uri - rdf_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(
            se3a,
            "{http://purl.org/dc/dcmitype/}Text",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": rdf_uri
            })
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(
            se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'application/rdf+xml'
        se3f = ET.SubElement(
            se3d,
            "{http://www.w3.org/2000/01/rdf-schema#}label",
            attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
        se3f.text = 'RDF'
    terms = tree_root.getiterator()
    #terms = vocab.getiterator()
    for term in terms:
        if term.tag.lower().strip() in def_tags:
            defby = None
            defby = term.find(
                "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy")
            if not defby:
                se4 = ET.SubElement(
                    term,
                    "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":
                        ns_uri
                    })

    #Move ontology to the first element
    tree_root.remove(vocab)
    tree_root.insert(0, vocab)

    tree.write(newrdf_vocab_properties['path'])
    #tree_root.write(newrdf_vocab_properties['path'])
    return (newrdf_vocab_properties, html_vocab_properties)
Esempio n. 7
0
def create_vocab_statusfile(userid, vocabprefix, vocabfile, baseuri, update=False, using_uuid=False, refvocab=False):
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix)
    vocabdir = os.path.join(ag.vocabulariesdir, str(vocabprefix))
    vocabstatusfile = os.path.join(vocabdir, "status.rdf")
    vocab_file_name = os.path.basename(vocabfile)
    vocabfile_uri = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, vocab_file_name))

    #Add vocab in mediator file
    graph = Graph()
    mediatorfile = os.path.join(ag.mediatorsdir, '%s.rdf'%userid)
    graph.parse(mediatorfile)
    user_uri = []
    for uri in graph.subjects(namespaces['foaf']['account'], Literal(userid)):
        if not uri in user_uri:
            user_uri.append(uri)
    user_uri = URIRef(user_uri[0])
    graph.add((vocab_uri, namespaces['dcterms']['mediator'], URIRef(user_uri)))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(mediatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()

    #Add vocab in vocab status file
    graph = Graph()
    if update and os.path.isfile(vocabstatusfile):
        graph.parse(vocabstatusfile)
    for prefix, url in namespaces.iteritems():
        graph.bind(prefix, URIRef(url))
    graph.add((vocab_uri, namespaces['dcterms']['mediator'], URIRef(user_uri)))
    graph.add((user_uri, namespaces['foaf']['account'], Literal(userid)))
    graph.add((vocab_uri, namespaces['dcterms']['hasFormat'], URIRef(vocabfile_uri)))
    graph.add((vocab_uri, namespaces['vann']['preferredNamespaceUri'], URIRef(baseuri)))
    graph.add((vocab_uri, namespaces['vann']['preferredNamespacePrefix'], Literal(vocabprefix)))
    graph.add((vocab_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[0])))
    if refvocab:
        add_ref_vocab(vocabprefix, refvocab)
        graph.add((vocab_uri, namespaces['dcterms']['isVersionOf'], URIRef(refvocab)))
    # get mimetype of file
    if os.path.isfile(vocabfile):
        graph.add((vocabfile_uri, namespaces['nfo']['fileUrl'], Literal('file://%s'%vocabfile)))
        graph.add((vocabfile_uri, namespaces['nfo']['fileName'], Literal(vocab_file_name)))
        mt = None
        if check_rdf(vocabfile):
            mt = 'application/rdf+xml'
            graph.add((vocabfile_uri, namespaces['dcterms']['conformsTo'], Literal(mt)))
            graph.add((vocabfile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[3])))
        elif check_n3(vocabfile):
            mt = 'text/rdf+nt'
            root, ext = os.path.splitext(vocabfile)
            if ext == '.rdf':
                rdffile = "%s_2.rdf"%root
            else:
                rdffile = "%s.rdf"%root
            converttordf = convert_n3_rdf(vocabfile, rdffile)
            if converttordf and os.path.isfile(rdffile):
                rdf_file_name = os.path.basename(rdffile)
                rdffile_uri = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, rdf_file_name))
                graph.add((vocab_uri, namespaces['dcterms']['hasFormat'], URIRef(rdffile_uri)))
                graph.add((rdffile_uri, namespaces['nfo']['fileUrl'], Literal('file://%s'%rdffile)))
                graph.add((rdffile_uri, namespaces['nfo']['fileName'], Literal(rdf_file_name)))
                graph.add((rdffile_uri, namespaces['dcterms']['conformsTo'], Literal('application/rdf+xml')))
                graph.add((rdffile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[3])))
                graph.add((rdffile_uri, namespaces['dcterms']['format'], Literal('application/rdf+xml')))
        else:
            mt1 = mimetypes.guess_type(vocabfile)
            mt2 = get_file_mimetype(vocabfile)
            if mt1[0]:
                mt = mt1[0]
            else:
                mt = mt2
            if str(mt) == 'application/rdf+xml':
                graph.add((vocabfile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[2])))
            else:
                graph.add((vocab_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[1])))
        if mt:
            graph.add((vocabfile_uri, namespaces['dcterms']['format'], Literal(mt)))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
def update_rdf_for_conversion(vocabprefix, vocab_properties, rdf_vocab_properties):
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
        ET._namespace_map[str(nsurl)] = str(nsprefix)
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            ET._namespace_map[str(url)] = str(prefix)

    def_tags = [
        "{http://www.w3.org/2000/01/rdf-schema#}Class".lower(),
        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Property".lower(),
        "{http://www.w3.org/2002/07/owl#}ObjectProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}DatatypeProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}Class".lower(),
    ]

    tree = ET.ElementTree(file=rdf_vocab_properties['path'])
    ns_uri = vocab_properties['preferredNamespaceUri']
    html_uri = html_vocab_properties['uri']
    rdf_uri = rdf_vocab_properties['uri']

    tree_root = tree.getroot()
    #vocab= tree_root.findall("{http://www.w3.org/2002/07/owl#}Ontology")
    vocab= tree_root.find("{http://www.w3.org/2002/07/owl#}Ontology")
    if vocab:
        #for vocab in vocabs:
        if not vocab.findall("{http://purl.org/dc/elements/1.1/}identifier"):
            se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier")
            se0.text = rdf_uri
        if not vocab.findall("{http://purl.org/dc/terms/}isVersionOf"):
            se1 = ET.SubElement(vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})
        if not vocab.findall("{http://purl.org/vocab/vann/}preferredNamespacePrefix"):
            se2a = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
            se2a.text = vocab_properties['preferredNamespacePrefix']
        if not vocab.findall("{http://purl.org/vocab/vann/}preferredNamespaceUri"):
            se2b = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
            se2b.text = vocab_properties['preferredNamespaceUri']
        if not vocab.findall("{http://purl.org/dc/terms/}hasFormat"):
            #Add html uri - html_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
            se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'text/html'
            #ET.TreeBuilder.data('text/html')
            se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
            se3f.text = 'HTML'
            #ET.TreeBuilder.data('HTML')
            #Add rdf uri - rdf_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
            se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'application/rdf+xml'
            #ET.TreeBuilder.data('application/rdf+xml')
            se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
            se3f.text = 'RDF'
            #ET.TreeBuilder.data('RDF')
        else:
            #Check the formats available and add if necessary
            formats = vocab.findall("{http://purl.org/dc/terms/}hasFormat")
            available_types = []
            for f in formats:
                type_tags = f.findall(".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                for type_tag in type_tags:
                    if type_tag.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"):
                        ftype = type_tag.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource")
                    elif type_tag.text:
                        ftype = type_tag.text
                    if ftype and 'html' in ftype.lower():
                        available_types.append('html')
                    elif ftype and 'rdf' in ftype.lower():
                        available_types.append('rdf')
            if not 'html' in available_types:
                #Add html file - vocabfile_html
                se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
                se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'text/html'
                #ET.TreeBuilder.data('text/html')
                se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
                se3f.text = 'HTML'
                #ET.TreeBuilder.data('HTML')
            if not 'rdf' in available_types:
                #Add rdf file - vocabfile
                se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
                se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'application/rdf+xml'
                #ET.TreeBuilder.data('application/rdf+xml')
                se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
                se3f.text = 'RDF'
                #ET.TreeBuilder.data('RDF')
    else:
        vocab = ET.SubElement(tree_root, "{http://www.w3.org/2002/07/owl#}Ontology", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":ns_uri})
        se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier")
        se0.text = rdf_uri
        se1 = ET.SubElement(vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})
        se2a = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
        se2a.text = vocab_properties['preferredNamespacePrefix']
        se2b = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
        se2b.text = vocab_properties['preferredNamespaceUri']
        #Add html uri - html_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'text/html'
        se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
        se3f.text = 'HTML'
        #Add rdf uri - rdf_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'application/rdf+xml'
        se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
        se3f.text = 'RDF'
    terms = tree_root.getiterator()
    #terms = vocab.getiterator()
    for term in terms:
        if term.tag.lower().strip() in def_tags:
            defby = None
            defby = term.find("{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy")
            if not defby:
                se4 = ET.SubElement(term, "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})

    #Move ontology to the first element
    tree_root.remove(vocab)
    tree_root.insert(0, vocab)

    tree.write(newrdf_vocab_properties['path'])
    #tree_root.write(newrdf_vocab_properties['path'])
    return (newrdf_vocab_properties, html_vocab_properties)
Esempio n. 9
0
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties):

    #(id, base, prefix) = get_vocab_base(vocabfile)
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])

    subject = None
    for s in graph.subjects(namespaces['rdf']['type'],
                            URIRef(namespaces['owl']['Ontology'])):
        subject = s

    #graph2 = Graph()
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            graph.bind(prefix, URIRef(url))

    #properties = get_vocab_properties(prefix)
    #subject = None
    #for s in graph.subjects(namespaces['dc']['title'], None):
    #    subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['title'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dc']['creator'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['creator'], None):
    #        subject = s

    formatNode1 = BNode()
    formatNode2 = BNode()

    #Add vocabulary properties identifier and format
    graph.add((subject, namespaces['dc']['identifier'],
               URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['isVersionOf'],
               URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'],
               URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'],
               URIRef(html_vocab_properties['uri'])))
    graph.add((subject, namespaces['vann']['preferredNamespaceUri'],
               URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['vann']['preferredNamespacePrefix'],
               URIRef(vocab_properties['preferredNamespacePrefix'])))

    graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'],
               URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(html_vocab_properties['uri']),
               namespaces['dc']['format'], formatNode1))
    graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html')))
    graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML')))
    graph.add((formatNode1, namespaces['rdf']['type'],
               URIRef(namespaces['dcterms']['IMT'])))

    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'],
               URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'],
               formatNode2))
    graph.add((formatNode2, namespaces['rdf']['value'],
               Literal('application/rdf+xml')))
    graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF')))
    graph.add((formatNode2, namespaces['rdf']['type'],
               URIRef(namespaces['dcterms']['IMT'])))

    #Add rdfs:isDefinedBy for each class / property / term of the vocabulary
    #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term
    #testo = vocab_type_definitions_test['rdfs']
    #subjects = []
    #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    #for s in subs:
    #    subjects.append(s)
    #if subjects:
    #    objects = vocab_type_definitions_rdfs
    #else:
    #    objects = vocab_type_definitions_owl

    #For all subjects that are of the type found above, add rdfs:isDefinedBy
    #for o in objects:
    #    subs = graph.subjects(namespaces['rdf']['type'], o)
    #    for s in subs:
    #        graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    list_of_terms = get_terms(rdf_vocab_properties['path'])
    for s in list_of_terms:
        graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'],
                   URIRef(vocab_properties['preferredNamespaceUri'])))

    rdf_str = None
    rdf_str = graph.serialize(format="pretty-xml")
    #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8')
    f = codecs.open(newrdf_vocab_properties['path'], 'w')
    f.write(rdf_str)
    f.close()
    return (newrdf_vocab_properties, html_vocab_properties)
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties):

    #(id, base, prefix) = get_vocab_base(vocabfile)
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])

    subject = None
    for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])):
        subject = s

    #graph2 = Graph()
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            graph.bind(prefix, URIRef(url))

    
    #properties = get_vocab_properties(prefix)
    #subject = None
    #for s in graph.subjects(namespaces['dc']['title'], None):
    #    subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['title'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dc']['creator'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['creator'], None):
    #        subject = s

    formatNode1 = BNode()
    formatNode2 = BNode()

    #Add vocabulary properties identifier and format
    graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri'])))
    graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix'])))

    graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1))
    graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html')))
    graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML')))
    graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT'])))

    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2))
    graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml')))
    graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF')))
    graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT'])))

    #Add rdfs:isDefinedBy for each class / property / term of the vocabulary
    #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term
    #testo = vocab_type_definitions_test['rdfs']
    #subjects = []
    #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    #for s in subs:
    #    subjects.append(s)
    #if subjects:
    #    objects = vocab_type_definitions_rdfs
    #else:
    #    objects = vocab_type_definitions_owl

    #For all subjects that are of the type found above, add rdfs:isDefinedBy
    #for o in objects: 
    #    subs = graph.subjects(namespaces['rdf']['type'], o)
    #    for s in subs:
    #        graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    list_of_terms = get_terms(rdf_vocab_properties['path'])
    for s in list_of_terms:
        graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    rdf_str = None
    rdf_str = graph.serialize(format="pretty-xml")
    #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8')
    f = codecs.open(newrdf_vocab_properties['path'], 'w')
    f.write(rdf_str)
    f.close()
    return (newrdf_vocab_properties, html_vocab_properties)