def update_rdf_for_conversion(vocabprefix, vocab_properties,
                              rdf_vocab_properties):
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
        ET._namespace_map[str(nsurl)] = str(nsprefix)
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            ET._namespace_map[str(url)] = str(prefix)

    def_tags = [
        "{http://www.w3.org/2000/01/rdf-schema#}Class".lower(),
        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Property".lower(),
        "{http://www.w3.org/2002/07/owl#}ObjectProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}DatatypeProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}Class".lower(),
    ]

    tree = ET.ElementTree(file=rdf_vocab_properties['path'])
    ns_uri = vocab_properties['preferredNamespaceUri']
    html_uri = html_vocab_properties['uri']
    rdf_uri = rdf_vocab_properties['uri']

    tree_root = tree.getroot()
    #vocab= tree_root.findall("{http://www.w3.org/2002/07/owl#}Ontology")
    vocab = tree_root.find("{http://www.w3.org/2002/07/owl#}Ontology")
    if vocab:
        #for vocab in vocabs:
        if not vocab.findall("{http://purl.org/dc/elements/1.1/}identifier"):
            se0 = ET.SubElement(
                vocab, "{http://purl.org/dc/elements/1.1/}identifier")
            se0.text = rdf_uri
        if not vocab.findall("{http://purl.org/dc/terms/}isVersionOf"):
            se1 = ET.SubElement(
                vocab,
                "{http://purl.org/dc/terms/}isVersionOf",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":
                    ns_uri
                })
        if not vocab.findall(
                "{http://purl.org/vocab/vann/}preferredNamespacePrefix"):
            se2a = ET.SubElement(
                vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
            se2a.text = vocab_properties['preferredNamespacePrefix']
        if not vocab.findall(
                "{http://purl.org/vocab/vann/}preferredNamespaceUri"):
            se2b = ET.SubElement(
                vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
            se2b.text = vocab_properties['preferredNamespaceUri']
        if not vocab.findall("{http://purl.org/dc/terms/}hasFormat"):
            #Add html uri - html_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(
                se3a,
                "{http://purl.org/dc/dcmitype/}Text",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                    html_uri
                })
            se3c = ET.SubElement(se3b,
                                 "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(
                se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'text/html'
            #ET.TreeBuilder.data('text/html')
            se3f = ET.SubElement(
                se3d,
                "{http://www.w3.org/2000/01/rdf-schema#}label",
                attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
            se3f.text = 'HTML'
            #ET.TreeBuilder.data('HTML')
            #Add rdf uri - rdf_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(
                se3a,
                "{http://purl.org/dc/dcmitype/}Text",
                attrib={
                    "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                    rdf_uri
                })
            se3c = ET.SubElement(se3b,
                                 "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(
                se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'application/rdf+xml'
            #ET.TreeBuilder.data('application/rdf+xml')
            se3f = ET.SubElement(
                se3d,
                "{http://www.w3.org/2000/01/rdf-schema#}label",
                attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
            se3f.text = 'RDF'
            #ET.TreeBuilder.data('RDF')
        else:
            #Check the formats available and add if necessary
            formats = vocab.findall("{http://purl.org/dc/terms/}hasFormat")
            available_types = []
            for f in formats:
                type_tags = f.findall(
                    ".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                for type_tag in type_tags:
                    if type_tag.attrib.get(
                            "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"
                    ):
                        ftype = type_tag.attrib.get(
                            "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"
                        )
                    elif type_tag.text:
                        ftype = type_tag.text
                    if ftype and 'html' in ftype.lower():
                        available_types.append('html')
                    elif ftype and 'rdf' in ftype.lower():
                        available_types.append('rdf')
            if not 'html' in available_types:
                #Add html file - vocabfile_html
                se3a = ET.SubElement(vocab,
                                     "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(
                    se3a,
                    "{http://purl.org/dc/dcmitype/}Text",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                        html_uri
                    })
                se3c = ET.SubElement(
                    se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(
                    se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'text/html'
                #ET.TreeBuilder.data('text/html')
                se3f = ET.SubElement(
                    se3d,
                    "{http://www.w3.org/2000/01/rdf-schema#}label",
                    attrib={
                        "{http://www.w3.org/XML/1998/namespace}lang": "en"
                    })
                se3f.text = 'HTML'
                #ET.TreeBuilder.data('HTML')
            if not 'rdf' in available_types:
                #Add rdf file - vocabfile
                se3a = ET.SubElement(vocab,
                                     "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(
                    se3a,
                    "{http://purl.org/dc/dcmitype/}Text",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":
                        rdf_uri
                    })
                se3c = ET.SubElement(
                    se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(
                    se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'application/rdf+xml'
                #ET.TreeBuilder.data('application/rdf+xml')
                se3f = ET.SubElement(
                    se3d,
                    "{http://www.w3.org/2000/01/rdf-schema#}label",
                    attrib={
                        "{http://www.w3.org/XML/1998/namespace}lang": "en"
                    })
                se3f.text = 'RDF'
                #ET.TreeBuilder.data('RDF')
    else:
        vocab = ET.SubElement(
            tree_root,
            "{http://www.w3.org/2002/07/owl#}Ontology",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": ns_uri
            })
        se0 = ET.SubElement(vocab,
                            "{http://purl.org/dc/elements/1.1/}identifier")
        se0.text = rdf_uri
        se1 = ET.SubElement(
            vocab,
            "{http://purl.org/dc/terms/}isVersionOf",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource": ns_uri
            })
        se2a = ET.SubElement(
            vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
        se2a.text = vocab_properties['preferredNamespacePrefix']
        se2b = ET.SubElement(
            vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
        se2b.text = vocab_properties['preferredNamespaceUri']
        #Add html uri - html_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(
            se3a,
            "{http://purl.org/dc/dcmitype/}Text",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": html_uri
            })
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(
            se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'text/html'
        se3f = ET.SubElement(
            se3d,
            "{http://www.w3.org/2000/01/rdf-schema#}label",
            attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
        se3f.text = 'HTML'
        #Add rdf uri - rdf_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(
            se3a,
            "{http://purl.org/dc/dcmitype/}Text",
            attrib={
                "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about": rdf_uri
            })
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(
            se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'application/rdf+xml'
        se3f = ET.SubElement(
            se3d,
            "{http://www.w3.org/2000/01/rdf-schema#}label",
            attrib={"{http://www.w3.org/XML/1998/namespace}lang": "en"})
        se3f.text = 'RDF'
    terms = tree_root.getiterator()
    #terms = vocab.getiterator()
    for term in terms:
        if term.tag.lower().strip() in def_tags:
            defby = None
            defby = term.find(
                "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy")
            if not defby:
                se4 = ET.SubElement(
                    term,
                    "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy",
                    attrib={
                        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":
                        ns_uri
                    })

    #Move ontology to the first element
    tree_root.remove(vocab)
    tree_root.insert(0, vocab)

    tree.write(newrdf_vocab_properties['path'])
    #tree_root.write(newrdf_vocab_properties['path'])
    return (newrdf_vocab_properties, html_vocab_properties)
Beispiel #2
0
class TriplesDatabase(object):
    """A database from the defined triples"""
    def __init__(self):
        self._open = False

    def open(self, filename, graphClass=None):
        """
        Load existing database at 'filename'.
        """
        if filename is None:
            if graphClass is None:
                self.graph = Graph()
            else:
                self.graph = graphClass()
        else:
            assert os.path.exists(filename), (
                    "%s must be an existing database" % (filename,))

            path, filename = os.path.split(filename)
            self.graph = sqliteBackedGraph(path, filename)

        self._open = True

    def query(self, rest, initNs=None, initBindings=None):
        """
        Execute a SPARQL query and get the results as a SPARQLResult

        {rest} is a string that should begin with "SELECT ", usually
        """
        assert self._open

        if initNs is None:
            initNs = dict(self.graph.namespaces()) 
        if initBindings is None: initBindings = {}

        sel = select(self.getBase(), rest)
        ret = self.graph.query(sel, initNs=initNs, initBindings=initBindings,
                DEBUG=False)
        return ret

    def getBase(self):
        d = dict(self.graph.namespaces())
        return d.get('', RDFSNS)

    def addTriple(self, s, v, *objects):
        """
        Make a statement/arc/triple in the database.

        Strings, ints and floats as s or o will automatically be coerced to
        RDFLiteral().  It is an error to give a RDFLiteral as v, so no
        coercion will be done in that position.

        2-tuples will be coerced to bnodes.
        
        If more than one object is given, i.e.
            addTriple(a, b, c1, c2, c3) 
        this is equivalent to:
            addTriple(a,b,c1); addTriple(a,b,c2); addTriple(a,b,c3)
        """
        assert self._open
        assert len(objects) >= 1, "You must provide at least one object"
        if canBeLiteral(s):
            s = RDFLiteral(s)

        bnode = None
        for o in objects:
            if canBeLiteral(o):
                o = RDFLiteral(o)
            elif isinstance(o, tuple) and len(o) == 2:
                if bnode is None:
                    bnode = BNode()
                self.addTriple(bnode, *o)
                o = bnode

            assert None not in [s,v,o]
            self.graph.add((s, v, o))

    def dump(self):
        assert self._open
        io = StringIO()
        try:
            self.graph.serialize(destination=io, format='n3')
        except Exception, e:
            import sys, pdb; pdb.post_mortem(sys.exc_info()[2])
        return io.getvalue()
Beispiel #3
0
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties):

    #(id, base, prefix) = get_vocab_base(vocabfile)
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])

    subject = None
    for s in graph.subjects(namespaces['rdf']['type'],
                            URIRef(namespaces['owl']['Ontology'])):
        subject = s

    #graph2 = Graph()
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            graph.bind(prefix, URIRef(url))

    #properties = get_vocab_properties(prefix)
    #subject = None
    #for s in graph.subjects(namespaces['dc']['title'], None):
    #    subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['title'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dc']['creator'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['creator'], None):
    #        subject = s

    formatNode1 = BNode()
    formatNode2 = BNode()

    #Add vocabulary properties identifier and format
    graph.add((subject, namespaces['dc']['identifier'],
               URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['isVersionOf'],
               URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'],
               URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'],
               URIRef(html_vocab_properties['uri'])))
    graph.add((subject, namespaces['vann']['preferredNamespaceUri'],
               URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['vann']['preferredNamespacePrefix'],
               URIRef(vocab_properties['preferredNamespacePrefix'])))

    graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'],
               URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(html_vocab_properties['uri']),
               namespaces['dc']['format'], formatNode1))
    graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html')))
    graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML')))
    graph.add((formatNode1, namespaces['rdf']['type'],
               URIRef(namespaces['dcterms']['IMT'])))

    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'],
               URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'],
               formatNode2))
    graph.add((formatNode2, namespaces['rdf']['value'],
               Literal('application/rdf+xml')))
    graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF')))
    graph.add((formatNode2, namespaces['rdf']['type'],
               URIRef(namespaces['dcterms']['IMT'])))

    #Add rdfs:isDefinedBy for each class / property / term of the vocabulary
    #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term
    #testo = vocab_type_definitions_test['rdfs']
    #subjects = []
    #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    #for s in subs:
    #    subjects.append(s)
    #if subjects:
    #    objects = vocab_type_definitions_rdfs
    #else:
    #    objects = vocab_type_definitions_owl

    #For all subjects that are of the type found above, add rdfs:isDefinedBy
    #for o in objects:
    #    subs = graph.subjects(namespaces['rdf']['type'], o)
    #    for s in subs:
    #        graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    list_of_terms = get_terms(rdf_vocab_properties['path'])
    for s in list_of_terms:
        graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'],
                   URIRef(vocab_properties['preferredNamespaceUri'])))

    rdf_str = None
    rdf_str = graph.serialize(format="pretty-xml")
    #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8')
    f = codecs.open(newrdf_vocab_properties['path'], 'w')
    f.write(rdf_str)
    f.close()
    return (newrdf_vocab_properties, html_vocab_properties)
def update_rdf_for_conversion(vocabprefix, vocab_properties, rdf_vocab_properties):
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
        ET._namespace_map[str(nsurl)] = str(nsprefix)
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            ET._namespace_map[str(url)] = str(prefix)

    def_tags = [
        "{http://www.w3.org/2000/01/rdf-schema#}Class".lower(),
        "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Property".lower(),
        "{http://www.w3.org/2002/07/owl#}ObjectProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}DatatypeProperty".lower(),
        "{http://www.w3.org/2002/07/owl#}Class".lower(),
    ]

    tree = ET.ElementTree(file=rdf_vocab_properties['path'])
    ns_uri = vocab_properties['preferredNamespaceUri']
    html_uri = html_vocab_properties['uri']
    rdf_uri = rdf_vocab_properties['uri']

    tree_root = tree.getroot()
    #vocab= tree_root.findall("{http://www.w3.org/2002/07/owl#}Ontology")
    vocab= tree_root.find("{http://www.w3.org/2002/07/owl#}Ontology")
    if vocab:
        #for vocab in vocabs:
        if not vocab.findall("{http://purl.org/dc/elements/1.1/}identifier"):
            se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier")
            se0.text = rdf_uri
        if not vocab.findall("{http://purl.org/dc/terms/}isVersionOf"):
            se1 = ET.SubElement(vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})
        if not vocab.findall("{http://purl.org/vocab/vann/}preferredNamespacePrefix"):
            se2a = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
            se2a.text = vocab_properties['preferredNamespacePrefix']
        if not vocab.findall("{http://purl.org/vocab/vann/}preferredNamespaceUri"):
            se2b = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
            se2b.text = vocab_properties['preferredNamespaceUri']
        if not vocab.findall("{http://purl.org/dc/terms/}hasFormat"):
            #Add html uri - html_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
            se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'text/html'
            #ET.TreeBuilder.data('text/html')
            se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
            se3f.text = 'HTML'
            #ET.TreeBuilder.data('HTML')
            #Add rdf uri - rdf_vocab_properties['uri']
            se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
            se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
            se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
            se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
            se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
            se3e.text = 'application/rdf+xml'
            #ET.TreeBuilder.data('application/rdf+xml')
            se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
            se3f.text = 'RDF'
            #ET.TreeBuilder.data('RDF')
        else:
            #Check the formats available and add if necessary
            formats = vocab.findall("{http://purl.org/dc/terms/}hasFormat")
            available_types = []
            for f in formats:
                type_tags = f.findall(".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                for type_tag in type_tags:
                    if type_tag.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"):
                        ftype = type_tag.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource")
                    elif type_tag.text:
                        ftype = type_tag.text
                    if ftype and 'html' in ftype.lower():
                        available_types.append('html')
                    elif ftype and 'rdf' in ftype.lower():
                        available_types.append('rdf')
            if not 'html' in available_types:
                #Add html file - vocabfile_html
                se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
                se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'text/html'
                #ET.TreeBuilder.data('text/html')
                se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
                se3f.text = 'HTML'
                #ET.TreeBuilder.data('HTML')
            if not 'rdf' in available_types:
                #Add rdf file - vocabfile
                se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
                se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
                se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
                se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
                se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
                se3e.text = 'application/rdf+xml'
                #ET.TreeBuilder.data('application/rdf+xml')
                se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
                se3f.text = 'RDF'
                #ET.TreeBuilder.data('RDF')
    else:
        vocab = ET.SubElement(tree_root, "{http://www.w3.org/2002/07/owl#}Ontology", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":ns_uri})
        se0 = ET.SubElement(vocab, "{http://purl.org/dc/elements/1.1/}identifier")
        se0.text = rdf_uri
        se1 = ET.SubElement(vocab, "{http://purl.org/dc/terms/}isVersionOf", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})
        se2a = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespacePrefix")
        se2a.text = vocab_properties['preferredNamespacePrefix']
        se2b = ET.SubElement(vocab, "{http://purl.org/vocab/vann/}preferredNamespaceUri")
        se2b.text = vocab_properties['preferredNamespaceUri']
        #Add html uri - html_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":html_uri})
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'text/html'
        se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
        se3f.text = 'HTML'
        #Add rdf uri - rdf_vocab_properties['uri']
        se3a = ET.SubElement(vocab, "{http://purl.org/dc/terms/}hasFormat")
        se3b = ET.SubElement(se3a, "{http://purl.org/dc/dcmitype/}Text", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about":rdf_uri})
        se3c = ET.SubElement(se3b, "{http://purl.org/dc/elements/1.1/}format")
        se3d = ET.SubElement(se3c, "{http://purl.org/dc/terms/}IMT")
        se3e = ET.SubElement(se3d, "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value")
        se3e.text = 'application/rdf+xml'
        se3f = ET.SubElement(se3d, "{http://www.w3.org/2000/01/rdf-schema#}label", attrib={"{http://www.w3.org/XML/1998/namespace}lang":"en"})
        se3f.text = 'RDF'
    terms = tree_root.getiterator()
    #terms = vocab.getiterator()
    for term in terms:
        if term.tag.lower().strip() in def_tags:
            defby = None
            defby = term.find("{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy")
            if not defby:
                se4 = ET.SubElement(term, "{http://www.w3.org/2000/01/rdf-schema#}isDefinedBy", attrib={"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource":ns_uri})

    #Move ontology to the first element
    tree_root.remove(vocab)
    tree_root.insert(0, vocab)

    tree.write(newrdf_vocab_properties['path'])
    #tree_root.write(newrdf_vocab_properties['path'])
    return (newrdf_vocab_properties, html_vocab_properties)
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties):

    #(id, base, prefix) = get_vocab_base(vocabfile)
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html"%os.path.splitext(rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf"%os.path.splitext(rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])

    subject = None
    for s in graph.subjects(namespaces['rdf']['type'], URIRef(namespaces['owl']['Ontology'])):
        subject = s

    #graph2 = Graph()
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            graph.bind(prefix, URIRef(url))

    
    #properties = get_vocab_properties(prefix)
    #subject = None
    #for s in graph.subjects(namespaces['dc']['title'], None):
    #    subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['title'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dc']['creator'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['creator'], None):
    #        subject = s

    formatNode1 = BNode()
    formatNode2 = BNode()

    #Add vocabulary properties identifier and format
    graph.add((subject, namespaces['dc']['identifier'], URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['isVersionOf'], URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'], URIRef(html_vocab_properties['uri'])))
    graph.add((subject, namespaces['vann']['preferredNamespaceUri'], URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['vann']['preferredNamespacePrefix'], URIRef(vocab_properties['preferredNamespacePrefix'])))

    graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(html_vocab_properties['uri']), namespaces['dc']['format'], formatNode1))
    graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html')))
    graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML')))
    graph.add((formatNode1, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT'])))

    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'], URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'], formatNode2))
    graph.add((formatNode2, namespaces['rdf']['value'], Literal('application/rdf+xml')))
    graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF')))
    graph.add((formatNode2, namespaces['rdf']['type'], URIRef(namespaces['dcterms']['IMT'])))

    #Add rdfs:isDefinedBy for each class / property / term of the vocabulary
    #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term
    #testo = vocab_type_definitions_test['rdfs']
    #subjects = []
    #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    #for s in subs:
    #    subjects.append(s)
    #if subjects:
    #    objects = vocab_type_definitions_rdfs
    #else:
    #    objects = vocab_type_definitions_owl

    #For all subjects that are of the type found above, add rdfs:isDefinedBy
    #for o in objects: 
    #    subs = graph.subjects(namespaces['rdf']['type'], o)
    #    for s in subs:
    #        graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    list_of_terms = get_terms(rdf_vocab_properties['path'])
    for s in list_of_terms:
        graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    rdf_str = None
    rdf_str = graph.serialize(format="pretty-xml")
    #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8')
    f = codecs.open(newrdf_vocab_properties['path'], 'w')
    f.write(rdf_str)
    f.close()
    return (newrdf_vocab_properties, html_vocab_properties)