def __init__(self, path, **kwargs): super(OXDSDataset, self).__init__(path) self._manifest_filename = os.path.join(path, 'manifest.rdf') self._manifest = bind_namespaces(rdflib.ConjunctiveGraph()) try: with open(self._manifest_filename, 'rb') as f: self._manifest.parse(f, base=self._manifest_filename) except IOError, e: pass
def update_manifest(self): self._manifest = bind_namespaces(rdflib.ConjunctiveGraph()) package = rdflib.URIRef(self._path) self._manifest += ((package, RDF.type, OXDS.Grouping), ) seen_uris = set() for base, dirs, files in os.walk(self._path): for filename in files + dirs: if not base and filename == 'manifest.rdf': continue # URIs use '/' as separators, and we don't want to trust that the OS # uses the same separator. uri = rdflib.URIRef('/'.join( os.path.split(os.path.join(base, filename)))) filename = os.path.join(self._path, base, filename) seen_uris.add(uri) if os.path.isdir(filename): self._manifest.add((uri, RDF.type, FOAF.Document)) xattr_data = dict(xattr.xattr(filename)) self._update_field( uri, DCTERMS['identifier'], xattr_data.get('user.dublincore.identifier')) self._update_field(uri, DCTERMS['title'], xattr_data.get('user.dublincore.title')) self._update_field( uri, DCTERMS['description'], xattr_data.get('user.dublincore.description')) # if 'user.dublincore.license' in xattr_data: self._update_field(uri, DCTERMS['license'], xattr_data.get('user.dublincore.license')) for uri in self._manifest.subjects(RDF.type, FOAF.Document): if uri not in seen_uris: self._remove_cbd(uri) print "Updating" self._update_field(package, DCTERMS['title'], self.title) self._update_field(package, DCTERMS['description'], self.description) #file.write('license' + "=" + self.license) #file.close() if self.license == None or self.license == "": self._remove_field(package, DCTERMS['license'], self.license) else: self._add_field(package, DCTERMS['license'], self.license)
def update_manifest(self): self._manifest = bind_namespaces(rdflib.ConjunctiveGraph()) package = rdflib.URIRef(self._path) self._manifest += ((package, RDF.type, OXDS.Grouping),) seen_uris = set() for base, dirs, files in os.walk(self._path): for filename in files + dirs: if not base and filename == 'manifest.rdf': continue # URIs use '/' as separators, and we don't want to trust that the OS # uses the same separator. uri = rdflib.URIRef('/'.join(os.path.split(os.path.join(base, filename)))) filename = os.path.join(self._path, base, filename) seen_uris.add(uri) if os.path.isdir(filename): self._manifest.add((uri, RDF.type, FOAF.Document)) xattr_data = dict(xattr.xattr(filename)) self._update_field(uri, DCTERMS['identifier'], xattr_data.get('user.dublincore.identifier')) self._update_field(uri, DCTERMS['title'], xattr_data.get('user.dublincore.title')) self._update_field(uri, DCTERMS['description'], xattr_data.get('user.dublincore.description')) # if 'user.dublincore.license' in xattr_data: self._update_field(uri, DCTERMS['license'], xattr_data.get('user.dublincore.license')) for uri in self._manifest.subjects(RDF.type, FOAF.Document): if uri not in seen_uris: self._remove_cbd(uri) print "Updating" self._update_field(package, DCTERMS['title'], self.title) self._update_field(package, DCTERMS['description'], self.description) #file.write('license' + "=" + self.license) #file.close() if self.license==None or self.license=="": self._remove_field(package, DCTERMS['license'], self.license) else: self._add_field(package, DCTERMS['license'], self.license)