class OFS(object): def __init__(self, storage_dir="data", uri_base="urn:uuid:", hashing_type="md5"): self.storage_dir = storage_dir self.uri_base = uri_base self.hashing_type = hashing_type self._open_store() def _open_store(self): if self.hashing_type: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=2, hashing_type=self.hashing_type) else: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=2) def exists(self, uuid): return self._store.exists(uuid) def _get_object(self, uuid): po = self._store.get_object(uuid) json_payload = PersistentState(po.id_to_dirpath()) return (po, json_payload) def _setup_item(self, uuid): _, json_payload = self._get_object(uuid) json_payload['_uri'] = self.uri_base + uuid json_payload.sync() def claim_an_id(self): uuid = uuid4().hex while(self.exists(uuid)): uuid = uuid4().hex self._setup_item(uuid) return uuid def list_ids(self): return self._store.list_ids() def put_stream(self, uuid, stream_object, filename, params={}): ## QUESTION: do we enforce that the uuid's have to be 'claimed' first? ## NB this method doesn't care if it has been po, json_payload = self._get_object(uuid) hash_vals = po.add_bytestream_by_path(filename, stream_object) stat_vals = po.stat(filename) if '_filename' in json_payload.keys(): # remove old file which has a different name po.del_file(json_payload['_filename']) creation_date = None else: # New upload - record creation date creation_date = datetime.now().isoformat().split(".")[0] ## '2010-07-08T19:56:47' # Userland parameters for the file cleaned_params = dict( [ (k, params[k]) for k in params if not k.startswith("_")]) json_payload.update(cleaned_params) # Filedetails: _filename, _numberofbytes (in bytes) json_payload['_filename'] = filename try: json_payload['_numberofbytes'] = int(stat_vals.st_size) except TypeError: print "Error getting filesize from os.stat().st_size into an integer..." if creation_date: json_payload['_datecreated'] = creation_date json_payload['_lastmodified'] = creation_date else: # Modification date json_payload['_lastmodified'] = datetime.now().isoformat().split(".")[0] # Hash details: if hash_vals: json_payload['_checksum'] = "%s:%s" % (hash_vals['type'], hash_vals['checksum']) json_payload.sync() return json_payload.state def get_stream(self, uuid, as_stream=True): if self.exists(uuid): po, json_payload = self._get_object(uuid) if '_filename' in json_payload.keys(): return po.get_bytestream(json_payload['_filename'], streamable=as_stream, path=None, appendable=False) raise FileNotFoundException def get_stream_metadata(self, uuid): if self.exists(uuid): _, json_payload = self._get_object(uuid) return json_payload.state else: raise FileNotFoundException def update_stream_metadata(self, uuid, params): if self.exists(uuid) and isinstance(params, dict): _, json_payload = self._get_object(uuid) # Userland parameters for the file cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")]) json_payload.update(cleaned_params) json_payload.sync() return json_payload.state else: raise FileNotFoundException def remove_metadata_keys(self, uuid, keys): if self.exists(uuid) and isinstance(keys, list): _, json_payload = self._get_object(uuid) for key in [x for x in keys if not x.startswith("_")]: if key in json_payload.keys(): del json_payload[key] json_payload.sync() return json_payload.state else: raise FileNotFoundException def del_stream(self, uuid): if self.exists(uuid): # deletes the whole object for uuid self._store.delete_object(uuid) else: raise FileNotFoundException
class FileStorageClient(object): def __init__(self, uri_base, store_dir, prefix, shorty_length,queue=None, hashing_type=None, **context): self.store_dir = store_dir self.uri_base = None if uri_base: self.uri_base = Namespace(uri_base) self.ids = {} self.id_parts = {} self.prefix = prefix self.shorty_length = shorty_length self.queue = queue self.context = context if hashing_type: self.hashing_type = hashing_type self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length, hashing_type=self.hashing_type) else: self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length) if self.storeclient.uri_base: self.uri_base = Namespace(self.storeclient.uri_base) def list_ids(self): return self.storeclient.list_ids() def _get_latest_part(self, id, part_id): try: versions = self._list_part_versions(id, part_id) if versions: return max(versions) return 0 except PartNotFoundException: return 0 def _list_parts(self, id): return self.storeclient.list_parts(id) def _list_part_versions(self, id, part_id): if part_id in self.storeclient.list_parts(id): versions = self.storeclient.list_parts(id, part_id) numbered_versions = [int(x.split(self.prefix)[-1]) for x in versions] if numbered_versions: return numbered_versions else: return [] else: raise PartNotFoundException def _del_part_version(self, id, part_id, version): if not self.storeclient.exists(id): raise ObjectNotFoundException if part_id in self.storeclient.list_parts(id): if version in self._list_part_versions(id, part_id): # delete version resp = self.storeclient.del_stream(id, "%s%s%s" % (part_id, self.prefix, version), path=part_id) if self.queue != None: self._log(id, 'd', 'Deleting a version of a part', part_id=part_id, version=version) return resp else: raise VersionNotFoundException(part_id=part_id, version=version) else: raise PartNotFoundException def _put_part(self, id, part_id, bytestream, version=False, buffer_size = 1024 * 8, mimetype=None): if not self.storeclient.exists(id): self.storeclient.create_object(id) if not version: version = self._get_latest_part(id, part_id) + 1 part_name = "%s%s%s" % (part_id, self.prefix, version) hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream, buffer_size) if self.queue != None: if version == 1: self._log(id, 'c', 'Creating a part', part_id=part_id, version=version, checksum=hexhash, mimetype=mimetype) else: self._log(id, 'w', 'Updating a part', part_id=part_id, version=version, checksum=hexhash, mimetype=mimetype) return {'version':version, 'checksum':hexhash} def _get_part(self, id, part_id, stream, version = False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) if version == 0: raise PartNotFoundException part_name = "%s%s%s" % (part_id, self.prefix, version) if not self.storeclient.exists(id, os.path.join(part_id, part_name)): raise VersionNotFoundException(part_id=part_id, version=version) else: return self.storeclient.get_stream(id, part_id, part_name, stream) def _del_part(self, id, part_id): if not self.storeclient.exists(id): raise ObjectNotFoundException if not self.storeclient.exists(id, part_id): raise PartNotFoundException(part_id=part_id) self.storeclient.del_path(id, part_id, recursive=True) if self.queue != None: self._log(id, 'd', 'Deleting a part', part_id=part_id) def _store_manifest(self, id, part_id, manifest, version = False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) + 1 part_name = "%s%s%s" % (part_id, self.prefix, version) bytestream = manifest.to_string() if isinstance(bytestream, unicode): bytestream = bytestream.encode('utf-8') hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream) if self.queue != None: if version == 1: self._log(id, 'c', 'Creating an RDF Manifest', part_id=part_id, version=version, checksum=hexhash) else: self._log(id, 'w', 'Updating an RDF Manifest', part_id=part_id, version=version, checksum=hexhash) return {'version':version, 'checksum':hexhash} def _store_rdfobject(self, id, part_id, rdfobject, version=False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) + 1 part_name = "%s%s%s" % (part_id, self.prefix, version) bytestream = rdfobject.to_string() if isinstance(bytestream, unicode): bytestream = bytestream.encode('utf-8') hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream) if self.queue != None: if version == 1: self._log(id, 'c', 'Creating an RDF Root', part_id=part_id, version=version, checksum=hexhash) else: self._log(id, 'w', 'Updating an RDF Root', part_id=part_id, version=version, checksum=hexhash) return {'version':version, 'checksum':hexhash} def _get_rdfobject(self, id, part_id, version = False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) part_name = "%s%s%s" % (part_id, self.prefix, version) r = RDFobject() r.set_uri(self.uri_base[id]) if version >= 1: f = self.storeclient.get_stream(id, part_id, part_name,streamable=False) r.from_string(self.uri_base[id], f.decode('utf-8')) return r def _get_manifest(self, id, part_id, file_uri, version = False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) part_name = "%s%s%s" % (part_id, self.prefix, version) m = Manifest(file_uri) if version >= 1: f = self.storeclient.get_stream(id, part_id, part_name,streamable=False) m.from_string(f.decode('utf-8')) return m def exists(self, id): return self.storeclient.exists(id) def getObject(self, id=None, create_if_doesnt_exist=True): exists = self.storeclient.exists(id) self.storeclient.get_object(id, create_if_doesnt_exist) if create_if_doesnt_exist and not exists and self.queue != None: self._log(id, 'c', 'Creating an object') return FileStorageObject(id, self) def createObject(self, id): self.storeclient.create_object(id) if self.queue != None: self._log(id, 'c', 'Creating an object') return FileStorageObject(id, self) def deleteObject(self, id): if self.storeclient.exists(id): if self.queue != None: self._log(id, 'd', 'Deleting an object') return self.storeclient.delete_object(id) def log_audit(self, id, logcontext, context): if self.queue != None: self._log(id, 'metadatadelta', 'Metadata changes', _logcontext=logcontext, **context) def _log(self, id, action, label, **kw): msg = {} msg.update(kw) msg.update(self.context) msg['id'] = id msg['action'] = action msg['label'] = label msg['uri_base'] = self.uri_base # Get an ISO datetime for this msg['timestamp'] = datetime.now().isoformat() try: self.queue.put(simplejson.dumps(msg)) except Exception, e: raise e