Esempio n. 1
0
class OFS(object):
    def __init__(self, storage_dir="data", uri_base="urn:uuid:", hashing_type="md5"):
        self.storage_dir = storage_dir
        self.uri_base = uri_base
        self.hashing_type = hashing_type
        self._open_store()
    
    def _open_store(self):
        if self.hashing_type:
            self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=2, hashing_type=self.hashing_type)
        else:
            self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=2)

    def exists(self, uuid):
        return self._store.exists(uuid)
    
    def _get_object(self, uuid):
        po = self._store.get_object(uuid)
        json_payload = PersistentState(po.id_to_dirpath())
        return (po, json_payload)
    
    def _setup_item(self, uuid):
        _, json_payload = self._get_object(uuid)
        json_payload['_uri'] = self.uri_base + uuid
        json_payload.sync()
    
    def claim_an_id(self):
        uuid = uuid4().hex
        while(self.exists(uuid)):
            uuid = uuid4().hex
        self._setup_item(uuid)
        return uuid
        
    def list_ids(self):
        return self._store.list_ids()
        
    def put_stream(self, uuid, stream_object, filename, params={}):
        ## QUESTION: do we enforce that the uuid's have to be 'claimed' first?
        ## NB this method doesn't care if it has been
        po, json_payload = self._get_object(uuid)
        hash_vals = po.add_bytestream_by_path(filename, stream_object)
        stat_vals = po.stat(filename)
        if '_filename' in json_payload.keys():
            # remove old file which has a different name
            po.del_file(json_payload['_filename'])
            creation_date = None
        else:
            # New upload - record creation date
            creation_date = datetime.now().isoformat().split(".")[0]  ## '2010-07-08T19:56:47'
        # Userland parameters for the file
        cleaned_params = dict( [ (k, params[k]) for k in params if not k.startswith("_")])
        json_payload.update(cleaned_params)
        # Filedetails: _filename, _numberofbytes (in bytes)
        json_payload['_filename'] = filename
        try:
            json_payload['_numberofbytes'] = int(stat_vals.st_size)
        except TypeError:
            print "Error getting filesize from os.stat().st_size into an integer..."
        if creation_date:
            json_payload['_datecreated'] = creation_date
            json_payload['_lastmodified'] = creation_date
        else:
            # Modification date
           json_payload['_lastmodified'] = datetime.now().isoformat().split(".")[0]
        # Hash details:
        if hash_vals:
            json_payload['_checksum'] = "%s:%s" % (hash_vals['type'], hash_vals['checksum'])
        json_payload.sync()
        return json_payload.state

    def get_stream(self, uuid, as_stream=True):
        if self.exists(uuid):
            po, json_payload = self._get_object(uuid)
            if '_filename' in json_payload.keys():
                return po.get_bytestream(json_payload['_filename'], streamable=as_stream, path=None, appendable=False)
        raise FileNotFoundException

    def get_stream_metadata(self, uuid):
        if self.exists(uuid):
            _, json_payload = self._get_object(uuid)
            return json_payload.state
        else:
            raise FileNotFoundException
    
    def update_stream_metadata(self, uuid, params):
        if self.exists(uuid) and isinstance(params, dict):
            _, json_payload = self._get_object(uuid)
            # Userland parameters for the file
            cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")])
            json_payload.update(cleaned_params)
            json_payload.sync()
            return json_payload.state
        else:
            raise FileNotFoundException
    
    def remove_metadata_keys(self, uuid, keys):
        if self.exists(uuid) and isinstance(keys, list):
            _, json_payload = self._get_object(uuid)
            for key in [x for x in keys if not x.startswith("_")]:
                if key in json_payload.keys():
                    del json_payload[key]
            json_payload.sync()
            return json_payload.state
        else:
            raise FileNotFoundException

    def del_stream(self, uuid):
        if self.exists(uuid):
            # deletes the whole object for uuid
            self._store.delete_object(uuid)
        else:
            raise FileNotFoundException
Esempio n. 2
0
class FileStorageClient(object):
    def __init__(self, uri_base, store_dir, prefix, shorty_length,queue=None, hashing_type=None, **context):
        self.store_dir = store_dir
        self.uri_base = None
        if uri_base:
            self.uri_base = Namespace(uri_base)
        self.ids = {}
        self.id_parts = {}
        self.prefix = prefix
        self.shorty_length = shorty_length
        self.queue = queue
        self.context = context
        if hashing_type:
            self.hashing_type = hashing_type
            self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length,
                                                     hashing_type=self.hashing_type)
        else:
            self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length)
        if self.storeclient.uri_base:
            self.uri_base = Namespace(self.storeclient.uri_base)


    def list_ids(self):
        return self.storeclient.list_ids()

    def _get_latest_part(self, id, part_id):
        try:
            versions = self._list_part_versions(id, part_id)
            if versions:
                return max(versions)
            return 0
        except PartNotFoundException:
            return 0

    def _list_parts(self, id):
        return self.storeclient.list_parts(id)

    def _list_part_versions(self, id, part_id):
        if part_id in self.storeclient.list_parts(id):
            versions = self.storeclient.list_parts(id, part_id)
            numbered_versions = [int(x.split(self.prefix)[-1]) for x in versions]
            if numbered_versions:
                return numbered_versions
            else:
                return []
        else:
            raise PartNotFoundException

    def _del_part_version(self, id, part_id, version):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if part_id in self.storeclient.list_parts(id):
            if version in self._list_part_versions(id, part_id):
                # delete version
                resp = self.storeclient.del_stream(id, "%s%s%s" % (part_id, self.prefix, version), path=part_id)
                if self.queue != None:
                    self._log(id, 'd', 'Deleting a version of a part', part_id=part_id, version=version)
                return resp
            else:
                raise VersionNotFoundException(part_id=part_id, version=version)
        else:
            raise PartNotFoundException

    def _put_part(self, id, part_id, bytestream, version=False, buffer_size = 1024 * 8, mimetype=None):
        if not self.storeclient.exists(id):
            self.storeclient.create_object(id)
        if not version:
            version = self._get_latest_part(id, part_id) + 1
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream, buffer_size)
        if self.queue != None:
            if version == 1:
                self._log(id, 'c', 'Creating a part', part_id=part_id, version=version, checksum=hexhash, mimetype=mimetype)
            else:
                self._log(id, 'w', 'Updating a part', part_id=part_id, version=version, checksum=hexhash, mimetype=mimetype)
        return {'version':version, 'checksum':hexhash}

    def _get_part(self, id, part_id, stream, version = False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id)
        if version == 0:
            raise PartNotFoundException
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        if not self.storeclient.exists(id, os.path.join(part_id, part_name)):
            raise VersionNotFoundException(part_id=part_id, version=version)
        else:
            return self.storeclient.get_stream(id, part_id, part_name, stream)

    def _del_part(self, id, part_id):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not self.storeclient.exists(id, part_id):
            raise PartNotFoundException(part_id=part_id)
        self.storeclient.del_path(id, part_id, recursive=True)
        if self.queue != None:
            self._log(id, 'd', 'Deleting a part', part_id=part_id)

    def _store_manifest(self, id, part_id, manifest, version = False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id) + 1
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        bytestream = manifest.to_string()
        if isinstance(bytestream, unicode):
            bytestream = bytestream.encode('utf-8')
        hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream)
        if self.queue != None:
            if version == 1:
                self._log(id, 'c', 'Creating an RDF Manifest', part_id=part_id, version=version, checksum=hexhash)
            else:
                self._log(id, 'w', 'Updating an RDF Manifest', part_id=part_id, version=version, checksum=hexhash)
        return {'version':version, 'checksum':hexhash}

    def _store_rdfobject(self, id, part_id, rdfobject, version=False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id) + 1
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        bytestream = rdfobject.to_string()
        if isinstance(bytestream, unicode):
            bytestream = bytestream.encode('utf-8')
        hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream)
        if self.queue != None:
            if version == 1:
                self._log(id, 'c', 'Creating an RDF Root', part_id=part_id, version=version, checksum=hexhash)
            else:
                self._log(id, 'w', 'Updating an RDF Root', part_id=part_id, version=version, checksum=hexhash)
        return {'version':version, 'checksum':hexhash}


    def _get_rdfobject(self, id, part_id, version = False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id)
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        r = RDFobject()
        r.set_uri(self.uri_base[id])
        if version >= 1:
            f = self.storeclient.get_stream(id, part_id, part_name,streamable=False)
            r.from_string(self.uri_base[id], f.decode('utf-8'))
        return r

    def _get_manifest(self, id, part_id, file_uri, version = False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id)
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        m = Manifest(file_uri)
        if version >= 1:
            f = self.storeclient.get_stream(id, part_id, part_name,streamable=False)
            m.from_string(f.decode('utf-8'))
        return m

    def exists(self, id):
        return self.storeclient.exists(id)

    def getObject(self, id=None, create_if_doesnt_exist=True):
        exists = self.storeclient.exists(id)
        self.storeclient.get_object(id, create_if_doesnt_exist)
        if create_if_doesnt_exist and not exists and self.queue != None:
            self._log(id, 'c', 'Creating an object')
        return FileStorageObject(id, self)

    def createObject(self, id):
        self.storeclient.create_object(id)
        if self.queue != None:
            self._log(id, 'c', 'Creating an object')
        return FileStorageObject(id, self)

    def deleteObject(self, id):
        if self.storeclient.exists(id):
            if self.queue != None:
                self._log(id, 'd', 'Deleting an object')
            return self.storeclient.delete_object(id)

    def log_audit(self, id, logcontext, context):
        if self.queue != None:
            self._log(id, 'metadatadelta', 'Metadata changes', _logcontext=logcontext, **context)

    def _log(self, id, action, label, **kw):
        msg = {}
        msg.update(kw)
        msg.update(self.context)
        msg['id'] = id
        msg['action'] = action
        msg['label'] = label
        msg['uri_base'] = self.uri_base
        # Get an ISO datetime for this
        msg['timestamp'] = datetime.now().isoformat()

        try:
            self.queue.put(simplejson.dumps(msg))
        except Exception, e:
            raise e