def _open_store(self): if self.hashing_type: self._store = PairtreeStorageClient( self.uri_base, self.storage_dir, shorty_length=self.shorty_length, hashing_type=self.hashing_type) else: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=shorty_length)
def __init__(self, uri_base, store_dir, prefix, shorty_length,queue=None, hashing_type=None, **context): self.store_dir = store_dir self.uri_base = None if uri_base: self.uri_base = Namespace(uri_base) self.ids = {} self.id_parts = {} self.prefix = prefix self.shorty_length = shorty_length self.queue = queue self.context = context if hashing_type: self.hashing_type = hashing_type self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length, hashing_type=self.hashing_type) else: self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length) if self.storeclient.uri_base: self.uri_base = Namespace(self.storeclient.uri_base)
def _open_store(self): if self.hashing_type: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=self.shorty_length, hashing_type=self.hashing_type) else: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=shorty_length)
class PTOFS(OFSInterface): '''OFS backend backed onto the filesystem and using PairTree_. .. _PairTree: http://pypi.python.org/pypi/Pairtree ''' def __init__(self, storage_dir="data", uri_base="urn:uuid:", hashing_type="md5", shorty_length=2): self.storage_dir = storage_dir self.uri_base = uri_base self.hashing_type = hashing_type self.shorty_length = shorty_length self._open_store() def _open_store(self): if self.hashing_type: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=self.shorty_length, hashing_type=self.hashing_type) else: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=shorty_length) def exists(self, bucket, label=None): if self._store.exists(bucket): if label: return self._store.isfile(bucket, label) else: return True def _get_object(self, bucket): po = self._store.get_object(bucket) json_payload = PersistentState(po.id_to_dirpath()) return (po, json_payload) def _setup_item(self, bucket): _, json_payload = self._get_object(bucket) json_payload.sync() def claim_bucket(self, bucket=None): if bucket: if self.exists(bucket): raise BucketExists else: bucket = uuid4().hex while(self.exists(bucket)): bucket = uuid4().hex self._setup_item(bucket) return bucket def list_labels(self, bucket, prefix=None): if self.exists(bucket): _, json_payload = self._get_object(bucket) if prefix is None: return json_payload.keys() else: l = len(prefix) if l > 0 and prefix[0]=='/': prefix = prefix[1:] return filter(lambda k: k[:l] == prefix, json_payload.keys()) def list_buckets(self): return self._store.list_ids() def put_stream(self, bucket, label, stream_object, params={}): ## QUESTION: do we enforce that the bucket's have to be 'claimed' first? ## NB this method doesn't care if it has been po, json_payload = self._get_object(bucket) if label in json_payload.keys(): creation_date = None else: # New upload - record creation date creation_date = datetime.now().isoformat().split(".")[0] ## '2010-07-08T19:56:47' if params.has_key('_label'): json_payload[label] = {"_label":params['_label']} else: json_payload[label] = {"_label":label} hash_vals = po.add_bytestream_by_path(label, stream_object) stat_vals = po.stat(label) # Userland parameters for the file cleaned_params = dict( [ (k, params[k]) for k in params if not k.startswith("_")]) json_payload[label].update(cleaned_params) try: json_payload[label]['_content_length'] = int(stat_vals.st_size) except TypeError: print "Error getting filesize from os.stat().st_size into an integer..." if creation_date: json_payload[label]['_creation_date'] = creation_date json_payload[label]['_last_modified'] = creation_date else: # Modification date json_payload[label]['_last_modified'] = datetime.now().isoformat().split(".")[0] # Hash details: if hash_vals: json_payload[label]['_checksum'] = "%s:%s" % (hash_vals['type'], hash_vals['checksum']) json_payload.sync() return json_payload.state[label] def get_stream(self, bucket, label, as_stream=True): if self.exists(bucket): po, json_payload = self._get_object(bucket) if self.exists(bucket, label): return po.get_bytestream(label, streamable=as_stream, path=None, appendable=False) raise FileNotFoundException def get_url(self, bucket, label): if self.exists(bucket) and self.exists(bucket, label): return self._store.get_url(bucket, label) else: raise FileNotFoundException def get_metadata(self, bucket, label): if self.exists(bucket): _, json_payload = self._get_object(bucket) if json_payload.has_key(label): return json_payload.state[label] raise FileNotFoundException def update_metadata(self, bucket, label, params): if self.exists(bucket, label) and isinstance(params, dict): _, json_payload = self._get_object(bucket) # Userland parameters for the file cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")]) json_payload[label].update(cleaned_params) json_payload.sync() return json_payload.state[label] else: raise FileNotFoundException def del_metadata_keys(self, bucket, label, keys): if self.exists(bucket, label) and isinstance(keys, list): _, json_payload = self._get_object(bucket) for key in [x for x in keys if not x.startswith("_")]: if key in json_payload[label].keys(): del json_payload[label][key] json_payload.sync() return json_payload.state[label] else: raise FileNotFoundException def del_stream(self, bucket, label): if self.exists(bucket, label): # deletes the whole object for uuid self._store.del_stream(bucket, label) _, json_payload = self._get_object(bucket) if json_payload.has_key(label): del json_payload[label] json_payload.sync() else: raise FileNotFoundException
class TestPairtree(unittest.TestCase): def i2p2i(self, id, target, label): ppath = self.pairtree._id_to_dir_list(id)[1:] self.assertEqual(ppath, target) #self.assertEqual( reverse it) def roundtrip(self, id, label): ppath = self.pairtree.id_encode(id) new_id = self.pairtree.id_decode(ppath) self.assertEqual(id, new_id) self.ppath_roundtrip(id, label) #self.assertEqual( reverse it) def ppath_roundtrip(self, id, label): pp = ppath.get_id_from_dirpath(ppath.id_to_dirpath(id)) self.assertEqual(pp, id) def setUp(self): self.pairtree = PairtreeStorageClient('http://example.org', PAIRTREE_STORAGE_DIR, 2) def test_empty(self): pass #try: # ppath = PairPath("") # self.assertFalse(True, 'Empty id should raise exception') #except BadPairPath: # pass def testabc(self): self.i2p2i('abc', ['ab','c','obj'], 'basic 3-char case') def testabc_roundtrip(self): self.roundtrip('abc', 'basic 3-char case - roundtrip') def testabc(self): self.i2p2i('abcd', ['ab','cd', 'obj'], 'basic 4-char case') def testabc_roundtrip(self): self.roundtrip('abcd', 'basic 4-char case - roundtrip') def testabc(self): self.i2p2i('abcd', ['ab','cd', 'obj'], 'basic 4-char case') def testabc_roundtrip(self): self.roundtrip('abcd', 'basic 4-char case - roundtrip') def testxy(self): self.i2p2i('xy', ['xy', 'obj'], '2-char edge case') def testxy_roundtrip(self): self.roundtrip('xy', '2-char edge case - roundtrip') def testz(self): self.i2p2i('z', ['z', 'obj'], '1-char edge case') def testz_roundtrip(self): self.roundtrip('z', '1-char edge case - roundtrip') def test12_986xy4(self): self.i2p2i('12-986xy4', ['12', '-9', '86', 'xy', '4', 'obj'], 'hyphen') def test12_986xy4_roundtrip(self): self.roundtrip('12-986xy4', 'hyphen - roundtrip') def test_13030_45xqv_793842495(self): self.i2p2i('13030_45xqv_793842495', ['13', '03', '0_', '45', 'xq', 'v_', '79', '38', '42', '49', '5', 'obj'], 'long id with undescores') def test_13030_45xqv_793842495_roundtrip(self): self.roundtrip('13030_45xqv_793842495', 'long id with undescores - roundtrip') def test_ark_13030_xt12t3(self): self.i2p2i('ark:/13030/xt12t3', ['ar', 'k+', '=1', '30', '30', '=x', 't1', '2t', '3', 'obj'], 'colons and slashes') def test_ark_13030_xt12t3_roundtrip(self): self.roundtrip('ark:/13030/xt12t3', 'colons and slashes - roundtrip') def test_space(self): self.i2p2i('hello world', ['he', 'll', 'o^', '20', 'wo', 'rl', 'd', 'obj'], 'space') def test_space_roundtrip(self): self.roundtrip('hello world', 'space - roundtrip') def test_slash(self): self.i2p2i('/', ['=', 'obj'], '1-separator-char edge case') def test_slash_roundtrip(self): self.roundtrip('/', '1-separator-char edge case - roundtrip') def test_urn(self): self.i2p2i('http://n2t.info/urn:nbn:se:kb:repos-1', ['ht', 'tp', '+=', '=n', '2t', ',i', 'nf', 'o=', 'ur', 'n+', 'nb', 'n+', 'se', '+k', 'b+', 're', 'po', 's-', '1', 'obj'], 'a URL with colons, slashes, and periods') def test_urn_roundtrip(self): self.roundtrip('http://n2t.info/urn:nbn:se:kb:repos-1', 'a URL with colons, slashes, and periods - roundtrip') def test_wtf(self): self.i2p2i('what-the-*@?#!^!?', ['wh', 'at', '-t', 'he', '-^', '2a', '@^', '3f', '#!', '^5', 'e!', '^3', 'f', 'obj'], 'weird chars from spec example'); def test_wtf_roundtrip(self): self.roundtrip('what-the-*@?#!^!?', 'weird chars from spec example - roundtrip'); def test_weird(self): self.i2p2i('\\"*+,<=>?^|', ['^5', 'c^', '22', '^2', 'a^', '2b', '^2', 'c^', '3c', '^3', 'd^', '3e', '^3', 'f^', '5e', '^7', 'c', 'obj'], 'all weird visible chars'); def test_weird_roundtrip(self): self.roundtrip('\\"*+,<=>?^|', 'all weird visible chars - roundtrip'); def test_basic_roundtrip(self): self.roundtrip('asdfghjklpoiuytrewqxcvbnm1234567890:;/', 'Basic Roundtrip') def test_french_roundtrip(self): self.roundtrip(u'Années de Pèlerinage', 'French Unicode roundtrip') def test_japanese_rountrip(self): self.roundtrip(u'ウインカリッスの日本語', 'Japanese Unicode roundtrip') def test_hardcore_unicode_rountrip(self): # If this works... self.roundtrip(u""" 1. Euro Symbol: €. 2. Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα. 3. Íslenska / Icelandic: Ég get etið gler án þess að meiða mig. 4. Polish: Mogę jeść szkło, i mi nie szkodzi. 5. Romanian: Pot să mănânc sticlă și ea nu mă rănește. 6. Ukrainian: Я можу їсти шкло, й воно мені не пошкодить. 7. Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։ 8. Georgian: მინას ვჭამ და არა მტკივა. 9. Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती. 10. Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי. 11. Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ. 12. Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني. 13. Japanese: 私はガラスを食べられます。それは私を傷つけません。 14. Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ """, "hardcore unicode test - roundtrip") def test_french(self): self.i2p2i('Années de Pèlerinage', ['An', 'n^', 'c3', '^a', '9e', 's^', '20', 'de', '^2', '0P', '^c', '3^', 'a8', 'le', 'ri', 'na', 'ge', 'obj'], 'UTF-8 chars') self.i2p2i("Années de Pèlerinage (Years of Pilgrimage) (S.160, S.161,\n\ S.163) is a set of three suites by Franz Liszt for solo piano. Liszt's\n\ complete musical style is evident in this masterwork, which ranges from\n\ virtuosic fireworks to sincerely moving emotional statements. His musical\n\ maturity can be seen evolving through his experience and travel. The\n\ third volume is especially notable as an example of his later style: it\n\ was composed well after the first two volumes and often displays less\n\ showy virtuosity and more harmonic experimentation.", ['An', 'n^', 'c3', '^a', '9e', 's^', '20', 'de', '^2', '0P', '^c', '3^', 'a8', 'le', 'ri', 'na', 'ge', '^2', '0(', 'Ye', 'ar', 's^', '20', 'of', '^2', '0P', 'il', 'gr', 'im', 'ag', 'e)', '^2', '0(', 'S,', '16', '0^', '2c', '^2', '0S', ',1', '61', '^2', 'c^', '0a', '^2', '0S', ',1', '63', ')^', '20', 'is', '^2', '0a', '^2', '0s', 'et', '^2', '0o', 'f^', '20', 'th', 're', 'e^', '20', 'su', 'it', 'es', '^2', '0b', 'y^', '20', 'Fr', 'an', 'z^', '20', 'Li', 'sz', 't^', '20', 'fo', 'r^', '20', 'so', 'lo', '^2', '0p', 'ia', 'no', ',^', '20', 'Li', 'sz', 't\'', 's^', '0a', '^2', '0c', 'om', 'pl', 'et', 'e^', '20', 'mu', 'si', 'ca', 'l^', '20', 'st', 'yl', 'e^', '20', 'is', '^2', '0e', 'vi', 'de', 'nt', '^2', '0i', 'n^', '20', 'th', 'is', '^2', '0m', 'as', 'te', 'rw', 'or', 'k^', '2c', '^2', '0w', 'hi', 'ch', '^2', '0r', 'an', 'ge', 's^', '20', 'fr', 'om', '^0', 'a^', '20', 'vi', 'rt', 'uo', 'si', 'c^', '20', 'fi', 're', 'wo', 'rk', 's^', '20', 'to', '^2', '0s', 'in', 'ce', 're', 'ly', '^2', '0m', 'ov', 'in', 'g^', '20', 'em', 'ot', 'io', 'na', 'l^', '20', 'st', 'at', 'em', 'en', 'ts', ',^', '20', 'Hi', 's^', '20', 'mu', 'si', 'ca', 'l^', '0a', '^2', '0m', 'at', 'ur', 'it', 'y^', '20', 'ca', 'n^', '20', 'be', '^2', '0s', 'ee', 'n^', '20', 'ev', 'ol', 'vi', 'ng', '^2', '0t', 'hr', 'ou', 'gh', '^2', '0h', 'is', '^2', '0e', 'xp', 'er', 'ie', 'nc', 'e^', '20', 'an', 'd^', '20', 'tr', 'av', 'el', ',^', '20', 'Th', 'e^', '0a', '^2', '0t', 'hi', 'rd', '^2', '0v', 'ol', 'um', 'e^', '20', 'is', '^2', '0e', 'sp', 'ec', 'ia', 'll', 'y^', '20', 'no', 'ta', 'bl', 'e^', '20', 'as', '^2', '0a', 'n^', '20', 'ex', 'am', 'pl', 'e^', '20', 'of', '^2', '0h', 'is', '^2', '0l', 'at', 'er', '^2', '0s', 'ty', 'le', '+^', '20', 'it', '^0', 'a^', '20', 'wa', 's^', '20', 'co', 'mp', 'os', 'ed', '^2', '0w', 'el', 'l^', '20', 'af', 'te', 'r^', '20', 'th', 'e^', '20', 'fi', 'rs', 't^', '20', 'tw', 'o^', '20', 'vo', 'lu', 'me', 's^', '20', 'an', 'd^', '20', 'of', 'te', 'n^', '20', 'di', 'sp', 'la', 'ys', '^2', '0l', 'es', 's^', '0a', '^2', '0s', 'ho', 'wy', '^2', '0v', 'ir', 'tu', 'os', 'it', 'y^', '20', 'an', 'd^', '20', 'mo', 're', '^2', '0h', 'ar', 'mo', 'ni', 'c^', '20', 'ex', 'pe', 'ri', 'me', 'nt', 'at', 'io', 'n,', 'obj'], 'very long id with apostrophes and UTF-8 chars') def test_id_to_url_simple(self): desired_url = "file://%s/pairtree_root/fo/o/obj/bar.txt" % PAIRTREE_STORAGE_DIR test_url = self.pairtree.get_url("foo", "bar.txt") self.assertEquals(desired_url, test_url) def test_id_to_url_withpath(self): desired_url = "file://%s/pairtree_root/fo/o/obj/data/subdir/bar.txt" % PAIRTREE_STORAGE_DIR test_url = self.pairtree.get_url("foo", "bar.txt", path="data/subdir") self.assertEquals(desired_url, test_url)
def setUp(self): self.pairtree = PairtreeStorageClient('http://example.org', PAIRTREE_STORAGE_DIR, 2)
class PTOFS(OFSInterface): '''OFS backend backed onto the filesystem and using PairTree_. .. _PairTree: http://pypi.python.org/pypi/Pairtree ''' def __init__(self, storage_dir="data", uri_base="urn:uuid:", hashing_type="md5", shorty_length=2): self.storage_dir = storage_dir self.uri_base = uri_base self.hashing_type = hashing_type self.shorty_length = shorty_length self._open_store() def _open_store(self): if self.hashing_type: self._store = PairtreeStorageClient( self.uri_base, self.storage_dir, shorty_length=self.shorty_length, hashing_type=self.hashing_type) else: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=shorty_length) def exists(self, bucket, label=None): if self._store.exists(bucket): if label: return self._store.isfile(bucket, label) else: return True def _get_object(self, bucket): po = self._store.get_object(bucket) json_payload = PersistentState(po.id_to_dirpath()) return (po, json_payload) def _setup_item(self, bucket): _, json_payload = self._get_object(bucket) json_payload.sync() def claim_bucket(self, bucket=None): if bucket: if self.exists(bucket): raise BucketExists else: bucket = uuid4().hex while (self.exists(bucket)): bucket = uuid4().hex self._setup_item(bucket) return bucket def list_labels(self, bucket): if self.exists(bucket): _, json_payload = self._get_object(bucket) return json_payload.keys() def list_buckets(self): return self._store.list_ids() def put_stream(self, bucket, label, stream_object, params={}): ## QUESTION: do we enforce that the bucket's have to be 'claimed' first? ## NB this method doesn't care if it has been po, json_payload = self._get_object(bucket) if label in json_payload.keys(): creation_date = None else: # New upload - record creation date creation_date = datetime.now().isoformat().split(".")[ 0] ## '2010-07-08T19:56:47' if '_label' in params: json_payload[label] = {"_label": params['_label']} else: json_payload[label] = {"_label": label} hash_vals = po.add_bytestream_by_path(label, stream_object) stat_vals = po.stat(label) # Userland parameters for the file cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")]) json_payload[label].update(cleaned_params) try: json_payload[label]['_content_length'] = int(stat_vals.st_size) except TypeError: print( "Error getting filesize from os.stat().st_size into an integer..." ) if creation_date: json_payload[label]['_creation_date'] = creation_date json_payload[label]['_last_modified'] = creation_date else: # Modification date json_payload[label]['_last_modified'] = datetime.now().isoformat( ).split(".")[0] # Hash details: if hash_vals: json_payload[label]['_checksum'] = "%s:%s" % ( hash_vals['type'], hash_vals['checksum']) json_payload.sync() return json_payload.state[label] def get_stream(self, bucket, label, as_stream=True): if self.exists(bucket): po, json_payload = self._get_object(bucket) if self.exists(bucket, label): return po.get_bytestream(label, streamable=as_stream, path=None, appendable=False) raise FileNotFoundException def get_url(self, bucket, label): if self.exists(bucket) and self.exists(bucket, label): return self._store.get_url(bucket, label) else: raise FileNotFoundException def get_metadata(self, bucket, label): if self.exists(bucket): _, json_payload = self._get_object(bucket) if json_payload.has_key(label): return json_payload.state[label] raise FileNotFoundException def update_metadata(self, bucket, label, params): if self.exists(bucket, label) and isinstance(params, dict): _, json_payload = self._get_object(bucket) # Userland parameters for the file cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")]) json_payload[label].update(cleaned_params) json_payload.sync() return json_payload.state[label] else: raise FileNotFoundException def del_metadata_keys(self, bucket, label, keys): if self.exists(bucket, label) and isinstance(keys, list): _, json_payload = self._get_object(bucket) for key in [x for x in keys if not x.startswith("_")]: if key in json_payload[label].keys(): del json_payload[label][key] json_payload.sync() return json_payload.state[label] else: raise FileNotFoundException def del_stream(self, bucket, label): if self.exists(bucket, label): # deletes the whole object for uuid self._store.del_stream(bucket, label) _, json_payload = self._get_object(bucket) if json_payload.has_key(label): del json_payload[label] json_payload.sync() else: raise FileNotFoundException
def setUp(self): self.pairtree = PairtreeStorageClient('http://example.org', '/tmp/pairtree', 2)
class OFS(object): def __init__(self, storage_dir="data", uri_base="urn:uuid:", hashing_type="md5"): self.storage_dir = storage_dir self.uri_base = uri_base self.hashing_type = hashing_type self._open_store() def _open_store(self): if self.hashing_type: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=2, hashing_type=self.hashing_type) else: self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=2) def exists(self, uuid): return self._store.exists(uuid) def _get_object(self, uuid): po = self._store.get_object(uuid) json_payload = PersistentState(po.id_to_dirpath()) return (po, json_payload) def _setup_item(self, uuid): _, json_payload = self._get_object(uuid) json_payload['_uri'] = self.uri_base + uuid json_payload.sync() def claim_an_id(self): uuid = uuid4().hex while(self.exists(uuid)): uuid = uuid4().hex self._setup_item(uuid) return uuid def list_ids(self): return self._store.list_ids() def put_stream(self, uuid, stream_object, filename, params={}): ## QUESTION: do we enforce that the uuid's have to be 'claimed' first? ## NB this method doesn't care if it has been po, json_payload = self._get_object(uuid) hash_vals = po.add_bytestream_by_path(filename, stream_object) stat_vals = po.stat(filename) if '_filename' in json_payload.keys(): # remove old file which has a different name po.del_file(json_payload['_filename']) creation_date = None else: # New upload - record creation date creation_date = datetime.now().isoformat().split(".")[0] ## '2010-07-08T19:56:47' # Userland parameters for the file cleaned_params = dict( [ (k, params[k]) for k in params if not k.startswith("_")]) json_payload.update(cleaned_params) # Filedetails: _filename, _numberofbytes (in bytes) json_payload['_filename'] = filename try: json_payload['_numberofbytes'] = int(stat_vals.st_size) except TypeError: print "Error getting filesize from os.stat().st_size into an integer..." if creation_date: json_payload['_datecreated'] = creation_date json_payload['_lastmodified'] = creation_date else: # Modification date json_payload['_lastmodified'] = datetime.now().isoformat().split(".")[0] # Hash details: if hash_vals: json_payload['_checksum'] = "%s:%s" % (hash_vals['type'], hash_vals['checksum']) json_payload.sync() return json_payload.state def get_stream(self, uuid, as_stream=True): if self.exists(uuid): po, json_payload = self._get_object(uuid) if '_filename' in json_payload.keys(): return po.get_bytestream(json_payload['_filename'], streamable=as_stream, path=None, appendable=False) raise FileNotFoundException def get_stream_metadata(self, uuid): if self.exists(uuid): _, json_payload = self._get_object(uuid) return json_payload.state else: raise FileNotFoundException def update_stream_metadata(self, uuid, params): if self.exists(uuid) and isinstance(params, dict): _, json_payload = self._get_object(uuid) # Userland parameters for the file cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")]) json_payload.update(cleaned_params) json_payload.sync() return json_payload.state else: raise FileNotFoundException def remove_metadata_keys(self, uuid, keys): if self.exists(uuid) and isinstance(keys, list): _, json_payload = self._get_object(uuid) for key in [x for x in keys if not x.startswith("_")]: if key in json_payload.keys(): del json_payload[key] json_payload.sync() return json_payload.state else: raise FileNotFoundException def del_stream(self, uuid): if self.exists(uuid): # deletes the whole object for uuid self._store.delete_object(uuid) else: raise FileNotFoundException
class FileStorageClient(object): def __init__(self, uri_base, store_dir, prefix, shorty_length,queue=None, hashing_type=None, **context): self.store_dir = store_dir self.uri_base = None if uri_base: self.uri_base = Namespace(uri_base) self.ids = {} self.id_parts = {} self.prefix = prefix self.shorty_length = shorty_length self.queue = queue self.context = context if hashing_type: self.hashing_type = hashing_type self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length, hashing_type=self.hashing_type) else: self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length) if self.storeclient.uri_base: self.uri_base = Namespace(self.storeclient.uri_base) def list_ids(self): return self.storeclient.list_ids() def _get_latest_part(self, id, part_id): try: versions = self._list_part_versions(id, part_id) if versions: return max(versions) return 0 except PartNotFoundException: return 0 def _list_parts(self, id): return self.storeclient.list_parts(id) def _list_part_versions(self, id, part_id): if part_id in self.storeclient.list_parts(id): versions = self.storeclient.list_parts(id, part_id) numbered_versions = [int(x.split(self.prefix)[-1]) for x in versions] if numbered_versions: return numbered_versions else: return [] else: raise PartNotFoundException def _del_part_version(self, id, part_id, version): if not self.storeclient.exists(id): raise ObjectNotFoundException if part_id in self.storeclient.list_parts(id): if version in self._list_part_versions(id, part_id): # delete version resp = self.storeclient.del_stream(id, "%s%s%s" % (part_id, self.prefix, version), path=part_id) if self.queue != None: self._log(id, 'd', 'Deleting a version of a part', part_id=part_id, version=version) return resp else: raise VersionNotFoundException(part_id=part_id, version=version) else: raise PartNotFoundException def _put_part(self, id, part_id, bytestream, version=False, buffer_size = 1024 * 8, mimetype=None): if not self.storeclient.exists(id): self.storeclient.create_object(id) if not version: version = self._get_latest_part(id, part_id) + 1 part_name = "%s%s%s" % (part_id, self.prefix, version) hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream, buffer_size) if self.queue != None: if version == 1: self._log(id, 'c', 'Creating a part', part_id=part_id, version=version, checksum=hexhash, mimetype=mimetype) else: self._log(id, 'w', 'Updating a part', part_id=part_id, version=version, checksum=hexhash, mimetype=mimetype) return {'version':version, 'checksum':hexhash} def _get_part(self, id, part_id, stream, version = False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) if version == 0: raise PartNotFoundException part_name = "%s%s%s" % (part_id, self.prefix, version) if not self.storeclient.exists(id, os.path.join(part_id, part_name)): raise VersionNotFoundException(part_id=part_id, version=version) else: return self.storeclient.get_stream(id, part_id, part_name, stream) def _del_part(self, id, part_id): if not self.storeclient.exists(id): raise ObjectNotFoundException if not self.storeclient.exists(id, part_id): raise PartNotFoundException(part_id=part_id) self.storeclient.del_path(id, part_id, recursive=True) if self.queue != None: self._log(id, 'd', 'Deleting a part', part_id=part_id) def _store_manifest(self, id, part_id, manifest, version = False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) + 1 part_name = "%s%s%s" % (part_id, self.prefix, version) bytestream = manifest.to_string() if isinstance(bytestream, unicode): bytestream = bytestream.encode('utf-8') hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream) if self.queue != None: if version == 1: self._log(id, 'c', 'Creating an RDF Manifest', part_id=part_id, version=version, checksum=hexhash) else: self._log(id, 'w', 'Updating an RDF Manifest', part_id=part_id, version=version, checksum=hexhash) return {'version':version, 'checksum':hexhash} def _store_rdfobject(self, id, part_id, rdfobject, version=False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) + 1 part_name = "%s%s%s" % (part_id, self.prefix, version) bytestream = rdfobject.to_string() if isinstance(bytestream, unicode): bytestream = bytestream.encode('utf-8') hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream) if self.queue != None: if version == 1: self._log(id, 'c', 'Creating an RDF Root', part_id=part_id, version=version, checksum=hexhash) else: self._log(id, 'w', 'Updating an RDF Root', part_id=part_id, version=version, checksum=hexhash) return {'version':version, 'checksum':hexhash} def _get_rdfobject(self, id, part_id, version = False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) part_name = "%s%s%s" % (part_id, self.prefix, version) r = RDFobject() r.set_uri(self.uri_base[id]) if version >= 1: f = self.storeclient.get_stream(id, part_id, part_name,streamable=False) r.from_string(self.uri_base[id], f.decode('utf-8')) return r def _get_manifest(self, id, part_id, file_uri, version = False): if not self.storeclient.exists(id): raise ObjectNotFoundException if not version: version = self._get_latest_part(id, part_id) part_name = "%s%s%s" % (part_id, self.prefix, version) m = Manifest(file_uri) if version >= 1: f = self.storeclient.get_stream(id, part_id, part_name,streamable=False) m.from_string(f.decode('utf-8')) return m def exists(self, id): return self.storeclient.exists(id) def getObject(self, id=None, create_if_doesnt_exist=True): exists = self.storeclient.exists(id) self.storeclient.get_object(id, create_if_doesnt_exist) if create_if_doesnt_exist and not exists and self.queue != None: self._log(id, 'c', 'Creating an object') return FileStorageObject(id, self) def createObject(self, id): self.storeclient.create_object(id) if self.queue != None: self._log(id, 'c', 'Creating an object') return FileStorageObject(id, self) def deleteObject(self, id): if self.storeclient.exists(id): if self.queue != None: self._log(id, 'd', 'Deleting an object') return self.storeclient.delete_object(id) def log_audit(self, id, logcontext, context): if self.queue != None: self._log(id, 'metadatadelta', 'Metadata changes', _logcontext=logcontext, **context) def _log(self, id, action, label, **kw): msg = {} msg.update(kw) msg.update(self.context) msg['id'] = id msg['action'] = action msg['label'] = label msg['uri_base'] = self.uri_base # Get an ISO datetime for this msg['timestamp'] = datetime.now().isoformat() try: self.queue.put(simplejson.dumps(msg)) except Exception, e: raise e