Beispiel #1
0
 def _open_store(self):
     if self.hashing_type:
         self._store = PairtreeStorageClient(
             self.uri_base,
             self.storage_dir,
             shorty_length=self.shorty_length,
             hashing_type=self.hashing_type)
     else:
         self._store = PairtreeStorageClient(self.uri_base,
                                             self.storage_dir,
                                             shorty_length=shorty_length)
Beispiel #2
0
 def __init__(self, uri_base, store_dir, prefix, shorty_length,queue=None, hashing_type=None, **context):
     self.store_dir = store_dir
     self.uri_base = None
     if uri_base:
         self.uri_base = Namespace(uri_base)
     self.ids = {}
     self.id_parts = {}
     self.prefix = prefix
     self.shorty_length = shorty_length
     self.queue = queue
     self.context = context
     if hashing_type:
         self.hashing_type = hashing_type
         self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length,
                                                  hashing_type=self.hashing_type)
     else:
         self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length)
     if self.storeclient.uri_base:
         self.uri_base = Namespace(self.storeclient.uri_base)
Beispiel #3
0
 def _open_store(self):
     if self.hashing_type:
         self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=self.shorty_length, hashing_type=self.hashing_type)
     else:
         self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=shorty_length)
Beispiel #4
0
class PTOFS(OFSInterface):
    '''OFS backend backed onto the filesystem and using PairTree_.

    .. _PairTree: http://pypi.python.org/pypi/Pairtree
    '''
    def __init__(self, storage_dir="data", uri_base="urn:uuid:", hashing_type="md5", shorty_length=2):
        self.storage_dir = storage_dir
        self.uri_base = uri_base
        self.hashing_type = hashing_type
        self.shorty_length = shorty_length
        self._open_store()
    
    def _open_store(self):
        if self.hashing_type:
            self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=self.shorty_length, hashing_type=self.hashing_type)
        else:
            self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=shorty_length)

    def exists(self, bucket, label=None):
        if self._store.exists(bucket):
            if label:
                return self._store.isfile(bucket, label)
            else:
                return True
    
    def _get_object(self, bucket):
        po = self._store.get_object(bucket)
        json_payload = PersistentState(po.id_to_dirpath())
        return (po, json_payload)

    def _setup_item(self, bucket):
        _, json_payload = self._get_object(bucket)
        json_payload.sync()
    
    def claim_bucket(self, bucket=None):
        if bucket:
            if self.exists(bucket):
                raise BucketExists
        else:
            bucket = uuid4().hex
            while(self.exists(bucket)):
                bucket = uuid4().hex
        self._setup_item(bucket)
        return bucket
        
    def list_labels(self, bucket, prefix=None):
        if self.exists(bucket):
            _, json_payload = self._get_object(bucket)
            if prefix is None:
                return json_payload.keys()
            else:
                l = len(prefix)
                if l > 0 and prefix[0]=='/':
                    prefix = prefix[1:]
                return filter(lambda k: k[:l] == prefix, json_payload.keys())
                
    def list_buckets(self):
        return self._store.list_ids()
        
    def put_stream(self, bucket, label, stream_object, params={}):
        ## QUESTION: do we enforce that the bucket's have to be 'claimed' first?
        ## NB this method doesn't care if it has been
        po, json_payload = self._get_object(bucket)

        if label in json_payload.keys():
            creation_date = None
        else:
            # New upload - record creation date
            creation_date = datetime.now().isoformat().split(".")[0]  ## '2010-07-08T19:56:47'
            if params.has_key('_label'):
                json_payload[label] = {"_label":params['_label']}
            else:
                json_payload[label] = {"_label":label}

        hash_vals = po.add_bytestream_by_path(label, stream_object)
        stat_vals = po.stat(label)
        
        # Userland parameters for the file
        cleaned_params = dict( [ (k, params[k]) for k in params if not k.startswith("_")])
        json_payload[label].update(cleaned_params)
        try:
            json_payload[label]['_content_length'] = int(stat_vals.st_size)
        except TypeError:
            print "Error getting filesize from os.stat().st_size into an integer..."
        if creation_date:
            json_payload[label]['_creation_date'] = creation_date
            json_payload[label]['_last_modified'] = creation_date
        else:
            # Modification date
           json_payload[label]['_last_modified'] = datetime.now().isoformat().split(".")[0]
        # Hash details:
        if hash_vals:
            json_payload[label]['_checksum'] = "%s:%s" % (hash_vals['type'], hash_vals['checksum'])
        json_payload.sync()
        return json_payload.state[label]

    def get_stream(self, bucket, label, as_stream=True):
        if self.exists(bucket):
            po, json_payload = self._get_object(bucket)
            if self.exists(bucket, label):
                return po.get_bytestream(label, streamable=as_stream, path=None, appendable=False)
        raise FileNotFoundException

    def get_url(self, bucket, label):
        if self.exists(bucket) and self.exists(bucket, label):
            return self._store.get_url(bucket, label)
        else:
            raise FileNotFoundException
    
    def get_metadata(self, bucket, label):
        if self.exists(bucket):
            _, json_payload = self._get_object(bucket)
            if json_payload.has_key(label):
                return json_payload.state[label]
        raise FileNotFoundException
    
    def update_metadata(self, bucket, label, params):
        if self.exists(bucket, label) and isinstance(params, dict):
            _, json_payload = self._get_object(bucket)
            # Userland parameters for the file
            cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")])
            json_payload[label].update(cleaned_params)
            json_payload.sync()
            return json_payload.state[label]
        else:
            raise FileNotFoundException
    
    def del_metadata_keys(self, bucket, label, keys):
        if self.exists(bucket, label) and isinstance(keys, list):
            _, json_payload = self._get_object(bucket)
            for key in [x for x in keys if not x.startswith("_")]:
                if key in json_payload[label].keys():
                    del json_payload[label][key]
            json_payload.sync()
            return json_payload.state[label]
        else:
            raise FileNotFoundException

    def del_stream(self, bucket, label):
        if self.exists(bucket, label):
            # deletes the whole object for uuid
            self._store.del_stream(bucket, label)
            _, json_payload = self._get_object(bucket)
            if json_payload.has_key(label):
                del json_payload[label]
            json_payload.sync()
        else:
            raise FileNotFoundException
Beispiel #5
0
class TestPairtree(unittest.TestCase):
    def i2p2i(self, id, target, label):
        ppath = self.pairtree._id_to_dir_list(id)[1:]
        self.assertEqual(ppath, target)
        #self.assertEqual( reverse it)

    def roundtrip(self, id, label):
        ppath = self.pairtree.id_encode(id)
        new_id = self.pairtree.id_decode(ppath)
        self.assertEqual(id, new_id)
        self.ppath_roundtrip(id, label)
        #self.assertEqual( reverse it)

    def ppath_roundtrip(self, id, label):
        pp = ppath.get_id_from_dirpath(ppath.id_to_dirpath(id))
        self.assertEqual(pp, id)

    def setUp(self):
        self.pairtree = PairtreeStorageClient('http://example.org', PAIRTREE_STORAGE_DIR, 2)

    def test_empty(self):
        pass
        #try:
        #    ppath = PairPath("")
        #    self.assertFalse(True, 'Empty id should raise exception')
        #except BadPairPath:
        #    pass

    def testabc(self):
        self.i2p2i('abc', ['ab','c','obj'], 'basic 3-char case')
    
    def testabc_roundtrip(self):
        self.roundtrip('abc', 'basic 3-char case - roundtrip')

    def testabc(self):
        self.i2p2i('abcd', ['ab','cd', 'obj'], 'basic 4-char case')

    def testabc_roundtrip(self):
        self.roundtrip('abcd', 'basic 4-char case - roundtrip')
    
    def testabc(self):
        self.i2p2i('abcd', ['ab','cd', 'obj'], 'basic 4-char case')
        
    def testabc_roundtrip(self):
        self.roundtrip('abcd', 'basic 4-char case - roundtrip')

    def testxy(self):
        self.i2p2i('xy', ['xy', 'obj'], '2-char edge case')
        
    def testxy_roundtrip(self):
        self.roundtrip('xy', '2-char edge case - roundtrip')

    def testz(self):
        self.i2p2i('z', ['z', 'obj'], '1-char edge case')
        
    def testz_roundtrip(self):
        self.roundtrip('z', '1-char edge case - roundtrip')
        
    def test12_986xy4(self):
        self.i2p2i('12-986xy4', ['12', '-9', '86', 'xy', '4', 'obj'], 'hyphen')
        
    def test12_986xy4_roundtrip(self):
        self.roundtrip('12-986xy4', 'hyphen - roundtrip')

    def test_13030_45xqv_793842495(self):
        self.i2p2i('13030_45xqv_793842495',
                   ['13', '03', '0_', '45', 'xq', 'v_', '79', '38', '42', '49', 
                    '5', 'obj'],
                   'long id with undescores')

    def test_13030_45xqv_793842495_roundtrip(self):
        self.roundtrip('13030_45xqv_793842495',
                   'long id with undescores - roundtrip')

    def test_ark_13030_xt12t3(self):
        self.i2p2i('ark:/13030/xt12t3',
                   ['ar', 'k+', '=1', '30', '30', '=x', 't1', '2t', '3', 'obj'],
                   'colons and slashes')

    def test_ark_13030_xt12t3_roundtrip(self):
        self.roundtrip('ark:/13030/xt12t3',
                   'colons and slashes - roundtrip')

    def test_space(self):
        self.i2p2i('hello world', ['he', 'll', 'o^', '20', 'wo', 'rl', 'd', 'obj'], 'space')
        
    def test_space_roundtrip(self):
        self.roundtrip('hello world', 'space - roundtrip')
        
    def test_slash(self):
        self.i2p2i('/', ['=', 'obj'], '1-separator-char edge case')

    def test_slash_roundtrip(self):
        self.roundtrip('/', '1-separator-char edge case - roundtrip')

    def test_urn(self):
        self.i2p2i('http://n2t.info/urn:nbn:se:kb:repos-1',
                   ['ht', 'tp', '+=', '=n', '2t', ',i', 'nf', 'o=', 'ur', 'n+', 
                    'nb', 'n+', 'se', '+k', 'b+', 're', 'po', 's-', '1', 'obj'],
                   'a URL with colons, slashes, and periods')

    def test_urn_roundtrip(self):
        self.roundtrip('http://n2t.info/urn:nbn:se:kb:repos-1',
                   'a URL with colons, slashes, and periods - roundtrip')

    def test_wtf(self):
        self.i2p2i('what-the-*@?#!^!?',
                   ['wh', 'at', '-t', 'he', '-^', '2a', '@^', '3f', '#!', '^5', 
                    'e!', '^3', 'f', 'obj'],
                   'weird chars from spec example');

    def test_wtf_roundtrip(self):
        self.roundtrip('what-the-*@?#!^!?',
                   'weird chars from spec example - roundtrip');

    def test_weird(self):
        self.i2p2i('\\"*+,<=>?^|',
                   ['^5', 'c^', '22', '^2', 'a^', '2b', '^2', 'c^', '3c', '^3',
                    'd^', '3e', '^3', 'f^', '5e', '^7', 'c', 'obj'],
                   'all weird visible chars');

    def test_weird_roundtrip(self):
        self.roundtrip('\\"*+,<=>?^|',
                   'all weird visible chars - roundtrip');

    def test_basic_roundtrip(self):
        self.roundtrip('asdfghjklpoiuytrewqxcvbnm1234567890:;/', 'Basic Roundtrip')

    def test_french_roundtrip(self):
        self.roundtrip(u'Années de Pèlerinage', 'French Unicode roundtrip')

    def test_japanese_rountrip(self):
        self.roundtrip(u'ウインカリッスの日本語', 'Japanese Unicode roundtrip')
        
    def test_hardcore_unicode_rountrip(self):
        # If this works...
        self.roundtrip(u"""   1. Euro Symbol: €.
   2. Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.
   3. Íslenska / Icelandic: Ég get etið gler án þess að meiða mig.
   4. Polish: Mogę jeść szkło, i mi nie szkodzi.
   5. Romanian: Pot să mănânc sticlă și ea nu mă rănește.
   6. Ukrainian: Я можу їсти шкло, й воно мені не пошкодить.
   7. Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։
   8. Georgian: მინას ვჭამ და არა მტკივა.
   9. Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती.
  10. Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי.
  11. Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ.
  12. Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني.
  13. Japanese: 私はガラスを食べられます。それは私を傷つけません。
  14. Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ """,
                        "hardcore unicode test - roundtrip")

    def test_french(self):
        self.i2p2i('Années de Pèlerinage',
                   ['An', 'n^', 'c3', '^a', '9e', 's^', '20', 'de', '^2', '0P',
                    '^c', '3^', 'a8', 'le', 'ri', 'na', 'ge', 'obj'],
                   'UTF-8 chars')

        self.i2p2i("Années de Pèlerinage (Years of Pilgrimage) (S.160, S.161,\n\
 S.163) is a set of three suites by Franz Liszt for solo piano. Liszt's\n\
 complete musical style is evident in this masterwork, which ranges from\n\
 virtuosic fireworks to sincerely moving emotional statements. His musical\n\
 maturity can be seen evolving through his experience and travel. The\n\
 third volume is especially notable as an example of his later style: it\n\
 was composed well after the first two volumes and often displays less\n\
 showy virtuosity and more harmonic experimentation.",
                   ['An', 'n^', 'c3', '^a', '9e', 's^', '20', 'de', '^2', '0P',
                    '^c', '3^', 'a8', 'le', 'ri', 'na', 'ge', '^2', '0(', 'Ye',
                    'ar', 's^', '20', 'of', '^2', '0P', 'il', 'gr', 'im', 'ag', 
                    'e)', '^2', '0(', 'S,', '16', '0^', '2c', '^2', '0S', ',1',
                    '61', '^2', 'c^', '0a', '^2', '0S', ',1', '63', ')^', '20',
                    'is', '^2', '0a', '^2', '0s', 'et', '^2', '0o', 'f^', '20', 
                    'th', 're', 'e^', '20', 'su', 'it', 'es', '^2', '0b', 'y^', 
                    '20', 'Fr', 'an', 'z^', '20', 'Li', 'sz', 't^', '20', 'fo', 
                    'r^', '20', 'so', 'lo', '^2', '0p', 'ia', 'no', ',^', '20',
                    'Li', 'sz', 't\'', 's^', '0a', '^2', '0c', 'om', 'pl', 'et',
                    'e^', '20', 'mu', 'si', 'ca', 'l^', '20', 'st', 'yl', 'e^',
                    '20', 'is', '^2', '0e', 'vi', 'de', 'nt', '^2', '0i', 'n^',
                    '20', 'th', 'is', '^2', '0m', 'as', 'te', 'rw', 'or', 'k^',
                    '2c', '^2', '0w', 'hi', 'ch', '^2', '0r', 'an', 'ge', 's^',
                    '20', 'fr', 'om', '^0', 'a^', '20', 'vi', 'rt', 'uo', 'si',
                    'c^', '20', 'fi', 're', 'wo', 'rk', 's^', '20', 'to', '^2',
                    '0s', 'in', 'ce', 're', 'ly', '^2', '0m', 'ov', 'in', 'g^',
                    '20', 'em', 'ot', 'io', 'na', 'l^', '20', 'st', 'at', 'em',
                    'en', 'ts', ',^', '20', 'Hi', 's^', '20', 'mu', 'si', 'ca',
                    'l^', '0a', '^2', '0m', 'at', 'ur', 'it', 'y^', '20', 'ca',
                    'n^', '20', 'be', '^2', '0s', 'ee', 'n^', '20', 'ev', 'ol',
                    'vi', 'ng', '^2', '0t', 'hr', 'ou', 'gh', '^2', '0h', 'is',
                    '^2', '0e', 'xp', 'er', 'ie', 'nc', 'e^', '20', 'an', 'd^',
                    '20', 'tr', 'av', 'el', ',^', '20', 'Th', 'e^', '0a', '^2',
                    '0t', 'hi', 'rd', '^2', '0v', 'ol', 'um', 'e^', '20', 'is',
                    '^2', '0e', 'sp', 'ec', 'ia', 'll', 'y^', '20', 'no', 'ta',
                    'bl', 'e^', '20', 'as', '^2', '0a', 'n^', '20', 'ex', 'am',
                    'pl', 'e^', '20', 'of', '^2', '0h', 'is', '^2', '0l', 'at',
                    'er', '^2', '0s', 'ty', 'le', '+^', '20', 'it', '^0', 'a^',
                    '20', 'wa', 's^', '20', 'co', 'mp', 'os', 'ed', '^2', '0w',
                    'el', 'l^', '20', 'af', 'te', 'r^', '20', 'th', 'e^', '20',
                    'fi', 'rs', 't^', '20', 'tw', 'o^', '20', 'vo', 'lu', 'me',
                    's^', '20', 'an', 'd^', '20', 'of', 'te', 'n^', '20', 'di',
                    'sp', 'la', 'ys', '^2', '0l', 'es', 's^', '0a', '^2', '0s',
                    'ho', 'wy', '^2', '0v', 'ir', 'tu', 'os', 'it', 'y^', '20',
                    'an', 'd^', '20', 'mo', 're', '^2', '0h', 'ar', 'mo', 'ni',
                    'c^', '20', 'ex', 'pe', 'ri', 'me', 'nt', 'at', 'io', 'n,', 'obj'],                   
                   'very long id with apostrophes and UTF-8 chars')

    def test_id_to_url_simple(self):
        desired_url = "file://%s/pairtree_root/fo/o/obj/bar.txt" % PAIRTREE_STORAGE_DIR
        test_url = self.pairtree.get_url("foo", "bar.txt")
        self.assertEquals(desired_url, test_url)


    def test_id_to_url_withpath(self):
        desired_url = "file://%s/pairtree_root/fo/o/obj/data/subdir/bar.txt" % PAIRTREE_STORAGE_DIR
        test_url = self.pairtree.get_url("foo", "bar.txt", path="data/subdir")
        self.assertEquals(desired_url, test_url)
Beispiel #6
0
 def setUp(self):
     self.pairtree = PairtreeStorageClient('http://example.org', PAIRTREE_STORAGE_DIR, 2)
Beispiel #7
0
class PTOFS(OFSInterface):
    '''OFS backend backed onto the filesystem and using PairTree_.

    .. _PairTree: http://pypi.python.org/pypi/Pairtree
    '''
    def __init__(self,
                 storage_dir="data",
                 uri_base="urn:uuid:",
                 hashing_type="md5",
                 shorty_length=2):
        self.storage_dir = storage_dir
        self.uri_base = uri_base
        self.hashing_type = hashing_type
        self.shorty_length = shorty_length
        self._open_store()

    def _open_store(self):
        if self.hashing_type:
            self._store = PairtreeStorageClient(
                self.uri_base,
                self.storage_dir,
                shorty_length=self.shorty_length,
                hashing_type=self.hashing_type)
        else:
            self._store = PairtreeStorageClient(self.uri_base,
                                                self.storage_dir,
                                                shorty_length=shorty_length)

    def exists(self, bucket, label=None):
        if self._store.exists(bucket):
            if label:
                return self._store.isfile(bucket, label)
            else:
                return True

    def _get_object(self, bucket):
        po = self._store.get_object(bucket)
        json_payload = PersistentState(po.id_to_dirpath())
        return (po, json_payload)

    def _setup_item(self, bucket):
        _, json_payload = self._get_object(bucket)
        json_payload.sync()

    def claim_bucket(self, bucket=None):
        if bucket:
            if self.exists(bucket):
                raise BucketExists
        else:
            bucket = uuid4().hex
            while (self.exists(bucket)):
                bucket = uuid4().hex
        self._setup_item(bucket)
        return bucket

    def list_labels(self, bucket):
        if self.exists(bucket):
            _, json_payload = self._get_object(bucket)
            return json_payload.keys()

    def list_buckets(self):
        return self._store.list_ids()

    def put_stream(self, bucket, label, stream_object, params={}):
        ## QUESTION: do we enforce that the bucket's have to be 'claimed' first?
        ## NB this method doesn't care if it has been
        po, json_payload = self._get_object(bucket)

        if label in json_payload.keys():
            creation_date = None
        else:
            # New upload - record creation date
            creation_date = datetime.now().isoformat().split(".")[
                0]  ## '2010-07-08T19:56:47'
            if '_label' in params:
                json_payload[label] = {"_label": params['_label']}
            else:
                json_payload[label] = {"_label": label}

        hash_vals = po.add_bytestream_by_path(label, stream_object)
        stat_vals = po.stat(label)

        # Userland parameters for the file
        cleaned_params = dict([(k, params[k]) for k in params
                               if not k.startswith("_")])
        json_payload[label].update(cleaned_params)
        try:
            json_payload[label]['_content_length'] = int(stat_vals.st_size)
        except TypeError:
            print(
                "Error getting filesize from os.stat().st_size into an integer..."
            )
        if creation_date:
            json_payload[label]['_creation_date'] = creation_date
            json_payload[label]['_last_modified'] = creation_date
        else:
            # Modification date
            json_payload[label]['_last_modified'] = datetime.now().isoformat(
            ).split(".")[0]
        # Hash details:
        if hash_vals:
            json_payload[label]['_checksum'] = "%s:%s" % (
                hash_vals['type'], hash_vals['checksum'])
        json_payload.sync()
        return json_payload.state[label]

    def get_stream(self, bucket, label, as_stream=True):
        if self.exists(bucket):
            po, json_payload = self._get_object(bucket)
            if self.exists(bucket, label):
                return po.get_bytestream(label,
                                         streamable=as_stream,
                                         path=None,
                                         appendable=False)
        raise FileNotFoundException

    def get_url(self, bucket, label):
        if self.exists(bucket) and self.exists(bucket, label):
            return self._store.get_url(bucket, label)
        else:
            raise FileNotFoundException

    def get_metadata(self, bucket, label):
        if self.exists(bucket):
            _, json_payload = self._get_object(bucket)
            if json_payload.has_key(label):
                return json_payload.state[label]
        raise FileNotFoundException

    def update_metadata(self, bucket, label, params):
        if self.exists(bucket, label) and isinstance(params, dict):
            _, json_payload = self._get_object(bucket)
            # Userland parameters for the file
            cleaned_params = dict([(k, params[k]) for k in params
                                   if not k.startswith("_")])
            json_payload[label].update(cleaned_params)
            json_payload.sync()
            return json_payload.state[label]
        else:
            raise FileNotFoundException

    def del_metadata_keys(self, bucket, label, keys):
        if self.exists(bucket, label) and isinstance(keys, list):
            _, json_payload = self._get_object(bucket)
            for key in [x for x in keys if not x.startswith("_")]:
                if key in json_payload[label].keys():
                    del json_payload[label][key]
            json_payload.sync()
            return json_payload.state[label]
        else:
            raise FileNotFoundException

    def del_stream(self, bucket, label):
        if self.exists(bucket, label):
            # deletes the whole object for uuid
            self._store.del_stream(bucket, label)
            _, json_payload = self._get_object(bucket)
            if json_payload.has_key(label):
                del json_payload[label]
            json_payload.sync()
        else:
            raise FileNotFoundException
Beispiel #8
0
 def setUp(self):
     self.pairtree = PairtreeStorageClient('http://example.org',
                                           '/tmp/pairtree', 2)
Beispiel #9
0
class OFS(object):
    def __init__(self, storage_dir="data", uri_base="urn:uuid:", hashing_type="md5"):
        self.storage_dir = storage_dir
        self.uri_base = uri_base
        self.hashing_type = hashing_type
        self._open_store()
    
    def _open_store(self):
        if self.hashing_type:
            self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=2, hashing_type=self.hashing_type)
        else:
            self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=2)

    def exists(self, uuid):
        return self._store.exists(uuid)
    
    def _get_object(self, uuid):
        po = self._store.get_object(uuid)
        json_payload = PersistentState(po.id_to_dirpath())
        return (po, json_payload)
    
    def _setup_item(self, uuid):
        _, json_payload = self._get_object(uuid)
        json_payload['_uri'] = self.uri_base + uuid
        json_payload.sync()
    
    def claim_an_id(self):
        uuid = uuid4().hex
        while(self.exists(uuid)):
            uuid = uuid4().hex
        self._setup_item(uuid)
        return uuid
        
    def list_ids(self):
        return self._store.list_ids()
        
    def put_stream(self, uuid, stream_object, filename, params={}):
        ## QUESTION: do we enforce that the uuid's have to be 'claimed' first?
        ## NB this method doesn't care if it has been
        po, json_payload = self._get_object(uuid)
        hash_vals = po.add_bytestream_by_path(filename, stream_object)
        stat_vals = po.stat(filename)
        if '_filename' in json_payload.keys():
            # remove old file which has a different name
            po.del_file(json_payload['_filename'])
            creation_date = None
        else:
            # New upload - record creation date
            creation_date = datetime.now().isoformat().split(".")[0]  ## '2010-07-08T19:56:47'
        # Userland parameters for the file
        cleaned_params = dict( [ (k, params[k]) for k in params if not k.startswith("_")])
        json_payload.update(cleaned_params)
        # Filedetails: _filename, _numberofbytes (in bytes)
        json_payload['_filename'] = filename
        try:
            json_payload['_numberofbytes'] = int(stat_vals.st_size)
        except TypeError:
            print "Error getting filesize from os.stat().st_size into an integer..."
        if creation_date:
            json_payload['_datecreated'] = creation_date
            json_payload['_lastmodified'] = creation_date
        else:
            # Modification date
           json_payload['_lastmodified'] = datetime.now().isoformat().split(".")[0]
        # Hash details:
        if hash_vals:
            json_payload['_checksum'] = "%s:%s" % (hash_vals['type'], hash_vals['checksum'])
        json_payload.sync()
        return json_payload.state

    def get_stream(self, uuid, as_stream=True):
        if self.exists(uuid):
            po, json_payload = self._get_object(uuid)
            if '_filename' in json_payload.keys():
                return po.get_bytestream(json_payload['_filename'], streamable=as_stream, path=None, appendable=False)
        raise FileNotFoundException

    def get_stream_metadata(self, uuid):
        if self.exists(uuid):
            _, json_payload = self._get_object(uuid)
            return json_payload.state
        else:
            raise FileNotFoundException
    
    def update_stream_metadata(self, uuid, params):
        if self.exists(uuid) and isinstance(params, dict):
            _, json_payload = self._get_object(uuid)
            # Userland parameters for the file
            cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")])
            json_payload.update(cleaned_params)
            json_payload.sync()
            return json_payload.state
        else:
            raise FileNotFoundException
    
    def remove_metadata_keys(self, uuid, keys):
        if self.exists(uuid) and isinstance(keys, list):
            _, json_payload = self._get_object(uuid)
            for key in [x for x in keys if not x.startswith("_")]:
                if key in json_payload.keys():
                    del json_payload[key]
            json_payload.sync()
            return json_payload.state
        else:
            raise FileNotFoundException

    def del_stream(self, uuid):
        if self.exists(uuid):
            # deletes the whole object for uuid
            self._store.delete_object(uuid)
        else:
            raise FileNotFoundException
Beispiel #10
0
class FileStorageClient(object):
    def __init__(self, uri_base, store_dir, prefix, shorty_length,queue=None, hashing_type=None, **context):
        self.store_dir = store_dir
        self.uri_base = None
        if uri_base:
            self.uri_base = Namespace(uri_base)
        self.ids = {}
        self.id_parts = {}
        self.prefix = prefix
        self.shorty_length = shorty_length
        self.queue = queue
        self.context = context
        if hashing_type:
            self.hashing_type = hashing_type
            self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length,
                                                     hashing_type=self.hashing_type)
        else:
            self.storeclient = PairtreeStorageClient(uri_base, store_dir, shorty_length)
        if self.storeclient.uri_base:
            self.uri_base = Namespace(self.storeclient.uri_base)


    def list_ids(self):
        return self.storeclient.list_ids()

    def _get_latest_part(self, id, part_id):
        try:
            versions = self._list_part_versions(id, part_id)
            if versions:
                return max(versions)
            return 0
        except PartNotFoundException:
            return 0

    def _list_parts(self, id):
        return self.storeclient.list_parts(id)

    def _list_part_versions(self, id, part_id):
        if part_id in self.storeclient.list_parts(id):
            versions = self.storeclient.list_parts(id, part_id)
            numbered_versions = [int(x.split(self.prefix)[-1]) for x in versions]
            if numbered_versions:
                return numbered_versions
            else:
                return []
        else:
            raise PartNotFoundException

    def _del_part_version(self, id, part_id, version):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if part_id in self.storeclient.list_parts(id):
            if version in self._list_part_versions(id, part_id):
                # delete version
                resp = self.storeclient.del_stream(id, "%s%s%s" % (part_id, self.prefix, version), path=part_id)
                if self.queue != None:
                    self._log(id, 'd', 'Deleting a version of a part', part_id=part_id, version=version)
                return resp
            else:
                raise VersionNotFoundException(part_id=part_id, version=version)
        else:
            raise PartNotFoundException

    def _put_part(self, id, part_id, bytestream, version=False, buffer_size = 1024 * 8, mimetype=None):
        if not self.storeclient.exists(id):
            self.storeclient.create_object(id)
        if not version:
            version = self._get_latest_part(id, part_id) + 1
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream, buffer_size)
        if self.queue != None:
            if version == 1:
                self._log(id, 'c', 'Creating a part', part_id=part_id, version=version, checksum=hexhash, mimetype=mimetype)
            else:
                self._log(id, 'w', 'Updating a part', part_id=part_id, version=version, checksum=hexhash, mimetype=mimetype)
        return {'version':version, 'checksum':hexhash}

    def _get_part(self, id, part_id, stream, version = False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id)
        if version == 0:
            raise PartNotFoundException
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        if not self.storeclient.exists(id, os.path.join(part_id, part_name)):
            raise VersionNotFoundException(part_id=part_id, version=version)
        else:
            return self.storeclient.get_stream(id, part_id, part_name, stream)

    def _del_part(self, id, part_id):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not self.storeclient.exists(id, part_id):
            raise PartNotFoundException(part_id=part_id)
        self.storeclient.del_path(id, part_id, recursive=True)
        if self.queue != None:
            self._log(id, 'd', 'Deleting a part', part_id=part_id)

    def _store_manifest(self, id, part_id, manifest, version = False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id) + 1
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        bytestream = manifest.to_string()
        if isinstance(bytestream, unicode):
            bytestream = bytestream.encode('utf-8')
        hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream)
        if self.queue != None:
            if version == 1:
                self._log(id, 'c', 'Creating an RDF Manifest', part_id=part_id, version=version, checksum=hexhash)
            else:
                self._log(id, 'w', 'Updating an RDF Manifest', part_id=part_id, version=version, checksum=hexhash)
        return {'version':version, 'checksum':hexhash}

    def _store_rdfobject(self, id, part_id, rdfobject, version=False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id) + 1
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        bytestream = rdfobject.to_string()
        if isinstance(bytestream, unicode):
            bytestream = bytestream.encode('utf-8')
        hexhash = self.storeclient.put_stream(id, part_id, part_name, bytestream)
        if self.queue != None:
            if version == 1:
                self._log(id, 'c', 'Creating an RDF Root', part_id=part_id, version=version, checksum=hexhash)
            else:
                self._log(id, 'w', 'Updating an RDF Root', part_id=part_id, version=version, checksum=hexhash)
        return {'version':version, 'checksum':hexhash}


    def _get_rdfobject(self, id, part_id, version = False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id)
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        r = RDFobject()
        r.set_uri(self.uri_base[id])
        if version >= 1:
            f = self.storeclient.get_stream(id, part_id, part_name,streamable=False)
            r.from_string(self.uri_base[id], f.decode('utf-8'))
        return r

    def _get_manifest(self, id, part_id, file_uri, version = False):
        if not self.storeclient.exists(id):
            raise ObjectNotFoundException
        if not version:
            version = self._get_latest_part(id, part_id)
        part_name = "%s%s%s" % (part_id, self.prefix, version)
        m = Manifest(file_uri)
        if version >= 1:
            f = self.storeclient.get_stream(id, part_id, part_name,streamable=False)
            m.from_string(f.decode('utf-8'))
        return m

    def exists(self, id):
        return self.storeclient.exists(id)

    def getObject(self, id=None, create_if_doesnt_exist=True):
        exists = self.storeclient.exists(id)
        self.storeclient.get_object(id, create_if_doesnt_exist)
        if create_if_doesnt_exist and not exists and self.queue != None:
            self._log(id, 'c', 'Creating an object')
        return FileStorageObject(id, self)

    def createObject(self, id):
        self.storeclient.create_object(id)
        if self.queue != None:
            self._log(id, 'c', 'Creating an object')
        return FileStorageObject(id, self)

    def deleteObject(self, id):
        if self.storeclient.exists(id):
            if self.queue != None:
                self._log(id, 'd', 'Deleting an object')
            return self.storeclient.delete_object(id)

    def log_audit(self, id, logcontext, context):
        if self.queue != None:
            self._log(id, 'metadatadelta', 'Metadata changes', _logcontext=logcontext, **context)

    def _log(self, id, action, label, **kw):
        msg = {}
        msg.update(kw)
        msg.update(self.context)
        msg['id'] = id
        msg['action'] = action
        msg['label'] = label
        msg['uri_base'] = self.uri_base
        # Get an ISO datetime for this
        msg['timestamp'] = datetime.now().isoformat()

        try:
            self.queue.put(simplejson.dumps(msg))
        except Exception, e:
            raise e