コード例 #1
0
ファイル: test.py プロジェクト: JoseLuisIc/ckan-capsus
class TestPairtree(unittest.TestCase):
    def i2p2i(self, id, target, label):
        ppath = self.pairtree._id_to_dir_list(id)[1:]
        self.assertEqual(ppath, target)
        #self.assertEqual( reverse it)

    def roundtrip(self, id, label):
        ppath = self.pairtree.id_encode(id)
        new_id = self.pairtree.id_decode(ppath)
        self.assertEqual(id, new_id)
        self.ppath_roundtrip(id, label)
        #self.assertEqual( reverse it)

    def ppath_roundtrip(self, id, label):
        pp = ppath.get_id_from_dirpath(ppath.id_to_dirpath(id))
        self.assertEqual(pp, id)

    def setUp(self):
        self.pairtree = PairtreeStorageClient('http://example.org', PAIRTREE_STORAGE_DIR, 2)

    def test_empty(self):
        pass
        #try:
        #    ppath = PairPath("")
        #    self.assertFalse(True, 'Empty id should raise exception')
        #except BadPairPath:
        #    pass

    def testabc(self):
        self.i2p2i('abc', ['ab','c','obj'], 'basic 3-char case')
    
    def testabc_roundtrip(self):
        self.roundtrip('abc', 'basic 3-char case - roundtrip')

    def testabc(self):
        self.i2p2i('abcd', ['ab','cd', 'obj'], 'basic 4-char case')

    def testabc_roundtrip(self):
        self.roundtrip('abcd', 'basic 4-char case - roundtrip')
    
    def testabc(self):
        self.i2p2i('abcd', ['ab','cd', 'obj'], 'basic 4-char case')
        
    def testabc_roundtrip(self):
        self.roundtrip('abcd', 'basic 4-char case - roundtrip')

    def testxy(self):
        self.i2p2i('xy', ['xy', 'obj'], '2-char edge case')
        
    def testxy_roundtrip(self):
        self.roundtrip('xy', '2-char edge case - roundtrip')

    def testz(self):
        self.i2p2i('z', ['z', 'obj'], '1-char edge case')
        
    def testz_roundtrip(self):
        self.roundtrip('z', '1-char edge case - roundtrip')
        
    def test12_986xy4(self):
        self.i2p2i('12-986xy4', ['12', '-9', '86', 'xy', '4', 'obj'], 'hyphen')
        
    def test12_986xy4_roundtrip(self):
        self.roundtrip('12-986xy4', 'hyphen - roundtrip')

    def test_13030_45xqv_793842495(self):
        self.i2p2i('13030_45xqv_793842495',
                   ['13', '03', '0_', '45', 'xq', 'v_', '79', '38', '42', '49', 
                    '5', 'obj'],
                   'long id with undescores')

    def test_13030_45xqv_793842495_roundtrip(self):
        self.roundtrip('13030_45xqv_793842495',
                   'long id with undescores - roundtrip')

    def test_ark_13030_xt12t3(self):
        self.i2p2i('ark:/13030/xt12t3',
                   ['ar', 'k+', '=1', '30', '30', '=x', 't1', '2t', '3', 'obj'],
                   'colons and slashes')

    def test_ark_13030_xt12t3_roundtrip(self):
        self.roundtrip('ark:/13030/xt12t3',
                   'colons and slashes - roundtrip')

    def test_space(self):
        self.i2p2i('hello world', ['he', 'll', 'o^', '20', 'wo', 'rl', 'd', 'obj'], 'space')
        
    def test_space_roundtrip(self):
        self.roundtrip('hello world', 'space - roundtrip')
        
    def test_slash(self):
        self.i2p2i('/', ['=', 'obj'], '1-separator-char edge case')

    def test_slash_roundtrip(self):
        self.roundtrip('/', '1-separator-char edge case - roundtrip')

    def test_urn(self):
        self.i2p2i('http://n2t.info/urn:nbn:se:kb:repos-1',
                   ['ht', 'tp', '+=', '=n', '2t', ',i', 'nf', 'o=', 'ur', 'n+', 
                    'nb', 'n+', 'se', '+k', 'b+', 're', 'po', 's-', '1', 'obj'],
                   'a URL with colons, slashes, and periods')

    def test_urn_roundtrip(self):
        self.roundtrip('http://n2t.info/urn:nbn:se:kb:repos-1',
                   'a URL with colons, slashes, and periods - roundtrip')

    def test_wtf(self):
        self.i2p2i('what-the-*@?#!^!?',
                   ['wh', 'at', '-t', 'he', '-^', '2a', '@^', '3f', '#!', '^5', 
                    'e!', '^3', 'f', 'obj'],
                   'weird chars from spec example');

    def test_wtf_roundtrip(self):
        self.roundtrip('what-the-*@?#!^!?',
                   'weird chars from spec example - roundtrip');

    def test_weird(self):
        self.i2p2i('\\"*+,<=>?^|',
                   ['^5', 'c^', '22', '^2', 'a^', '2b', '^2', 'c^', '3c', '^3',
                    'd^', '3e', '^3', 'f^', '5e', '^7', 'c', 'obj'],
                   'all weird visible chars');

    def test_weird_roundtrip(self):
        self.roundtrip('\\"*+,<=>?^|',
                   'all weird visible chars - roundtrip');

    def test_basic_roundtrip(self):
        self.roundtrip('asdfghjklpoiuytrewqxcvbnm1234567890:;/', 'Basic Roundtrip')

    def test_french_roundtrip(self):
        self.roundtrip(u'Années de Pèlerinage', 'French Unicode roundtrip')

    def test_japanese_rountrip(self):
        self.roundtrip(u'ウインカリッスの日本語', 'Japanese Unicode roundtrip')
        
    def test_hardcore_unicode_rountrip(self):
        # If this works...
        self.roundtrip(u"""   1. Euro Symbol: €.
   2. Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.
   3. Íslenska / Icelandic: Ég get etið gler án þess að meiða mig.
   4. Polish: Mogę jeść szkło, i mi nie szkodzi.
   5. Romanian: Pot să mănânc sticlă și ea nu mă rănește.
   6. Ukrainian: Я можу їсти шкло, й воно мені не пошкодить.
   7. Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։
   8. Georgian: მინას ვჭამ და არა მტკივა.
   9. Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती.
  10. Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי.
  11. Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ.
  12. Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني.
  13. Japanese: 私はガラスを食べられます。それは私を傷つけません。
  14. Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ """,
                        "hardcore unicode test - roundtrip")

    def test_french(self):
        self.i2p2i('Années de Pèlerinage',
                   ['An', 'n^', 'c3', '^a', '9e', 's^', '20', 'de', '^2', '0P',
                    '^c', '3^', 'a8', 'le', 'ri', 'na', 'ge', 'obj'],
                   'UTF-8 chars')

        self.i2p2i("Années de Pèlerinage (Years of Pilgrimage) (S.160, S.161,\n\
 S.163) is a set of three suites by Franz Liszt for solo piano. Liszt's\n\
 complete musical style is evident in this masterwork, which ranges from\n\
 virtuosic fireworks to sincerely moving emotional statements. His musical\n\
 maturity can be seen evolving through his experience and travel. The\n\
 third volume is especially notable as an example of his later style: it\n\
 was composed well after the first two volumes and often displays less\n\
 showy virtuosity and more harmonic experimentation.",
                   ['An', 'n^', 'c3', '^a', '9e', 's^', '20', 'de', '^2', '0P',
                    '^c', '3^', 'a8', 'le', 'ri', 'na', 'ge', '^2', '0(', 'Ye',
                    'ar', 's^', '20', 'of', '^2', '0P', 'il', 'gr', 'im', 'ag', 
                    'e)', '^2', '0(', 'S,', '16', '0^', '2c', '^2', '0S', ',1',
                    '61', '^2', 'c^', '0a', '^2', '0S', ',1', '63', ')^', '20',
                    'is', '^2', '0a', '^2', '0s', 'et', '^2', '0o', 'f^', '20', 
                    'th', 're', 'e^', '20', 'su', 'it', 'es', '^2', '0b', 'y^', 
                    '20', 'Fr', 'an', 'z^', '20', 'Li', 'sz', 't^', '20', 'fo', 
                    'r^', '20', 'so', 'lo', '^2', '0p', 'ia', 'no', ',^', '20',
                    'Li', 'sz', 't\'', 's^', '0a', '^2', '0c', 'om', 'pl', 'et',
                    'e^', '20', 'mu', 'si', 'ca', 'l^', '20', 'st', 'yl', 'e^',
                    '20', 'is', '^2', '0e', 'vi', 'de', 'nt', '^2', '0i', 'n^',
                    '20', 'th', 'is', '^2', '0m', 'as', 'te', 'rw', 'or', 'k^',
                    '2c', '^2', '0w', 'hi', 'ch', '^2', '0r', 'an', 'ge', 's^',
                    '20', 'fr', 'om', '^0', 'a^', '20', 'vi', 'rt', 'uo', 'si',
                    'c^', '20', 'fi', 're', 'wo', 'rk', 's^', '20', 'to', '^2',
                    '0s', 'in', 'ce', 're', 'ly', '^2', '0m', 'ov', 'in', 'g^',
                    '20', 'em', 'ot', 'io', 'na', 'l^', '20', 'st', 'at', 'em',
                    'en', 'ts', ',^', '20', 'Hi', 's^', '20', 'mu', 'si', 'ca',
                    'l^', '0a', '^2', '0m', 'at', 'ur', 'it', 'y^', '20', 'ca',
                    'n^', '20', 'be', '^2', '0s', 'ee', 'n^', '20', 'ev', 'ol',
                    'vi', 'ng', '^2', '0t', 'hr', 'ou', 'gh', '^2', '0h', 'is',
                    '^2', '0e', 'xp', 'er', 'ie', 'nc', 'e^', '20', 'an', 'd^',
                    '20', 'tr', 'av', 'el', ',^', '20', 'Th', 'e^', '0a', '^2',
                    '0t', 'hi', 'rd', '^2', '0v', 'ol', 'um', 'e^', '20', 'is',
                    '^2', '0e', 'sp', 'ec', 'ia', 'll', 'y^', '20', 'no', 'ta',
                    'bl', 'e^', '20', 'as', '^2', '0a', 'n^', '20', 'ex', 'am',
                    'pl', 'e^', '20', 'of', '^2', '0h', 'is', '^2', '0l', 'at',
                    'er', '^2', '0s', 'ty', 'le', '+^', '20', 'it', '^0', 'a^',
                    '20', 'wa', 's^', '20', 'co', 'mp', 'os', 'ed', '^2', '0w',
                    'el', 'l^', '20', 'af', 'te', 'r^', '20', 'th', 'e^', '20',
                    'fi', 'rs', 't^', '20', 'tw', 'o^', '20', 'vo', 'lu', 'me',
                    's^', '20', 'an', 'd^', '20', 'of', 'te', 'n^', '20', 'di',
                    'sp', 'la', 'ys', '^2', '0l', 'es', 's^', '0a', '^2', '0s',
                    'ho', 'wy', '^2', '0v', 'ir', 'tu', 'os', 'it', 'y^', '20',
                    'an', 'd^', '20', 'mo', 're', '^2', '0h', 'ar', 'mo', 'ni',
                    'c^', '20', 'ex', 'pe', 'ri', 'me', 'nt', 'at', 'io', 'n,', 'obj'],                   
                   'very long id with apostrophes and UTF-8 chars')

    def test_id_to_url_simple(self):
        desired_url = "file://%s/pairtree_root/fo/o/obj/bar.txt" % PAIRTREE_STORAGE_DIR
        test_url = self.pairtree.get_url("foo", "bar.txt")
        self.assertEquals(desired_url, test_url)


    def test_id_to_url_withpath(self):
        desired_url = "file://%s/pairtree_root/fo/o/obj/data/subdir/bar.txt" % PAIRTREE_STORAGE_DIR
        test_url = self.pairtree.get_url("foo", "bar.txt", path="data/subdir")
        self.assertEquals(desired_url, test_url)
コード例 #2
0
ファイル: pairtreestore.py プロジェクト: glavaux/ofs
class PTOFS(OFSInterface):
    '''OFS backend backed onto the filesystem and using PairTree_.

    .. _PairTree: http://pypi.python.org/pypi/Pairtree
    '''
    def __init__(self, storage_dir="data", uri_base="urn:uuid:", hashing_type="md5", shorty_length=2):
        self.storage_dir = storage_dir
        self.uri_base = uri_base
        self.hashing_type = hashing_type
        self.shorty_length = shorty_length
        self._open_store()
    
    def _open_store(self):
        if self.hashing_type:
            self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=self.shorty_length, hashing_type=self.hashing_type)
        else:
            self._store = PairtreeStorageClient(self.uri_base, self.storage_dir, shorty_length=shorty_length)

    def exists(self, bucket, label=None):
        if self._store.exists(bucket):
            if label:
                return self._store.isfile(bucket, label)
            else:
                return True
    
    def _get_object(self, bucket):
        po = self._store.get_object(bucket)
        json_payload = PersistentState(po.id_to_dirpath())
        return (po, json_payload)

    def _setup_item(self, bucket):
        _, json_payload = self._get_object(bucket)
        json_payload.sync()
    
    def claim_bucket(self, bucket=None):
        if bucket:
            if self.exists(bucket):
                raise BucketExists
        else:
            bucket = uuid4().hex
            while(self.exists(bucket)):
                bucket = uuid4().hex
        self._setup_item(bucket)
        return bucket
        
    def list_labels(self, bucket, prefix=None):
        if self.exists(bucket):
            _, json_payload = self._get_object(bucket)
            if prefix is None:
                return json_payload.keys()
            else:
                l = len(prefix)
                if l > 0 and prefix[0]=='/':
                    prefix = prefix[1:]
                return filter(lambda k: k[:l] == prefix, json_payload.keys())
                
    def list_buckets(self):
        return self._store.list_ids()
        
    def put_stream(self, bucket, label, stream_object, params={}):
        ## QUESTION: do we enforce that the bucket's have to be 'claimed' first?
        ## NB this method doesn't care if it has been
        po, json_payload = self._get_object(bucket)

        if label in json_payload.keys():
            creation_date = None
        else:
            # New upload - record creation date
            creation_date = datetime.now().isoformat().split(".")[0]  ## '2010-07-08T19:56:47'
            if params.has_key('_label'):
                json_payload[label] = {"_label":params['_label']}
            else:
                json_payload[label] = {"_label":label}

        hash_vals = po.add_bytestream_by_path(label, stream_object)
        stat_vals = po.stat(label)
        
        # Userland parameters for the file
        cleaned_params = dict( [ (k, params[k]) for k in params if not k.startswith("_")])
        json_payload[label].update(cleaned_params)
        try:
            json_payload[label]['_content_length'] = int(stat_vals.st_size)
        except TypeError:
            print "Error getting filesize from os.stat().st_size into an integer..."
        if creation_date:
            json_payload[label]['_creation_date'] = creation_date
            json_payload[label]['_last_modified'] = creation_date
        else:
            # Modification date
           json_payload[label]['_last_modified'] = datetime.now().isoformat().split(".")[0]
        # Hash details:
        if hash_vals:
            json_payload[label]['_checksum'] = "%s:%s" % (hash_vals['type'], hash_vals['checksum'])
        json_payload.sync()
        return json_payload.state[label]

    def get_stream(self, bucket, label, as_stream=True):
        if self.exists(bucket):
            po, json_payload = self._get_object(bucket)
            if self.exists(bucket, label):
                return po.get_bytestream(label, streamable=as_stream, path=None, appendable=False)
        raise FileNotFoundException

    def get_url(self, bucket, label):
        if self.exists(bucket) and self.exists(bucket, label):
            return self._store.get_url(bucket, label)
        else:
            raise FileNotFoundException
    
    def get_metadata(self, bucket, label):
        if self.exists(bucket):
            _, json_payload = self._get_object(bucket)
            if json_payload.has_key(label):
                return json_payload.state[label]
        raise FileNotFoundException
    
    def update_metadata(self, bucket, label, params):
        if self.exists(bucket, label) and isinstance(params, dict):
            _, json_payload = self._get_object(bucket)
            # Userland parameters for the file
            cleaned_params = dict([(k, params[k]) for k in params if not k.startswith("_")])
            json_payload[label].update(cleaned_params)
            json_payload.sync()
            return json_payload.state[label]
        else:
            raise FileNotFoundException
    
    def del_metadata_keys(self, bucket, label, keys):
        if self.exists(bucket, label) and isinstance(keys, list):
            _, json_payload = self._get_object(bucket)
            for key in [x for x in keys if not x.startswith("_")]:
                if key in json_payload[label].keys():
                    del json_payload[label][key]
            json_payload.sync()
            return json_payload.state[label]
        else:
            raise FileNotFoundException

    def del_stream(self, bucket, label):
        if self.exists(bucket, label):
            # deletes the whole object for uuid
            self._store.del_stream(bucket, label)
            _, json_payload = self._get_object(bucket)
            if json_payload.has_key(label):
                del json_payload[label]
            json_payload.sync()
        else:
            raise FileNotFoundException
コード例 #3
0
class PTOFS(OFSInterface):
    '''OFS backend backed onto the filesystem and using PairTree_.

    .. _PairTree: http://pypi.python.org/pypi/Pairtree
    '''
    def __init__(self,
                 storage_dir="data",
                 uri_base="urn:uuid:",
                 hashing_type="md5",
                 shorty_length=2):
        self.storage_dir = storage_dir
        self.uri_base = uri_base
        self.hashing_type = hashing_type
        self.shorty_length = shorty_length
        self._open_store()

    def _open_store(self):
        if self.hashing_type:
            self._store = PairtreeStorageClient(
                self.uri_base,
                self.storage_dir,
                shorty_length=self.shorty_length,
                hashing_type=self.hashing_type)
        else:
            self._store = PairtreeStorageClient(self.uri_base,
                                                self.storage_dir,
                                                shorty_length=shorty_length)

    def exists(self, bucket, label=None):
        if self._store.exists(bucket):
            if label:
                return self._store.isfile(bucket, label)
            else:
                return True

    def _get_object(self, bucket):
        po = self._store.get_object(bucket)
        json_payload = PersistentState(po.id_to_dirpath())
        return (po, json_payload)

    def _setup_item(self, bucket):
        _, json_payload = self._get_object(bucket)
        json_payload.sync()

    def claim_bucket(self, bucket=None):
        if bucket:
            if self.exists(bucket):
                raise BucketExists
        else:
            bucket = uuid4().hex
            while (self.exists(bucket)):
                bucket = uuid4().hex
        self._setup_item(bucket)
        return bucket

    def list_labels(self, bucket):
        if self.exists(bucket):
            _, json_payload = self._get_object(bucket)
            return json_payload.keys()

    def list_buckets(self):
        return self._store.list_ids()

    def put_stream(self, bucket, label, stream_object, params={}):
        ## QUESTION: do we enforce that the bucket's have to be 'claimed' first?
        ## NB this method doesn't care if it has been
        po, json_payload = self._get_object(bucket)

        if label in json_payload.keys():
            creation_date = None
        else:
            # New upload - record creation date
            creation_date = datetime.now().isoformat().split(".")[
                0]  ## '2010-07-08T19:56:47'
            if '_label' in params:
                json_payload[label] = {"_label": params['_label']}
            else:
                json_payload[label] = {"_label": label}

        hash_vals = po.add_bytestream_by_path(label, stream_object)
        stat_vals = po.stat(label)

        # Userland parameters for the file
        cleaned_params = dict([(k, params[k]) for k in params
                               if not k.startswith("_")])
        json_payload[label].update(cleaned_params)
        try:
            json_payload[label]['_content_length'] = int(stat_vals.st_size)
        except TypeError:
            print(
                "Error getting filesize from os.stat().st_size into an integer..."
            )
        if creation_date:
            json_payload[label]['_creation_date'] = creation_date
            json_payload[label]['_last_modified'] = creation_date
        else:
            # Modification date
            json_payload[label]['_last_modified'] = datetime.now().isoformat(
            ).split(".")[0]
        # Hash details:
        if hash_vals:
            json_payload[label]['_checksum'] = "%s:%s" % (
                hash_vals['type'], hash_vals['checksum'])
        json_payload.sync()
        return json_payload.state[label]

    def get_stream(self, bucket, label, as_stream=True):
        if self.exists(bucket):
            po, json_payload = self._get_object(bucket)
            if self.exists(bucket, label):
                return po.get_bytestream(label,
                                         streamable=as_stream,
                                         path=None,
                                         appendable=False)
        raise FileNotFoundException

    def get_url(self, bucket, label):
        if self.exists(bucket) and self.exists(bucket, label):
            return self._store.get_url(bucket, label)
        else:
            raise FileNotFoundException

    def get_metadata(self, bucket, label):
        if self.exists(bucket):
            _, json_payload = self._get_object(bucket)
            if json_payload.has_key(label):
                return json_payload.state[label]
        raise FileNotFoundException

    def update_metadata(self, bucket, label, params):
        if self.exists(bucket, label) and isinstance(params, dict):
            _, json_payload = self._get_object(bucket)
            # Userland parameters for the file
            cleaned_params = dict([(k, params[k]) for k in params
                                   if not k.startswith("_")])
            json_payload[label].update(cleaned_params)
            json_payload.sync()
            return json_payload.state[label]
        else:
            raise FileNotFoundException

    def del_metadata_keys(self, bucket, label, keys):
        if self.exists(bucket, label) and isinstance(keys, list):
            _, json_payload = self._get_object(bucket)
            for key in [x for x in keys if not x.startswith("_")]:
                if key in json_payload[label].keys():
                    del json_payload[label][key]
            json_payload.sync()
            return json_payload.state[label]
        else:
            raise FileNotFoundException

    def del_stream(self, bucket, label):
        if self.exists(bucket, label):
            # deletes the whole object for uuid
            self._store.del_stream(bucket, label)
            _, json_payload = self._get_object(bucket)
            if json_payload.has_key(label):
                del json_payload[label]
            json_payload.sync()
        else:
            raise FileNotFoundException