def test_update_object_and_get_through_store_should_get_identical_bytestream(self):

        # create file
        storage_factory = PairtreeStorageFactory()
        store = storage_factory.get_store(store_dir=self.data_dir, uri_base="http://dummy")
        id = u'owërdœ.file'
        object = store.create_object(id)
        with open(self.test_file_path, 'rb') as test_file:
            object.add_bytestream('dummy.txt', test_file)

        # update file
        handle_large_file_2 = BytesIO()
        handle_large_file_2.write(b'baz')
        handle_large_file_2.seek(0)
        object.add_bytestream('dummy.txt', handle_large_file_2)

        handle_large_file_2.close()

        # create check
        string_io_container = BytesIO()
        string_io_container.write(b'baz')

        retreived_bytestream = object.get_bytestream('dummy.txt')
        orig_hash = hashlib.md5(string_io_container.getvalue()).hexdigest()
        created_hash = hashlib.md5(retreived_bytestream).hexdigest()

        self.assertEqual(orig_hash, created_hash)
    def test_create_object_and_file_should_exist_according_to_store(self):
        storage_factory = PairtreeStorageFactory()
        store = storage_factory.get_store(store_dir=self.data_dir, uri_base="http://dummy")
        id = u'owërdœ.file'
        object = store.create_object(id)
        with open(self.test_file_path, 'rb') as test_file:
            object.add_bytestream('dummy', test_file)

        self.assertTrue(store.isfile(id, 'dummy'))
    def test_delete_object_should_remove_file_from_system(self):
        storage_factory = PairtreeStorageFactory()
        store = storage_factory.get_store(store_dir=self.data_dir, uri_base="http://dummy")
        object = store.create_object('test')
        file_path = self.test_file_path
        object.add_file(file_path)
        self.assertTrue(os.path.isfile(os.path.join(object.location, 'dummy_image.jpg')))

        store.delete_object('test')

        self.assertFalse(os.path.exists(object.location))
    def test_store_file_should_create_identical_file(self):
        storage_factory = PairtreeStorageFactory()
        store = storage_factory.get_store(store_dir=self.data_dir, uri_base="http://dummy")
        object = store.create_object('test')
        with open(self.test_file_path, 'rb') as test_file:
            object.add_bytestream('dummy.jpg', test_file)

        orig_hash = hashlib.md5(open(self.test_file_path, 'rb').read()).hexdigest()
        created_hash = hashlib.md5(open(os.path.join(object.location, 'dummy.jpg'), 'rb').read()).hexdigest()

        self.assertEqual(orig_hash, created_hash)
    def test_create_object_and_file_store_should_be_able_to_retreive_identical_file(self):
        storage_factory = PairtreeStorageFactory()
        store = storage_factory.get_store(store_dir=self.data_dir, uri_base="http://dummy")
        id = u'owërdœ.file'
        object = store.create_object(id)
        with open(self.test_file_path, 'rb') as test_file:
            object.add_bytestream('dummy', test_file)

        retreived_bytestream = object.get_bytestream('dummy')
        orig_hash = hashlib.md5(open(self.test_file_path, 'rb').read()).hexdigest()
        created_hash = hashlib.md5(retreived_bytestream).hexdigest()

        self.assertEqual(orig_hash, created_hash)
class DirectoryPairtreeStorage(PairtreeStorage):
    """
    Pairtree storage class allowing to build a filesystem hierarchy to store objects that are located by mapping
    identifier strings to object directory (or folder) paths.
    """
    storage_factory = None
    repository_storage_dir = None

    def __init__(self, repository_storage_dir, representations_directory="representations"):
        """
        Constructor initialises pairtree repository
        :param repository_storage_dir: repository storage directory
        :param representations_directory: representations directory
        """
        super().__init__(repository_storage_dir)
        self.storage_factory = PairtreeStorageFactory()
        self.repository_storage_dir = repository_storage_dir
        self.representations_directory = representations_directory
        self.repo_storage_client = self.storage_factory.get_store(store_dir=repository_storage_dir, uri_base="http://")
        self.representations_directory = representations_directory

    # noinspection PyProtectedMember
    def get_dir_path_from_id(self, identifier):
        """
        Get directory path from id
        :param identifier: identifier
        :return: directory path
        """
        return self.repo_storage_client._id_to_dirpath(identifier)

    def get_tar_file_path(self, identifier, representation_label=None):
        object_path = self.get_object_path(identifier)
        if representation_label:
            tar_file_path = os.path.join(object_path, self.representations_directory, "%s.tar" % representation_label)
        else:
            tar_file_path = os.path.join(object_path, "%s.tar" % to_safe_filename(identifier))
        if os.path.exists(tar_file_path):
            logger.debug("Package file found at: %s" % tar_file_path)
            return tar_file_path
        raise ObjectNotFoundException("Package file not found")

    def get_object_item_stream(self, identifier, representation_label, entry, tar_file=None):
        """
        Get stream of a representation tar file entry
        :param identifier: package identifier
        :param representation_label: label of the representation (used in directory and file names), can be empty,
        tar assumed to be single package in that case
        :param entry: entry of the tar file
        :return: chunks iterator of the tar file
        """
        object_path = self.get_object_path(identifier)
        tar_file_name = "%s.tar" % representation_label if representation_label else to_safe_filename(identifier)
        tar_file_path = os.path.join(object_path, self.representations_directory, tar_file_name)
        if os.path.exists(tar_file_path):
            logger.debug("Packaged representation file found at: %s" % entry)
        t = tar_file if tar_file else tarfile.open(tar_file_path, 'r')
        logger.debug("Accessing access entry %s" % entry)
        try:
            inst = ChunkedTarEntryReader(t)
            return inst.chunks(entry)
        except KeyError:
            logger.error('ERROR: Did not find %s in tar archive' % entry)
            raise ObjectNotFoundException("Entry not found in repository object")

    def trigger_new_version(self, uuid, identifier, config_path_work, storage_directory):
        """
        Trigger new version depending on changed files in working directory compared to the data set in storage.
        :param storage_directory:
        :param config_path_work:
        :param uuid: UUID of working directory
        :param identifier: Data asset identifier
        :return: True, if new version is triggered, False otherwise
        """
        working_dir = os.path.join(config_path_work, uuid)
        if self.identifier_object_exists(identifier):
            version = self.curr_version(identifier)
            data_asset_last_version_path = os.path.join(
                make_storage_data_directory_path(identifier, storage_directory),
                version, to_safe_filename(identifier))
            working_distributions_dir = os.path.join(working_dir, self.representations_directory)
            if not os.path.exists(working_distributions_dir):
                logger.debug("New version is not triggered because working catalogue directory does not exist.")
                return False
            stored_distributions_dir = os.path.join(data_asset_last_version_path, self.representations_directory)
            distribution_files = list_files_in_dir(working_distributions_dir)
            for dataset_dir in distribution_files:
                dataset_package_file = os.path.join(working_distributions_dir, "%s.tar" % dataset_dir)
                dataset_package_stored_file = os.path.join(stored_distributions_dir, "%s.tar" % dataset_dir)
                files_ident = files_identical(dataset_package_file, dataset_package_stored_file)
                if not files_ident:
                    logger.debug("New version triggered because hash of dataset packages is not identical")
                    return True
        logger.debug("New version not triggered.")
        return False

    def store(self, identifier, source_directory, progress_reporter=default_reporter, single_package=True):
        sdir = source_directory[:-1] if source_directory.endswith('/') else source_directory
        uuid = sdir[sdir.rfind('/')+1:]
        working_dir = sdir[:sdir.rfind('/')]
        return self.store_working_directory(uuid, identifier, working_dir, single_package=single_package)

    def store_working_directory(self, uuid, identifier, working_directory, single_package=True):
        """
        Store working directory either as single package or as representation packages
        :param uuid: UUID of working directory
        :param identifier: Object identifier
        :param working_directory: working directory
        :param single_package: store as single package or as representation packages
        :return: version of the stored object
        """
        if single_package:
            version = super().store(identifier, working_directory, copy_dir=True)
        else:
            version = self.store_working_directory_as_representation_packages(uuid, identifier, working_directory)
        return version

    def store_working_directory_as_representation_packages(self, uuid, identifier, working_directory):
        """
        Store working directory
        :param storage_directory:
        :param working_directory: working directory
        :param uuid: UUID of working directory
        :param identifier: Object identifier
        :return: version
        """
        working_dir = os.path.join(working_directory, uuid)
        version = self._next_version(identifier) \
            if self.trigger_new_version(uuid, identifier, working_directory, self.repository_storage_dir) \
            else self.curr_version(identifier)
        target_dir = os.path.join(make_storage_data_directory_path(identifier, self.repository_storage_dir), version,
                                  to_safe_filename(identifier))
        changed = False
        for path, _, files in os.walk(os.path.abspath(working_dir)):
            sub_path = path.replace(working_dir, "").lstrip("/")
            for file in files:
                # copy only packaged datasets, not the directories
                if not path.startswith(os.path.join(working_dir, self.representations_directory)) \
                        or fnmatch.fnmatch(file, "*.tar"):
                    source = os.path.join(working_dir, sub_path, file)
                    target = os.path.join(target_dir, sub_path, file)
                    # copy files only if they are not identical
                    if not files_identical(source, target):
                        copy_file_with_base_directory(working_dir, target_dir, sub_path, file)
                        changed = True
        # update state in storage and working directory if any files have been changed
        if changed:
            storage_state_file = os.path.join(target_dir, "state.xml")
            working_state_file = os.path.join(working_dir, "state.xml")
            update_state(working_state_file, identifier, version)
            shutil.copy2(working_state_file, storage_state_file)
        return version
Exemple #7
0
class PairtreeStorage(object):
    """
    Pairtree storage class allowing to build a filesystem hierarchy for holding objects that are located by mapping identifier strings to object directory (or folder) paths with
    two characters at a time.
    """

    storage_factory = None
    repository_storage_dir = None

    def __init__(self, repository_storage_dir):
        """
        Constructor initialises pairtree repository

        @type       repository_storage_dir: string
        @param      repository_storage_dir: repository storage directory
        """
        self.storage_factory = PairtreeStorageFactory()
        self.repository_storage_dir = repository_storage_dir
        self.repo_storage_client = self.storage_factory.get_store(
            store_dir=self.repository_storage_dir, uri_base="http://")

    def store(self,
              identifier,
              source_file,
              progress_reporter=default_reporter):
        """
        Storing an object in the pairtree path according to the given identifier. If a version of the object exists,
        a new version is created.

        @type       identifier: string
        @param      identifier: Identifier
        @type:      source_file: string
        @param:     source_file: Source file path to be stored in the repository
        @type:      progress_reporter: function
        @param:     progress_reporter: progress reporter function
        @raise:     IOError: If the checksum of the copied file is incorrect
        """
        repo_object = self.repo_storage_client.get_object(identifier, True)
        basename = ntpath.basename(source_file)
        next_version = self._next_version(identifier)
        with open(source_file, 'rb') as stream:
            repo_object.add_bytestream(basename,
                                       stream,
                                       path="data/%s" % next_version)
        progress_reporter(50)
        checksum_source_file = ChecksumFile(source_file).get(
            ChecksumAlgorithm.SHA256)
        checksum_target_file = ChecksumFile(
            self.get_object_path(identifier)).get(ChecksumAlgorithm.SHA256)
        if checksum_source_file != checksum_target_file:
            raise IOError(
                "Storage of repository object for identifier '%s' failed!" %
                identifier)
        progress_reporter(100)

    def identifier_object_exists(self, identifier):
        """
        Verify if an object of the given identifier exists in the repository

        @type       identifier: string
        @param      identifier: Identifier
        @rtype:     boolean
        @return:    True if the object exists, false otherwise
        """
        logger.debug("Looking for object at path: %s/data" %
                     self.repo_storage_client._id_to_dirpath(identifier))
        return self.repo_storage_client.exists(identifier, "data")

    def identifier_version_object_exists(self, identifier, version_num):
        """
        Verify if the given version of the object exists in the repository

        @type       identifier: string
        @param      identifier: Identifier
        type        version_num: int
        @param      version_num: version number
        @rtype:     boolean
        @return:    True if the object exists, false otherwise
        """
        version = '%05d' % version_num
        return self.repo_storage_client.exists(identifier, "data/%s" % version)

    def _get_version_parts(self, identifier):
        """
        Get version directories

        @type       identifier: string
        @param      identifier: Identifier
        @rtype:     list
        @return:    List of directories of the versions
        """
        return self.repo_storage_client.list_parts(identifier, "data")

    def _next_version(self, identifier):
        """
        Get next formatted version directory name

        @type       identifier: string
        @param      identifier: Identifier
        @rtype:     string
        @return:    Formatted version string (constant VersionDirFormat)
        """
        if not self.identifier_object_exists(identifier):
            return VersionDirFormat % 1
        version_num = 1
        while self.identifier_version_object_exists(identifier, version_num):
            version_num += 1
        return VersionDirFormat % version_num

    def curr_version(self, identifier):
        """
        Get current formatted version directory name

        @type       identifier: string
        @param      identifier: Identifier
        @rtype:     string
        @return:    Formatted version string (constant VersionDirFormat)
        """
        return VersionDirFormat % self.curr_version_num(identifier)

    def curr_version_num(self, identifier):
        """
        Get current version number

        @type       identifier: string
        @param      identifier: Identifier
        @rtype:     int
        @return:    Current version number
        """
        if not self.identifier_object_exists(identifier):
            raise ValueError(
                "No repository object for id '%s'. Unable to get current version number."
                % identifier)
        version_num = 1
        while self.identifier_version_object_exists(identifier, version_num):
            version_num += 1
        version_num -= 1
        return version_num

    def get_object_path(self, identifier, version_num=0):
        """
        Get absolute file path of the stored object. If the version number is omitted, the path of the highest version
        number is returned.

        @type       identifier: string
        @param      identifier: Identifier
        @type       version_num: int
        @param      version_num: version number
        @rtype:     string
        @return:    Absolute file path of the stored object
        @raise      ObjectNotFoundException if the file is not available
        """
        if not self.identifier_object_exists(identifier):
            raise ValueError(
                "No repository object for id '%s'. Unable to get requested version object path."
                % identifier)
        if version_num == 0:
            version_num = self.curr_version_num(identifier)
        if not self.identifier_version_object_exists(identifier, version_num):
            raise ValueError("Repository object '%s' has no version %d." %
                             (identifier, version_num))
        version = '%05d' % version_num
        repo_obj = self.repo_storage_client.get_object(identifier, False)
        repo_obj_path = uri_to_safe_filename(
            os.path.join(repo_obj.id_to_dirpath(), "data/%s" % version))
        try:
            return next(
                os.path.join(repo_obj_path, f)
                for f in os.listdir(repo_obj_path)
                if os.path.isfile(os.path.join(repo_obj_path, f)))
        except StopIteration:
            raise ObjectNotFoundException(
                "The file object does not exist in the repository")

    def get_object_item_stream(self, identifier, entry):
        """
        Get stream of tar file entry.

        @type       identifier: string
        @param      identifier: Identifier
        @type       entry: string
        @param      entry: tar file entry (path within tar file)
        @rtype:     binary
        @return:    File content
        @raise      KeyError if the tar entry does not exist in the stored package
        """
        object_path = self.get_object_path(identifier)
        t = tarfile.open(object_path, 'r')
        logger.debug("Trying to access entry %s" % entry)
        try:
            info = t.getmember(entry)
            f = t.extractfile(info)
            inst = ChunkedTarEntryReader(t)
            return inst.chunks(entry)
        except KeyError:
            logger.error('ERROR: Did not find %s in tar archive' % entry)
            raise ObjectNotFoundException(
                "Entry not found in repository object")

    def latest_version_ip_list(self):
        """
        Get a list of latest version packages from repository storage.
        @return:    List of dictionary items of IPs available in repository storage.
        """
        files = Dir(config_path_storage, exclude_file='').files()
        sortkeyfn = lambda s: s[1]
        tuples = []
        for repofile in files:
            if repofile.endswith(".tar"):
                f, fname = os.path.split(repofile)
                if f.startswith("pairtree_root"):
                    version = f[-5:] if f[-5:] != '' else '00001'
                    repoitem = (repofile, version)
                    tuples.append(repoitem)
        tuples.sort(key=sortkeyfn, reverse=True)
        items_grouped_by_version = []
        for key, valuesiter in groupby(tuples, key=sortkeyfn):
            items_grouped_by_version.append(
                dict(version=key, items=list(v[0] for v in valuesiter)))
        lastversionfiles = []
        for version_items in items_grouped_by_version:
            for item in version_items['items']:
                p, f = os.path.split(item)
                p2 = os.path.join(self.repository_storage_dir,
                                  p[:p.find("/data/")])
                obj_id = self.repo_storage_client._get_id_from_dirpath(p2)
                if not obj_id in [x['id'] for x in lastversionfiles]:
                    lastversionfiles.append({
                        "id": obj_id,
                        "version": version_items['version'],
                        "path": item
                    })
        return lastversionfiles
Exemple #8
0
 def __init__(self, store_dir, uri_base='urn:x-vioe:'):
     sf = PairtreeStorageFactory()
     self.store = sf.get_store(store_dir=store_dir, uri_base=uri_base)
Exemple #9
0
class PairtreeStorage(object):
    """
    Pairtree storage class allowing to build a filesystem hierarchy for holding objects that are located by mapping
    identifier strings to object directory (or folder) paths with two characters at a time.
    """
    storage_factory = None
    repository_storage_dir = None

    def __init__(self, repository_storage_dir):
        """
        Constructor initialises pairtree repository

        @type       repository_storage_dir: string
        @param      repository_storage_dir: repository storage directory
        """
        self.storage_factory = PairtreeStorageFactory()
        self.repository_storage_dir = repository_storage_dir
        self.repo_storage_client = self.storage_factory.get_store(
            store_dir=self.repository_storage_dir, uri_base="http://")

    # noinspection PyProtectedMember
    def store(self,
              identifier,
              source_directory,
              progress_reporter=default_reporter):
        """
        Storing a directory in the pairtree path according to the given identifier. If a version of the object exists,
        a new version is created.
        :param identifier: identifier
        :param source_directory: source directory
        :param progress_reporter: progress reporter
        :return:
        """
        dirpath = self.repo_storage_client._id_to_dirpath(identifier)
        next_version = self._next_version(identifier)
        target_data_directory = os.path.join(dirpath, "data")
        pathlib.Path(target_data_directory).mkdir(parents=True, exist_ok=True)
        target_data_version_directory = os.path.join(target_data_directory,
                                                     next_version)
        target_data_version_asset_directory = os.path.join(
            target_data_version_directory, uri_to_safe_filename(identifier))
        shutil.copytree(source_directory, target_data_version_asset_directory)
        progress_reporter(100)
        return next_version

    def identifier_object_exists(self, identifier):
        """
        Verify if an object of the given identifier exists in the repository

        @type       identifier: string
        @param      identifier: Identifier
        @rtype:     boolean
        @return:    True if the object exists, false otherwise
        """
        logger.debug("Looking for object at path: %s/data" %
                     self.repo_storage_client._id_to_dirpath(identifier))
        return self.repo_storage_client.exists(identifier, "data")

    def identifier_version_object_exists(self, identifier, version_num):
        """
        Verify if the given version of the object exists in the repository
        :param identifier: identifier
        :param version_num: version number
        :return:
        """
        version = '%05d' % version_num
        return self.repo_storage_client.exists(identifier, "data/%s" % version)

    def _get_version_parts(self, identifier):
        """
        Get version directories
        :param identifier: identifier
        :return: version parts
        """
        return self.repo_storage_client.list_parts(identifier, "data")

    def _next_version(self, identifier):
        """
        Get next formatted version directory name
        :param identifier: identifier
        :return: next formatted version directory name
        """
        if not self.identifier_object_exists(identifier):
            return VersionDirFormat % 1
        version_num = 1
        while self.identifier_version_object_exists(identifier, version_num):
            version_num += 1
        return VersionDirFormat % version_num

    def curr_version(self, identifier):
        """
        Get current formatted version directory name
        :param identifier: identifier
        :return: current formatted version directory name
        """
        return VersionDirFormat % self.curr_version_num(identifier)

    def curr_version_num(self, identifier):
        """
        Get current version number
        :param identifier: identifier
        :return: current version number
        """
        if not self.identifier_object_exists(identifier):
            return 1
        version_num = 1
        while self.identifier_version_object_exists(identifier, version_num):
            version_num += 1
        version_num -= 1
        return version_num

    def get_object_path(self, identifier, version_num=0):
        """
        Get absolute file path of the stored object. If the version number is omitted, the path of the highest version
        number is returned.
        :param identifier: identifier
        :param version_num: version number
        :return: absolute file path of the stored object
        """
        if not self.identifier_object_exists(identifier):
            raise ValueError("No repository object for id '%s'. "
                             "Unable to get requested version object path." %
                             identifier)
        if version_num == 0:
            version_num = self.curr_version_num(identifier)
        if not self.identifier_version_object_exists(identifier, version_num):
            raise ValueError("Repository object '%s' has no version %d." %
                             (identifier, version_num))
        version = '%05d' % version_num
        repo_obj = self.repo_storage_client.get_object(identifier, False)
        repo_obj_path = uri_to_safe_filename(
            os.path.join(repo_obj.id_to_dirpath(), "data/%s" % version))
        try:
            return next(
                os.path.join(repo_obj_path, f)
                for f in os.listdir(repo_obj_path)
                if os.path.isdir(os.path.join(repo_obj_path, f)))
        except StopIteration:
            raise ObjectNotFoundException(
                "The file object does not exist in the repository")

    def get_chunked_tar_entry_reader(self,
                                     identifier: str) -> ChunkedTarEntryReader:
        tar_file_path = os.path.join(
            self.get_object_path(identifier),
            "%s.tar" % uri_to_safe_filename(identifier))
        tar_file = tarfile.open(tar_file_path, 'r')
        return ChunkedTarEntryReader(tar_file)

    # noinspection PyProtectedMember
    def latest_version_ip_list(self) -> list:
        """
        Get a list of latest version packages from repository storage.
        :return: list of latest version packages
        """
        files = rec_find_files(self.repository_storage_dir)
        sortkeyfn = lambda s: s[1]
        tuples = []
        for repofile in files:
            if repofile.endswith(".tar"):
                f, fname = os.path.split(repofile)
                if f.startswith("pairtree_root"):
                    version = f[-5:] if f[-5:] != '' else '00001'
                    repoitem = (repofile, version)
                    tuples.append(repoitem)
        tuples.sort(key=sortkeyfn, reverse=True)
        items_grouped_by_version = []
        for key, valuesiter in groupby(tuples, key=sortkeyfn):
            items_grouped_by_version.append(
                dict(version=key, items=list(v[0] for v in valuesiter)))
        lastversionfiles = []
        for version_items in items_grouped_by_version:
            for item in version_items['items']:
                p, f = os.path.split(item)
                p2 = os.path.join(self.repository_storage_dir,
                                  p[:p.find("/data/")])
                obj_id = self.repo_storage_client._get_id_from_dirpath(p2)
                if obj_id not in [x['id'] for x in lastversionfiles]:
                    lastversionfiles.append({
                        "id": obj_id,
                        "version": version_items['version'],
                        "path": item
                    })
        return lastversionfiles
Exemple #10
0
class PairtreeStorage(object):
    """
    Pairtree storage class allowing to build a filesystem hierarchy for holding objects that are located by mapping identifier strings to object directory (or folder) paths with
    two characters at a time.
    """

    storage_factory = None
    repository_storage_dir = None

    def __init__(self, repository_storage_dir):
        """
        Constructor initialises pairtree repository

        @type       repository_storage_dir: string
        @param      repository_storage_dir: repository storage directory
        """
        self.storage_factory = PairtreeStorageFactory()
        self.repository_storage_dir = repository_storage_dir
        self.repo_storage_client = self.storage_factory.get_store(
            store_dir=self.repository_storage_dir, uri_base="http://")

    def identifier_object_exists(self, identifier):
        """
        Verify if an object of the given identifier exists in the repository

        @type       identifier: string
        @param      identifier: Identifier
        @rtype:     boolean
        @return:    True if the object exists, false otherwise
        """
        return self.repo_storage_client.exists(identifier, "data")

    def identifier_version_object_exists(self, identifier, version_num):
        """
        Verify if the given version of the object exists in the repository

        @type       identifier: string
        @param      identifier: Identifier
        type        version_num: int
        @param      version_num: version number
        @rtype:     boolean
        @return:    True if the object exists, false otherwise
        """
        version = '%05d' % version_num
        return self.repo_storage_client.exists(identifier, "data/%s" % version)

    def curr_version_num(self, identifier):
        """
        Get current version number

        @type       identifier: string
        @param      identifier: Identifier
        @rtype:     int
        @return:    Current version number
        """
        if not self.identifier_object_exists(identifier):
            raise ValueError(
                "No repository object for id '%s'. Unable to get current version number."
                % identifier)
        version_num = 1
        while self.identifier_version_object_exists(identifier, version_num):
            version_num += 1
        version_num -= 1
        return version_num

    def get_object_path(self, identifier, version_num=0):
        """
        Get absolute file path of the stored object. If the version number is omitted, the path of the highest version
        number is returned.

        @type       identifier: string
        @param      identifier: Identifier
        @type       version_num: int
        @param      version_num: version number
        @rtype:     string
        @return:    Absolute file path of the stored object
        @raise      ObjectNotFoundException if the file is not available
        """
        if not self.identifier_object_exists(identifier):
            raise ValueError(
                "No repository object for id '%s'. Unable to get requested version object path."
                % identifier)
        if version_num == 0:
            version_num = self.curr_version_num(identifier)
        if not self.identifier_version_object_exists(identifier, version_num):
            raise ValueError("Repository object '%s' has no version %d." %
                             (identifier, version_num))
        version = '%05d' % version_num
        repo_obj = self.repo_storage_client.get_object(identifier, False)
        repo_obj_path = uri_to_safe_filename(
            os.path.join(repo_obj.id_to_dirpath(), "data/%s" % version))
        try:
            return next(
                os.path.join(repo_obj_path, f)
                for f in os.listdir(repo_obj_path)
                if os.path.isfile(os.path.join(repo_obj_path, f)))
        except StopIteration:
            raise ObjectNotFoundException(
                "The file object does not exist in the repository")