def store_working_directory_as_representation_packages(self, uuid, identifier, working_directory):
     """
     Store working directory
     :param storage_directory:
     :param working_directory: working directory
     :param uuid: UUID of working directory
     :param identifier: Object identifier
     :return: version
     """
     working_dir = os.path.join(working_directory, uuid)
     version = self._next_version(identifier) \
         if self.trigger_new_version(uuid, identifier, working_directory, self.repository_storage_dir) \
         else self.curr_version(identifier)
     target_dir = os.path.join(make_storage_data_directory_path(identifier, self.repository_storage_dir), version,
                               to_safe_filename(identifier))
     changed = False
     for path, _, files in os.walk(os.path.abspath(working_dir)):
         sub_path = path.replace(working_dir, "").lstrip("/")
         for file in files:
             # copy only packaged datasets, not the directories
             if not path.startswith(os.path.join(working_dir, self.representations_directory)) \
                     or fnmatch.fnmatch(file, "*.tar"):
                 source = os.path.join(working_dir, sub_path, file)
                 target = os.path.join(target_dir, sub_path, file)
                 # copy files only if they are not identical
                 if not files_identical(source, target):
                     copy_file_with_base_directory(working_dir, target_dir, sub_path, file)
                     changed = True
     # update state in storage and working directory if any files have been changed
     if changed:
         storage_state_file = os.path.join(target_dir, "state.xml")
         working_state_file = os.path.join(working_dir, "state.xml")
         update_state(working_state_file, identifier, version)
         shutil.copy2(working_state_file, storage_state_file)
     return version
 def trigger_new_version(self, uuid, identifier, config_path_work, storage_directory):
     """
     Trigger new version depending on changed files in working directory compared to the data set in storage.
     :param storage_directory:
     :param config_path_work:
     :param uuid: UUID of working directory
     :param identifier: Data asset identifier
     :return: True, if new version is triggered, False otherwise
     """
     working_dir = os.path.join(config_path_work, uuid)
     if self.identifier_object_exists(identifier):
         version = self.curr_version(identifier)
         data_asset_last_version_path = os.path.join(
             make_storage_data_directory_path(identifier, storage_directory),
             version, to_safe_filename(identifier))
         working_distributions_dir = os.path.join(working_dir, self.representations_directory)
         if not os.path.exists(working_distributions_dir):
             logger.debug("New version is not triggered because working catalogue directory does not exist.")
             return False
         stored_distributions_dir = os.path.join(data_asset_last_version_path, self.representations_directory)
         distribution_files = list_files_in_dir(working_distributions_dir)
         for dataset_dir in distribution_files:
             dataset_package_file = os.path.join(working_distributions_dir, "%s.tar" % dataset_dir)
             dataset_package_stored_file = os.path.join(stored_distributions_dir, "%s.tar" % dataset_dir)
             files_ident = files_identical(dataset_package_file, dataset_package_stored_file)
             if not files_ident:
                 logger.debug("New version triggered because hash of dataset packages is not identical")
                 return True
     logger.debug("New version not triggered.")
     return False
 def get_tar_file_path(self, identifier, representation_label=None):
     object_path = self.get_object_path(identifier)
     if representation_label:
         tar_file_path = os.path.join(object_path, self.representations_directory, "%s.tar" % representation_label)
     else:
         tar_file_path = os.path.join(object_path, "%s.tar" % to_safe_filename(identifier))
     if os.path.exists(tar_file_path):
         logger.debug("Package file found at: %s" % tar_file_path)
         return tar_file_path
     raise ObjectNotFoundException("Package file not found")
 def get_object_item_stream(self, identifier, representation_label, entry, tar_file=None):
     """
     Get stream of a representation tar file entry
     :param identifier: package identifier
     :param representation_label: label of the representation (used in directory and file names), can be empty,
     tar assumed to be single package in that case
     :param entry: entry of the tar file
     :return: chunks iterator of the tar file
     """
     object_path = self.get_object_path(identifier)
     tar_file_name = "%s.tar" % representation_label if representation_label else to_safe_filename(identifier)
     tar_file_path = os.path.join(object_path, self.representations_directory, tar_file_name)
     if os.path.exists(tar_file_path):
         logger.debug("Packaged representation file found at: %s" % entry)
     t = tar_file if tar_file else tarfile.open(tar_file_path, 'r')
     logger.debug("Accessing access entry %s" % entry)
     try:
         inst = ChunkedTarEntryReader(t)
         return inst.chunks(entry)
     except KeyError:
         logger.error('ERROR: Did not find %s in tar archive' % entry)
         raise ObjectNotFoundException("Entry not found in repository object")
def make_storage_directory_path(identifier, version, config_path_storage):
    """Used for remote (no access to storage backend)"""
    pts = DirectoryPairtreeStorage(config_path_storage)
    return os.path.join(pts.get_dir_path_from_id(identifier), "data", version, to_safe_filename(identifier))