def store_working_directory_as_representation_packages(self, uuid, identifier, working_directory): """ Store working directory :param storage_directory: :param working_directory: working directory :param uuid: UUID of working directory :param identifier: Object identifier :return: version """ working_dir = os.path.join(working_directory, uuid) version = self._next_version(identifier) \ if self.trigger_new_version(uuid, identifier, working_directory, self.repository_storage_dir) \ else self.curr_version(identifier) target_dir = os.path.join(make_storage_data_directory_path(identifier, self.repository_storage_dir), version, to_safe_filename(identifier)) changed = False for path, _, files in os.walk(os.path.abspath(working_dir)): sub_path = path.replace(working_dir, "").lstrip("/") for file in files: # copy only packaged datasets, not the directories if not path.startswith(os.path.join(working_dir, self.representations_directory)) \ or fnmatch.fnmatch(file, "*.tar"): source = os.path.join(working_dir, sub_path, file) target = os.path.join(target_dir, sub_path, file) # copy files only if they are not identical if not files_identical(source, target): copy_file_with_base_directory(working_dir, target_dir, sub_path, file) changed = True # update state in storage and working directory if any files have been changed if changed: storage_state_file = os.path.join(target_dir, "state.xml") working_state_file = os.path.join(working_dir, "state.xml") update_state(working_state_file, identifier, version) shutil.copy2(working_state_file, storage_state_file) return version
def trigger_new_version(self, uuid, identifier, config_path_work, storage_directory): """ Trigger new version depending on changed files in working directory compared to the data set in storage. :param storage_directory: :param config_path_work: :param uuid: UUID of working directory :param identifier: Data asset identifier :return: True, if new version is triggered, False otherwise """ working_dir = os.path.join(config_path_work, uuid) if self.identifier_object_exists(identifier): version = self.curr_version(identifier) data_asset_last_version_path = os.path.join( make_storage_data_directory_path(identifier, storage_directory), version, to_safe_filename(identifier)) working_distributions_dir = os.path.join(working_dir, self.representations_directory) if not os.path.exists(working_distributions_dir): logger.debug("New version is not triggered because working catalogue directory does not exist.") return False stored_distributions_dir = os.path.join(data_asset_last_version_path, self.representations_directory) distribution_files = list_files_in_dir(working_distributions_dir) for dataset_dir in distribution_files: dataset_package_file = os.path.join(working_distributions_dir, "%s.tar" % dataset_dir) dataset_package_stored_file = os.path.join(stored_distributions_dir, "%s.tar" % dataset_dir) files_ident = files_identical(dataset_package_file, dataset_package_stored_file) if not files_ident: logger.debug("New version triggered because hash of dataset packages is not identical") return True logger.debug("New version not triggered.") return False
def get_tar_file_path(self, identifier, representation_label=None): object_path = self.get_object_path(identifier) if representation_label: tar_file_path = os.path.join(object_path, self.representations_directory, "%s.tar" % representation_label) else: tar_file_path = os.path.join(object_path, "%s.tar" % to_safe_filename(identifier)) if os.path.exists(tar_file_path): logger.debug("Package file found at: %s" % tar_file_path) return tar_file_path raise ObjectNotFoundException("Package file not found")
def get_object_item_stream(self, identifier, representation_label, entry, tar_file=None): """ Get stream of a representation tar file entry :param identifier: package identifier :param representation_label: label of the representation (used in directory and file names), can be empty, tar assumed to be single package in that case :param entry: entry of the tar file :return: chunks iterator of the tar file """ object_path = self.get_object_path(identifier) tar_file_name = "%s.tar" % representation_label if representation_label else to_safe_filename(identifier) tar_file_path = os.path.join(object_path, self.representations_directory, tar_file_name) if os.path.exists(tar_file_path): logger.debug("Packaged representation file found at: %s" % entry) t = tar_file if tar_file else tarfile.open(tar_file_path, 'r') logger.debug("Accessing access entry %s" % entry) try: inst = ChunkedTarEntryReader(t) return inst.chunks(entry) except KeyError: logger.error('ERROR: Did not find %s in tar archive' % entry) raise ObjectNotFoundException("Entry not found in repository object")
def make_storage_directory_path(identifier, version, config_path_storage): """Used for remote (no access to storage backend)""" pts = DirectoryPairtreeStorage(config_path_storage) return os.path.join(pts.get_dir_path_from_id(identifier), "data", version, to_safe_filename(identifier))