def one(self): results = self.execute() if not len(results): raise NoResultFound() if len(results) > 1: raise MultipleResultsFound() return results[0]
def size(self, iterator, recurse=False, verbose=False): """ Yields (path, size-in-bytes) tuples for the selected data objects and collections. Examples: >>> session.bulk.size('~/data/out*.txt') >>> session.bulk.size('./data', recurse=True) Arguments: iterator: iterator or str Defines which items are subject to the bulk operation. Can be an iterator (e.g. using search_manager.find()) or a string (which will be used to construct a search_manager.iglob() iterator). Data sizes will be returned for matching data objects and, if used recursively, collections. recurse: bool (default: False) Whether to use recursion, meaning that the data size of matching collections will be calculated as the sum of their data objects and subcollection sizes. verbose: bool (default: False) Whether to print more output. """ if isinstance(iterator, str): iterator = self.session.search.iglob(iterator) for item in iterator: path = self.session.path.get_absolute_irods_path(item) if self.session.collections.exists(path): if recurse: new_iterator = self.size(item + '/*', recurse=True, verbose=verbose) size = sum([result[1] for result in new_iterator]) else: self.log('Skipping collection %s (no recursion)' % item, verbose) continue else: dirname = os.path.dirname(path) basename = os.path.basename(path) criteria = [ Criterion('=', Collection.name, dirname), Criterion('=', DataObject.name, basename) ] fields = [DataObject.size] q = self.session.query(*fields).filter(*criteria) results = [result for result in q.get_results()] if len(results) > 1: raise MultipleResultsFound('Different replicas of data ' + \ 'object %s have different sizes' % path) size = results[0][DataObject.size] yield (item, size)
def one(self): results = self.execute() if results.continue_index > 0: self.continue_index(results.continue_index).close() if not len(results): raise NoResultFound() if len(results) > 1: raise MultipleResultsFound() return results[0]
def federated_exists(self, sds_file, root_collection): """Check whether a data object is present in a federated iRODS zone with the same checksum. Parameters ---------- sds_file : `SDSFile` File to search. root_collection : `str` The archive's root collection. Raises ------ MultipleResultsFound Raised if more than one different versions of the file exist in remote location. """ # Query iRODS q = (irods_session.session.query( Collection.name, DataObject.name, DataObject.checksum).filter( Collection.name == sds_file.custom_directory(root_collection)). filter(DataObject.name == sds_file.filename)) results = q.all() # No file found if len(results) == 0: self.logger.debug("File %s does not exist in root collection %s." % (sds_file.filename, root_collection)) return False # Read checksum(s) into a set to eliminate repeats checksum_set = {r[DataObject.checksum] for r in results} if len(checksum_set) > 1: raise MultipleResultsFound( "File %s has more than one different version." % sds_file.custom_path(root_collection)) remote_checksum = checksum_set.pop() # Compare checksums if sds_file.checksum == remote_checksum: self.logger.debug( "File %s does exist in iRODS, with same checksum (%s)." % (sds_file.filename, sds_file.checksum)) return True self.logger.debug( "File %s does exist in iRODS, but with a different checksum (%s vs %s)." % (sds_file.filename, remote_checksum, sds_file.checksum)) return False
def get_federated_pid(self, sds_file, root_collection): """Get the PID of a data object in a federated iRODS. Parameters ---------- sds_file : `SDSFile` File to search. root_collection : `str` The archive's root collection. Returns ------- pid : `str` The PID is the file has one, or None if the file does not exist or does not have a PID. Raises ------ MultipleResultsFound Raised if file has more than one different PID assigned to it. """ # Query iRODS q = (irods_session.session.query( Collection.name, DataObject.name, DataObjectMeta.value).filter( Collection.name == sds_file.custom_directory(root_collection)). filter(DataObject.name == sds_file.filename).filter( DataObjectMeta.name == "PID")) results = q.all() # No file or PID found if len(results) == 0: self.logger.debug( "File %s does not exist or does not have a PID registered." % sds_file.filename) return None # Read PID(s) into a set to eliminate repeats pid_set = {r[DataObjectMeta.value] for r in results} if len(pid_set) > 1: raise MultipleResultsFound("File %s has more than one PID." % sds_file.custom_path(root_collection)) # Return the PID pid = pid_set.pop() self.logger.debug("File %s has PID %s." % (sds_file.filename, pid)) return pid