def put(self, source, target, source_dir=None, transfer_timeout=None, progressbar=False): """ Allows to store files inside the referred RSE. :param source Physical file name :param target Name of the file on the storage system e.g. with prefixed scope :param source_dir Path where the to be transferred files are stored in the local file system :param transfer_timeout Transfer timeout (in seconds) - dummy :raises DestinationNotAccessible, ServiceUnavailable, SourceNotFound, RSEAccessDenied """ path = self.path2pfn(target) full_name = source_dir + '/' + source if source_dir else source directories = path.split('/') # Try the upload without testing the existence of the destination directory try: if not os.path.exists(full_name): raise exception.SourceNotFound() it = UploadInChunks(full_name, 10000000, progressbar) result = self.session.put(path, data=IterableToFileAdapter(it), verify=False, allow_redirects=True, timeout=self.timeout, cert=self.cert) if result.status_code in [200, 201]: return if result.status_code in [409, ]: raise exception.FileReplicaAlreadyExists() else: # Create the directories before issuing the PUT for directory_level in reversed(list(range(1, 4))): upper_directory = "/".join(directories[:-directory_level]) self.mkdir(upper_directory) try: if not os.path.exists(full_name): raise exception.SourceNotFound() it = UploadInChunks(full_name, 10000000, progressbar) result = self.session.put(path, data=IterableToFileAdapter(it), verify=False, allow_redirects=True, timeout=self.timeout, cert=self.cert) if result.status_code in [200, 201]: return if result.status_code in [409, ]: raise exception.FileReplicaAlreadyExists() elif result.status_code in [401, ]: raise exception.RSEAccessDenied() else: # catchall exception raise exception.RucioException(result.status_code, result.text) except requests.exceptions.ConnectionError as error: raise exception.ServiceUnavailable(error) except IOError as error: raise exception.SourceNotFound(error) except requests.exceptions.ConnectionError as error: raise exception.ServiceUnavailable(error) except requests.exceptions.ReadTimeout as error: raise exception.ServiceUnavailable(error) except IOError as error: raise exception.SourceNotFound(error)
def put(self, source, target, source_dir=None, transfer_timeout=None): """ Allows to store files inside the referred RSE. :param source: path to the source file on the client file system :param target: path to the destination file on the storage :param source_dir: Path where the to be transferred files are stored in the local file system :param transfer_timeout: Transfer timeout (in seconds) - dummy :raises DestinationNotAccessible: if the destination storage was not accessible. :raises ServiceUnavailable: if some generic error occured in the library. :raises SourceNotFound: if the source file was not found on the referred storage. """ full_name = source_dir + '/' + source if source_dir else source path = self._get_signed_url(target, operation='write') full_name = source_dir + '/' + source if source_dir else source try: if not os.path.exists(full_name): raise exception.SourceNotFound() it = UploadInChunks(full_name, 10000000, progressbar=False) result = self.session.put(path, data=IterableToFileAdapter(it), verify=False, allow_redirects=True, timeout=self.timeout, cert=self.cert) if result.status_code in [200, 201]: return if result.status_code in [409, ]: raise exception.FileReplicaAlreadyExists() else: try: if not os.path.exists(full_name): raise exception.SourceNotFound() it = UploadInChunks(full_name, 10000000, progressbar=False) result = self.session.put(path, data=IterableToFileAdapter(it), verify=False, allow_redirects=True, timeout=self.timeout, cert=self.cert) if result.status_code in [200, 201]: return if result.status_code in [409, ]: raise exception.FileReplicaAlreadyExists() elif result.status_code in [401, ]: raise exception.RSEAccessDenied() else: # catchall exception raise exception.RucioException(result.status_code, result.text) except requests.exceptions.ConnectionError as error: raise exception.ServiceUnavailable(error) except IOError as error: raise exception.SourceNotFound(error) except requests.exceptions.ConnectionError as error: raise exception.ServiceUnavailable(error) except IOError as error: raise exception.SourceNotFound(error)
def rename(rse_settings, files): """ Rename files stored on the connected storage. Providing a list indicates the bulk mode. :param files: a single dict or a list with dicts containing 'scope', 'name', 'new_scope' and 'new_name' if LFNs are used or only 'name' and 'new_name' if PFNs are used. If 'new_scope' or 'new_name' are not provided, the current one is used. Examples: [ {'name': '3_rse_remote_rename.raw', 'scope': 'user.jdoe', 'new_name': '3_rse_new.raw', 'new_scope': 'user.jdoe'}, {'name': 'user/jdoe/d9/cb/9_rse_remote_rename.raw', 'new_name': 'user/jdoe/c6/4a/9_rse_new.raw'} ] :returns: True/False for a single file or a dict object with LFN (key) and True/False (value) in bulk mode :raises RSENotConnected: no connection to a specific storage has been established :raises SourceNotFound: remote source file can not be found on storage :raises DestinationNotAccessible: remote destination directory is not accessible :raises ServiceUnavailable: for any other reason """ ret = {} gs = True # gs represents the global status which inidcates if every operation workd in bulk mode protocol = create_protocol(rse_settings, 'write') protocol.connect() files = [files] if not type(files) is list else files for f in files: pfn = None new_pfn = None key = None if 'scope' in f: # LFN is provided key = '%s:%s' % (f['scope'], f['name']) # Check if new name is provided if 'new_name' not in f: f['new_name'] = f['name'] # Check if new scope is provided if 'new_scope' not in f: f['new_scope'] = f['scope'] pfn = list( protocol.lfns2pfns({ 'name': f['name'], 'scope': f['scope'] }).values())[0] new_pfn = list( protocol.lfns2pfns({ 'name': f['new_name'], 'scope': f['new_scope'] }).values())[0] else: pfn = f['name'] new_pfn = f['new_name'] key = pfn # Check if target is not on storage if protocol.exists(new_pfn): ret[key] = exception.FileReplicaAlreadyExists( 'File %s already exists on storage' % (new_pfn)) gs = False # Check if source is on storage elif not protocol.exists(pfn): ret[key] = exception.SourceNotFound( 'File %s not found on storage' % (pfn)) gs = False else: try: protocol.rename(pfn, new_pfn) ret[key] = True except Exception as e: ret[key] = e gs = False protocol.close() if len(ret) == 1: for x in ret: if isinstance(ret[x], Exception): raise ret[x] else: return ret[x] return [gs, ret]
def upload(rse_settings, lfns, source_dir=None, force_pfn=None, force_scheme=None, transfer_timeout=None): """ Uploads a file to the connected storage. Providing a list indicates the bulk mode. :param lfns: a single dict or a list with dicts containing 'scope' and 'name'. Examples: [ {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}, {'name': '2_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 4711, 'adler32': 'RSSMICETHMISBA837464F'} ] If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name'). :param source_dir: path to the local directory including the source files :param force_pfn: use the given PFN -- can lead to dark data, use sparingly :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it :returns: True/False for a single file or a dict object with 'scope:name' as keys and True or the exception as value for each file in bulk mode :raises RSENotConnected: no connection to a specific storage has been established :raises SourceNotFound: local source file can not be found :raises DestinationNotAccessible: remote destination directory is not accessible :raises ServiceUnavailable: for any other reason """ ret = {} gs = True # gs represents the global status which indicates if every operation worked in bulk mode protocol = create_protocol(rse_settings, 'write', scheme=force_scheme) protocol.connect() protocol_delete = create_protocol(rse_settings, 'delete') protocol_delete.connect() lfns = [lfns] if not type(lfns) is list else lfns for lfn in lfns: base_name = lfn.get('filename', lfn['name']) name = lfn.get('name', base_name) scope = lfn['scope'] if 'adler32' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing checksum for file %s:%s' % (lfn['scope'], name)) continue if 'filesize' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing filesize for file %s:%s' % (lfn['scope'], name)) continue if force_pfn: pfn = force_pfn else: pfn = list(protocol.lfns2pfns(make_valid_did(lfn)).values())[0] if isinstance(pfn, exception.RucioException): raise pfn # First check if renaming operation is supported if protocol.renaming: # Check if file replica is already on the storage system if protocol.overwrite is False and protocol.exists(pfn): ret['%s:%s' % ( scope, name )] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) gs = False else: if protocol.exists( '%s.rucio.upload' % pfn ): # Check for left over of previous unsuccessful attempts try: protocol_delete.delete('%s.rucio.upload' % list( protocol_delete.lfns2pfns( make_valid_did(lfn)).values())[0]) except Exception as e: ret['%s:%s' % ( scope, name )] = exception.RSEOperationNotSupported( 'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(e))) gs = False continue try: # Try uploading file protocol.put(base_name, '%s.rucio.upload' % pfn, source_dir, transfer_timeout=transfer_timeout) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful try: stats = protocol.stat('%s.rucio.upload' % pfn) if ('adler32' in stats) and ('adler32' in lfn): valid = stats['adler32'] == lfn['adler32'] if (valid is None) and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except exception.RSEChecksumUnavailable as e: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if valid: # The upload finished successful and the file can be renamed try: protocol.rename('%s.rucio.upload' % pfn, pfn) ret['%s:%s' % (scope, name)] = True except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e else: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) else: # Check if file replica is already on the storage system if protocol.overwrite is False and protocol.exists(pfn): ret['%s:%s' % ( scope, name )] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) gs = False else: try: # Try uploading file protocol.put(base_name, pfn, source_dir, transfer_timeout=transfer_timeout) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful try: stats = protocol.stat(pfn) if ('adler32' in stats) and ('adler32' in lfn): valid = stats['adler32'] == lfn['adler32'] if (valid is None) and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except exception.RSEChecksumUnavailable as e: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if not valid: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) protocol.close() protocol_delete.close() if len(ret) == 1: for x in ret: if isinstance(ret[x], Exception): raise ret[x] else: return {'success': ret[x], 'pfn': pfn} return [gs, ret]
def upload(rse_settings, lfns, source_dir=None, force_pfn=None, force_scheme=None, transfer_timeout=None, delete_existing=False, sign_service=None, auth_token=None): """ Uploads a file to the connected storage. Providing a list indicates the bulk mode. :rse_settings: RSE attributes :param lfns: a single dict or a list with dicts containing 'scope' and 'name'. Examples: [ {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}, {'name': '2_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 4711, 'adler32': 'RSSMICETHMISBA837464F'} ] If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name'). :param source_dir: path to the local directory including the source files :param force_pfn: use the given PFN -- can lead to dark data, use sparingly :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it :param sign_service: use the given service (e.g. gcs, s3, swift) to sign the URL :param auth_token: Optionally passing JSON Web Token (OIDC) string for authentication :returns: True/False for a single file or a dict object with 'scope:name' as keys and True or the exception as value for each file in bulk mode :raises RSENotConnected: no connection to a specific storage has been established :raises SourceNotFound: local source file can not be found :raises DestinationNotAccessible: remote destination directory is not accessible :raises ServiceUnavailable: for any other reason """ ret = {} gs = True # gs represents the global status which indicates if every operation worked in bulk mode protocol = create_protocol(rse_settings, 'write', scheme=force_scheme, auth_token=auth_token) protocol.connect() protocol_delete = create_protocol(rse_settings, 'delete', auth_token=auth_token) protocol_delete.connect() lfns = [lfns] if not type(lfns) is list else lfns for lfn in lfns: base_name = lfn.get('filename', lfn['name']) name = lfn.get('name', base_name) scope = lfn['scope'] if 'adler32' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing checksum for file %s:%s' % (lfn['scope'], name)) continue if 'filesize' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing filesize for file %s:%s' % (lfn['scope'], name)) continue if force_pfn: pfn = force_pfn readpfn = force_pfn else: pfn = list(protocol.lfns2pfns(make_valid_did(lfn)).values())[0] if isinstance(pfn, exception.RucioException): raise pfn readpfn = pfn if sign_service is not None: # need a separate signed URL for read operations (exists and stat) readpfn = __get_signed_url(rse_settings['rse'], sign_service, 'read', pfn) # NOQA pylint: disable=undefined-variable pfn = __get_signed_url(rse_settings['rse'], sign_service, 'write', pfn) # NOQA pylint: disable=undefined-variable # First check if renaming operation is supported if protocol.renaming: # Check if file replica is already on the storage system if protocol.overwrite is False and delete_existing is False and protocol.exists( pfn): ret['%s:%s' % ( scope, name )] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) gs = False else: if protocol.exists( '%s.rucio.upload' % pfn ): # Check for left over of previous unsuccessful attempts try: protocol_delete.delete('%s.rucio.upload' % list( protocol_delete.lfns2pfns( make_valid_did(lfn)).values())[0]) except Exception as e: ret['%s:%s' % ( scope, name )] = exception.RSEOperationNotSupported( 'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(e))) gs = False continue if delete_existing: if protocol.exists( '%s' % pfn ): # Check for previous completed uploads that have to be removed before upload try: protocol_delete.delete('%s' % list( protocol_delete.lfns2pfns( make_valid_did(lfn)).values())[0]) except Exception as e: ret['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported( 'Unable to remove file %s: %s' % (pfn, str(e))) gs = False continue try: # Try uploading file protocol.put(base_name, '%s.rucio.upload' % pfn, source_dir, transfer_timeout=transfer_timeout) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful try: stats = _retry_protocol_stat(protocol, '%s.rucio.upload' % pfn) # Verify all supported checksums and keep rack of the verified ones verified_checksums = [] for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS: if (checksum_name in stats) and (checksum_name in lfn): verified_checksums.append( stats[checksum_name] == lfn[checksum_name]) # Upload is successful if at least one checksum was found valid = any(verified_checksums) if not valid and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except NotImplementedError: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except exception.RSEChecksumUnavailable as e: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if valid: # The upload finished successful and the file can be renamed try: protocol.rename('%s.rucio.upload' % pfn, pfn) ret['%s:%s' % (scope, name)] = True except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e else: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) else: # Check if file replica is already on the storage system if protocol.overwrite is False and delete_existing is False and protocol.exists( readpfn): ret['%s:%s' % ( scope, name )] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) gs = False else: try: # Try uploading file protocol.put(base_name, pfn, source_dir, transfer_timeout=transfer_timeout) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful try: stats = _retry_protocol_stat(protocol, pfn) # Verify all supported checksums and keep rack of the verified ones verified_checksums = [] for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS: if (checksum_name in stats) and (checksum_name in lfn): verified_checksums.append( stats[checksum_name] == lfn[checksum_name]) # Upload is successful if at least one checksum was found valid = any(verified_checksums) if not valid and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except NotImplementedError: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except exception.RSEChecksumUnavailable as e: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if not valid: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) protocol.close() protocol_delete.close() if len(ret) == 1: for x in ret: if isinstance(ret[x], Exception): raise ret[x] else: return {'success': ret[x], 'pfn': pfn} return {0: gs, 1: ret, 'success': gs, 'pfn': pfn}
def upload(rse_settings, lfns, source_dir=None, force_pfn=None): """ Uploads a file to the connected storage. Providing a list indicates the bulk mode. :param lfns: a single dict or a list with dicts containing 'scope' and 'name'. E.g. [{'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}, {'name': '2_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 4711, 'adler32': 'RSSMICETHMISBA837464F'}] :param source_dir: path to the local directory including the source files :param force_pfn: use the given PFN -- can lead to dark data, use sparingly :returns: True/False for a single file or a dict object with 'scope:name' as keys and True or the exception as value for each file in bulk mode :raises RSENotConnected: no connection to a specific storage has been established :raises SourceNotFound: local source file can not be found :raises DestinationNotAccessible: remote destination directory is not accessible :raises ServiceUnavailable: for any other reason """ ret = {} gs = True # gs represents the global status which indicates if every operation worked in bulk mode protocol = create_protocol(rse_settings, 'write') protocol.connect() protocol_delete = create_protocol(rse_settings, 'delete') protocol_delete.connect() lfns = [lfns] if not type(lfns) is list else lfns for lfn in lfns: name = lfn['name'] scope = lfn['scope'] if 'adler32' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing checksum for file %s:%s' % (lfn['scope'], lfn['name'])) continue if 'filesize' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing filesize for file %s:%s' % (lfn['scope'], lfn['name'])) continue if force_pfn: pfn = force_pfn else: pfn = protocol.lfns2pfns(lfn).values()[0] # First check if renaming operation is supported if protocol.renaming: # Check if file replica is already on the storage system if protocol.overwrite is False and protocol.exists(pfn): ret['%s:%s' % (scope, name)] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage' % (name, scope)) gs = False else: if protocol.exists( '%s.rucio.upload' % pfn ): # Check for left over of previous unsuccessful attempts try: protocol_delete.delete( '%s.rucio.upload', protocol_delete.lfns2pfns(lfn).values()[0]) except Exception as e: ret['%s:%s' % ( scope, name )] = exception.RSEOperationNotSupported( 'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(e))) try: # Try uploading file protocol.put(name, '%s.rucio.upload' % pfn, source_dir) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful stats = protocol.stat('%s.rucio.upload' % pfn) if ('adler32' in stats) and ('adler32' in lfn): valid = stats['adler32'] == lfn['adler32'] if (valid is None) and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except NotImplementedError: valid = False except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if valid: # The upload finished successful and the file can be renamed try: protocol.rename('%s.rucio.upload' % pfn, pfn) ret['%s:%s' % (scope, name)] = True except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e else: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) else: # Check if file replica is already on the storage system if protocol.overwrite is False and protocol.exists(pfn): ret['%s:%s' % (scope, name)] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage' % (name, scope)) gs = False else: try: # Try uploading file protocol.put(name, pfn, source_dir) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful stats = protocol.stat(pfn) if ('adler32' in stats) and ('adler32' in lfn): valid = stats['adler32'] == lfn['adler32'] if (valid is None) and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except NotImplementedError: valid = True # If the protocol doesn't support stat of a file, we agreed on assuming that the file was uploaded without error except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if not valid: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) protocol.close() protocol_delete.close() if len(ret) == 1: for x in ret: if isinstance(ret[x], Exception): raise ret[x] else: return {'success': ret[x], 'pfn': pfn} return [gs, ret]