def upload(rse_settings, lfns, source_dir=None, force_pfn=None, force_scheme=None, transfer_timeout=None, delete_existing=False, sign_service=None, auth_token=None): """ Uploads a file to the connected storage. Providing a list indicates the bulk mode. :rse_settings: RSE attributes :param lfns: a single dict or a list with dicts containing 'scope' and 'name'. Examples: [ {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}, {'name': '2_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 4711, 'adler32': 'RSSMICETHMISBA837464F'} ] If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name'). :param source_dir: path to the local directory including the source files :param force_pfn: use the given PFN -- can lead to dark data, use sparingly :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it :param sign_service: use the given service (e.g. gcs, s3, swift) to sign the URL :param auth_token: Optionally passing JSON Web Token (OIDC) string for authentication :returns: True/False for a single file or a dict object with 'scope:name' as keys and True or the exception as value for each file in bulk mode :raises RSENotConnected: no connection to a specific storage has been established :raises SourceNotFound: local source file can not be found :raises DestinationNotAccessible: remote destination directory is not accessible :raises ServiceUnavailable: for any other reason """ ret = {} gs = True # gs represents the global status which indicates if every operation worked in bulk mode protocol = create_protocol(rse_settings, 'write', scheme=force_scheme, auth_token=auth_token) protocol.connect() protocol_delete = create_protocol(rse_settings, 'delete', auth_token=auth_token) protocol_delete.connect() lfns = [lfns] if not type(lfns) is list else lfns for lfn in lfns: base_name = lfn.get('filename', lfn['name']) name = lfn.get('name', base_name) scope = lfn['scope'] if 'adler32' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing checksum for file %s:%s' % (lfn['scope'], name)) continue if 'filesize' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing filesize for file %s:%s' % (lfn['scope'], name)) continue if force_pfn: pfn = force_pfn readpfn = force_pfn else: pfn = list(protocol.lfns2pfns(make_valid_did(lfn)).values())[0] if isinstance(pfn, exception.RucioException): raise pfn readpfn = pfn if sign_service is not None: # need a separate signed URL for read operations (exists and stat) readpfn = __get_signed_url(rse_settings['rse'], sign_service, 'read', pfn) # NOQA pylint: disable=undefined-variable pfn = __get_signed_url(rse_settings['rse'], sign_service, 'write', pfn) # NOQA pylint: disable=undefined-variable # First check if renaming operation is supported if protocol.renaming: # Check if file replica is already on the storage system if protocol.overwrite is False and delete_existing is False and protocol.exists( pfn): ret['%s:%s' % ( scope, name )] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) gs = False else: if protocol.exists( '%s.rucio.upload' % pfn ): # Check for left over of previous unsuccessful attempts try: protocol_delete.delete('%s.rucio.upload' % list( protocol_delete.lfns2pfns( make_valid_did(lfn)).values())[0]) except Exception as e: ret['%s:%s' % ( scope, name )] = exception.RSEOperationNotSupported( 'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(e))) gs = False continue if delete_existing: if protocol.exists( '%s' % pfn ): # Check for previous completed uploads that have to be removed before upload try: protocol_delete.delete('%s' % list( protocol_delete.lfns2pfns( make_valid_did(lfn)).values())[0]) except Exception as e: ret['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported( 'Unable to remove file %s: %s' % (pfn, str(e))) gs = False continue try: # Try uploading file protocol.put(base_name, '%s.rucio.upload' % pfn, source_dir, transfer_timeout=transfer_timeout) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful try: stats = _retry_protocol_stat(protocol, '%s.rucio.upload' % pfn) # Verify all supported checksums and keep rack of the verified ones verified_checksums = [] for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS: if (checksum_name in stats) and (checksum_name in lfn): verified_checksums.append( stats[checksum_name] == lfn[checksum_name]) # Upload is successful if at least one checksum was found valid = any(verified_checksums) if not valid and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except NotImplementedError: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except exception.RSEChecksumUnavailable as e: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if valid: # The upload finished successful and the file can be renamed try: protocol.rename('%s.rucio.upload' % pfn, pfn) ret['%s:%s' % (scope, name)] = True except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e else: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) else: # Check if file replica is already on the storage system if protocol.overwrite is False and delete_existing is False and protocol.exists( readpfn): ret['%s:%s' % ( scope, name )] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) gs = False else: try: # Try uploading file protocol.put(base_name, pfn, source_dir, transfer_timeout=transfer_timeout) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful try: stats = _retry_protocol_stat(protocol, pfn) # Verify all supported checksums and keep rack of the verified ones verified_checksums = [] for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS: if (checksum_name in stats) and (checksum_name in lfn): verified_checksums.append( stats[checksum_name] == lfn[checksum_name]) # Upload is successful if at least one checksum was found valid = any(verified_checksums) if not valid and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except NotImplementedError: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except exception.RSEChecksumUnavailable as e: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if not valid: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) protocol.close() protocol_delete.close() if len(ret) == 1: for x in ret: if isinstance(ret[x], Exception): raise ret[x] else: return {'success': ret[x], 'pfn': pfn} return {0: gs, 1: ret, 'success': gs, 'pfn': pfn}
def upload(rse_settings, lfns, source_dir=None, force_pfn=None, force_scheme=None, transfer_timeout=None): """ Uploads a file to the connected storage. Providing a list indicates the bulk mode. :param lfns: a single dict or a list with dicts containing 'scope' and 'name'. Examples: [ {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'}, {'name': '2_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 4711, 'adler32': 'RSSMICETHMISBA837464F'} ] If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name'). :param source_dir: path to the local directory including the source files :param force_pfn: use the given PFN -- can lead to dark data, use sparingly :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it :returns: True/False for a single file or a dict object with 'scope:name' as keys and True or the exception as value for each file in bulk mode :raises RSENotConnected: no connection to a specific storage has been established :raises SourceNotFound: local source file can not be found :raises DestinationNotAccessible: remote destination directory is not accessible :raises ServiceUnavailable: for any other reason """ ret = {} gs = True # gs represents the global status which indicates if every operation worked in bulk mode protocol = create_protocol(rse_settings, 'write', scheme=force_scheme) protocol.connect() protocol_delete = create_protocol(rse_settings, 'delete') protocol_delete.connect() lfns = [lfns] if not type(lfns) is list else lfns for lfn in lfns: base_name = lfn.get('filename', lfn['name']) name = lfn.get('name', base_name) scope = lfn['scope'] if 'adler32' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing checksum for file %s:%s' % (lfn['scope'], name)) continue if 'filesize' not in lfn: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Missing filesize for file %s:%s' % (lfn['scope'], name)) continue if force_pfn: pfn = force_pfn else: pfn = list(protocol.lfns2pfns(make_valid_did(lfn)).values())[0] if isinstance(pfn, exception.RucioException): raise pfn # First check if renaming operation is supported if protocol.renaming: # Check if file replica is already on the storage system if protocol.overwrite is False and protocol.exists(pfn): ret['%s:%s' % ( scope, name )] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) gs = False else: if protocol.exists( '%s.rucio.upload' % pfn ): # Check for left over of previous unsuccessful attempts try: protocol_delete.delete('%s.rucio.upload' % list( protocol_delete.lfns2pfns( make_valid_did(lfn)).values())[0]) except Exception as e: ret['%s:%s' % ( scope, name )] = exception.RSEOperationNotSupported( 'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(e))) gs = False continue try: # Try uploading file protocol.put(base_name, '%s.rucio.upload' % pfn, source_dir, transfer_timeout=transfer_timeout) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful try: stats = protocol.stat('%s.rucio.upload' % pfn) if ('adler32' in stats) and ('adler32' in lfn): valid = stats['adler32'] == lfn['adler32'] if (valid is None) and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except exception.RSEChecksumUnavailable as e: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if valid: # The upload finished successful and the file can be renamed try: protocol.rename('%s.rucio.upload' % pfn, pfn) ret['%s:%s' % (scope, name)] = True except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e else: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) else: # Check if file replica is already on the storage system if protocol.overwrite is False and protocol.exists(pfn): ret['%s:%s' % ( scope, name )] = exception.FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) gs = False else: try: # Try uploading file protocol.put(base_name, pfn, source_dir, transfer_timeout=transfer_timeout) except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue valid = None try: # Get metadata of file to verify if upload was successful try: stats = protocol.stat(pfn) if ('adler32' in stats) and ('adler32' in lfn): valid = stats['adler32'] == lfn['adler32'] if (valid is None) and ('filesize' in stats) and ('filesize' in lfn): valid = stats['filesize'] == lfn['filesize'] except exception.RSEChecksumUnavailable as e: if rse_settings['verify_checksum'] is False: valid = True else: raise exception.RucioException( 'Checksum not validated') except Exception as e: gs = False ret['%s:%s' % (scope, name)] = e continue if not valid: gs = False ret['%s:%s' % (scope, name)] = exception.RucioException( 'Replica %s is corrupted.' % pfn) protocol.close() protocol_delete.close() if len(ret) == 1: for x in ret: if isinstance(ret[x], Exception): raise ret[x] else: return {'success': ret[x], 'pfn': pfn} return [gs, ret]
def _upload_item(self, rse_settings, rse_attributes, lfn, source_dir=None, domain='wan', force_pfn=None, force_scheme=None, transfer_timeout=None, delete_existing=False, sign_service=None): """ Uploads a file to the connected storage. :param rse_settings: dictionary containing the RSE settings :param rse_attributes: dictionary containing the RSE attribute key value pairs :param lfn: a single dict containing 'scope' and 'name'. Example: {'name': '1_rse_local_put.raw', 'scope': 'user.jdoe', 'filesize': 42, 'adler32': '87HS3J968JSNWID'} If the 'filename' key is present, it will be used by Rucio as the actual name of the file on disk (separate from the Rucio 'name'). :param source_dir: path to the local directory including the source files :param force_pfn: use the given PFN -- can lead to dark data, use sparingly :param force_scheme: use the given protocol scheme, overriding the protocol priority in the RSE description :param transfer_timeout: set this timeout (in seconds) for the transfers, for protocols that support it :param sign_service: use the given service (e.g. gcs, s3, swift) to sign the URL :raises RucioException(msg): general exception with msg for more details. """ logger = self.logger # Construct protocol for write and read operation. protocol_write = self._create_protocol(rse_settings, 'write', force_scheme=force_scheme, domain=domain) protocol_read = self._create_protocol(rse_settings, 'read', domain=domain) base_name = lfn.get('filename', lfn['name']) name = lfn.get('name', base_name) scope = lfn['scope'] # Conditional lfn properties if 'adler32' not in lfn: logger(logging.WARNING, 'Missing checksum for file %s:%s' % (lfn['scope'], name)) # Getting pfn pfn = None readpfn = None try: pfn = list(protocol_write.lfns2pfns( make_valid_did(lfn)).values())[0] readpfn = list( protocol_read.lfns2pfns(make_valid_did(lfn)).values())[0] logger(logging.DEBUG, 'The PFN created from the LFN: {}'.format(pfn)) except Exception as error: logger(logging.WARNING, 'Failed to create PFN for LFN: %s' % lfn) logger(logging.DEBUG, str(error), exc_info=True) if force_pfn: pfn = force_pfn readpfn = pfn logger(logging.DEBUG, 'The given PFN is used: {}'.format(pfn)) # Auth. mostly for object stores if sign_service: pfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'write', pfn) # NOQA pylint: disable=undefined-variable readpfn = self.client.get_signed_url(rse_settings['rse'], sign_service, 'read', pfn) # NOQA pylint: disable=undefined-variable # Create a name of tmp file if renaming operation is supported pfn_tmp = '%s.rucio.upload' % pfn if protocol_write.renaming else pfn readpfn_tmp = '%s.rucio.upload' % readpfn if protocol_write.renaming else readpfn # Either DID eixsts or not register_after_upload if protocol_write.overwrite is False and delete_existing is False and protocol_read.exists( readpfn): raise FileReplicaAlreadyExists( 'File %s in scope %s already exists on storage as PFN %s' % (name, scope, pfn)) # wrong exception ? # Removing tmp from earlier attempts if protocol_read.exists(readpfn_tmp): logger(logging.DEBUG, 'Removing remains of previous upload attemtps.') try: # Construct protocol for delete operation. protocol_delete = self._create_protocol(rse_settings, 'delete', domain=domain) protocol_delete.delete('%s.rucio.upload' % list( protocol_delete.lfns2pfns( make_valid_did(lfn)).values())[0]) protocol_delete.close() except Exception as e: raise RSEOperationNotSupported( 'Unable to remove temporary file %s.rucio.upload: %s' % (pfn, str(e))) # Removing not registered files from earlier attempts if delete_existing: logger( logging.DEBUG, 'Removing not-registered remains of previous upload attemtps.') try: # Construct protocol for delete operation. protocol_delete = self._create_protocol(rse_settings, 'delete', domain=domain) protocol_delete.delete('%s' % list( protocol_delete.lfns2pfns( make_valid_did(lfn)).values())[0]) protocol_delete.close() except Exception as error: raise RSEOperationNotSupported('Unable to remove file %s: %s' % (pfn, str(error))) # Process the upload of the tmp file try: retry(protocol_write.put, base_name, pfn_tmp, source_dir, transfer_timeout=transfer_timeout)(mtries=2, logger=logger) logger(logging.INFO, 'Successful upload of temporary file. {}'.format(pfn_tmp)) except Exception as error: raise RSEOperationNotSupported(str(error)) # Is stat after that upload allowed? skip_upload_stat = rse_attributes.get('skip_upload_stat', False) self.logger(logging.DEBUG, 'skip_upload_stat=%s', skip_upload_stat) # Checksum verification, obsolete, see Gabriele changes. if not skip_upload_stat: try: stats = self._retry_protocol_stat(protocol_write, pfn_tmp) if not isinstance(stats, dict): raise RucioException( 'Could not get protocol.stats for given PFN: %s' % pfn) # The checksum and filesize check if ('filesize' in stats) and ('filesize' in lfn): self.logger( logging.DEBUG, 'Filesize: Expected=%s Found=%s' % (lfn['filesize'], stats['filesize'])) if int(stats['filesize']) != int(lfn['filesize']): raise RucioException( 'Filesize mismatch. Source: %s Destination: %s' % (lfn['filesize'], stats['filesize'])) if rse_settings['verify_checksum'] is not False: if ('adler32' in stats) and ('adler32' in lfn): self.logger( logging.DEBUG, 'Checksum: Expected=%s Found=%s' % (lfn['adler32'], stats['adler32'])) if str(stats['adler32']).lstrip('0') != str( lfn['adler32']).lstrip('0'): raise RucioException( 'Checksum mismatch. Source: %s Destination: %s' % (lfn['adler32'], stats['adler32'])) except Exception as error: raise error # The upload finished successful and the file can be renamed try: if protocol_write.renaming: logger(logging.DEBUG, 'Renaming file %s to %s' % (pfn_tmp, pfn)) protocol_write.rename(pfn_tmp, pfn) except Exception: raise RucioException('Unable to rename the tmp file %s.' % pfn_tmp) protocol_write.close() protocol_read.close() return pfn