def test_exists_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Check a single file on storage using PFN (Success)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values()[0] mgr.exists(self.rse_settings, {'name': pfn})
def test_exists_mgr_ok_single_lfn(self): """(RSE/PROTOCOLS): Check a single file on storage using LFN (Success)""" mgr.exists(self.rse_settings, { 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, impl=self.impl, vo=self.vo)
def test_exists_mgr_false_single_pfn(self): """(RSE/PROTOCOLS): Check a single file on storage using PFN (Fail)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user }).values()[0] not mgr.exists(self.rse_settings, {'name': pfn})
def test_exists_mgr_ok_multi(self): """(RSE/PROTOCOLS): Check multiple files on storage (Success)""" pfn_a = list( mgr.lfns2pfns(self.rse_settings, { 'name': '3_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values())[0] pfn_b = list( mgr.lfns2pfns(self.rse_settings, { 'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values())[0] status, details = mgr.exists(self.rse_settings, [{ 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': pfn_a }, { 'name': pfn_b }]) if not (status and details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user] and details[pfn_a] and details[pfn_b]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_exists_mgr_false_multi(self): """(RSE/PROTOCOLS): Check multiple files on storage (Fail)""" pfn_a = list( mgr.lfns2pfns(self.rse_settings, { 'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values())[0] pfn_b = list( mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user }).values())[0] status, details = mgr.exists(self.rse_settings, [{ 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user }, { 'name': pfn_a }, { 'name': pfn_b }]) if status or not details[ 'user.%s:1_rse_remote_get.raw' % self.user] or details[ 'user.%s:not_existing_data.raw' % self.user] or not details[pfn_a] or details[pfn_b]: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_exists_mgr_false_single_lfn(self): """(RSE/PROTOCOLS): Check a single file on storage using LFN (Fail)""" not mgr.exists(self.rse_settings, { 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user }, impl=self.impl, vo=self.vo)
def test_exists_mgr_false_multi(self): """(RSE/PROTOCOLS): Check multiple files on storage (Fail)""" pfn_a = mgr.lfns2pfns(self.rse_settings, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.exists(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user}, {'name': pfn_a}, {'name': pfn_b}]) if status or not details['user.%s:1_rse_remote_get.raw' % self.user] or details['user.%s:not_existing_data.raw' % self.user] or not details[pfn_a] or details[pfn_b]: raise Exception('Return not as expected: %s, %s' % (status, details))
def verify_stage_out(fspec): """ Checks that the uploaded file is physically at the destination. :param fspec: file specifications """ from rucio.rse import rsemanager as rsemgr rse_settings = rsemgr.get_rse_info(fspec.ddmendpoint) uploaded_file = {'name': fspec.lfn, 'scope': fspec.scope} logger.info('Checking file: %s' % str(fspec.lfn)) return rsemgr.exists(rse_settings, [uploaded_file])
def test_exists_mgr_ok_multi(self): """(RSE/PROTOCOLS): Check multiple files on storage (Success)""" pfn_a = mgr.lfns2pfns(self.rse_settings, {'name': '3_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b = mgr.lfns2pfns(self.rse_settings, {'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.exists(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': pfn_a}, {'name': pfn_b}]) if not (status and details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user] and details[pfn_a] and details[pfn_b]): raise Exception('Return not as expected: %s, %s' % (status, details))
def VerifyStageOut(self, dst, fspec): """ Checks that the uploaded file is physically at the destination. :param dst: destination rse :param fspec: file specifications """ from rucio.rse import rsemanager as rsemgr rse_settings = rsemgr.get_rse_info(dst) uploaded_file = {'name':fspec.lfn, 'scope':fspec.scope} tolog('Checking file: %s' % str(fspec.lfn)) return rsemgr.exists(rse_settings, [uploaded_file])
def VerifyStageOut(self, dst, fspec): """ Checks that the uploaded file is physically at the destination. :param dst: destination rse :param fspec: file specifications """ from rucio.rse import rsemanager as rsemgr rse_settings = rsemgr.get_rse_info(dst) uploaded_file = {'name': fspec.lfn, 'scope': fspec.scope} tolog('Checking file: %s' % str(fspec.lfn)) return rsemgr.exists(rse_settings, [uploaded_file])
def test_exists_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Check a single file on storage using PFN (Success)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] mgr.exists(self.rse_settings, {'name': pfn})
def test_exists_mgr_false_single_lfn(self): """(RSE/PROTOCOLS): Check a single file on storage using LFN (Fail)""" not mgr.exists(self.rse_settings, {'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user})
def test_exists_mgr_false_single_pfn(self): """(RSE/PROTOCOLS): Check a single file on storage using PFN (Fail)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0] not mgr.exists(self.rse_settings, {'name': pfn})
def upload(self, items, summary_file_path=None, traces_copy_out=None): """ :param items: List of dictionaries. Each dictionary describing a file to upload. Keys: path - path of the file that will be uploaded rse - rse expression/name (e.g. 'CERN-PROD_DATADISK') where to upload the file did_scope - Optional: custom did scope (Default: user.<account>) did_name - Optional: custom did name (Default: name of the file) dataset_scope - Optional: custom dataset scope dataset_name - Optional: custom dataset name force_scheme - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None) pfn - Optional: use a given PFN (this sets no_register to True, and no_register becomes mandatory) no_register - Optional: if True, the file will not be registered in the rucio catalogue register_after_upload - Optional: if True, the file will be registered after successful upload lifetime - Optional: the lifetime of the file after it was uploaded transfer_timeout - Optional: time after the upload will be aborted guid - Optional: guid of the file recursive - Optional: if set, parses the folder structure recursively into collections :param summary_file_path: Optional: a path where a summary in form of a json file will be stored :param traces_copy_out: reference to an external list, where the traces should be uploaded :returns: 0 on success :raises InputValidationError: if any input arguments are in a wrong format :raises RSEWriteBlocked: if a given RSE is not available for writing :raises NoFilesUploaded: if no files were successfully uploaded :raises NotAllFilesUploaded: if not all files were successfully uploaded """ # helper to get rse from rse_expression: def _pick_random_rse(rse_expression): rses = [r['rse'] for r in self.client.list_rses(rse_expression) ] # can raise InvalidRSEExpression random.shuffle(rses) return rses[0] logger = self.logger self.trace['uuid'] = generate_uuid() # check given sources, resolve dirs into files, and collect meta infos files = self._collect_and_validate_file_info(items) logger( logging.DEBUG, 'Num. of files that upload client is processing: {}'.format( len(files))) # check if RSE of every file is available for writing # and cache rse settings registered_dataset_dids = set() registered_file_dids = set() rse_expression = None for file in files: rse_expression = file['rse'] rse = self.rse_expressions.setdefault( rse_expression, _pick_random_rse(rse_expression)) if not self.rses.get(rse): rse_settings = self.rses.setdefault( rse, rsemgr.get_rse_info(rse, vo=self.client.vo)) if rse_settings['availability_write'] != 1: raise RSEWriteBlocked( '%s is not available for writing. No actions have been taken' % rse) dataset_scope = file.get('dataset_scope') dataset_name = file.get('dataset_name') file['rse'] = rse if dataset_scope and dataset_name: dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name)) file['dataset_did_str'] = dataset_did_str registered_dataset_dids.add(dataset_did_str) registered_file_dids.add('%s:%s' % (file['did_scope'], file['did_name'])) wrong_dids = registered_file_dids.intersection(registered_dataset_dids) if len(wrong_dids): raise InputValidationError( 'DIDs used to address both files and datasets: %s' % str(wrong_dids)) logger(logging.DEBUG, 'Input validation done.') # clear this set again to ensure that we only try to register datasets once registered_dataset_dids = set() num_succeeded = 0 summary = [] for file in files: basename = file['basename'] logger(logging.INFO, 'Preparing upload for file %s' % basename) no_register = file.get('no_register') register_after_upload = file.get( 'register_after_upload') and not no_register pfn = file.get('pfn') force_scheme = file.get('force_scheme') delete_existing = False trace = copy.deepcopy(self.trace) # appending trace to list reference, if the reference exists if traces_copy_out is not None: traces_copy_out.append(trace) rse = file['rse'] trace['scope'] = file['did_scope'] trace['datasetScope'] = file.get('dataset_scope', '') trace['dataset'] = file.get('dataset_name', '') trace['remoteSite'] = rse trace['filesize'] = file['bytes'] file_did = {'scope': file['did_scope'], 'name': file['did_name']} dataset_did_str = file.get('dataset_did_str') rse_settings = self.rses[rse] rse_sign_service = rse_settings.get('sign_url', None) is_deterministic = rse_settings.get('deterministic', True) if not is_deterministic and not pfn: logger(logging.ERROR, 'PFN has to be defined for NON-DETERMINISTIC RSE.') continue if pfn and is_deterministic: logger( logging.WARNING, 'Upload with given pfn implies that no_register is True, except non-deterministic RSEs' ) no_register = True # resolving local area networks domain = 'wan' rse_attributes = {} try: rse_attributes = self.client.list_rse_attributes(rse) except: logger(logging.WARNING, 'Attributes of the RSE: %s not available.' % rse) if (self.client_location and 'lan' in rse_settings['domain'] and 'site' in rse_attributes): if self.client_location['site'] == rse_attributes['site']: domain = 'lan' logger(logging.DEBUG, '{} domain is used for the upload'.format(domain)) if not no_register and not register_after_upload: self._register_file(file, registered_dataset_dids) # if register_after_upload, file should be overwritten if it is not registered # otherwise if file already exists on RSE we're done if register_after_upload: if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, auth_token=self.auth_token, logger=logger): try: self.client.get_did(file['did_scope'], file['did_name']) logger(logging.INFO, 'File already registered. Skipping upload.') trace['stateReason'] = 'File already exists' continue except DataIdentifierNotFound: logger( logging.INFO, 'File already exists on RSE. Previous left overs will be overwritten.' ) delete_existing = True elif not is_deterministic and not no_register: if rsemgr.exists(rse_settings, pfn, domain=domain, auth_token=self.auth_token, logger=logger): logger( logging.INFO, 'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.' ) trace['stateReason'] = 'File already exists' continue elif rsemgr.exists(rse_settings, file_did, domain=domain, auth_token=self.auth_token, logger=logger): logger( logging.INFO, 'File already exists on RSE with different pfn. Skipping upload.' ) trace['stateReason'] = 'File already exists' continue else: if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, auth_token=self.auth_token, logger=logger): logger(logging.INFO, 'File already exists on RSE. Skipping upload') trace['stateReason'] = 'File already exists' continue # protocol handling and upload protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=force_scheme, domain=domain) protocols.reverse() success = False state_reason = '' logger(logging.DEBUG, str(protocols)) while not success and len(protocols): protocol = protocols.pop() cur_scheme = protocol['scheme'] logger(logging.INFO, 'Trying upload with %s to %s' % (cur_scheme, rse)) lfn = {} lfn['filename'] = basename lfn['scope'] = file['did_scope'] lfn['name'] = file['did_name'] for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS: if checksum_name in file: lfn[checksum_name] = file[checksum_name] lfn['filesize'] = file['bytes'] sign_service = None if cur_scheme == 'https': sign_service = rse_sign_service trace['protocol'] = cur_scheme trace['transferStart'] = time.time() logger(logging.DEBUG, 'Processing upload with the domain: {}'.format(domain)) try: pfn = self._upload_item( rse_settings=rse_settings, rse_attributes=rse_attributes, lfn=lfn, source_dir=file['dirname'], domain=domain, force_scheme=cur_scheme, force_pfn=pfn, transfer_timeout=file.get('transfer_timeout'), delete_existing=delete_existing, sign_service=sign_service) logger(logging.DEBUG, 'Upload done.') success = True file['upload_result'] = { 0: True, 1: None, 'success': True, 'pfn': pfn } # needs to be removed except (ServiceUnavailable, ResourceTemporaryUnavailable, RSEOperationNotSupported, RucioException) as error: logger(logging.WARNING, 'Upload attempt failed') logger(logging.INFO, 'Exception: %s' % str(error), exc_info=True) state_reason = str(error) if success: num_succeeded += 1 trace['transferEnd'] = time.time() trace['clientState'] = 'DONE' file['state'] = 'A' logger(logging.INFO, 'Successfully uploaded file %s' % basename) self._send_trace(trace) if summary_file_path: summary.append(copy.deepcopy(file)) if not no_register: if register_after_upload: self._register_file(file, registered_dataset_dids) replica_for_api = self._convert_file_for_api(file) if not self.client.update_replicas_states( rse, files=[replica_for_api]): logger(logging.WARNING, 'Failed to update replica state') # add file to dataset if needed if dataset_did_str and not no_register: try: self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) except Exception as error: logger(logging.WARNING, 'Failed to attach file to the dataset') logger(logging.DEBUG, 'Attaching to dataset {}'.format(str(error))) else: trace['clientState'] = 'FAILED' trace['stateReason'] = state_reason self._send_trace(trace) logger(logging.ERROR, 'Failed to upload file %s' % basename) if summary_file_path: logger(logging.DEBUG, 'Summary will be available at {}'.format(summary_file_path)) final_summary = {} for file in summary: file_scope = file['did_scope'] file_name = file['did_name'] file_did_str = '%s:%s' % (file_scope, file_name) final_summary[file_did_str] = { 'scope': file_scope, 'name': file_name, 'bytes': file['bytes'], 'rse': file['rse'], 'pfn': file['upload_result'].get('pfn', ''), 'guid': file['meta']['guid'] } for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS: if checksum_name in file: final_summary[file_did_str][checksum_name] = file[ checksum_name] with open(summary_file_path, 'w') as summary_file: json.dump(final_summary, summary_file, sort_keys=True, indent=1) if num_succeeded == 0: raise NoFilesUploaded() elif num_succeeded != len(files): raise NotAllFilesUploaded() return 0
def upload(self, sources_with_settings, summary_file_path=None): """ List of dictionaries of file descriptions. None means optional [{'path': 'file1', 'rse': 'rse_name1', 'did_scope': None, 'did_name': None, 'dataset_name': None, 'dataset_scope': None, 'scheme': None, 'pfn': None, 'no_register': None, 'lifetime': None }, {'path': 'file2', 'rse': 'rse_name2', 'did_scope': None, 'did_name': None, 'dataset_name': None, 'dataset_scope': None, 'scheme': None, 'pfn': None, 'no_register': None, 'lifetime': None }] raises InputValidationError raises RSEBlacklisted """ logger = self.logger self.trace['uuid'] = generate_uuid() # check given sources, resolve dirs into files, and collect meta infos files = self.collect_and_validate_file_info(sources_with_settings) # check if RSE of every file is available for writing # and cache rse settings registered_dataset_dids = set() registered_file_dids = set() for file in files: rse = file['rse'] if not self.rses.get(rse): rse_settings = self.rses.setdefault(rse, rsemgr.get_rse_info(rse)) if rse_settings['availability_write'] != 1: raise RSEBlacklisted('%s is blacklisted for writing. No actions have been taken' % rse) dataset_scope = file.get('dataset_scope') dataset_name = file.get('dataset_name') if dataset_scope and dataset_name: dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name)) file['dataset_did_str'] = dataset_did_str registered_dataset_dids.add(dataset_did_str) registered_file_dids.add('%s:%s' % (file['did_scope'], file['did_name'])) wrong_dids = registered_file_dids.intersection(registered_dataset_dids) if len(wrong_dids): raise InputValidationError('DIDs used to address both files and datasets: %s' % str(wrong_dids)) # clear this set again to ensure that we only try to register datasets once registered_dataset_dids = set() for file in files: basename = file['basename'] logger.info('Preparing upload for file %s' % basename) no_register = file.get('no_register') pfn = file.get('pfn') scheme = file.get('scheme') self.trace['scope'] = file['did_scope'] self.trace['datasetScope'] = file.get('dataset_scope', '') self.trace['dataset'] = file.get('dataset_name', '') self.trace['remoteSite'] = rse self.trace['filesize'] = file['bytes'] file_scope = file['did_scope'] file_name = file['did_name'] file_did = {'scope': file_scope, 'name': file_name} file_did_str = '%s:%s' % (file_scope, file_name) dataset_did_str = file.get('dataset_did_str') rse = file['rse'] rse_settings = self.rses[rse] # register a dataset if we need to if dataset_did_str and dataset_did_str not in registered_dataset_dids and not no_register: registered_dataset_dids.add(dataset_did_str) try: self.client.add_dataset(scope=file['dataset_scope'], name=file['dataset_name'], rules=[{'account': self.account, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET', 'lifetime': file['lifetime']}]) logger.info('Dataset %s successfully created' % dataset_did_str) except DataIdentifierAlreadyExists: # TODO: Need to check the rules thing!! logger.info("Dataset %s already exists" % dataset_did_str) replica_for_api = self.convert_file_for_api(file) try: # if the remote checksum is different this did must not be used meta = self.client.get_metadata(file_scope, file_name) logger.info('Comparing checksums of %s and %s' % (basename, file_did_str)) if meta['adler32'] != file['adler32']: logger.error('Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32'])) raise DataIdentifierAlreadyExists # add file to rse if it is not registered yet replicastate = list(self.client.list_replicas([file_did], all_states=True)) if rse not in replicastate[0]['rses'] and not no_register: logger.info('Adding replica at %s in Rucio catalog' % rse) self.client.add_replicas(rse=file['rse'], files=[replica_for_api]) except DataIdentifierNotFound: if not no_register: logger.info('Adding replica at %s in Rucio catalog' % rse) self.client.add_replicas(rse=file['rse'], files=[replica_for_api]) if not dataset_did_str: # only need to add rules for files if no dataset is given logger.info('Adding replication rule at %s' % rse) self.client.add_replication_rule([file_did], copies=1, rse_expression=rse, lifetime=file['lifetime']) # if file already exists on RSE we're done if not rsemgr.exists(rse_settings, file_did): protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme) protocols.reverse() success = False summary = [] while not success and len(protocols): protocol = protocols.pop() logger.info('Trying upload to %s with protocol %s' % (rse, protocol['scheme'])) lfn = {} lfn['filename'] = file['basename'] lfn['scope'] = file['did_scope'] lfn['name'] = file['did_name'] lfn['adler32'] = file['adler32'] lfn['filesize'] = file['bytes'] self.trace['protocol'] = protocol['scheme'] self.trace['transferStart'] = time.time() try: state = rsemgr.upload(rse_settings=rse_settings, lfns=lfn, source_dir=file['dirname'], force_scheme=protocol['scheme'], force_pfn=pfn) success = True file['upload_result'] = state except (ServiceUnavailable, ResourceTemporaryUnavailable) as error: logger.warning('Upload attempt failed') logger.debug('Exception: %s' % str(error)) if success: self.trace['transferEnd'] = time.time() self.trace['clientState'] = 'DONE' file['state'] = 'A' logger.info('File %s successfully uploaded' % basename) send_trace(self.trace, self.client.host, self.user_agent, logger=logger) if summary_file_path: summary.append(copy.deepcopy(file)) else: logger.error('Failed to upload file %s' % basename) # TODO trace? continue # skip attach_did and update_states for this file else: logger.info('File already exists on RSE. Skipped upload') if not no_register: # add file to dataset if needed if dataset_did_str: try: logger.info('Attaching file to dataset %s' % dataset_did_str) self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) except Exception as error: logger.warning('Failed to attach file to the dataset') logger.warning(error) logger.info('Setting replica state to available') replica_for_api = self.convert_file_for_api(file) self.client.update_replicas_states(rse, files=[replica_for_api]) if summary_file_path: final_summary = {} for file in summary: file_scope = file['did_scope'] file_name = file['did_name'] file_did_str = '%s:%s' % (file_scope, file_name) final_summary[file_did_str] = {'scope': file['scope'], 'name': file['name'], 'bytes': file['bytes'], 'rse': file['rse'], 'pfn': file['upload_result']['pfn'], 'guid': file['meta']['guid'], 'adler32': file['adler32'], 'md5': file['md5']} with open(summary_file_path, 'wb') as summary_file: json.dump(final_summary, summary_file, sort_keys=True, indent=1)
def upload(self, items, summary_file_path=None): """ :param items: List of dictionaries. Each dictionary describing a file to upload. Keys: path - path of the file that will be uploaded rse - rse name (e.g. 'CERN-PROD_DATADISK') where to upload the file did_scope - Optional: custom did scope (Default: user.<account>) did_name - Optional: custom did name (Default: name of the file) dataset_scope - Optional: custom dataset scope dataset_name - Optional: custom dataset name force_scheme - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None) pfn - Optional: use a given PFN (this sets no_register to True) no_register - Optional: if True, the file will not be registered in the rucio catalogue lifetime - Optional: the lifetime of the file after it was uploaded transfer_timeout - Optional: time after the upload will be aborted guid - Optional: guid of the file :param summary_file_path: Optional: a path where a summary in form of a json file will be stored :returns: 0 on success :raises InputValidationError: if any input arguments are in a wrong format :raises RSEBlacklisted: if a given RSE is not available for writing :raises NoFilesUploaded: if no files were successfully uploaded :raises NotAllFilesUploaded: if not all files were successfully uploaded """ logger = self.logger self.trace['uuid'] = generate_uuid() # check given sources, resolve dirs into files, and collect meta infos files = self._collect_and_validate_file_info(items) # check if RSE of every file is available for writing # and cache rse settings registered_dataset_dids = set() registered_file_dids = set() for file in files: rse = file['rse'] if not self.rses.get(rse): rse_settings = self.rses.setdefault(rse, rsemgr.get_rse_info(rse)) if rse_settings['availability_write'] != 1: raise RSEBlacklisted( '%s is blacklisted for writing. No actions have been taken' % rse) dataset_scope = file.get('dataset_scope') dataset_name = file.get('dataset_name') if dataset_scope and dataset_name: dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name)) file['dataset_did_str'] = dataset_did_str registered_dataset_dids.add(dataset_did_str) registered_file_dids.add('%s:%s' % (file['did_scope'], file['did_name'])) wrong_dids = registered_file_dids.intersection(registered_dataset_dids) if len(wrong_dids): raise InputValidationError( 'DIDs used to address both files and datasets: %s' % str(wrong_dids)) # clear this set again to ensure that we only try to register datasets once registered_dataset_dids = set() num_succeeded = 0 for file in files: basename = file['basename'] logger.info('Preparing upload for file %s' % basename) no_register = file.get('no_register') pfn = file.get('pfn') force_scheme = file.get('force_scheme') self.trace['scope'] = file['did_scope'] self.trace['datasetScope'] = file.get('dataset_scope', '') self.trace['dataset'] = file.get('dataset_name', '') self.trace['remoteSite'] = rse self.trace['filesize'] = file['bytes'] file_did = {'scope': file['did_scope'], 'name': file['did_name']} dataset_did_str = file.get('dataset_did_str') if not no_register: self._register_file(file, registered_dataset_dids) rse = file['rse'] rse_settings = self.rses[rse] # if file already exists on RSE we're done if rsemgr.exists(rse_settings, file_did): logger.info('File already exists on RSE. Skipping upload') continue protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=force_scheme) protocols.reverse() success = False summary = [] while not success and len(protocols): protocol = protocols.pop() cur_scheme = protocol['scheme'] logger.info('Trying upload with %s to %s' % (cur_scheme, rse)) lfn = {} lfn['filename'] = basename lfn['scope'] = file['did_scope'] lfn['name'] = file['did_name'] lfn['adler32'] = file['adler32'] lfn['filesize'] = file['bytes'] self.trace['protocol'] = cur_scheme self.trace['transferStart'] = time.time() try: state = rsemgr.upload( rse_settings=rse_settings, lfns=lfn, source_dir=file['dirname'], force_scheme=cur_scheme, force_pfn=pfn, transfer_timeout=file.get('transfer_timeout')) success = True file['upload_result'] = state except (ServiceUnavailable, ResourceTemporaryUnavailable) as error: logger.warning('Upload attempt failed') logger.debug('Exception: %s' % str(error)) if success: num_succeeded += 1 self.trace['transferEnd'] = time.time() self.trace['clientState'] = 'DONE' file['state'] = 'A' logger.info('Successfully uploaded file %s' % basename) send_trace(self.trace, self.client.host, self.client.user_agent) if summary_file_path: summary.append(copy.deepcopy(file)) # add file to dataset if needed if dataset_did_str and not no_register: try: self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) except Exception as error: logger.warning('Failed to attach file to the dataset') logger.debug(error) if not no_register: replica_for_api = self._convert_file_for_api(file) if not self.client.update_replicas_states( rse, files=[replica_for_api]): logger.warning('Failed to update replica state') else: logger.error('Failed to upload file %s' % basename) if summary_file_path: final_summary = {} for file in summary: file_scope = file['did_scope'] file_name = file['did_name'] file_did_str = '%s:%s' % (file_scope, file_name) final_summary[file_did_str] = { 'scope': file['scope'], 'name': file['name'], 'bytes': file['bytes'], 'rse': file['rse'], 'pfn': file['upload_result']['pfn'], 'guid': file['meta']['guid'], 'adler32': file['adler32'], 'md5': file['md5'] } with open(summary_file_path, 'wb') as summary_file: json.dump(final_summary, summary_file, sort_keys=True, indent=1) if num_succeeded == 0: raise NoFilesUploaded() elif num_succeeded != len(files): raise NotAllFilesUploaded() return 0
def test_exists_mgr_ok_single_lfn(self): """(RSE/PROTOCOLS): Check a single file on storage using LFN (Success)""" mgr.exists(self.rse_settings, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user})