def test_put_mgr_ok_multi(self): """(RSE/PROTOCOLS): Put multiple files to storage (Success)""" if self.rse_settings['protocols'][0]['hostname'] == 'ssh1': result = mgr.upload(self.rse_settings, [{'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'md5': md5(str(self.tmpdir) + '/1_rse_local_put.raw'), 'filesize': os.stat('%s/1_rse_local_put.raw' % self.tmpdir)[ os.path.stat.ST_SIZE]}, {'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'md5': md5(str(self.tmpdir) + '/2_rse_local_put.raw'), 'filesize': os.stat('%s/2_rse_local_put.raw' % self.tmpdir)[ os.path.stat.ST_SIZE]}], source_dir=self.tmpdir, vo=self.vo, impl=self.impl) else: result = mgr.upload(self.rse_settings, [{'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/1_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/1_rse_local_put.raw' % self.tmpdir)[ os.path.stat.ST_SIZE]}, {'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/2_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/2_rse_local_put.raw' % self.tmpdir)[ os.path.stat.ST_SIZE]}], source_dir=self.tmpdir, vo=self.vo) status = result[0] details = result[1] if not (status and details['user.%s:1_rse_local_put.raw' % self.user] and details['user.%s:2_rse_local_put.raw' % self.user]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_put_mgr_ok_single(self): """(RSE/PROTOCOLS): Put a single file to storage (Success)""" if self.rse_settings['protocols'][0]['hostname'] == 'ssh1': mgr.upload(self.rse_settings, { 'name': '3_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'md5': md5('%s/3_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/3_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE] }, source_dir=self.tmpdir, vo=self.vo, impl=self.impl) else: mgr.upload(self.rse_settings, { 'name': '3_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/3_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/3_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE] }, source_dir=self.tmpdir, vo=self.vo)
def test_put_mgr_SourceNotFound_single(self): """(RSE/PROTOCOLS): Put a single file to storage (SourceNotFound)""" mgr.upload(self.rse_settings, { 'name': 'not_existing_data2.raw', 'scope': 'user.%s' % self.user, 'adler32': 'random_stuff', 'filesize': 0 }, source_dir=self.tmpdir)
def test_put_mgr_FileReplicaAlreadyExists_single(self): """(RSE/PROTOCOLS): Put a single file to storage (FileReplicaAlreadyExists)""" mgr.upload(self.rse_settings, { 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': 'bla-bla', 'filesize': 4711 }, source_dir=self.tmpdir)
def test_delete_mgr_ok_multi(self): """MOCK (RSE/PROTOCOLS): Delete multiple files from storage (Success)""" for fichier in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [ { 'name': fichier, 'scope': 'user.%s' % self.user }, ]) self.mtc.test_delete_mgr_ok_multi()
def test_get_mgr_SourceNotFound_single_pfn(self): """MOCK (RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotFound)""" for fichier in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [ { 'name': fichier, 'scope': 'user.%s' % self.user }, ]) self.mtc.test_get_mgr_SourceNotFound_single_pfn()
def test_get_mgr_SourceNotFound_multi(self): """MOCK (RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (SourceNotFound)""" for fichier in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [ { 'name': fichier, 'scope': 'user.%s' % self.user }, ]) with pytest.raises(exception.SourceNotFound): self.mtc.test_get_mgr_SourceNotFound_multi()
def test_put_mgr_ok_single(self): """(RSE/PROTOCOLS): Put a single file to storage (Success)""" mgr.upload(self.rse_settings, { 'name': '3_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/3_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat( '%s/3_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE] }, source_dir=self.tmpdir)
def test_put_mgr_ok_multi(self): """(RSE/PROTOCOLS): Put multiple files to storage (Success)""" result = mgr.upload(self.rse_settings, [{ 'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/1_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat( '%s/1_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE] }, { 'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/2_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat( '%s/2_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE] }], source_dir=self.tmpdir) status = result[0] details = result[1] if not (status and details['user.%s:1_rse_local_put.raw' % self.user] and details['user.%s:2_rse_local_put.raw' % self.user]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_put_mgr_SourceNotFound_multi(self): """(RSE/PROTOCOLS): Put multiple files to storage (SourceNotFound)""" result = mgr.upload(self.rse_settings, [{ 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user, 'adler32': 'some_random_stuff', 'filesize': 4711 }, { 'name': '4_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/4_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat( '%s/4_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE] }], source_dir=self.tmpdir) status = result[0] details = result[1] if details['user.%s:4_rse_local_put.raw' % self.user]: raise details['user.%s:not_existing_data.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_put_mgr_FileReplicaAlreadyExists_multi(self): """(RSE/PROTOCOLS): Put multiple files to storage (FileReplicaAlreadyExists)""" status, details = mgr.upload(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': "bla-bla", 'filesize': 4711}, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': "bla-bla", 'filesize': 4711}], self.tmpdir) if details['user.%s:1_rse_remote_get.raw' % self.user]: raise details['user.%s:2_rse_remote_get.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_put_mgr_ok_multi(self): """(RSE/PROTOCOLS): Put multiple files to storage (Success)""" status, details = mgr.upload(self.rse_settings, [{'name': '1_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/1_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/1_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]}, {'name': '2_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/2_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/2_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]}], self.tmpdir) if not (status and details['user.%s:1_rse_local_put.raw' % self.user] and details['user.%s:2_rse_local_put.raw' % self.user]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_put_mgr_SourceNotFound_multi(self): """(RSE/PROTOCOLS): Put multiple files to storage (SourceNotFound)""" status, details = mgr.upload(self.rse_settings, [{'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user, 'adler32': 'some_random_stuff', 'filesize': 4711}, {'name': '4_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/4_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/4_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]}], self.tmpdir) if details['user.%s:4_rse_local_put.raw' % self.user]: raise details['user.%s:not_existing_data.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details))
def upload(files, scope, metadata, rse, account, source_dir, worker_number, total_workers, dataset_lifetime, did=None, set_metadata=False): logging.debug('In upload') dsn = None if did: dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]} client = Client() list_files = [] lfns = [] prepend_str = 'Thread [%i/%i] : ' % (worker_number, total_workers) logging.debug(prepend_str + 'Looping over the files') for filename in files: fullpath = '%s/%s' % (source_dir, filename) size = stat(fullpath).st_size checksum = adler32(fullpath) logging.info(prepend_str + 'File %s : Size %s , adler32 %s' % (fullpath, str(size), checksum)) list_files.append({'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': {'guid': generate_uuid()}}) lfns.append({'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum, 'filename': filename}) # Physical upload logging.info(prepend_str + 'Uploading physically the files %s on %s' % (str(lfns), rse)) rse_info = rsemgr.get_rse_info(rse) try: success_upload = True for cnt in xrange(0, 3): global_status, ret = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir) logging.info(prepend_str + 'Returned global status : %s, Returned : %s' % (str(global_status), str(ret))) if not global_status: for item in ret: if (not isinstance(ret[item], FileReplicaAlreadyExists)) and ret[item] is not True: sleep(exp(cnt)) success_upload = False logging.error(prepend_str + 'Problem to upload file %s with error %s' % (item, str(ret[item]))) break else: break if not success_upload: logging.error(prepend_str + 'Upload operation to %s failed, removing leftovers' % (rse)) rsemgr.delete(rse_info, lfns=lfns) return False except Exception, error: logging.error(prepend_str + '%s' % (str(error))) return False
def upload(files, scope, metadata, rse, account, source_dir, worker_number, total_workers, dataset_lifetime, did=None): logging.debug('In upload') dsn = None if did: dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]} client = Client() list_files = [] lfns = [] logging.debug('Thread [%i/%i] : Looping over the files' % (worker_number, total_workers)) for filename in files: fullpath = '%s/%s' % (source_dir, filename) size = stat(fullpath).st_size checksum = adler32(fullpath) logging.info('Thread [%i/%i] : File %s : Size %s , adler32 %s' % (worker_number, total_workers, fullpath, str(size), checksum)) list_files.append({'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': {'guid': generate_uuid()}}) lfns.append({'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum}) # Physical upload logging.info('Thread [%i/%i] : Uploading physically the files %s on %s' % (worker_number, total_workers, str(lfns), rse)) rse_info = rsemgr.get_rse_info(rse) try: success_upload = True for i in xrange(0, 3): gs, ret = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir) logging.info('Returned global status : %s, Returned : %s' % (str(gs), str(ret))) if not gs: for x in ret: if (not isinstance(ret[x], FileReplicaAlreadyExists)) and ret[x] is not True: sleep(exp(i)) success_upload = False logging.error('Problem to upload file %s with error %s' % (x, str(ret[x]))) break else: break if not success_upload: logging.error('Thread [%i/%i] : Upload operation to %s failed, removing leftovers' % (worker_number, total_workers, rse)) rsemgr.delete(rse_info, lfns=lfns) return False except Exception, e: return False
def test_get_mgr_SourceNotFound_single_pfn(self): """MOCK (RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotFound)""" for f in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [{'name': f, 'scope': 'user.%s' % self.user}, ]) self.mtc.test_get_mgr_SourceNotFound_single_pfn()
def test_put_mgr_ok_single(self): """(RSE/PROTOCOLS): Put a single file to storage (Success)""" mgr.upload(self.rse_settings, {'name': '3_rse_local_put.raw', 'scope': 'user.%s' % self.user, 'adler32': adler32('%s/3_rse_local_put.raw' % self.tmpdir), 'filesize': os.stat('%s/3_rse_local_put.raw' % self.tmpdir)[os.path.stat.ST_SIZE]}, self.tmpdir)
def test_put_mgr_FileReplicaAlreadyExists_single(self): """(RSE/PROTOCOLS): Put a single file to storage (FileReplicaAlreadyExists)""" mgr.upload(self.rse_settings, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'adler32': 'bla-bla', 'filesize': 4711}, self.tmpdir)
def upload(self, items, summary_file_path=None): """ :param items: List of dictionaries. Each dictionary describing a file to upload. Keys: path - path of the file that will be uploaded rse - rse name (e.g. 'CERN-PROD_DATADISK') where to upload the file did_scope - Optional: custom did scope (Default: user.<account>) did_name - Optional: custom did name (Default: name of the file) dataset_scope - Optional: custom dataset scope dataset_name - Optional: custom dataset name force_scheme - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None) pfn - Optional: use a given PFN (this sets no_register to True) no_register - Optional: if True, the file will not be registered in the rucio catalogue lifetime - Optional: the lifetime of the file after it was uploaded transfer_timeout - Optional: time after the upload will be aborted guid - Optional: guid of the file :param summary_file_path: Optional: a path where a summary in form of a json file will be stored :returns: 0 on success :raises InputValidationError: if any input arguments are in a wrong format :raises RSEBlacklisted: if a given RSE is not available for writing :raises NoFilesUploaded: if no files were successfully uploaded :raises NotAllFilesUploaded: if not all files were successfully uploaded """ logger = self.logger self.trace['uuid'] = generate_uuid() # check given sources, resolve dirs into files, and collect meta infos files = self._collect_and_validate_file_info(items) # check if RSE of every file is available for writing # and cache rse settings registered_dataset_dids = set() registered_file_dids = set() for file in files: rse = file['rse'] if not self.rses.get(rse): rse_settings = self.rses.setdefault(rse, rsemgr.get_rse_info(rse)) if rse_settings['availability_write'] != 1: raise RSEBlacklisted( '%s is blacklisted for writing. No actions have been taken' % rse) dataset_scope = file.get('dataset_scope') dataset_name = file.get('dataset_name') if dataset_scope and dataset_name: dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name)) file['dataset_did_str'] = dataset_did_str registered_dataset_dids.add(dataset_did_str) registered_file_dids.add('%s:%s' % (file['did_scope'], file['did_name'])) wrong_dids = registered_file_dids.intersection(registered_dataset_dids) if len(wrong_dids): raise InputValidationError( 'DIDs used to address both files and datasets: %s' % str(wrong_dids)) # clear this set again to ensure that we only try to register datasets once registered_dataset_dids = set() num_succeeded = 0 for file in files: basename = file['basename'] logger.info('Preparing upload for file %s' % basename) no_register = file.get('no_register') pfn = file.get('pfn') force_scheme = file.get('force_scheme') self.trace['scope'] = file['did_scope'] self.trace['datasetScope'] = file.get('dataset_scope', '') self.trace['dataset'] = file.get('dataset_name', '') self.trace['remoteSite'] = rse self.trace['filesize'] = file['bytes'] file_did = {'scope': file['did_scope'], 'name': file['did_name']} dataset_did_str = file.get('dataset_did_str') if not no_register: self._register_file(file, registered_dataset_dids) rse = file['rse'] rse_settings = self.rses[rse] # if file already exists on RSE we're done if rsemgr.exists(rse_settings, file_did): logger.info('File already exists on RSE. Skipping upload') continue protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=force_scheme) protocols.reverse() success = False summary = [] while not success and len(protocols): protocol = protocols.pop() cur_scheme = protocol['scheme'] logger.info('Trying upload with %s to %s' % (cur_scheme, rse)) lfn = {} lfn['filename'] = basename lfn['scope'] = file['did_scope'] lfn['name'] = file['did_name'] lfn['adler32'] = file['adler32'] lfn['filesize'] = file['bytes'] self.trace['protocol'] = cur_scheme self.trace['transferStart'] = time.time() try: state = rsemgr.upload( rse_settings=rse_settings, lfns=lfn, source_dir=file['dirname'], force_scheme=cur_scheme, force_pfn=pfn, transfer_timeout=file.get('transfer_timeout')) success = True file['upload_result'] = state except (ServiceUnavailable, ResourceTemporaryUnavailable) as error: logger.warning('Upload attempt failed') logger.debug('Exception: %s' % str(error)) if success: num_succeeded += 1 self.trace['transferEnd'] = time.time() self.trace['clientState'] = 'DONE' file['state'] = 'A' logger.info('Successfully uploaded file %s' % basename) send_trace(self.trace, self.client.host, self.client.user_agent) if summary_file_path: summary.append(copy.deepcopy(file)) # add file to dataset if needed if dataset_did_str and not no_register: try: self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) except Exception as error: logger.warning('Failed to attach file to the dataset') logger.debug(error) if not no_register: replica_for_api = self._convert_file_for_api(file) if not self.client.update_replicas_states( rse, files=[replica_for_api]): logger.warning('Failed to update replica state') else: logger.error('Failed to upload file %s' % basename) if summary_file_path: final_summary = {} for file in summary: file_scope = file['did_scope'] file_name = file['did_name'] file_did_str = '%s:%s' % (file_scope, file_name) final_summary[file_did_str] = { 'scope': file['scope'], 'name': file['name'], 'bytes': file['bytes'], 'rse': file['rse'], 'pfn': file['upload_result']['pfn'], 'guid': file['meta']['guid'], 'adler32': file['adler32'], 'md5': file['md5'] } with open(summary_file_path, 'wb') as summary_file: json.dump(final_summary, summary_file, sort_keys=True, indent=1) if num_succeeded == 0: raise NoFilesUploaded() elif num_succeeded != len(files): raise NotAllFilesUploaded() return 0
def upload(files, scope, metadata, rse, account, source_dir, dataset_lifetime, did=None, set_metadata=False, logger=logging.log): logger(logging.DEBUG, 'In upload') dsn = None if did: dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]} client = Client() list_files = [] lfns = [] for filename in files: physical_fname = filename if physical_fname.find('/') > -1: physical_fname = "".join(filename.split('/')) fullpath = '%s/%s' % (source_dir, physical_fname) size = stat(fullpath).st_size checksum = adler32(fullpath) logger(logging.INFO, 'File %s : Size %s , adler32 %s', fullpath, str(size), checksum) list_files.append({ 'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': { 'guid': generate_uuid() } }) lfns.append({ 'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum, 'filename': physical_fname }) # Physical upload logger(logging.INFO, 'Uploading physically the files %s on %s', str(lfns), rse) rse_info = rsemgr.get_rse_info(rse, vo=client.vo) try: success_upload = True for cnt in range(0, 3): rows = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir, logger=logger) # temporary hack global_status, ret = rows['success'], rows[1] logger(logging.INFO, 'Returned global status : %s, Returned : %s', str(global_status), str(ret)) if not global_status: for item in ret: if (not isinstance(ret[item], FileReplicaAlreadyExists) ) and ret[item] is not True: sleep(exp(cnt)) success_upload = False logger(logging.ERROR, 'Problem to upload file %s with error %s', item, str(ret[item])) break else: break if not success_upload: logger(logging.ERROR, 'Upload operation to %s failed, removing leftovers', rse) rsemgr.delete(rse_info, lfns=lfns) return False except Exception as error: logger(logging.DEBUG, "Exception", exc_info=True) logger(logging.ERROR, '%s', str(error)) return False logger(logging.INFO, 'Files successfully copied on %s', rse) # Registering DIDs and replicas in Rucio logger(logging.INFO, 'Registering DIDs and replicas in Rucio') meta = metadata if not set_metadata: meta = None if dsn: try: client.add_dataset(scope=dsn['scope'], name=dsn['name'], rules=[{ 'account': account, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET', 'activity': 'Functional Test' }], meta=meta, lifetime=dataset_lifetime) client.add_files_to_dataset(scope=dsn['scope'], name=dsn['name'], files=list_files, rse=rse) logger(logging.INFO, 'Upload operation for %s:%s done', dsn['scope'], dsn['name']) except Exception as error: logger(logging.DEBUG, "Exception", exc_info=True) logger(logging.ERROR, 'Failed to upload %s', str(list_files)) logger(logging.ERROR, '%s', str(error)) logger(logging.ERROR, 'removing files from the Storage') rsemgr.delete(rse_info, lfns=lfns) return False else: logger(logging.WARNING, 'No dsn is specified') try: client.add_replicas(files=list_files, rse=rse) client.add_replication_rule(list_files, copies=1, rse_expression=rse, activity='Functional Test') logger(logging.INFO, 'Upload operation for %s done', str(list_files)) except Exception as error: logger(logging.DEBUG, "Exception", exc_info=True) logger(logging.ERROR, 'Failed to upload %s', str(list_files)) logger(logging.ERROR, '%s', str(error)) logger(logging.ERROR, 'Removing files from the Storage') rsemgr.delete(rse_info, lfns=lfns) return False return True
def test_delete_mgr_ok_single(self): """MOCK (RSE/PROTOCOLS): Delete a single file from storage (Success)""" for f in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [{'name': f, 'scope': 'user.%s' % self.user}, ]) self.mtc.test_delete_mgr_ok_single()
def upload(files, scope, metadata, rse, account, source_dir, worker_number, total_workers, dataset_lifetime, did=None, set_metadata=False): logging.debug('In upload') dsn = None if did: dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]} client = Client() list_files = [] lfns = [] prepend_str = 'Thread [%i/%i] : ' % (worker_number, total_workers) logging.debug(prepend_str + 'Looping over the files') for filename in files: fullpath = '%s/%s' % (source_dir, filename) size = stat(fullpath).st_size checksum = adler32(fullpath) logging.info(prepend_str + 'File %s : Size %s , adler32 %s' % (fullpath, str(size), checksum)) list_files.append({'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': {'guid': generate_uuid()}}) lfns.append({'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum, 'filename': filename}) # Physical upload logging.info(prepend_str + 'Uploading physically the files %s on %s' % (str(lfns), rse)) rse_info = rsemgr.get_rse_info(rse) try: success_upload = True for cnt in range(0, 3): rows = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir) # temporary hack global_status, ret = rows['success'], rows[1] logging.info(prepend_str + 'Returned global status : %s, Returned : %s' % (str(global_status), str(ret))) if not global_status: for item in ret: if (not isinstance(ret[item], FileReplicaAlreadyExists)) and ret[item] is not True: sleep(exp(cnt)) success_upload = False logging.error(prepend_str + 'Problem to upload file %s with error %s' % (item, str(ret[item]))) break else: break if not success_upload: logging.error(prepend_str + 'Upload operation to %s failed, removing leftovers' % (rse)) rsemgr.delete(rse_info, lfns=lfns) return False except Exception as error: logging.debug(traceback.format_exc()) logging.error(prepend_str + '%s' % (str(error))) return False logging.info(prepend_str + 'Files successfully copied on %s' % (rse)) # Registering DIDs and replicas in Rucio logging.info(prepend_str + 'Registering DIDs and replicas in Rucio') meta = metadata if not set_metadata: meta = None if dsn: try: client.add_dataset(scope=dsn['scope'], name=dsn['name'], rules=[{'account': account, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET', 'activity': 'Functional Test'}], meta=meta, lifetime=dataset_lifetime) client.add_files_to_dataset(scope=dsn['scope'], name=dsn['name'], files=list_files, rse=rse) logging.info(prepend_str + 'Upload operation for %s:%s done' % (dsn['scope'], dsn['name'])) except Exception as error: logging.debug(traceback.format_exc()) logging.error(prepend_str + 'Failed to upload %(files)s' % locals()) logging.error(prepend_str + '%s' % (str(error))) logging.error(prepend_str + 'Removing files from the Storage') rsemgr.delete(rse_info, lfns=lfns) return False else: logging.warning(prepend_str + 'No dsn is specified') try: client.add_replicas(files=list_files, rse=rse) client.add_replication_rule(list_files, copies=1, rse_expression=rse, activity='Functional Test') logging.info(prepend_str + 'Upload operation for %s done' % (str(list_files))) except Exception as error: logging.debug(traceback.format_exc()) logging.error(prepend_str + 'Failed to upload %(files)s' % locals()) logging.error(prepend_str + '%s' % (str(error))) logging.error(prepend_str + 'Removing files from the Storage') rsemgr.delete(rse_info, lfns=lfns) return False return True
def upload(self, sources_with_settings, summary_file_path=None): """ List of dictionaries of file descriptions. None means optional [{'path': 'file1', 'rse': 'rse_name1', 'did_scope': None, 'did_name': None, 'dataset_name': None, 'dataset_scope': None, 'scheme': None, 'pfn': None, 'no_register': None, 'lifetime': None }, {'path': 'file2', 'rse': 'rse_name2', 'did_scope': None, 'did_name': None, 'dataset_name': None, 'dataset_scope': None, 'scheme': None, 'pfn': None, 'no_register': None, 'lifetime': None }] raises InputValidationError raises RSEBlacklisted """ logger = self.logger self.trace['uuid'] = generate_uuid() # check given sources, resolve dirs into files, and collect meta infos files = self.collect_and_validate_file_info(sources_with_settings) # check if RSE of every file is available for writing # and cache rse settings registered_dataset_dids = set() registered_file_dids = set() for file in files: rse = file['rse'] if not self.rses.get(rse): rse_settings = self.rses.setdefault(rse, rsemgr.get_rse_info(rse)) if rse_settings['availability_write'] != 1: raise RSEBlacklisted('%s is blacklisted for writing. No actions have been taken' % rse) dataset_scope = file.get('dataset_scope') dataset_name = file.get('dataset_name') if dataset_scope and dataset_name: dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name)) file['dataset_did_str'] = dataset_did_str registered_dataset_dids.add(dataset_did_str) registered_file_dids.add('%s:%s' % (file['did_scope'], file['did_name'])) wrong_dids = registered_file_dids.intersection(registered_dataset_dids) if len(wrong_dids): raise InputValidationError('DIDs used to address both files and datasets: %s' % str(wrong_dids)) # clear this set again to ensure that we only try to register datasets once registered_dataset_dids = set() for file in files: basename = file['basename'] logger.info('Preparing upload for file %s' % basename) no_register = file.get('no_register') pfn = file.get('pfn') scheme = file.get('scheme') self.trace['scope'] = file['did_scope'] self.trace['datasetScope'] = file.get('dataset_scope', '') self.trace['dataset'] = file.get('dataset_name', '') self.trace['remoteSite'] = rse self.trace['filesize'] = file['bytes'] file_scope = file['did_scope'] file_name = file['did_name'] file_did = {'scope': file_scope, 'name': file_name} file_did_str = '%s:%s' % (file_scope, file_name) dataset_did_str = file.get('dataset_did_str') rse = file['rse'] rse_settings = self.rses[rse] # register a dataset if we need to if dataset_did_str and dataset_did_str not in registered_dataset_dids and not no_register: registered_dataset_dids.add(dataset_did_str) try: self.client.add_dataset(scope=file['dataset_scope'], name=file['dataset_name'], rules=[{'account': self.account, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET', 'lifetime': file['lifetime']}]) logger.info('Dataset %s successfully created' % dataset_did_str) except DataIdentifierAlreadyExists: # TODO: Need to check the rules thing!! logger.info("Dataset %s already exists" % dataset_did_str) replica_for_api = self.convert_file_for_api(file) try: # if the remote checksum is different this did must not be used meta = self.client.get_metadata(file_scope, file_name) logger.info('Comparing checksums of %s and %s' % (basename, file_did_str)) if meta['adler32'] != file['adler32']: logger.error('Local checksum %s does not match remote checksum %s' % (file['adler32'], meta['adler32'])) raise DataIdentifierAlreadyExists # add file to rse if it is not registered yet replicastate = list(self.client.list_replicas([file_did], all_states=True)) if rse not in replicastate[0]['rses'] and not no_register: logger.info('Adding replica at %s in Rucio catalog' % rse) self.client.add_replicas(rse=file['rse'], files=[replica_for_api]) except DataIdentifierNotFound: if not no_register: logger.info('Adding replica at %s in Rucio catalog' % rse) self.client.add_replicas(rse=file['rse'], files=[replica_for_api]) if not dataset_did_str: # only need to add rules for files if no dataset is given logger.info('Adding replication rule at %s' % rse) self.client.add_replication_rule([file_did], copies=1, rse_expression=rse, lifetime=file['lifetime']) # if file already exists on RSE we're done if not rsemgr.exists(rse_settings, file_did): protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme) protocols.reverse() success = False summary = [] while not success and len(protocols): protocol = protocols.pop() logger.info('Trying upload to %s with protocol %s' % (rse, protocol['scheme'])) lfn = {} lfn['filename'] = file['basename'] lfn['scope'] = file['did_scope'] lfn['name'] = file['did_name'] lfn['adler32'] = file['adler32'] lfn['filesize'] = file['bytes'] self.trace['protocol'] = protocol['scheme'] self.trace['transferStart'] = time.time() try: state = rsemgr.upload(rse_settings=rse_settings, lfns=lfn, source_dir=file['dirname'], force_scheme=protocol['scheme'], force_pfn=pfn) success = True file['upload_result'] = state except (ServiceUnavailable, ResourceTemporaryUnavailable) as error: logger.warning('Upload attempt failed') logger.debug('Exception: %s' % str(error)) if success: self.trace['transferEnd'] = time.time() self.trace['clientState'] = 'DONE' file['state'] = 'A' logger.info('File %s successfully uploaded' % basename) send_trace(self.trace, self.client.host, self.user_agent, logger=logger) if summary_file_path: summary.append(copy.deepcopy(file)) else: logger.error('Failed to upload file %s' % basename) # TODO trace? continue # skip attach_did and update_states for this file else: logger.info('File already exists on RSE. Skipped upload') if not no_register: # add file to dataset if needed if dataset_did_str: try: logger.info('Attaching file to dataset %s' % dataset_did_str) self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) except Exception as error: logger.warning('Failed to attach file to the dataset') logger.warning(error) logger.info('Setting replica state to available') replica_for_api = self.convert_file_for_api(file) self.client.update_replicas_states(rse, files=[replica_for_api]) if summary_file_path: final_summary = {} for file in summary: file_scope = file['did_scope'] file_name = file['did_name'] file_did_str = '%s:%s' % (file_scope, file_name) final_summary[file_did_str] = {'scope': file['scope'], 'name': file['name'], 'bytes': file['bytes'], 'rse': file['rse'], 'pfn': file['upload_result']['pfn'], 'guid': file['meta']['guid'], 'adler32': file['adler32'], 'md5': file['md5']} with open(summary_file_path, 'wb') as summary_file: json.dump(final_summary, summary_file, sort_keys=True, indent=1)
def upload(self, items, summary_file_path=None, traces_copy_out=None): """ :param items: List of dictionaries. Each dictionary describing a file to upload. Keys: path - path of the file that will be uploaded rse - rse name (e.g. 'CERN-PROD_DATADISK') where to upload the file did_scope - Optional: custom did scope (Default: user.<account>) did_name - Optional: custom did name (Default: name of the file) dataset_scope - Optional: custom dataset scope dataset_name - Optional: custom dataset name force_scheme - Optional: force a specific scheme (if PFN upload this will be overwritten) (Default: None) pfn - Optional: use a given PFN (this sets no_register to True, and no_register becomes mandatory) no_register - Optional: if True, the file will not be registered in the rucio catalogue register_after_upload - Optional: if True, the file will be registered after successful upload lifetime - Optional: the lifetime of the file after it was uploaded transfer_timeout - Optional: time after the upload will be aborted guid - Optional: guid of the file :param summary_file_path: Optional: a path where a summary in form of a json file will be stored :param traces_copy_out: reference to an external list, where the traces should be uploaded :returns: 0 on success :raises InputValidationError: if any input arguments are in a wrong format :raises RSEBlacklisted: if a given RSE is not available for writing :raises NoFilesUploaded: if no files were successfully uploaded :raises NotAllFilesUploaded: if not all files were successfully uploaded """ logger = self.logger self.trace['uuid'] = generate_uuid() # check given sources, resolve dirs into files, and collect meta infos files = self._collect_and_validate_file_info(items) # check if RSE of every file is available for writing # and cache rse settings registered_dataset_dids = set() registered_file_dids = set() for file in files: rse = file['rse'] if not self.rses.get(rse): rse_settings = self.rses.setdefault(rse, rsemgr.get_rse_info(rse)) if rse_settings['availability_write'] != 1: raise RSEBlacklisted( '%s is blacklisted for writing. No actions have been taken' % rse) dataset_scope = file.get('dataset_scope') dataset_name = file.get('dataset_name') if dataset_scope and dataset_name: dataset_did_str = ('%s:%s' % (dataset_scope, dataset_name)) file['dataset_did_str'] = dataset_did_str registered_dataset_dids.add(dataset_did_str) registered_file_dids.add('%s:%s' % (file['did_scope'], file['did_name'])) wrong_dids = registered_file_dids.intersection(registered_dataset_dids) if len(wrong_dids): raise InputValidationError( 'DIDs used to address both files and datasets: %s' % str(wrong_dids)) # clear this set again to ensure that we only try to register datasets once registered_dataset_dids = set() num_succeeded = 0 summary = [] for file in files: basename = file['basename'] logger.info('Preparing upload for file %s' % basename) no_register = file.get('no_register') register_after_upload = file.get( 'register_after_upload') and not no_register pfn = file.get('pfn') force_scheme = file.get('force_scheme') delete_existing = False trace = copy.deepcopy(self.trace) # appending trace to list reference, if the reference exists if traces_copy_out is not None: traces_copy_out.append(trace) trace['scope'] = file['did_scope'] trace['datasetScope'] = file.get('dataset_scope', '') trace['dataset'] = file.get('dataset_name', '') trace['remoteSite'] = rse trace['filesize'] = file['bytes'] file_did = {'scope': file['did_scope'], 'name': file['did_name']} dataset_did_str = file.get('dataset_did_str') rse = file['rse'] rse_settings = self.rses[rse] rse_sign_service = rse_settings.get('sign_url', None) is_deterministic = rse_settings.get('deterministic', True) if not is_deterministic and not pfn: logger.error( 'PFN has to be defined for NON-DETERMINISTIC RSE.') continue if pfn and is_deterministic: logger.warning( 'Upload with given pfn implies that no_register is True, except non-deterministic RSEs' ) no_register = True # resolving local area networks domain = 'wan' rse_attributes = {} try: rse_attributes = self.client.list_rse_attributes(rse) except: logger.warning('Attributes of the RSE: %s not available.' % rse) if (self.client_location and 'lan' in rse_settings['domain'] and 'site' in rse_attributes): if self.client_location['site'] == rse_attributes['site']: domain = 'lan' if not no_register and not register_after_upload: self._register_file(file, registered_dataset_dids) # if register_after_upload, file should be overwritten if it is not registered # otherwise if file already exists on RSE we're done if register_after_upload: if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, auth_token=self.auth_token, logger=logger): try: self.client.get_did(file['did_scope'], file['did_name']) logger.info( 'File already registered. Skipping upload.') trace['stateReason'] = 'File already exists' continue except DataIdentifierNotFound: logger.info( 'File already exists on RSE. Previous left overs will be overwritten.' ) delete_existing = True elif not is_deterministic and not no_register: if rsemgr.exists(rse_settings, pfn, domain=domain, auth_token=self.auth_token): logger.info( 'File already exists on RSE with given pfn. Skipping upload. Existing replica has to be removed first.' ) trace['stateReason'] = 'File already exists' continue elif rsemgr.exists(rse_settings, file_did, domain=domain, auth_token=self.auth_token): logger.info( 'File already exists on RSE with different pfn. Skipping upload.' ) trace['stateReason'] = 'File already exists' continue else: if rsemgr.exists(rse_settings, pfn if pfn else file_did, domain=domain, auth_token=self.auth_token): logger.info('File already exists on RSE. Skipping upload') trace['stateReason'] = 'File already exists' continue # protocol handling and upload protocols = rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=force_scheme, domain=domain) protocols.reverse() success = False state_reason = '' while not success and len(protocols): protocol = protocols.pop() cur_scheme = protocol['scheme'] logger.info('Trying upload with %s to %s' % (cur_scheme, rse)) lfn = {} lfn['filename'] = basename lfn['scope'] = file['did_scope'] lfn['name'] = file['did_name'] for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS: if checksum_name in file: lfn[checksum_name] = file[checksum_name] lfn['filesize'] = file['bytes'] sign_service = None if cur_scheme == 'https': sign_service = rse_sign_service trace['protocol'] = cur_scheme trace['transferStart'] = time.time() try: state = rsemgr.upload( rse_settings=rse_settings, lfns=lfn, domain=domain, source_dir=file['dirname'], force_scheme=cur_scheme, force_pfn=pfn, transfer_timeout=file.get('transfer_timeout'), delete_existing=delete_existing, sign_service=sign_service, auth_token=self.auth_token, logger=logger) success = state['success'] file['upload_result'] = state except (ServiceUnavailable, ResourceTemporaryUnavailable) as error: logger.warning('Upload attempt failed') logger.debug('Exception: %s' % str(error)) state_reason = str(error) if success: num_succeeded += 1 trace['transferEnd'] = time.time() trace['clientState'] = 'DONE' file['state'] = 'A' logger.info('Successfully uploaded file %s' % basename) self._send_trace(trace) if summary_file_path: summary.append(copy.deepcopy(file)) if not no_register: if register_after_upload: self._register_file(file, registered_dataset_dids) replica_for_api = self._convert_file_for_api(file) if not self.client.update_replicas_states( rse, files=[replica_for_api]): logger.warning('Failed to update replica state') # add file to dataset if needed if dataset_did_str and not no_register: try: self.client.attach_dids(file['dataset_scope'], file['dataset_name'], [file_did]) except Exception as error: logger.warning('Failed to attach file to the dataset') logger.debug(error) else: trace['clientState'] = 'FAILED' trace['stateReason'] = state_reason self._send_trace(trace) logger.error('Failed to upload file %s' % basename) if summary_file_path: final_summary = {} for file in summary: file_scope = file['did_scope'] file_name = file['did_name'] file_did_str = '%s:%s' % (file_scope, file_name) final_summary[file_did_str] = { 'scope': file_scope, 'name': file_name, 'bytes': file['bytes'], 'rse': file['rse'], 'pfn': file['upload_result'].get('pfn', ''), 'guid': file['meta']['guid'] } for checksum_name in GLOBALLY_SUPPORTED_CHECKSUMS: if checksum_name in file: final_summary[file_did_str][checksum_name] = file[ checksum_name] with open(summary_file_path, 'wb') as summary_file: json.dump(final_summary, summary_file, sort_keys=True, indent=1) if num_succeeded == 0: raise NoFilesUploaded() elif num_succeeded != len(files): raise NotAllFilesUploaded() return 0
def test_put_mgr_SourceNotFound_single(self): """(RSE/PROTOCOLS): Put a single file to storage (SourceNotFound)""" mgr.upload(self.rse_settings, {'name': 'not_existing_data2.raw', 'scope': 'user.%s' % self.user, 'adler32': 'random_stuff', 'filesize': 0}, self.tmpdir)