def _stage_in_bulk(dst, files, trace_report_out=None, trace_common_fields=None): """ Stage-in files in bulk using the Rucio API. :param dst: destination (string). :param files: list of fspec objects. :param trace_report: :param trace_report_out: :return: """ # init. download client from rucio.client.downloadclient import DownloadClient download_client = DownloadClient(logger=logger) # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = tracing_rucio # build the list of file dictionaries before calling the download function file_list = [] for fspec in files: fspec.status_code = 0 # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = fspec.workdir or dst f['no_subdir'] = True if fspec.turl: f['pfn'] = fspec.turl else: logger.warning( 'cannot perform bulk download since fspec.turl is not set (required by download_pfns()' ) # fail somehow if fspec.filesize: f['transfer_timeout'] = get_timeout(fspec.filesize) file_list.append(f) # proceed with the download trace_pattern = trace_common_fields if trace_common_fields else {} # download client raises an exception if any file failed num_threads = len(file_list) result = download_client.download_pfns(file_list, num_threads, trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) logger.debug('Rucio download client returned %s' % result)
def _stageInApi(self, dst, fspec): from rucio.client.downloadclient import DownloadClient # rucio logger init. rucio_logger = logging.getLogger('rucio_mover') download_client = DownloadClient(logger=rucio_logger) # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = self.tracing # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dirname(dst) if fspec.turl: f['pfn'] = fspec.turl #if fspec.filesize: # f['transfer_timeout'] = self.getTimeOut(fspec.filesize) # too harsh, max 3 hours # proceed with the download tolog('_stageInApi file: %s' % str(f)) trace_pattern = {} if self.trace_report: trace_pattern = self.trace_report result = [] if fspec.turl: result = download_client.download_pfns( [f], 1, trace_custom_fields=trace_pattern) else: result = download_client.download_dids( [f], trace_custom_fields=trace_pattern) clientState = 'FAILED' if result: clientState = result[0].get('clientState', 'FAILED') return clientState
def _stage_in_api(dst, fspec, trace_report): # init. download client from rucio.client.downloadclient import DownloadClient download_client = DownloadClient() # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = tracing_rucio # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dst f['no_subdir'] = True if fspec.turl: f['pfn'] = fspec.turl if fspec.filesize: f['transfer_timeout'] = get_timeout(fspec.filesize) # proceed with the download logger.info('_stage_in_api file: %s' % str(f)) trace_pattern = {} if trace_report: trace_pattern = trace_report result = [] if fspec.turl: result = download_client.download_pfns( [f], 1, trace_custom_fields=trace_pattern) else: result = download_client.download_dids( [f], trace_custom_fields=trace_pattern) client_state = 'FAILED' if result: client_state = result[0].get('clientState', 'FAILED') return client_state
def _stageInApi(self, dst, fspec): from rucio.client.downloadclient import DownloadClient # rucio logger init. rucio_logger = logging.getLogger('rucio_mover') download_client = DownloadClient(logger=rucio_logger) # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = self.tracing # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dirname(dst) if fspec.turl: f['pfn'] = fspec.turl #if fspec.filesize: # f['transfer_timeout'] = self.getTimeOut(fspec.filesize) # too harsh, max 3 hours # proceed with the download tolog('_stageInApi file: %s' % str(f)) trace_pattern = {} if self.trace_report: trace_pattern = self.trace_report result = [] if fspec.turl: result = download_client.download_pfns([f], 1, trace_custom_fields=trace_pattern) else: result = download_client.download_dids([f], trace_custom_fields=trace_pattern) clientState = 'FAILED' if result: clientState = result[0].get('clientState', 'FAILED') return clientState
def _stage_in_api(dst, fspec, trace_report, trace_report_out, transfer_timeout): # init. download client from rucio.client.downloadclient import DownloadClient download_client = DownloadClient(logger=logger) # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = tracing_rucio # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dst f['no_subdir'] = True if fspec.turl: f['pfn'] = fspec.turl if transfer_timeout: f['transfer_timeout'] = transfer_timeout # proceed with the download logger.info('_stage_in_api file: %s' % str(f)) trace_pattern = {} if trace_report: trace_pattern = trace_report # download client raises an exception if any file failed if fspec.turl: result = download_client.download_pfns([f], 1, trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) else: result = download_client.download_dids([f], trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) logger.debug('Rucio download client returned %s' % result) return trace_report_out
# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # # Authors: # - Paul Nilsson, [email protected], 2018-9 # This script demonstrates how to download a file using the Rucio download client. # Note: Rucio needs to be setup with 'lsetup rucio'. try: from rucio.client.downloadclient import DownloadClient except Exception: print("Rucio client has not been setup, please run \'lsetup rucio\' first") else: f_ific = { 'did_scope': 'mc16_13TeV', 'did': 'mc16_13TeV:EVNT.16337107._000147.pool.root.1', 'rse': 'IFIC-LCG2_DATADISK', # Python 2 - is unicode necessary for the 'rse' value? was u'IFIC-LCG2_DATADISK' 'pfn': 'root://t2fax.ific.uv.es:1094//lustre/ific.uv.es/grid/atlas/atlasdatadisk/rucio/mc16_13TeV/59/29/EVNT.16337107._000147.pool.root.1', 'did_name': 'EVNT.16337107._000147.pool.root.1', 'transfer_timeout': 3981, 'base_dir': '.' } download_client = DownloadClient() print(download_client.download_pfns([f_ific], 1))
class TestDownloadClient(unittest.TestCase): def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = { 'vo': config_get('client', 'vo', raise_exception=False, default='tst') } else: self.vo = {} logger = logging.getLogger('dlul_client') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) self.client = Client() self.did_client = DIDClient() self.upload_client = UploadClient(_client=self.client, logger=logger) self.download_client = DownloadClient(client=self.client, logger=logger) def _upoad_test_file(self, rse, scope, name, path=None): item = { 'path': path if path else file_generator(), 'rse': rse, 'did_scope': scope, 'did_name': name, 'guid': generate_uuid(), } assert self.upload_client.upload([item]) == 0 return item @staticmethod def _check_download_result(actual_result, expected_result): assert len(expected_result) == len(actual_result) expected_result = sorted(expected_result, key=lambda x: x['did']) actual_result = sorted(actual_result, key=lambda x: x['did']) for i, expected in enumerate(expected_result): for param_name, expected_value in expected.items(): assert param_name and actual_result[i][param_name] == expected[ param_name] def test_download_without_base_dir(self): rse = 'MOCK4' scope = 'mock' item = self._upoad_test_file(rse, scope, 'testDownloadNoBasedir' + generate_uuid()) did = '%s:%s' % (scope, item['did_name']) try: # download to the default location, i.e. to ./ result = self.download_client.download_dids([{'did': did}]) self._check_download_result( actual_result=result, expected_result=[{ 'did': did, 'clientState': 'DONE', }], ) # re-downloading the same file again should not overwrite it result = self.download_client.download_dids([{'did': did}]) self._check_download_result( actual_result=result, expected_result=[{ 'did': did, 'clientState': 'ALREADY_DONE', }], ) finally: shutil.rmtree(scope) def test_download_multiple(self): rse = 'MOCK4' scope = 'mock' base_name = 'testDownloadItem' + generate_uuid() item000 = self._upoad_test_file(rse, scope, base_name + '.000') item001 = self._upoad_test_file(rse, scope, base_name + '.001') item100 = self._upoad_test_file(rse, scope, base_name + '.100') with TemporaryDirectory() as tmp_dir: # Download specific DID result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, item000['did_name']), 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[{ 'did': '%s:%s' % (scope, item000['did_name']), 'clientState': 'DONE', }], ) # Download multiple files with wildcard. One file already exists on the file system. Will not be re-downloaded. result = self.download_client.download_dids([{ 'did': '%s:%s.0*' % (scope, base_name), 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, item000['did_name']), 'clientState': 'ALREADY_DONE', }, { 'did': '%s:%s' % (scope, item001['did_name']), 'clientState': 'DONE', }, ], ) # Download with filter result = self.download_client.download_dids([{ 'filters': { 'guid': item000['guid'], 'scope': scope }, 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[{ 'did': '%s:%s' % (scope, item000['did_name']), }], ) # Download with wildcard and name result = self.download_client.download_dids([{ 'did': '%s:*' % scope, 'filters': { 'guid': item100['guid'] }, 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[{ 'did': '%s:%s' % (scope, item100['did_name']), 'clientState': 'DONE', }], ) # Don't create subdirectories by scope result = self.download_client.download_dids([{ 'did': '%s:%s.*' % (scope, base_name), 'base_dir': tmp_dir, 'no_subdir': True }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, item000['did_name']), 'clientState': 'DONE', 'dest_file_paths': ['%s/%s' % (tmp_dir, item000['did_name'])], }, { 'did': '%s:%s' % (scope, item001['did_name']), 'clientState': 'DONE', 'dest_file_paths': ['%s/%s' % (tmp_dir, item001['did_name'])], }, { 'did': '%s:%s' % (scope, item100['did_name']), 'clientState': 'DONE', 'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])], }, ], ) # Re-download file existing on the file system with no-subdir set. It must be overwritten. result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, item100['did_name']), 'base_dir': tmp_dir, 'no_subdir': True }]) self._check_download_result( actual_result=result, expected_result=[{ 'did': '%s:%s' % (scope, item100['did_name']), 'clientState': 'ALREADY_DONE', 'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])], }], ) @pytest.mark.xfail( reason= 'XRD1 must be initialized https://github.com/rucio/rucio/pull/4165/') def test_download_from_archive_on_xrd(self): scope = 'test' rse = 'XRD1' base_name = 'testDownloadArchive' + generate_uuid() with TemporaryDirectory() as tmp_dir: # Create a zip archive with two files and upload it name000 = base_name + '.000' data000 = '000' adler000 = '01230091' name001 = base_name + '.001' data001 = '001' adler001 = '01240092' zip_name = base_name + '.zip' zip_path = '%s/%s' % (tmp_dir, zip_name) with ZipFile(zip_path, 'w') as myzip: myzip.writestr(name000, data=data000) myzip.writestr(name001, data=data001) self._upoad_test_file(rse, scope, zip_name, path=zip_path) self.did_client.add_files_to_archive( scope, zip_name, [ { 'scope': scope, 'name': name000, 'bytes': len(data000), 'type': 'FILE', 'adler32': adler000, 'meta': { 'guid': str(generate_uuid()) } }, { 'scope': scope, 'name': name001, 'bytes': len(data001), 'type': 'FILE', 'adler32': adler001, 'meta': { 'guid': str(generate_uuid()) } }, ], ) # Download one file from the archive result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, name000), 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, name000), 'clientState': 'DONE', }, ], ) with open('%s/%s/%s' % (tmp_dir, scope, name000), 'r') as file: assert file.read() == data000 # Download both files from the archive result = self.download_client.download_dids([{ 'did': '%s:%s.00*' % (scope, base_name), 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, name000), 'clientState': 'ALREADY_DONE', }, { 'did': '%s:%s' % (scope, name001), 'clientState': 'DONE', }, ], ) with open('%s/%s/%s' % (tmp_dir, scope, name001), 'r') as file: assert file.read() == data001 pfn = next(filter(lambda r: name001 in r['did'], result))['sources'][0]['pfn'] # Download by pfn from the archive result = self.download_client.download_pfns([{ 'did': '%s:%s' % (scope, name001), 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir, 'no_subdir': True }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, name001), 'clientState': 'DONE', }, ], ) def test_trace_copy_out_and_checksum_validation(self): rse = 'MOCK4' scope = 'mock' name = 'testDownloadTraces' + generate_uuid() self._upoad_test_file(rse, scope, name) with TemporaryDirectory() as tmp_dir: # Try downloading non-existing did traces = [] with pytest.raises(NoFilesDownloaded): self.download_client.download_dids([{ 'did': 'some:randomNonExistingDid', 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'FILE_NOT_FOUND' # Download specific DID traces = [] self.download_client.download_dids([{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir }], traces_copy_out=traces) assert len(traces) == 1 and traces[0]['clientState'] == 'DONE' # Download same DID again traces = [] result = self.download_client.download_dids( [{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'ALREADY_DONE' # Change the local file and download the same file again. Checksum validation should fail and it must be re-downloaded with open(result[0]['dest_file_paths'][0], 'a') as f: f.write("more data") traces = [] result = self.download_client.download_dids( [{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir }], traces_copy_out=traces) assert len(traces) == 1 and traces[0]['clientState'] == 'DONE' pfn = result[0]['sources'][0]['pfn'] # Switch to a new empty directory with TemporaryDirectory() as tmp_dir: # Wildcards in did name are not allowed on pfn downloads traces = [] with pytest.raises(InputValidationError): self.download_client.download_pfns([{ 'did': '%s:*' % scope, 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir }], traces_copy_out=traces) assert not traces # Same pfn, but without wildcard in the did should work traces = [] self.download_client.download_pfns([{ 'did': '%s:%s' % (scope, name), 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir }], traces_copy_out=traces) assert len(traces) == 1 and traces[0]['clientState'] == 'DONE' # Same pfn. Local file already present. Shouldn't be overwritten. traces = [] self.download_client.download_pfns([{ 'did': '%s:%s' % (scope, name), 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'ALREADY_DONE' # Provide wrong checksum for validation, the file will be re-downloaded but checksum validation fails traces = [] with pytest.raises(NoFilesDownloaded): self.download_client.download_pfns( [{ 'did': '%s:%s' % (scope, name), 'pfn': pfn, 'rse': rse, 'adler32': 'wrong', 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'FAIL_VALIDATE' # Switch to a new empty directory with TemporaryDirectory() as tmp_dir: # Simulate checksum corruption by changing the source file. We rely on the particularity # that the MOCK4 rse uses the posix protocol: files are stored on the local file system protocol = rsemgr.create_protocol(rsemgr.get_rse_info( rse, vo=self.client.vo), operation='read') assert isinstance(protocol, PosixProtocol) mock_rse_local_path = protocol.pfn2path(pfn) with open(mock_rse_local_path, 'w') as f: f.write('some completely other data') # Download fails checksum validation traces = [] with pytest.raises(NoFilesDownloaded): self.download_client.download_dids( [{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'FAIL_VALIDATE' # Ignore_checksum set. Download works. traces = [] self.download_client.download_dids([{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir, 'ignore_checksum': True }], traces_copy_out=traces) assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'
def _stage_in_api(dst, fspec, trace_report, trace_report_out, transfer_timeout, use_pcache): ec = 0 # init. download client from rucio.client.downloadclient import DownloadClient download_client = DownloadClient(logger=logger) if use_pcache: download_client.check_pcache = True # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = tracing_rucio # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dst f['no_subdir'] = True if fspec.turl: f['pfn'] = fspec.turl if transfer_timeout: f['transfer_timeout'] = transfer_timeout f['connection_timeout'] = 60 * 60 # proceed with the download logger.info('rucio API stage-in dictionary: %s' % f) trace_pattern = {} if trace_report: trace_pattern = trace_report # download client raises an exception if any file failed try: logger.info('*** rucio API downloading file (taking over logging) ***') if fspec.turl: result = download_client.download_pfns( [f], 1, trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) else: result = download_client.download_dids( [f], trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) except Exception as e: logger.warning('*** rucio API download client failed ***') logger.warning('caught exception: %s' % e) logger.debug('trace_report_out=%s' % trace_report_out) # only raise an exception if the error info cannot be extracted if not trace_report_out: raise e if not trace_report_out[0].get('stateReason'): raise e ec = -1 else: logger.info('*** rucio API download client finished ***') logger.debug('client returned %s' % result) logger.debug('trace_report_out=%s' % trace_report_out) return ec, trace_report_out