class TestDownloadClient(unittest.TestCase): def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} else: self.vo = {} logger = logging.getLogger('dlul_client') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) self.client = Client() self.upload_client = UploadClient(_client=self.client, logger=logger) self.download_client = DownloadClient(client=self.client, logger=logger) self.file_path = file_generator() self.scope = 'mock' self.name = os.path.basename(self.file_path) self.rse = 'MOCK4' self.guid = generate_uuid() item = {'path': self.file_path, 'rse': self.rse, 'did_scope': self.scope, 'did_name': self.name, 'guid': self.guid} assert self.upload_client.upload([item]) == 0 def tearDown(self): shutil.rmtree('mock') def test_download_item(self): """ DOWNLOAD (CLIENT): Download DIDs """ # Download specific DID result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, self.name)}]) assert result # Download with wildcard result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, self.name[:-2] + '*')}]) assert result # Download with filter result = self.download_client.download_dids([{'filters': {'guid': self.guid, 'scope': self.scope}}]) assert result # Download with wildcard and name result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, '*'), 'filters': {'guid': self.guid}}]) assert result
def pre_cache(self, scope, name): """ Pre cache the dataset to this edge. The edge should define a path or a storage for pre caching. """ item = { 'did': '%s:%s' % (scope, name), 'base_dir': self.cache_path, 'no_subdir': self.no_subdir, 'transfer_timeout': self.transfer_timeout } if self.rse: item['rse'] = self.rse client = Client() all_files = client.list_files(scope, name) download_client = DownloadClient(client=client) downloaded_files = download_client.download_dids( [item], num_threads=self.num_threads) self.logger.info('Downloaded files: %s' % downloaded_files) ret_files = [] for file in all_files: downloaded_file = None for d_file in downloaded_files: if d_file['scope'] == file['scope'] and d_file['name'] == file[ 'name']: downloaded_file = d_file break ret_file = { 'scope': file['scope'], 'name': file['name'], 'min_id': 1, 'max_id': file['events'], 'status': ContentStatus.AVAILABLE if downloaded_file and downloaded_file['clientState'] == 'ALREADY_DONE' else ContentStatus.NEW, 'size': file['bytes'], 'md5': downloaded_file['md5'] if downloaded_file else None, 'adler32': downloaded_file['adler32'] if downloaded_file else None, 'pfn': downloaded_file['dest_file_path'] if downloaded_file else None } ret_files.append(ret_file) return ret_files
def download(dest_path, did): from rucio.client import Client from rucio.client.downloadclient import DownloadClient client = Client() download_client = DownloadClient(client=client, logger=download_logger) results = download_client.download_dids([{'did': did, 'base_dir': dest_path}]) return results
def test_download_item(self): """ DOWNLOAD (CLIENT): download DIDs. """ download_client = DownloadClient() tmp_file1 = file_generator() scope = 'mock' name = tmp_file1[5:] uuid = generate_uuid() cmd = 'rucio upload --rse {0} --scope {1} --guid {2} {3}'.format( 'MOCK4', scope, uuid, tmp_file1) exitcode, out, err = execute(cmd) # Download specific DID result = download_client.download_dids([{ 'did': '%s:%s' % (scope, name) }]) assert_true(result) # Download with wildcard result = download_client.download_dids([{ 'did': '%s:%s' % (scope, name[:10] + '*') }]) assert_true(result) # Download with filter result = download_client.download_dids([{ 'filters': { 'guid': uuid, 'scope': scope } }]) assert_true(result) # Download with wildcard and name result = download_client.download_dids([{ 'did': '%s:%s' % (scope, '*'), 'filters': { 'guid': uuid } }]) assert_true(result)
def stage_in(): configfile = json.load(open(sys.argv[2])) d = DownloadClient() dids = configfile['dids'] a = d.download_dids([{ 'did': x, 'base_dir': os.getcwd(), 'no_subdir': True } for x in dids]) inval = ','.join([x.split(':', 1)[1] for x in dids]) configfile = json.load(open(sys.argv[2])) template = configfile['exec_template'] rendered = PandaTemplate(template).safe_substitute(IN=inval, delimiter='%') with open(sys.argv[3], 'w') as f: f.write(rendered + '\n')
def _stageInApi(self, dst, fspec): from rucio.client.downloadclient import DownloadClient # rucio logger init. rucio_logger = logging.getLogger('rucio_mover') download_client = DownloadClient(logger=rucio_logger) # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = self.tracing # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dirname(dst) if fspec.turl: f['pfn'] = fspec.turl #if fspec.filesize: # f['transfer_timeout'] = self.getTimeOut(fspec.filesize) # too harsh, max 3 hours # proceed with the download tolog('_stageInApi file: %s' % str(f)) trace_pattern = {} if self.trace_report: trace_pattern = self.trace_report result = [] if fspec.turl: result = download_client.download_pfns( [f], 1, trace_custom_fields=trace_pattern) else: result = download_client.download_dids( [f], trace_custom_fields=trace_pattern) clientState = 'FAILED' if result: clientState = result[0].get('clientState', 'FAILED') return clientState
def _stage_in_api(dst, fspec, trace_report): # init. download client from rucio.client.downloadclient import DownloadClient download_client = DownloadClient() # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = tracing_rucio # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dst f['no_subdir'] = True if fspec.turl: f['pfn'] = fspec.turl if fspec.filesize: f['transfer_timeout'] = get_timeout(fspec.filesize) # proceed with the download logger.info('_stage_in_api file: %s' % str(f)) trace_pattern = {} if trace_report: trace_pattern = trace_report result = [] if fspec.turl: result = download_client.download_pfns( [f], 1, trace_custom_fields=trace_pattern) else: result = download_client.download_dids( [f], trace_custom_fields=trace_pattern) client_state = 'FAILED' if result: client_state = result[0].get('clientState', 'FAILED') return client_state
def _stageInApi(self, dst, fspec): from rucio.client.downloadclient import DownloadClient # rucio logger init. rucio_logger = logging.getLogger('rucio_mover') download_client = DownloadClient(logger=rucio_logger) # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = self.tracing # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dirname(dst) if fspec.turl: f['pfn'] = fspec.turl #if fspec.filesize: # f['transfer_timeout'] = self.getTimeOut(fspec.filesize) # too harsh, max 3 hours # proceed with the download tolog('_stageInApi file: %s' % str(f)) trace_pattern = {} if self.trace_report: trace_pattern = self.trace_report result = [] if fspec.turl: result = download_client.download_pfns([f], 1, trace_custom_fields=trace_pattern) else: result = download_client.download_dids([f], trace_custom_fields=trace_pattern) clientState = 'FAILED' if result: clientState = result[0].get('clientState', 'FAILED') return clientState
def _stage_in_api(dst, fspec, trace_report, trace_report_out, transfer_timeout): # init. download client from rucio.client.downloadclient import DownloadClient download_client = DownloadClient(logger=logger) # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = tracing_rucio # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dst f['no_subdir'] = True if fspec.turl: f['pfn'] = fspec.turl if transfer_timeout: f['transfer_timeout'] = transfer_timeout # proceed with the download logger.info('_stage_in_api file: %s' % str(f)) trace_pattern = {} if trace_report: trace_pattern = trace_report # download client raises an exception if any file failed if fspec.turl: result = download_client.download_pfns([f], 1, trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) else: result = download_client.download_dids([f], trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) logger.debug('Rucio download client returned %s' % result) return trace_report_out
class RucioAPI(): """Class RucioAPI() This class presents an approach to collect all necessary Rucio calls in one class. That allows easy handling of Rucio calls based on the Rucio API. If there are ever changes in the Rucio API, here is the wrapper to fix it. """ def __init__(self, enable_print=False): """Function: __init__() Constructor of RucioAPI class. Comes with a setting set up the print statements to terminal :param enable_print: If True then enable print to terminal """ self._print_to_screen = enable_print self._rucio_ping = None self._rucio_account = os.environ.get("RUCIO_ACCOUNT") self.ConfigHost() def __del__(self): """Function: __del__() Destructor - No further description """ pass # Here comes the backend configuration part: def SetRucioAccount(self, rucio_account=None): """Function: SetRucioAccount :param rucio_account: The Rucio account you would like to work with """ self._rucio_account = rucio_account def SetConfigPath(self, config_path=None): """Function: SetConfigPath This option is only important for legacy command line support and is ignored in RucioAPI setup. :param config_path: Path to CLI configuration file """ pass def SetProxyTicket(self, proxy_path=None): """Function: SetProxyTicket This option is only important for legacy command line support and is ignored in RucioAPI setup. :param proxy_path: Path to CLI configuration file """ pass def SetHost(self, hostname=None): """Function: SetHost This option is only important for legacy command line support and is ignored in RucioAPI setup. :param hostname: Path to CLI configuration file """ pass def ConfigHost(self): """Function: ConfigHost This member function setup the rucioAPI backend. To ensure full functionality, it needs: * Client() * UploadClient() * DownloadClient() :raise Exception if Rucio API is not ready (miss-configured) """ try: self._rucio_client = Client() self._rucio_client_upload = UploadClient(logger=logger) # self._rucio_client_upload = UploadClient(tracing=False) # print("Tracing set to False") self._rucio_client_download = DownloadClient() self._rucio_ping = self._rucio_client.ping except: print("Can not init the Rucio API") print("-> Check for your Rucio installation") exit(1) # finished the backend configuration for the Rucio API def Whoami(self): """RucioAPI:Whoami Results a dictionary to identify the current Rucio user and credentials. """ return self._rucio_client.whoami() def GetRucioPing(self): """Function: GetRucioPing :return If ConfigHost is executed without execption GetRucioPing provides a Rucio ping """ return self._rucio_client.ping #The scope section: def CreateScope(self, account, scope, verbose=False): """Function: CreateScope() Create a new Rucio scope what does not yet exists yet. Be aware that you need Rucio permissions to do it. Check your Rucio account and settings. :param account: The Rucio account you are working with (need to be allowed to create scopes) :param scope: The scope name you like to create :return result: """ result = 1 try: self._rucio_client.add_scope(account, scope) result = 0 except AccessDenied as e: print(e) except Duplicate as e: if verbose: print(e) else: pass return result #Several list commands def GetRSE(self, rse): """Function: GetRSE(...) Return further information about the RSE setup of a specific RSE :param rse: A (string) valid Rucio Storage Element (RSE) name :return result: A dictionary which holds information according the selected RSE """ result = {} try: result = self._rucio_client.get_rse(rse) except: print("No RSE attributes received for {0}".format(rse)) return result def ListRSEAttributes(self, rse): """Function: ListRSEAttributes(...) Return some attributes of a Rucio Storage Element Received keys are fts, fts_testing, RSE-NAME, istape :param rse: A valid (string) Rucio Storage Element (RSE) name :return result: A dictionary with RSE attributes """ result = {} try: result = self._rucio_client.list_rse_attributes(rse) except: print("No RSE attributes received for {0}".format(rse)) return result def ListRSEs(self): """Function: ListRSEs Returns an overview about all registered Rucio Storage elements in the current setup :return result: A list of dictionaries. Each dictionary holds RSE information. If not successful [] """ result = [] try: result = list(self._rucio_client.list_rses()) except: print("No RSE received from Rucio.") return result def ListContent(self, scope, name): """Function: ListContent() :param scope: A string which refers to a Rucio scope :param name: A string which refers to a Rucio name :return result: A list of dictionaries with the attached files to the DID """ result = [] try: return list(self._rucio_client.list_content(scope, name)) except TypeError as e: print(e) return result def ListScopes(self): """Function: ListScopes() List all created scopes in the Rucio catalogue :return result: A list of scopes, otherwise [] """ result = [] try: result = self._rucio_client.list_scopes() except: print("No scopes? - Check that!") return result def ListFileReplicas(self, scope, lfn): """Function: ListFileReplicas(...) List all your files which are attached to a dataset or container :param scope: A string which follows the rules of a Rucio scope :param lfn: the lfn. :return result: A list of file replicas, otherwise [] """ #todo FIX ME result = [] result = self._rucio_client.list_file_replicas(scope, lfn) return result #try: # result = self._rucio_client.list_file_replicas(scope, lfn) #except AttributeError as e: # print(e) return result def ListFiles(self, scope, name, long=True): """Function: ListFiles(...) List all your files which are attached to a dataset or container :param scope: A string which follows the rules of a Rucio scope :param name: A string which follows the rules of a Rucio name :param long: Define another output (Check the Rucio tutorials for it) :return result: A list of files, otherwise [] """ result = [] try: result = self._rucio_client.list_files(scope, name, long=None) except: print("No files are listed for {0}:{1}".format(scope, name)) return result def ListDids(self, scope, filters, type='collection', long=False, recursive=False): """ List all data identifiers in a scope which match a given pattern. Check Rucio github page for details :param scope: The valid string which follows the Rucio scope name. :param filters: A dictionary of key/value pairs like {'name': 'file_name','rse-expression': 'tier0'}. :param type: The type of the did: 'all'(container, dataset or file)|'collection'(dataset or container)|'dataset'|'container'|'file' :param long: Long format option to display more information for each DID. :param result: Recursively list DIDs content. """ result = [] try: return list( self._rucio_client.list_dids(scope, filters, type, long, recursive)) except TypeError as e: print(e) return result def ListDidRules(self, scope, name): """Return a class generator from Rucio which contains the individual rules to iterate over (or to create a list from) :param scope: A string which refers to the Rucio scope :param name: A string which refers to the Rucio name (a container, dataset or file name) :return: A list of Rucio transfer rules with additional rule information. Each list element stands for a Rucio Storage Element (RSE). List is empty if not successful or nor rules. """ result = [] try: return list(self._rucio_client.list_did_rules(scope, name)) except TypeError as e: print(e) return result #Attach and detach: def AttachDids(self, scope, name, attachment, rse=None): """Function: AttachDids(...) This function allows to attach datasets or containers to a top-level dataset or container. The parameters scope and name define the top-level structure (container or dataset) and the dictionary or the list of dictionaries contains the information about what is attached to the top-level structure. More information under https://github.com/rucio/rucio :param scope: A string which follows the rules of a Rucio scope :param name: A string which follows the rules of a Rucio name :param attachment: A dictionary or a list of dictionaries which consist of two keys: scope and name example{'scope': 'example_scope1', 'name':'example_name1'} :param rse: The RSE name when registering replicas. (optional) :return result: 0 if successful, 1 for failure """ result = 1 #In case there is only an individual dictionary provided, the dictionary is transformed into a list of #dictionaries. if isinstance(attachment, dict) == True: attachment = [attachment] #self._rucio_client.attach_dids(scope, name, attachment, rse=rse) try: self._rucio_client.attach_dids(scope, name, attachment, rse=rse) result = 0 except DuplicateContent as e: print(e) return result def DetachDids(self, scope, name, dids): try: self._rucio_client.detach_dids(scope, name, dids) except: return None #Container and Dataset managment: def CreateContainer(self, scope, name, statuses=None, meta=None, rules=None, lifetime=None): """Function CreateContainer(...) Follows the Rucio API to create a Rucio container based on scope and container name. It accept also further Rucio features. More information under https://github.com/rucio/rucio :param scope: A string which follows the rules of a Rucio scope :param name: A string which follows the rules of a Rucio container name :param statuses: Status (optional) :param meta: Put in further meta data which are going to be connected to the container. (optional) :param rules: Define transfer rules which apply to the container immediately. (optional) :param lifetime: Set a Rucio lifetime to the container if you with (optional) :return result: 0 if successful, 1 for failure """ result = 1 try: self._rucio_client.add_container(scope, name, statuses=None, meta=None, rules=None, lifetime=None) result = 0 except DataIdentifierAlreadyExists as e: print(e) return result def CreateDataset(self, scope, name, statuses=None, meta=None, rules=None, lifetime=None, files=None, rse=None, verbose=False): """Function CreateDataset(...) Follows the Rucio API to create a Rucio dataset based on scope and dataset name. It accept also further Rucio features. More information under https://github.com/rucio/rucio :param scope: A string which follows the rules of a Rucio scope :param name: A string which follows the rules of a Rucio dataset name :param statuses: Status (optional) :param meta: Put in further meta data which are going to be connected to the container. (optional) :param rules: Define transfer rules which apply to the container immediately. (optional) :param lifetime: Set a Rucio lifetime to the container if you with (optional) :param verbose: Flag to print DataIdentifierAlreadyExists exceptions :return result: 0 if successful, 1 for failure """ result = 1 try: self._rucio_client.add_dataset(scope, name, statuses=None, meta=None, rules=None, lifetime=None,\ files=None, rse=None) result = 0 except DataIdentifierAlreadyExists as e: if verbose: print(e) return result #Rules: def AddRule(self, dids, copies, rse_expression, weight=None, lifetime=None, grouping='DATASET', account=None, locked=False, source_replica_expression=None, activity=None, notify='N', purge_replicas=False, ignore_availability=False, comment=None, ask_approval=False, asynchronous=False, priority=3, meta=None): """Function: AddRule(...) A function to add a Rucio transfer rule to the given Rucio data identifiers (DIDs) More information under https://github.com/rucio/rucio :param dids: The data identifier set. :param copies: The number of replicas. :param rse_expression: Boolean string expression to give the list of RSEs. :param weight: If the weighting option of the replication rule is used, the choice of RSEs takes their weight into account. :param lifetime: The lifetime of the replication rules (in seconds). :param grouping: ALL - All files will be replicated to the same RSE. DATASET - All files in the same dataset will be replicated to the same RSE. NONE - Files will be completely spread over all allowed RSEs without any grouping considerations at all. :param account: The account owning the rule. :param locked: If the rule is locked, it cannot be deleted. :param source_replica_expression: RSE Expression for RSEs to be considered for source replicas. :param activity: Transfer Activity to be passed to FTS. :param notify: Notification setting for the rule (Y, N, C). :param purge_replicas: When the rule gets deleted purge the associated replicas immediately. :param ignore_availability: Option to ignore the availability of RSEs. :param ask_approval: Ask for approval of this replication rule. :param asynchronous: Create rule asynchronously by judge-injector. :param priority: Priority of the transfers. :param comment: Comment about the rule. :param meta: Metadata, as dictionary. :return result: 0 if successful, 1 for failure """ result = 1 try: # self._rucio_client.add_replication_rule(dids, copies, rse_expression, weight=None, lifetime=lifetime, # grouping='DATASET', account=None, locked=False, # source_replica_expression=None, activity=None, notify='N', # purge_replicas=False, ignore_availability=False, comment=None, # ask_approval=False, asynchronous=False, priority=3) self._rucio_client.add_replication_rule( dids, copies, rse_expression, weight=None, lifetime=lifetime, grouping='DATASET', account=None, locked=False, source_replica_expression=source_replica_expression, activity=None, notify='N', purge_replicas=False, ignore_availability=False, comment=None, ask_approval=False, asynchronous=False, priority=priority) result = 0 except DuplicateRule as e: print(e) return result def UpdateRule(self, rule_id, options=None): """Function UpdateRule() Aims to update a particular rule according to its rule_id and further option such as lifetime :param rule_id: A Rucio rule id string :param options: A dictionary with certain options (e.g. lifetime, weight, ,...) :return result: 0 on success, 1 at failure """ result = 1 try: self._rucio_client.update_replication_rule(rule_id, options) result = 0 except: print("Raised exception in UpdateRule") return result def GetReplicationRule(self, rule_id, estimate_ttc=False): """Function: GetReplicationRule(...) Get information on the replication rule based on the rule ID :param rule_id: A valid Rucio rule ID :return result: Information on the replication rule, otherwise 1 """ result = 1 try: result = self._rucio_client.get_replication_rule( self, rule_id, estimate_ttc=False) except: print("No replication rule to get") return result def DeleteRule(self, rule_id): """Function: DeleteRule(...) Deletes a replication rule. :param rule_id: A rucio rule id string """ self._rucio_client.delete_replication_rule(rule_id, purge_replicas=True) #Metadata: def GetMetadata(self, scope, name): try: return self._rucio_client.get_metadata(scope, name) except: return None def SetMetadata(self, scope, name, key, value, recursive=False): try: return self._rucio_client.set_metadata(scope, name, key, value, recursive=False) except: return None #Data upload / download / register def Upload(self, upload_dict=None): """Function: Upload() The list of dictionaries need to follow this convention: Rucio/Github: https://github.com/rucio/rucio/blob/master/lib/rucio/client/uploadclient.py#L71 :param upload_dict: A list object with dictionaries :return result: 0 on success, 1 on failure """ result = self._rucio_client_upload.upload(upload_dict) return result def DownloadDids(self, items, num_threads=2, trace_custom_fields={}): """Function: DownloadDids(...) Download from the Rucio catalogue by Rucio DIDs (or a list of them) :param items: A list or a dictionary of information what to download :param num_threads: Specify the number threads on the CPU, standard 2 (optional) :param trace_custom_fields: Customize downloads (Look at Rucio tutorials) (optional) :return result: A list of dictionaries of Rucio download result messages. If it fails: 1 """ result = 1 #if a dictionary is handed over, we create a list of it. if isinstance(items, dict): items = [items] try: result = self._rucio_client_download.download_dids( items=items, num_threads=num_threads, trace_custom_fields=trace_custom_fields) except: result = 1 return result def Register(self, rse, files, ignore_availability=True): #See email "IceCube Script to register data" #from Benedikt. #files = { #'scope': self.scope, #'name': replicas[filemd]['name'], #'adler32': replicas[filemd]['adler32'], #'bytes': replicas[filemd]['size'], #} for filemd in replicas] #--> Think about metadata try: self._rucio_client.add_replicas(rse, files, ignore_availability) except: print("Problem with file name does not match pattern") for filemd in replicas: try: self.didc.attach_dids(scope=self.scope, name=self.run_Number, dids=[{ 'scope': self.scope, 'name': replicas[filemd]['name'] }]) except FileAlreadyExists: print("File already attached")
class TestDownloadClient(unittest.TestCase): def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = { 'vo': config_get('client', 'vo', raise_exception=False, default='tst') } else: self.vo = {} logger = logging.getLogger('dlul_client') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) self.client = Client() self.did_client = DIDClient() self.upload_client = UploadClient(_client=self.client, logger=logger) self.download_client = DownloadClient(client=self.client, logger=logger) def _upoad_test_file(self, rse, scope, name, path=None): item = { 'path': path if path else file_generator(), 'rse': rse, 'did_scope': scope, 'did_name': name, 'guid': generate_uuid(), } assert self.upload_client.upload([item]) == 0 return item @staticmethod def _check_download_result(actual_result, expected_result): assert len(expected_result) == len(actual_result) expected_result = sorted(expected_result, key=lambda x: x['did']) actual_result = sorted(actual_result, key=lambda x: x['did']) for i, expected in enumerate(expected_result): for param_name, expected_value in expected.items(): assert param_name and actual_result[i][param_name] == expected[ param_name] def test_download_without_base_dir(self): rse = 'MOCK4' scope = 'mock' item = self._upoad_test_file(rse, scope, 'testDownloadNoBasedir' + generate_uuid()) did = '%s:%s' % (scope, item['did_name']) try: # download to the default location, i.e. to ./ result = self.download_client.download_dids([{'did': did}]) self._check_download_result( actual_result=result, expected_result=[{ 'did': did, 'clientState': 'DONE', }], ) # re-downloading the same file again should not overwrite it result = self.download_client.download_dids([{'did': did}]) self._check_download_result( actual_result=result, expected_result=[{ 'did': did, 'clientState': 'ALREADY_DONE', }], ) finally: shutil.rmtree(scope) def test_download_multiple(self): rse = 'MOCK4' scope = 'mock' base_name = 'testDownloadItem' + generate_uuid() item000 = self._upoad_test_file(rse, scope, base_name + '.000') item001 = self._upoad_test_file(rse, scope, base_name + '.001') item100 = self._upoad_test_file(rse, scope, base_name + '.100') with TemporaryDirectory() as tmp_dir: # Download specific DID result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, item000['did_name']), 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[{ 'did': '%s:%s' % (scope, item000['did_name']), 'clientState': 'DONE', }], ) # Download multiple files with wildcard. One file already exists on the file system. Will not be re-downloaded. result = self.download_client.download_dids([{ 'did': '%s:%s.0*' % (scope, base_name), 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, item000['did_name']), 'clientState': 'ALREADY_DONE', }, { 'did': '%s:%s' % (scope, item001['did_name']), 'clientState': 'DONE', }, ], ) # Download with filter result = self.download_client.download_dids([{ 'filters': { 'guid': item000['guid'], 'scope': scope }, 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[{ 'did': '%s:%s' % (scope, item000['did_name']), }], ) # Download with wildcard and name result = self.download_client.download_dids([{ 'did': '%s:*' % scope, 'filters': { 'guid': item100['guid'] }, 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[{ 'did': '%s:%s' % (scope, item100['did_name']), 'clientState': 'DONE', }], ) # Don't create subdirectories by scope result = self.download_client.download_dids([{ 'did': '%s:%s.*' % (scope, base_name), 'base_dir': tmp_dir, 'no_subdir': True }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, item000['did_name']), 'clientState': 'DONE', 'dest_file_paths': ['%s/%s' % (tmp_dir, item000['did_name'])], }, { 'did': '%s:%s' % (scope, item001['did_name']), 'clientState': 'DONE', 'dest_file_paths': ['%s/%s' % (tmp_dir, item001['did_name'])], }, { 'did': '%s:%s' % (scope, item100['did_name']), 'clientState': 'DONE', 'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])], }, ], ) # Re-download file existing on the file system with no-subdir set. It must be overwritten. result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, item100['did_name']), 'base_dir': tmp_dir, 'no_subdir': True }]) self._check_download_result( actual_result=result, expected_result=[{ 'did': '%s:%s' % (scope, item100['did_name']), 'clientState': 'ALREADY_DONE', 'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])], }], ) @pytest.mark.xfail( reason= 'XRD1 must be initialized https://github.com/rucio/rucio/pull/4165/') def test_download_from_archive_on_xrd(self): scope = 'test' rse = 'XRD1' base_name = 'testDownloadArchive' + generate_uuid() with TemporaryDirectory() as tmp_dir: # Create a zip archive with two files and upload it name000 = base_name + '.000' data000 = '000' adler000 = '01230091' name001 = base_name + '.001' data001 = '001' adler001 = '01240092' zip_name = base_name + '.zip' zip_path = '%s/%s' % (tmp_dir, zip_name) with ZipFile(zip_path, 'w') as myzip: myzip.writestr(name000, data=data000) myzip.writestr(name001, data=data001) self._upoad_test_file(rse, scope, zip_name, path=zip_path) self.did_client.add_files_to_archive( scope, zip_name, [ { 'scope': scope, 'name': name000, 'bytes': len(data000), 'type': 'FILE', 'adler32': adler000, 'meta': { 'guid': str(generate_uuid()) } }, { 'scope': scope, 'name': name001, 'bytes': len(data001), 'type': 'FILE', 'adler32': adler001, 'meta': { 'guid': str(generate_uuid()) } }, ], ) # Download one file from the archive result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, name000), 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, name000), 'clientState': 'DONE', }, ], ) with open('%s/%s/%s' % (tmp_dir, scope, name000), 'r') as file: assert file.read() == data000 # Download both files from the archive result = self.download_client.download_dids([{ 'did': '%s:%s.00*' % (scope, base_name), 'base_dir': tmp_dir }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, name000), 'clientState': 'ALREADY_DONE', }, { 'did': '%s:%s' % (scope, name001), 'clientState': 'DONE', }, ], ) with open('%s/%s/%s' % (tmp_dir, scope, name001), 'r') as file: assert file.read() == data001 pfn = next(filter(lambda r: name001 in r['did'], result))['sources'][0]['pfn'] # Download by pfn from the archive result = self.download_client.download_pfns([{ 'did': '%s:%s' % (scope, name001), 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir, 'no_subdir': True }]) self._check_download_result( actual_result=result, expected_result=[ { 'did': '%s:%s' % (scope, name001), 'clientState': 'DONE', }, ], ) def test_trace_copy_out_and_checksum_validation(self): rse = 'MOCK4' scope = 'mock' name = 'testDownloadTraces' + generate_uuid() self._upoad_test_file(rse, scope, name) with TemporaryDirectory() as tmp_dir: # Try downloading non-existing did traces = [] with pytest.raises(NoFilesDownloaded): self.download_client.download_dids([{ 'did': 'some:randomNonExistingDid', 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'FILE_NOT_FOUND' # Download specific DID traces = [] self.download_client.download_dids([{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir }], traces_copy_out=traces) assert len(traces) == 1 and traces[0]['clientState'] == 'DONE' # Download same DID again traces = [] result = self.download_client.download_dids( [{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'ALREADY_DONE' # Change the local file and download the same file again. Checksum validation should fail and it must be re-downloaded with open(result[0]['dest_file_paths'][0], 'a') as f: f.write("more data") traces = [] result = self.download_client.download_dids( [{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir }], traces_copy_out=traces) assert len(traces) == 1 and traces[0]['clientState'] == 'DONE' pfn = result[0]['sources'][0]['pfn'] # Switch to a new empty directory with TemporaryDirectory() as tmp_dir: # Wildcards in did name are not allowed on pfn downloads traces = [] with pytest.raises(InputValidationError): self.download_client.download_pfns([{ 'did': '%s:*' % scope, 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir }], traces_copy_out=traces) assert not traces # Same pfn, but without wildcard in the did should work traces = [] self.download_client.download_pfns([{ 'did': '%s:%s' % (scope, name), 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir }], traces_copy_out=traces) assert len(traces) == 1 and traces[0]['clientState'] == 'DONE' # Same pfn. Local file already present. Shouldn't be overwritten. traces = [] self.download_client.download_pfns([{ 'did': '%s:%s' % (scope, name), 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'ALREADY_DONE' # Provide wrong checksum for validation, the file will be re-downloaded but checksum validation fails traces = [] with pytest.raises(NoFilesDownloaded): self.download_client.download_pfns( [{ 'did': '%s:%s' % (scope, name), 'pfn': pfn, 'rse': rse, 'adler32': 'wrong', 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'FAIL_VALIDATE' # Switch to a new empty directory with TemporaryDirectory() as tmp_dir: # Simulate checksum corruption by changing the source file. We rely on the particularity # that the MOCK4 rse uses the posix protocol: files are stored on the local file system protocol = rsemgr.create_protocol(rsemgr.get_rse_info( rse, vo=self.client.vo), operation='read') assert isinstance(protocol, PosixProtocol) mock_rse_local_path = protocol.pfn2path(pfn) with open(mock_rse_local_path, 'w') as f: f.write('some completely other data') # Download fails checksum validation traces = [] with pytest.raises(NoFilesDownloaded): self.download_client.download_dids( [{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir }], traces_copy_out=traces) assert len( traces) == 1 and traces[0]['clientState'] == 'FAIL_VALIDATE' # Ignore_checksum set. Download works. traces = [] self.download_client.download_dids([{ 'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir, 'ignore_checksum': True }], traces_copy_out=traces) assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'
class TestDownloadClient(object): def setup(self): logger = logging.getLogger('dlul_client') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) self.client = Client() self.upload_client = UploadClient(_client=self.client, logger=logger) self.download_client = DownloadClient(client=self.client, logger=logger) self.file_path = file_generator() self.scope = 'mock' self.name = os.path.basename(self.file_path) self.rse = 'MOCK4' self.guid = generate_uuid() item = { 'path': self.file_path, 'rse': self.rse, 'did_scope': self.scope, 'did_name': self.name, 'guid': self.guid } nose.tools.assert_equal(self.upload_client.upload([item]), 0) def teardown(self): shutil.rmtree('mock') def test_download_item(self): """ DOWNLOAD (CLIENT): Download DIDs """ # Download specific DID result = self.download_client.download_dids([{ 'did': '%s:%s' % (self.scope, self.name) }]) nose.tools.assert_true(result) # Download with wildcard result = self.download_client.download_dids([{ 'did': '%s:%s' % (self.scope, self.name[:-2] + '*') }]) nose.tools.assert_true(result) # Download with filter result = self.download_client.download_dids([{ 'filters': { 'guid': self.guid, 'scope': self.scope } }]) nose.tools.assert_true(result) # Download with wildcard and name result = self.download_client.download_dids([{ 'did': '%s:%s' % (self.scope, '*'), 'filters': { 'guid': self.guid } }]) nose.tools.assert_true(result)
def _stage_in_api(dst, fspec, trace_report, trace_report_out, transfer_timeout, use_pcache): ec = 0 # init. download client from rucio.client.downloadclient import DownloadClient download_client = DownloadClient(logger=logger) if use_pcache: download_client.check_pcache = True # traces are switched off if hasattr(download_client, 'tracing'): download_client.tracing = tracing_rucio # file specifications before the actual download f = {} f['did_scope'] = fspec.scope f['did_name'] = fspec.lfn f['did'] = '%s:%s' % (fspec.scope, fspec.lfn) f['rse'] = fspec.ddmendpoint f['base_dir'] = dst f['no_subdir'] = True if fspec.turl: f['pfn'] = fspec.turl if transfer_timeout: f['transfer_timeout'] = transfer_timeout f['connection_timeout'] = 60 * 60 # proceed with the download logger.info('rucio API stage-in dictionary: %s' % f) trace_pattern = {} if trace_report: trace_pattern = trace_report # download client raises an exception if any file failed try: logger.info('*** rucio API downloading file (taking over logging) ***') if fspec.turl: result = download_client.download_pfns( [f], 1, trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) else: result = download_client.download_dids( [f], trace_custom_fields=trace_pattern, traces_copy_out=trace_report_out) except Exception as e: logger.warning('*** rucio API download client failed ***') logger.warning('caught exception: %s' % e) logger.debug('trace_report_out=%s' % trace_report_out) # only raise an exception if the error info cannot be extracted if not trace_report_out: raise e if not trace_report_out[0].get('stateReason'): raise e ec = -1 else: logger.info('*** rucio API download client finished ***') logger.debug('client returned %s' % result) logger.debug('trace_report_out=%s' % trace_report_out) return ec, trace_report_out
class TestDownloadClient(object): def setup(self): logger = logging.getLogger('dlul_client') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) self.client = Client() self.upload_client = UploadClient(_client=self.client, logger=logger) self.download_client = DownloadClient(client=self.client, logger=logger) def create_and_upload_tmp_file(self, rse, scope='mock'): file_path = file_generator() item = { 'path': file_path, 'rse': rse, 'did_scope': scope, 'did_name': os.path.basename(file_path), 'guid': generate_uuid() } nose.tools.assert_equal(self.upload_client.upload([item]), 0) return item def test_download_item(self): """ DOWNLOAD (CLIENT): download DIDs. """ item = self.create_and_upload_tmp_file('MOCK4') scope = item['did_scope'] name = item['did_name'] uuid = item['guid'] # Download specific DID result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, name) }]) nose.tools.assert_true(result) # Download with wildcard result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, name[:-2] + '*') }]) nose.tools.assert_true(result) # Download with filter result = self.download_client.download_dids([{ 'filters': { 'guid': uuid, 'scope': scope } }]) nose.tools.assert_true(result) # Download with wildcard and name result = self.download_client.download_dids([{ 'did': '%s:%s' % (scope, '*'), 'filters': { 'guid': uuid } }]) nose.tools.assert_true(result)