def __init__(self, _client=None, logger=None, tracing=True): """ Initialises the basic settings for an UploadClient object :param _client: - Optional: rucio.client.client.Client object. If None, a new object will be created. :param logger: - Optional: logging.Logger object. If None, default logger will be used. """ if not logger: self.logger = logging.log else: self.logger = logger.log self.client = _client if _client else Client() self.client_location = detect_client_location() # if token should be used, use only JWT tokens self.auth_token = self.client.auth_token if len( self.client.auth_token.split(".")) == 3 else None self.tracing = tracing if not self.tracing: logger(logging.DEBUG, 'Tracing is turned off.') self.default_file_scope = 'user.' + self.client.account self.rses = {} self.rse_expressions = {} self.trace = {} self.trace['hostname'] = socket.getfqdn() self.trace['account'] = self.client.account if self.client.vo != 'def': self.trace['vo'] = self.client.vo self.trace['eventType'] = 'upload' self.trace['eventVersion'] = version.RUCIO_VERSION[0]
def __init__(self, _client=None, logger=None, tracing=True): """ Initialises the basic settings for an UploadClient object :param _client: - Optional: rucio.client.client.Client object. If None, a new object will be created. :param logger: - logging.Logger object to use for uploads. If None nothing will be logged. """ if not logger: logger = logging.getLogger('%s.null' % __name__) logger.disabled = True self.logger = logger self.client = _client if _client else Client() self.client_location = detect_client_location() # if token should be used, use only JWT tokens self.auth_token = self.client.auth_token if len( self.client.auth_token.split(".")) == 3 else None self.tracing = tracing if not self.tracing: logger.debug('Tracing is turned off.') self.default_file_scope = 'user.' + self.client.account self.rses = {} self.trace = {} self.trace['hostname'] = socket.getfqdn() self.trace['account'] = self.client.account self.trace['eventType'] = 'upload' self.trace['eventVersion'] = version.RUCIO_VERSION[0]
def __init__(self, client=None, logger=None): """ Initialises the basic settings for an DownloadClient object :param client: Optional: rucio.client.client.Client object. If None, a new object will be created. :param logger: Optional: logging.Logger object to use for downloads. If None nothing will be logged. """ if not logger: logger = logging.getLogger('%s.null' % __name__) logger.disabled = True self.logger = logger self.is_human_readable = True self.client = client if client else Client() self.client_location = detect_client_location() account_attributes = [acc for acc in self.client.list_account_attributes(self.client.account)] self.is_admin = False for attr in account_attributes[0]: if attr['key'] == 'admin': self.is_admin = attr['value'] is True break if self.is_admin: logger.debug('Admin mode enabled') self.trace_tpl = {} self.trace_tpl['hostname'] = self.client_location['fqdn'] self.trace_tpl['localSite'] = self.client_location['site'] self.trace_tpl['account'] = self.client.account self.trace_tpl['eventType'] = 'download' self.trace_tpl['eventVersion'] = 'api_' + version.RUCIO_VERSION[0]
def download_file_from_archive(self, items, trace_custom_fields={}): """ Download items with a given PFN. This function can only download files, no datasets. :param items: List of dictionaries. Each dictionary describing a file to download. Keys: did - DID string of the archive file (e.g. 'scope:file.name'). Wildcards are not allowed rse - rse name (e.g. 'CERN-PROD_DATADISK'). RSE Expressions are not allowed archive - name of the archive from which the file should be extracted base_dir - Optional: Base directory where the downloaded files will be stored. (Default: '.') no_subdir - Optional: If true, files are written directly into base_dir and existing files are overwritten. (Default: False) ignore_checksum - Optional: If true, the checksum validation is skipped (for pfn downloads the checksum must be given explicitly). (Default: True) transfer_timeout - Optional: Timeout time for the download protocols. (Default: None) :param num_threads: Suggestion of number of threads to use for the download. It will be lowered if it's too high. :param trace_custom_fields: Custom key value pairs to send with the traces :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState clientState can be one of the following: ALREADY_DONE, DONE, FILE_NOT_FOUND, FAIL_VALIDATE, FAILED :raises InputValidationError: if one of the input items is in the wrong format :raises NoFilesDownloaded: if no files could be downloaded :raises NotAllFilesDownloaded: if not all files could be downloaded :raises RucioException: if something unexpected went wrong during the download """ logger = self.logger trace = copy.deepcopy(self.trace_tpl) log_prefix = 'Extracting files: ' logger.info('Processing %d item(s) for input' % len(items)) for item in items: archive = item.get('archive') file_extract = item.get('did') rse_name = item.get('rse') if not archive or not file_extract: raise InputValidationError( 'File DID and archive DID are mandatory') if '*' in archive: logger.debug(archive) raise InputValidationError( 'Cannot use PFN download with wildcard in DID') file_extract_scope, file_extract_name = self._split_did_str( file_extract) archive_scope, archive_name = self._split_did_str(archive) # listing all available replicas of given archhive file rse_expression = 'istape=False' if not rse_name else '(%s)&istape=False' % rse_name archive_replicas = self.client.list_replicas( [{ 'scope': archive_scope, 'name': archive_name }], schemes=['root'], rse_expression=rse_expression, unavailable=False, client_location=detect_client_location()) # preparing trace trace['uuid'] = generate_uuid() trace['scope'] = archive_scope trace['dataset'] = archive_name trace['filename'] = file_extract # preparing output directories dest_dir_path = self._prepare_dest_dir( item.get('base_dir', '.'), os.path.join(archive_scope, archive_name + '.extracted'), file_extract, item.get('no_subdir')) logger.debug('%sPreparing output destination %s' % (log_prefix, dest_dir_path)) # validation and customisation of list of replicas archive_pfns = [] replicas = next(archive_replicas) for rse in replicas['rses']: archive_pfns.extend(replicas['rses'][rse]) if len(archive_pfns) == 0: raise InputValidationError( 'No PFNs for replicas of archive %s' % archive) archive_pfns.reverse() # checking whether file already exists success = False dest_file_path = os.path.join(dest_dir_path, file_extract) if os.path.isfile(dest_file_path): logger.info('%s%s File exists already locally: %s' % (log_prefix, file_extract_name, dest_dir_path)) trace['clientState'] = 'ALREADY_DONE' trace['transferStart'] = time.time() trace['transferEnd'] = time.time() send_trace(trace, self.client.host, self.user_agent) success = True # DOWNLOAD, iteration over different rses unitl success retry_counter = 0 while not success and len(archive_pfns): retry_counter += 1 pfn = archive_pfns.pop() trace['rse'] = replicas['pfns'][pfn]['rse'] try: start_time = time.time() cmd = 'xrdcp -vf %s -z %s file://%s' % ( pfn, file_extract_name, dest_dir_path) logger.debug('%sExecuting: %s' % (log_prefix, cmd)) status, out, err = execute(cmd) end_time = time.time() trace['transferStart'] = start_time trace['transferEnd'] = end_time if status == 54: trace['clientState'] = 'FAILED' raise SourceNotFound(err) elif status != 0: trace['clientState'] = 'FAILED' raise RucioException(err) else: success = True trace['clientState'] = 'DONE' except Exception as e: trace['clientState'] = 'FAILED' raise ServiceUnavailable(e) send_trace(trace, self.client.host, self.user_agent) if not success: raise RucioException( 'Failed to download file %s after %d retries' % (file_extract_name, retry_counter))
def download_dids(self, items, num_threads=2, trace_custom_fields={}): """ Download items with given DIDs. This function can also download datasets and wildcarded DIDs. :param items: List of dictionaries. Each dictionary describing an item to download. Keys: did - DID string of this file (e.g. 'scope:file.name'). Wildcards are not allowed rse - Optional: rse name (e.g. 'CERN-PROD_DATADISK') or rse expression from where to download force_scheme - Optional: force a specific scheme to download this item. (Default: None) base_dir - Optional: base directory where the downloaded files will be stored. (Default: '.') no_subdir - Optional: If true, files are written directly into base_dir and existing files are overwritten. (Default: False) nrandom - Optional: if the DID addresses a dataset, nrandom files will be randomly choosen for download from the dataset ignore_checksum - Optional: If true, skips the checksum validation between the downloaded file and the rucio catalouge. (Default: False) transfer_timeout - Optional: Timeout time for the download protocols. (Default: None) :param num_threads: Suggestion of number of threads to use for the download. It will be lowered if it's too high. :param trace_custom_fields: Custom key value pairs to send with the traces :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState :raises InputValidationError: if one of the input items is in the wrong format :raises NoFilesDownloaded: if no files could be downloaded :raises NotAllFilesDownloaded: if not all files could be downloaded :raises RucioException: if something unexpected went wrong during the download """ logger = self.logger trace_custom_fields['uuid'] = generate_uuid() logger.info('Processing %d item(s) for input' % len(items)) resolved_items = [] for item in items: did_str = item.get('did') if not did_str: raise InputValidationError('The key did is mandatory') logger.debug('Processing item %s' % did_str) new_item = copy.deepcopy(item) # extend RSE expression to exclude tape RSEs for non-admin accounts if not self.is_admin: rse = new_item.get('rse') new_item[ 'rse'] = 'istape=False' if not rse else '(%s)&istape=False' % rse logger.debug('RSE-Expression: %s' % new_item['rse']) # resolve any wildcards in the input dids did_scope, did_name = self._split_did_str(did_str) logger.debug('Splitted DID: %s:%s' % (did_scope, did_name)) new_item['scope'] = did_scope if '*' in did_name: logger.debug('Resolving wildcarded DID %s' % did_str) for dsn in self.client.list_dids(did_scope, filters={'name': did_name}, type='all'): logger.debug('%s:%s' % (did_scope, dsn)) new_item['name'] = dsn new_item['did'] = '%s:%s' % (did_scope, dsn) resolved_items.append(new_item) else: new_item['name'] = did_name resolved_items.append(new_item) input_items = [] # get replicas for every file of the given dids logger.debug('%d DIDs after processing input' % len(resolved_items)) for item in resolved_items: did_scope = item['scope'] did_name = item['name'] did_str = item['did'] logger.debug('Processing: %s' % item) # get type of given did did_type = self.client.get_did(did_scope, did_name)['type'].upper() logger.debug('Type: %s' % did_type) # get replicas (RSEs) with PFNs for each file (especially if its a dataset) files_with_replicas = self.client.list_replicas( [{ 'scope': did_scope, 'name': did_name }], schemes=item.get('force_scheme'), rse_expression=item.get('rse'), client_location=detect_client_location()) nrandom = item.get('nrandom') if nrandom: logger.info('Selecting %d random replicas from dataset %s' % (nrandom, did_str)) files_with_replicas = list(files_with_replicas) random.shuffle(files_with_replicas) files_with_replicas = files_with_replicas[0:nrandom] for file_item in files_with_replicas: file_did_scope = file_item['scope'] file_did_name = file_item['name'] file_did_str = '%s:%s' % (file_did_scope, file_did_name) logger.debug('Queueing file: %s' % file_did_str) # put the input options from item into the file item file_item.update(item) dest_dir_name = file_did_scope if did_type == 'DATASET': # if the did is a dataset, scope and name were updated wrongly file_item['scope'] = file_did_scope file_item['name'] = file_did_name file_item['did'] = file_did_str file_item['dataset_scope'] = did_scope file_item['dataset_name'] = did_name dest_dir_name = did_name dest_dir_path = self._prepare_dest_dir( item.get('base_dir', '.'), dest_dir_name, file_did_name, item.get('no_subdir')) file_item['dest_dir_path'] = dest_dir_path input_items.append(file_item) num_files_in = len(input_items) output_items = self._download_multithreaded(input_items, num_threads, trace_custom_fields) num_files_out = len(output_items) if num_files_in != num_files_out: raise RucioException( '%d items were in the input queue but only %d are in the output queue' % (num_files_in, num_files_out)) return self._check_output(output_items)