def __arc_copy(self, src, dest, space_token=None): # TODO set proxy path # Convert the arguments to DataPoint objects source = DataPoint(str(src), self.cfg) if source.h is None: raise ServiceUnavailable("Can't handle source %s" % src) destination = DataPoint(str(dest), self.cfg) if destination.h is None: raise ServiceUnavailable("Can't handle destination %s" % dest) if space_token: destination.h.GetURL().AddOption('spacetoken', space_token) # DataMover does the transfer mover = arc.DataMover() # Don't attempt to retry on error mover.retry(False) # Passive and insecure gridftp mover.passive(True) mover.secure(False) # Do the transfer status = mover.Transfer(source.h, destination.h, arc.FileCache(), arc.URLMap()) if not status: if status.GetErrno() == errno.ENOENT: raise SourceNotFound() if status.GetErrno() == errno.EEXIST: raise FileAlreadyExists() raise ServiceUnavailable(str(status))
def construct_surl_dune(dsn, name): global sam_base if sam_base is None: sam_base = config.config_get('policy', 'sam_base_url') if sam_base is None: sam_base = 'https://samweb.fnal.gov:8483/sam/dune-test/api' url = '%s/files/name/%s/destination?format=json' % (sam_base, urllib.quote(name, '')) try: response = requests.get(url) response.raise_for_status() result = response.json() except Exception as ex: raise ServiceUnavailable("Error querying SAM") destination = result["destination"] if not destination.startswith('/'): destination = '/' + destination if destination.endswith('/'): destination += name else: destination += '/' + name return destination
def delete(self, pfn): """ Deletes a file from the connected RSE. :param pfn Physical file name :raises ServiceUnavailable, SourceNotFound """ dp = DataPoint(str(pfn), self.cfg) if dp.h is None: raise ServiceUnavailable("Can't handle pfn %s" % pfn) status = dp.h.Remove() if not status: if status.GetErrno() == errno.ENOENT: raise SourceNotFound() raise ServiceUnavailable(str(status))
def rename(self, pfn, new_pfn): """ Allows to rename a file stored inside the connected RSE. :param pfn Current physical file name :param new_pfn New physical file name :raises DestinationNotAccessible, ServiceUnavailable, SourceNotFound """ dp = DataPoint(str(pfn), self.cfg) if dp.h is None: raise ServiceUnavailable("Can't handle pfn %s" % pfn) url = arc.URL(str(new_pfn)) if not url: raise ServiceUnavailable("Can't handle new pfn %s" % new_pfn) status = dp.h.Rename(url) if not status: if status.GetErrno() == errno.ENOENT: raise SourceNotFound() raise ServiceUnavailable(str(status))
def exists(self, pfn): """ Checks if the requested file is known by the referred RSE. :param pfn Physical file name :returns: True if the file exists, False if it doesn't :raise ServiceUnavailable """ dp = DataPoint(str(pfn), self.cfg) fileinfo = arc.FileInfo() status = dp.h.Stat(fileinfo) if not status: if status.GetErrno() == errno.ENOENT: return False raise ServiceUnavailable(str(status)) return True
def download_file_from_archive(self, items, trace_custom_fields={}): """ Download items with a given PFN. This function can only download files, no datasets. :param items: List of dictionaries. Each dictionary describing a file to download. Keys: did - DID string of the archive file (e.g. 'scope:file.name'). Wildcards are not allowed archive - DID string of the archive from which the file should be extracted rse - Optional: rse name (e.g. 'CERN-PROD_DATADISK'). RSE Expressions are allowed base_dir - Optional: Base directory where the downloaded files will be stored. (Default: '.') no_subdir - Optional: If true, files are written directly into base_dir and existing files are overwritten. (Default: False) :param trace_custom_fields: Custom key value pairs to send with the traces :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState clientState can be one of the following: ALREADY_DONE, DONE, FILE_NOT_FOUND, FAIL_VALIDATE, FAILED :raises InputValidationError: if one of the input items is in the wrong format :raises NoFilesDownloaded: if no files could be downloaded :raises NotAllFilesDownloaded: if not all files could be downloaded :raises SourceNotFound: if xrdcp was unable to find the PFN :raises ServiceUnavailable: if xrdcp failed :raises RucioException: if something unexpected went wrong during the download """ logger = self.logger trace = copy.deepcopy(self.trace_tpl) trace['uuid'] = generate_uuid() log_prefix = 'Extracting files: ' logger.info('Processing %d item(s) for input' % len(items)) for item in items: archive = item.get('archive') file_extract = item.get('did') rse_name = item.get('rse') if not archive or not file_extract: raise InputValidationError('File DID and archive DID are mandatory') if '*' in archive: logger.debug(archive) raise InputValidationError('Cannot use PFN download with wildcard in DID') file_extract_scope, file_extract_name = self._split_did_str(file_extract) archive_scope, archive_name = self._split_did_str(archive) # listing all available replicas of given archhive file rse_expression = 'istape=False' if not rse_name else '(%s)&istape=False' % rse_name archive_replicas = self.client.list_replicas([{'scope': archive_scope, 'name': archive_name}], schemes=['root'], rse_expression=rse_expression, unavailable=False, client_location=self.client_location) # preparing trace trace['scope'] = archive_scope trace['dataset'] = archive_name trace['filename'] = file_extract # preparing output directories dest_dir_path = self._prepare_dest_dir(item.get('base_dir', '.'), os.path.join(archive_scope, archive_name + '.extracted'), file_extract, item.get('no_subdir')) logger.debug('%sPreparing output destination %s' % (log_prefix, dest_dir_path)) # validation and customisation of list of replicas archive_replicas = list(archive_replicas) if len(archive_replicas) != 1: raise RucioException('No replicas for DID found or dataset was given.') archive_pfns = archive_replicas[0]['pfns'].keys() if len(archive_pfns) == 0: raise InputValidationError('No PFNs for replicas of archive %s' % archive) # checking whether file already exists success = False dest_file_path = os.path.join(dest_dir_path, file_extract) if os.path.isfile(dest_file_path): logger.info('%s%s File exists already locally: %s' % (log_prefix, file_extract_name, dest_dir_path)) trace['clientState'] = 'ALREADY_DONE' trace['transferStart'] = time.time() trace['transferEnd'] = time.time() send_trace(trace, self.client.host, self.client.user_agent) success = True # DOWNLOAD, iteration over different rses unitl success retry_counter = 0 while not success and len(archive_pfns): retry_counter += 1 pfn = archive_pfns.pop() trace['rse'] = archive_replicas[0]['pfns'][pfn]['rse'] try: start_time = time.time() cmd = 'xrdcp -vf %s -z %s file://%s' % (pfn, file_extract_name, dest_dir_path) logger.debug('%sExecuting: %s' % (log_prefix, cmd)) status, out, err = execute(cmd) end_time = time.time() trace['transferStart'] = start_time trace['transferEnd'] = end_time if status == 54: trace['clientState'] = 'FAILED' raise SourceNotFound(err) elif status != 0: trace['clientState'] = 'FAILED' raise RucioException(err) else: success = True item['clientState'] = 'DONE' trace['clientState'] = 'DONE' except Exception as e: trace['clientState'] = 'FAILED' raise ServiceUnavailable(e) send_trace(trace, self.client.host, self.client.user_agent) if not success: raise RucioException('Failed to download file %s after %d retries' % (file_extract_name, retry_counter)) return self._check_output(items)