def __arc_copy(self, src, dest, space_token=None): # TODO set proxy path # Convert the arguments to DataPoint objects source = DataPoint(str(src), self.cfg) if source.h is None: raise ServiceUnavailable("Can't handle source %s" % src) destination = DataPoint(str(dest), self.cfg) if destination.h is None: raise ServiceUnavailable("Can't handle destination %s" % dest) if space_token: destination.h.GetURL().AddOption('spacetoken', space_token) # DataMover does the transfer mover = arc.DataMover() # Don't attempt to retry on error mover.retry(False) # Passive and insecure gridftp mover.passive(True) mover.secure(False) # Do the transfer status = mover.Transfer(source.h, destination.h, arc.FileCache(), arc.URLMap()) if not status: if status.GetErrno() == errno.ENOENT: raise SourceNotFound() if status.GetErrno() == errno.EEXIST: raise FileAlreadyExists() raise ServiceUnavailable(str(status))
def upload_files(account, sources): """ This operation is used to upload files into the system. First, file size, file checksum and access permissions on the local file system are derived. Second, the rucio-server is called to register all necessary data in the database i.e. add the file to file catalogue, add file metadata, add replication rules. Note that file replicas without replication rule will be deleted automatically and therefore at least one replication rule (default) must be created. After the rucio server responded without error, the physical copy of the file can be started. After the physical copy finished successfully the replica's state is changed from 'queued' to 'active' to enable the replica for users. :param account Account identifier :param sources Dictonary with the following structure: { path_to_file: { 'scope': scope, 'replication_spec': {replication_rules}, 'dataset': datasetname, 'checksum': checksum, 'filesize': filesize}} :return: report represented by dictonary with informtion on a per file basis, e.g. { 'file.a': True, 'File.b': SourceNotFound, 'File.c': RSEAccessDenied, 'File.d': DatasetAcessDenied, ... } """ report = {} for src in sources: try: if not os.access(src, os.R_OK): report[src] = SourceAccessDenied() continue except Exception: report[src] = SourceNotFound() if not sources[src].filesize: sources[src].filesize = os.path.getsize(src) if not sources[src].checksum: pass # sources[src].cheksum = TODO: Derive checksum # Remove unaccessible/unexisting files from sources to avoid unnecessary checks on the server for src in report: del sources[src] # ToDo the REST call for: report = rucio_server.declare_for_upload(sources, atomic=False) # ToDo: Merge the response from above into the report array (per file) # possible file status are: DatasetAcessDenied, ScopeAccessDenied, RSEOverQuota, InvalidMetadata, FileReplicaAlreadyExsists, FileConsitencyConfilct, InvalidRepliactionRule, FullStorage # if the transfer is considered to be fine, the value fo the file will be True RSEMgr, recommendation = None, None # In waiting for src in sources: if report[src]: # ToDo the REST call for: recommendation = rucio_server.recommend_storage(account, sources[src]) # ToDo the REST call for: rucio_server.prepare_upload(account, sources[src], recommendation) try: RSEMgr.upload(src, recommendation) except (RSEAccessDenied, FullStorage), error: report[src] = error continue # ToDo the REST call for: rucio_server.confirm_upload(src, recommendation) report[src] = True
def delete(self, pfn): """ Deletes a file from the connected RSE. :param pfn Physical file name :raises ServiceUnavailable, SourceNotFound """ dp = DataPoint(str(pfn), self.cfg) if dp.h is None: raise ServiceUnavailable("Can't handle pfn %s" % pfn) status = dp.h.Remove() if not status: if status.GetErrno() == errno.ENOENT: raise SourceNotFound() raise ServiceUnavailable(str(status))
def rename(self, pfn, new_pfn): """ Allows to rename a file stored inside the connected RSE. :param pfn Current physical file name :param new_pfn New physical file name :raises DestinationNotAccessible, ServiceUnavailable, SourceNotFound """ dp = DataPoint(str(pfn), self.cfg) if dp.h is None: raise ServiceUnavailable("Can't handle pfn %s" % pfn) url = arc.URL(str(new_pfn)) if not url: raise ServiceUnavailable("Can't handle new pfn %s" % new_pfn) status = dp.h.Rename(url) if not status: if status.GetErrno() == errno.ENOENT: raise SourceNotFound() raise ServiceUnavailable(str(status))
def download_file_from_archive(self, items, trace_custom_fields={}): """ Download items with a given PFN. This function can only download files, no datasets. :param items: List of dictionaries. Each dictionary describing a file to download. Keys: did - DID string of the archive file (e.g. 'scope:file.name'). Wildcards are not allowed archive - DID string of the archive from which the file should be extracted rse - Optional: rse name (e.g. 'CERN-PROD_DATADISK'). RSE Expressions are allowed base_dir - Optional: Base directory where the downloaded files will be stored. (Default: '.') no_subdir - Optional: If true, files are written directly into base_dir and existing files are overwritten. (Default: False) :param trace_custom_fields: Custom key value pairs to send with the traces :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState clientState can be one of the following: ALREADY_DONE, DONE, FILE_NOT_FOUND, FAIL_VALIDATE, FAILED :raises InputValidationError: if one of the input items is in the wrong format :raises NoFilesDownloaded: if no files could be downloaded :raises NotAllFilesDownloaded: if not all files could be downloaded :raises SourceNotFound: if xrdcp was unable to find the PFN :raises ServiceUnavailable: if xrdcp failed :raises RucioException: if something unexpected went wrong during the download """ logger = self.logger trace = copy.deepcopy(self.trace_tpl) trace['uuid'] = generate_uuid() log_prefix = 'Extracting files: ' logger.info('Processing %d item(s) for input' % len(items)) for item in items: archive = item.get('archive') file_extract = item.get('did') rse_name = item.get('rse') if not archive or not file_extract: raise InputValidationError('File DID and archive DID are mandatory') if '*' in archive: logger.debug(archive) raise InputValidationError('Cannot use PFN download with wildcard in DID') file_extract_scope, file_extract_name = self._split_did_str(file_extract) archive_scope, archive_name = self._split_did_str(archive) # listing all available replicas of given archhive file rse_expression = 'istape=False' if not rse_name else '(%s)&istape=False' % rse_name archive_replicas = self.client.list_replicas([{'scope': archive_scope, 'name': archive_name}], schemes=['root'], rse_expression=rse_expression, unavailable=False, client_location=self.client_location) # preparing trace trace['scope'] = archive_scope trace['dataset'] = archive_name trace['filename'] = file_extract # preparing output directories dest_dir_path = self._prepare_dest_dir(item.get('base_dir', '.'), os.path.join(archive_scope, archive_name + '.extracted'), file_extract, item.get('no_subdir')) logger.debug('%sPreparing output destination %s' % (log_prefix, dest_dir_path)) # validation and customisation of list of replicas archive_replicas = list(archive_replicas) if len(archive_replicas) != 1: raise RucioException('No replicas for DID found or dataset was given.') archive_pfns = archive_replicas[0]['pfns'].keys() if len(archive_pfns) == 0: raise InputValidationError('No PFNs for replicas of archive %s' % archive) # checking whether file already exists success = False dest_file_path = os.path.join(dest_dir_path, file_extract) if os.path.isfile(dest_file_path): logger.info('%s%s File exists already locally: %s' % (log_prefix, file_extract_name, dest_dir_path)) trace['clientState'] = 'ALREADY_DONE' trace['transferStart'] = time.time() trace['transferEnd'] = time.time() send_trace(trace, self.client.host, self.client.user_agent) success = True # DOWNLOAD, iteration over different rses unitl success retry_counter = 0 while not success and len(archive_pfns): retry_counter += 1 pfn = archive_pfns.pop() trace['rse'] = archive_replicas[0]['pfns'][pfn]['rse'] try: start_time = time.time() cmd = 'xrdcp -vf %s -z %s file://%s' % (pfn, file_extract_name, dest_dir_path) logger.debug('%sExecuting: %s' % (log_prefix, cmd)) status, out, err = execute(cmd) end_time = time.time() trace['transferStart'] = start_time trace['transferEnd'] = end_time if status == 54: trace['clientState'] = 'FAILED' raise SourceNotFound(err) elif status != 0: trace['clientState'] = 'FAILED' raise RucioException(err) else: success = True item['clientState'] = 'DONE' trace['clientState'] = 'DONE' except Exception as e: trace['clientState'] = 'FAILED' raise ServiceUnavailable(e) send_trace(trace, self.client.host, self.client.user_agent) if not success: raise RucioException('Failed to download file %s after %d retries' % (file_extract_name, retry_counter)) return self._check_output(items)