Beispiel #1
0
    def __arc_copy(self, src, dest, space_token=None):

        # TODO set proxy path

        # Convert the arguments to DataPoint objects
        source = DataPoint(str(src), self.cfg)
        if source.h is None:
            raise ServiceUnavailable("Can't handle source %s" % src)

        destination = DataPoint(str(dest), self.cfg)
        if destination.h is None:
            raise ServiceUnavailable("Can't handle destination %s" % dest)
        if space_token:
            destination.h.GetURL().AddOption('spacetoken', space_token)

        # DataMover does the transfer
        mover = arc.DataMover()
        # Don't attempt to retry on error
        mover.retry(False)
        # Passive and insecure gridftp
        mover.passive(True)
        mover.secure(False)
        # Do the transfer
        status = mover.Transfer(source.h, destination.h, arc.FileCache(),
                                arc.URLMap())

        if not status:
            if status.GetErrno() == errno.ENOENT:
                raise SourceNotFound()
            if status.GetErrno() == errno.EEXIST:
                raise FileAlreadyExists()
            raise ServiceUnavailable(str(status))
Beispiel #2
0
    def upload_files(account, sources):
        """
        This operation is used to upload files into the system. First, file size,
        file checksum and access permissions on the local file system are derived.
        Second, the rucio-server is called to register all necessary data in the
        database i.e. add the file to file catalogue, add file metadata, add
        replication rules. Note that file replicas without replication rule will be
        deleted automatically and therefore at least one replication rule (default)
        must be created. After the rucio server responded without error, the physical
        copy of the file can be started. After the physical copy finished successfully
        the replica's state is changed from 'queued' to 'active' to enable the
        replica for users.

        :param account Account identifier
        :param sources Dictonary with the following structure: { path_to_file: { 'scope': scope, 'replication_spec': {replication_rules}, 'dataset': datasetname, 'checksum': checksum, 'filesize': filesize}}
        :return: report represented by dictonary with informtion on a per file basis, e.g. { 'file.a': True, 'File.b': SourceNotFound, 'File.c': RSEAccessDenied, 'File.d': DatasetAcessDenied, ... }
        """
        report = {}

        for src in sources:
            try:
                if not os.access(src, os.R_OK):
                    report[src] = SourceAccessDenied()
                    continue
            except Exception:
                report[src] = SourceNotFound()
            if not sources[src].filesize:
                sources[src].filesize = os.path.getsize(src)
            if not sources[src].checksum:
                pass  # sources[src].cheksum = TODO: Derive checksum

        # Remove unaccessible/unexisting files from sources to avoid unnecessary checks on the server
        for src in report:
            del sources[src]

        # ToDo the REST call for:  report = rucio_server.declare_for_upload(sources, atomic=False)
        # ToDo: Merge the response from above into the report array (per file)
        #       possible file status are: DatasetAcessDenied, ScopeAccessDenied, RSEOverQuota, InvalidMetadata, FileReplicaAlreadyExsists, FileConsitencyConfilct, InvalidRepliactionRule, FullStorage
        #       if the transfer is considered to be fine, the value fo the file will be True
        RSEMgr, recommendation = None, None  # In waiting
        for src in sources:
            if report[src]:
                # ToDo the REST call for: recommendation = rucio_server.recommend_storage(account, sources[src])
                # ToDo the REST call for: rucio_server.prepare_upload(account, sources[src], recommendation)
                try:
                    RSEMgr.upload(src, recommendation)
                except (RSEAccessDenied, FullStorage), error:
                    report[src] = error
                    continue
                # ToDo the REST call for: rucio_server.confirm_upload(src, recommendation)
                report[src] = True
Beispiel #3
0
    def delete(self, pfn):
        """ Deletes a file from the connected RSE.

            :param pfn Physical file name

            :raises ServiceUnavailable, SourceNotFound
        """
        dp = DataPoint(str(pfn), self.cfg)
        if dp.h is None:
            raise ServiceUnavailable("Can't handle pfn %s" % pfn)

        status = dp.h.Remove()
        if not status:
            if status.GetErrno() == errno.ENOENT:
                raise SourceNotFound()
            raise ServiceUnavailable(str(status))
Beispiel #4
0
    def rename(self, pfn, new_pfn):
        """ Allows to rename a file stored inside the connected RSE.

            :param pfn      Current physical file name
            :param new_pfn  New physical file name

            :raises DestinationNotAccessible, ServiceUnavailable, SourceNotFound
        """
        dp = DataPoint(str(pfn), self.cfg)
        if dp.h is None:
            raise ServiceUnavailable("Can't handle pfn %s" % pfn)

        url = arc.URL(str(new_pfn))
        if not url:
            raise ServiceUnavailable("Can't handle new pfn %s" % new_pfn)

        status = dp.h.Rename(url)
        if not status:
            if status.GetErrno() == errno.ENOENT:
                raise SourceNotFound()
            raise ServiceUnavailable(str(status))
Beispiel #5
0
    def download_file_from_archive(self, items, trace_custom_fields={}):
        """
        Download items with a given PFN. This function can only download files, no datasets.

        :param items: List of dictionaries. Each dictionary describing a file to download. Keys:
            did                 - DID string of the archive file (e.g. 'scope:file.name'). Wildcards are not allowed
            archive             - DID string of the archive from which the file should be extracted
            rse                 - Optional: rse name (e.g. 'CERN-PROD_DATADISK'). RSE Expressions are allowed
            base_dir            - Optional: Base directory where the downloaded files will be stored. (Default: '.')
            no_subdir           - Optional: If true, files are written directly into base_dir and existing files are overwritten. (Default: False)
        :param trace_custom_fields: Custom key value pairs to send with the traces

        :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState
                  clientState can be one of the following: ALREADY_DONE, DONE, FILE_NOT_FOUND, FAIL_VALIDATE, FAILED

        :raises InputValidationError: if one of the input items is in the wrong format
        :raises NoFilesDownloaded: if no files could be downloaded
        :raises NotAllFilesDownloaded: if not all files could be downloaded
        :raises SourceNotFound: if xrdcp was unable to find the PFN
        :raises ServiceUnavailable: if xrdcp failed
        :raises RucioException: if something unexpected went wrong during the download
        """
        logger = self.logger
        trace = copy.deepcopy(self.trace_tpl)
        trace['uuid'] = generate_uuid()
        log_prefix = 'Extracting files: '

        logger.info('Processing %d item(s) for input' % len(items))
        for item in items:
            archive = item.get('archive')
            file_extract = item.get('did')
            rse_name = item.get('rse')
            if not archive or not file_extract:
                raise InputValidationError('File DID and archive DID are mandatory')
            if '*' in archive:
                logger.debug(archive)
                raise InputValidationError('Cannot use PFN download with wildcard in DID')

            file_extract_scope, file_extract_name = self._split_did_str(file_extract)
            archive_scope, archive_name = self._split_did_str(archive)

            # listing all available replicas of given archhive file
            rse_expression = 'istape=False' if not rse_name else '(%s)&istape=False' % rse_name
            archive_replicas = self.client.list_replicas([{'scope': archive_scope, 'name': archive_name}],
                                                         schemes=['root'],
                                                         rse_expression=rse_expression,
                                                         unavailable=False,
                                                         client_location=self.client_location)

            # preparing trace
            trace['scope'] = archive_scope
            trace['dataset'] = archive_name
            trace['filename'] = file_extract

            # preparing output directories
            dest_dir_path = self._prepare_dest_dir(item.get('base_dir', '.'),
                                                   os.path.join(archive_scope, archive_name + '.extracted'), file_extract,
                                                   item.get('no_subdir'))
            logger.debug('%sPreparing output destination %s' % (log_prefix, dest_dir_path))

            # validation and customisation of list of replicas
            archive_replicas = list(archive_replicas)
            if len(archive_replicas) != 1:
                raise RucioException('No replicas for DID found or dataset was given.')
            archive_pfns = archive_replicas[0]['pfns'].keys()
            if len(archive_pfns) == 0:
                raise InputValidationError('No PFNs for replicas of archive %s' % archive)

            # checking whether file already exists
            success = False
            dest_file_path = os.path.join(dest_dir_path, file_extract)
            if os.path.isfile(dest_file_path):
                logger.info('%s%s File exists already locally: %s' % (log_prefix, file_extract_name, dest_dir_path))
                trace['clientState'] = 'ALREADY_DONE'
                trace['transferStart'] = time.time()
                trace['transferEnd'] = time.time()
                send_trace(trace, self.client.host, self.client.user_agent)
                success = True

            # DOWNLOAD, iteration over different rses unitl success
            retry_counter = 0
            while not success and len(archive_pfns):
                retry_counter += 1
                pfn = archive_pfns.pop()
                trace['rse'] = archive_replicas[0]['pfns'][pfn]['rse']
                try:
                    start_time = time.time()
                    cmd = 'xrdcp -vf %s -z %s file://%s' % (pfn, file_extract_name, dest_dir_path)
                    logger.debug('%sExecuting: %s' % (log_prefix, cmd))
                    status, out, err = execute(cmd)
                    end_time = time.time()
                    trace['transferStart'] = start_time
                    trace['transferEnd'] = end_time
                    if status == 54:
                        trace['clientState'] = 'FAILED'
                        raise SourceNotFound(err)
                    elif status != 0:
                        trace['clientState'] = 'FAILED'
                        raise RucioException(err)
                    else:
                        success = True
                        item['clientState'] = 'DONE'
                        trace['clientState'] = 'DONE'
                except Exception as e:
                    trace['clientState'] = 'FAILED'
                    raise ServiceUnavailable(e)
                send_trace(trace, self.client.host, self.client.user_agent)
            if not success:
                raise RucioException('Failed to download file %s after %d retries' % (file_extract_name, retry_counter))
        return self._check_output(items)