Exemple #1
0
    def __arc_copy(self, src, dest, space_token=None):

        # TODO set proxy path

        # Convert the arguments to DataPoint objects
        source = DataPoint(str(src), self.cfg)
        if source.h is None:
            raise ServiceUnavailable("Can't handle source %s" % src)

        destination = DataPoint(str(dest), self.cfg)
        if destination.h is None:
            raise ServiceUnavailable("Can't handle destination %s" % dest)
        if space_token:
            destination.h.GetURL().AddOption('spacetoken', space_token)

        # DataMover does the transfer
        mover = arc.DataMover()
        # Don't attempt to retry on error
        mover.retry(False)
        # Passive and insecure gridftp
        mover.passive(True)
        mover.secure(False)
        # Do the transfer
        status = mover.Transfer(source.h, destination.h, arc.FileCache(),
                                arc.URLMap())

        if not status:
            if status.GetErrno() == errno.ENOENT:
                raise SourceNotFound()
            if status.GetErrno() == errno.EEXIST:
                raise FileAlreadyExists()
            raise ServiceUnavailable(str(status))
Exemple #2
0
def construct_surl_dune(dsn, name):
    global sam_base

    if sam_base is None:
        sam_base = config.config_get('policy', 'sam_base_url')
        if sam_base is None:
            sam_base = 'https://samweb.fnal.gov:8483/sam/dune-test/api'

    url = '%s/files/name/%s/destination?format=json' % (sam_base,
                                                        urllib.quote(name, ''))

    try:
        response = requests.get(url)
        response.raise_for_status()
        result = response.json()
    except Exception as ex:
        raise ServiceUnavailable("Error querying SAM")

    destination = result["destination"]
    if not destination.startswith('/'):
        destination = '/' + destination
    if destination.endswith('/'):
        destination += name
    else:
        destination += '/' + name
    return destination
Exemple #3
0
    def delete(self, pfn):
        """ Deletes a file from the connected RSE.

            :param pfn Physical file name

            :raises ServiceUnavailable, SourceNotFound
        """
        dp = DataPoint(str(pfn), self.cfg)
        if dp.h is None:
            raise ServiceUnavailable("Can't handle pfn %s" % pfn)

        status = dp.h.Remove()
        if not status:
            if status.GetErrno() == errno.ENOENT:
                raise SourceNotFound()
            raise ServiceUnavailable(str(status))
Exemple #4
0
    def rename(self, pfn, new_pfn):
        """ Allows to rename a file stored inside the connected RSE.

            :param pfn      Current physical file name
            :param new_pfn  New physical file name

            :raises DestinationNotAccessible, ServiceUnavailable, SourceNotFound
        """
        dp = DataPoint(str(pfn), self.cfg)
        if dp.h is None:
            raise ServiceUnavailable("Can't handle pfn %s" % pfn)

        url = arc.URL(str(new_pfn))
        if not url:
            raise ServiceUnavailable("Can't handle new pfn %s" % new_pfn)

        status = dp.h.Rename(url)
        if not status:
            if status.GetErrno() == errno.ENOENT:
                raise SourceNotFound()
            raise ServiceUnavailable(str(status))
Exemple #5
0
    def exists(self, pfn):
        """ Checks if the requested file is known by the referred RSE.

            :param pfn Physical file name

            :returns: True if the file exists, False if it doesn't

            :raise  ServiceUnavailable
        """
        dp = DataPoint(str(pfn), self.cfg)
        fileinfo = arc.FileInfo()

        status = dp.h.Stat(fileinfo)
        if not status:
            if status.GetErrno() == errno.ENOENT:
                return False
            raise ServiceUnavailable(str(status))

        return True
Exemple #6
0
    def download_file_from_archive(self, items, trace_custom_fields={}):
        """
        Download items with a given PFN. This function can only download files, no datasets.

        :param items: List of dictionaries. Each dictionary describing a file to download. Keys:
            did                 - DID string of the archive file (e.g. 'scope:file.name'). Wildcards are not allowed
            archive             - DID string of the archive from which the file should be extracted
            rse                 - Optional: rse name (e.g. 'CERN-PROD_DATADISK'). RSE Expressions are allowed
            base_dir            - Optional: Base directory where the downloaded files will be stored. (Default: '.')
            no_subdir           - Optional: If true, files are written directly into base_dir and existing files are overwritten. (Default: False)
        :param trace_custom_fields: Custom key value pairs to send with the traces

        :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState
                  clientState can be one of the following: ALREADY_DONE, DONE, FILE_NOT_FOUND, FAIL_VALIDATE, FAILED

        :raises InputValidationError: if one of the input items is in the wrong format
        :raises NoFilesDownloaded: if no files could be downloaded
        :raises NotAllFilesDownloaded: if not all files could be downloaded
        :raises SourceNotFound: if xrdcp was unable to find the PFN
        :raises ServiceUnavailable: if xrdcp failed
        :raises RucioException: if something unexpected went wrong during the download
        """
        logger = self.logger
        trace = copy.deepcopy(self.trace_tpl)
        trace['uuid'] = generate_uuid()
        log_prefix = 'Extracting files: '

        logger.info('Processing %d item(s) for input' % len(items))
        for item in items:
            archive = item.get('archive')
            file_extract = item.get('did')
            rse_name = item.get('rse')
            if not archive or not file_extract:
                raise InputValidationError('File DID and archive DID are mandatory')
            if '*' in archive:
                logger.debug(archive)
                raise InputValidationError('Cannot use PFN download with wildcard in DID')

            file_extract_scope, file_extract_name = self._split_did_str(file_extract)
            archive_scope, archive_name = self._split_did_str(archive)

            # listing all available replicas of given archhive file
            rse_expression = 'istape=False' if not rse_name else '(%s)&istape=False' % rse_name
            archive_replicas = self.client.list_replicas([{'scope': archive_scope, 'name': archive_name}],
                                                         schemes=['root'],
                                                         rse_expression=rse_expression,
                                                         unavailable=False,
                                                         client_location=self.client_location)

            # preparing trace
            trace['scope'] = archive_scope
            trace['dataset'] = archive_name
            trace['filename'] = file_extract

            # preparing output directories
            dest_dir_path = self._prepare_dest_dir(item.get('base_dir', '.'),
                                                   os.path.join(archive_scope, archive_name + '.extracted'), file_extract,
                                                   item.get('no_subdir'))
            logger.debug('%sPreparing output destination %s' % (log_prefix, dest_dir_path))

            # validation and customisation of list of replicas
            archive_replicas = list(archive_replicas)
            if len(archive_replicas) != 1:
                raise RucioException('No replicas for DID found or dataset was given.')
            archive_pfns = archive_replicas[0]['pfns'].keys()
            if len(archive_pfns) == 0:
                raise InputValidationError('No PFNs for replicas of archive %s' % archive)

            # checking whether file already exists
            success = False
            dest_file_path = os.path.join(dest_dir_path, file_extract)
            if os.path.isfile(dest_file_path):
                logger.info('%s%s File exists already locally: %s' % (log_prefix, file_extract_name, dest_dir_path))
                trace['clientState'] = 'ALREADY_DONE'
                trace['transferStart'] = time.time()
                trace['transferEnd'] = time.time()
                send_trace(trace, self.client.host, self.client.user_agent)
                success = True

            # DOWNLOAD, iteration over different rses unitl success
            retry_counter = 0
            while not success and len(archive_pfns):
                retry_counter += 1
                pfn = archive_pfns.pop()
                trace['rse'] = archive_replicas[0]['pfns'][pfn]['rse']
                try:
                    start_time = time.time()
                    cmd = 'xrdcp -vf %s -z %s file://%s' % (pfn, file_extract_name, dest_dir_path)
                    logger.debug('%sExecuting: %s' % (log_prefix, cmd))
                    status, out, err = execute(cmd)
                    end_time = time.time()
                    trace['transferStart'] = start_time
                    trace['transferEnd'] = end_time
                    if status == 54:
                        trace['clientState'] = 'FAILED'
                        raise SourceNotFound(err)
                    elif status != 0:
                        trace['clientState'] = 'FAILED'
                        raise RucioException(err)
                    else:
                        success = True
                        item['clientState'] = 'DONE'
                        trace['clientState'] = 'DONE'
                except Exception as e:
                    trace['clientState'] = 'FAILED'
                    raise ServiceUnavailable(e)
                send_trace(trace, self.client.host, self.client.user_agent)
            if not success:
                raise RucioException('Failed to download file %s after %d retries' % (file_extract_name, retry_counter))
        return self._check_output(items)