コード例 #1
0
ファイル: uploadclient.py プロジェクト: davidgcameron/rucio
    def __init__(self, _client=None, logger=None, tracing=True):
        """
        Initialises the basic settings for an UploadClient object

        :param _client:     - Optional: rucio.client.client.Client object. If None, a new object will be created.
        :param logger:      - Optional: logging.Logger object. If None, default logger will be used.
        """
        if not logger:
            self.logger = logging.log
        else:
            self.logger = logger.log

        self.client = _client if _client else Client()
        self.client_location = detect_client_location()
        # if token should be used, use only JWT tokens
        self.auth_token = self.client.auth_token if len(
            self.client.auth_token.split(".")) == 3 else None
        self.tracing = tracing
        if not self.tracing:
            logger(logging.DEBUG, 'Tracing is turned off.')
        self.default_file_scope = 'user.' + self.client.account
        self.rses = {}
        self.rse_expressions = {}

        self.trace = {}
        self.trace['hostname'] = socket.getfqdn()
        self.trace['account'] = self.client.account
        if self.client.vo != 'def':
            self.trace['vo'] = self.client.vo
        self.trace['eventType'] = 'upload'
        self.trace['eventVersion'] = version.RUCIO_VERSION[0]
コード例 #2
0
ファイル: uploadclient.py プロジェクト: yiiyama/rucio
    def __init__(self, _client=None, logger=None, tracing=True):
        """
        Initialises the basic settings for an UploadClient object

        :param _client:     - Optional: rucio.client.client.Client object. If None, a new object will be created.
        :param logger:      - logging.Logger object to use for uploads. If None nothing will be logged.
        """
        if not logger:
            logger = logging.getLogger('%s.null' % __name__)
            logger.disabled = True

        self.logger = logger
        self.client = _client if _client else Client()
        self.client_location = detect_client_location()
        # if token should be used, use only JWT tokens
        self.auth_token = self.client.auth_token if len(
            self.client.auth_token.split(".")) == 3 else None
        self.tracing = tracing
        if not self.tracing:
            logger.debug('Tracing is turned off.')
        self.default_file_scope = 'user.' + self.client.account
        self.rses = {}

        self.trace = {}
        self.trace['hostname'] = socket.getfqdn()
        self.trace['account'] = self.client.account
        self.trace['eventType'] = 'upload'
        self.trace['eventVersion'] = version.RUCIO_VERSION[0]
コード例 #3
0
ファイル: downloadclient.py プロジェクト: ahandresf/rucio
    def __init__(self, client=None, logger=None):
        """
        Initialises the basic settings for an DownloadClient object

        :param client: Optional: rucio.client.client.Client object. If None, a new object will be created.
        :param logger: Optional: logging.Logger object to use for downloads. If None nothing will be logged.
        """
        if not logger:
            logger = logging.getLogger('%s.null' % __name__)
            logger.disabled = True

        self.logger = logger
        self.is_human_readable = True
        self.client = client if client else Client()

        self.client_location = detect_client_location()

        account_attributes = [acc for acc in self.client.list_account_attributes(self.client.account)]
        self.is_admin = False
        for attr in account_attributes[0]:
            if attr['key'] == 'admin':
                self.is_admin = attr['value'] is True
                break
        if self.is_admin:
            logger.debug('Admin mode enabled')

        self.trace_tpl = {}
        self.trace_tpl['hostname'] = self.client_location['fqdn']
        self.trace_tpl['localSite'] = self.client_location['site']
        self.trace_tpl['account'] = self.client.account
        self.trace_tpl['eventType'] = 'download'
        self.trace_tpl['eventVersion'] = 'api_' + version.RUCIO_VERSION[0]
コード例 #4
0
ファイル: downloadclient.py プロジェクト: TiO2/rucio
    def download_file_from_archive(self, items, trace_custom_fields={}):
        """
        Download items with a given PFN. This function can only download files, no datasets.

        :param items: List of dictionaries. Each dictionary describing a file to download. Keys:
            did                 - DID string of the archive file (e.g. 'scope:file.name'). Wildcards are not allowed
            rse                 - rse name (e.g. 'CERN-PROD_DATADISK'). RSE Expressions are not allowed
            archive             - name of the archive from which the file should be extracted
            base_dir            - Optional: Base directory where the downloaded files will be stored. (Default: '.')
            no_subdir           - Optional: If true, files are written directly into base_dir and existing files are overwritten. (Default: False)
            ignore_checksum     - Optional: If true, the checksum validation is skipped (for pfn downloads the checksum must be given explicitly). (Default: True)
            transfer_timeout    - Optional: Timeout time for the download protocols. (Default: None)
        :param num_threads: Suggestion of number of threads to use for the download. It will be lowered if it's too high.
        :param trace_custom_fields: Custom key value pairs to send with the traces

        :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState
                  clientState can be one of the following: ALREADY_DONE, DONE, FILE_NOT_FOUND, FAIL_VALIDATE, FAILED

        :raises InputValidationError: if one of the input items is in the wrong format
        :raises NoFilesDownloaded: if no files could be downloaded
        :raises NotAllFilesDownloaded: if not all files could be downloaded
        :raises RucioException: if something unexpected went wrong during the download
        """
        logger = self.logger
        trace = copy.deepcopy(self.trace_tpl)
        log_prefix = 'Extracting files: '

        logger.info('Processing %d item(s) for input' % len(items))
        for item in items:
            archive = item.get('archive')
            file_extract = item.get('did')
            rse_name = item.get('rse')
            if not archive or not file_extract:
                raise InputValidationError(
                    'File DID and archive DID are mandatory')
            if '*' in archive:
                logger.debug(archive)
                raise InputValidationError(
                    'Cannot use PFN download with wildcard in DID')

            file_extract_scope, file_extract_name = self._split_did_str(
                file_extract)
            archive_scope, archive_name = self._split_did_str(archive)

            # listing all available replicas of given archhive file
            rse_expression = 'istape=False' if not rse_name else '(%s)&istape=False' % rse_name
            archive_replicas = self.client.list_replicas(
                [{
                    'scope': archive_scope,
                    'name': archive_name
                }],
                schemes=['root'],
                rse_expression=rse_expression,
                unavailable=False,
                client_location=detect_client_location())

            # preparing trace
            trace['uuid'] = generate_uuid()
            trace['scope'] = archive_scope
            trace['dataset'] = archive_name
            trace['filename'] = file_extract

            # preparing output directories
            dest_dir_path = self._prepare_dest_dir(
                item.get('base_dir', '.'),
                os.path.join(archive_scope, archive_name + '.extracted'),
                file_extract, item.get('no_subdir'))
            logger.debug('%sPreparing output destination %s' %
                         (log_prefix, dest_dir_path))

            # validation and customisation of list of replicas
            archive_pfns = []
            replicas = next(archive_replicas)
            for rse in replicas['rses']:
                archive_pfns.extend(replicas['rses'][rse])
                if len(archive_pfns) == 0:
                    raise InputValidationError(
                        'No PFNs for replicas of archive %s' % archive)
            archive_pfns.reverse()

            # checking whether file already exists
            success = False
            dest_file_path = os.path.join(dest_dir_path, file_extract)
            if os.path.isfile(dest_file_path):
                logger.info('%s%s File exists already locally: %s' %
                            (log_prefix, file_extract_name, dest_dir_path))
                trace['clientState'] = 'ALREADY_DONE'
                trace['transferStart'] = time.time()
                trace['transferEnd'] = time.time()
                send_trace(trace, self.client.host, self.user_agent)
                success = True

            # DOWNLOAD, iteration over different rses unitl success
            retry_counter = 0
            while not success and len(archive_pfns):
                retry_counter += 1
                pfn = archive_pfns.pop()
                trace['rse'] = replicas['pfns'][pfn]['rse']
                try:
                    start_time = time.time()
                    cmd = 'xrdcp -vf %s -z %s file://%s' % (
                        pfn, file_extract_name, dest_dir_path)
                    logger.debug('%sExecuting: %s' % (log_prefix, cmd))
                    status, out, err = execute(cmd)
                    end_time = time.time()
                    trace['transferStart'] = start_time
                    trace['transferEnd'] = end_time
                    if status == 54:
                        trace['clientState'] = 'FAILED'
                        raise SourceNotFound(err)
                    elif status != 0:
                        trace['clientState'] = 'FAILED'
                        raise RucioException(err)
                    else:
                        success = True
                        trace['clientState'] = 'DONE'
                except Exception as e:
                    trace['clientState'] = 'FAILED'
                    raise ServiceUnavailable(e)
                send_trace(trace, self.client.host, self.user_agent)
            if not success:
                raise RucioException(
                    'Failed to download file %s after %d retries' %
                    (file_extract_name, retry_counter))
コード例 #5
0
ファイル: downloadclient.py プロジェクト: TiO2/rucio
    def download_dids(self, items, num_threads=2, trace_custom_fields={}):
        """
        Download items with given DIDs. This function can also download datasets and wildcarded DIDs.

        :param items: List of dictionaries. Each dictionary describing an item to download. Keys:
            did                 - DID string of this file (e.g. 'scope:file.name'). Wildcards are not allowed
            rse                 - Optional: rse name (e.g. 'CERN-PROD_DATADISK') or rse expression from where to download
            force_scheme        - Optional: force a specific scheme to download this item. (Default: None)
            base_dir            - Optional: base directory where the downloaded files will be stored. (Default: '.')
            no_subdir           - Optional: If true, files are written directly into base_dir and existing files are overwritten. (Default: False)
            nrandom             - Optional: if the DID addresses a dataset, nrandom files will be randomly choosen for download from the dataset
            ignore_checksum     - Optional: If true, skips the checksum validation between the downloaded file and the rucio catalouge. (Default: False)
            transfer_timeout    - Optional: Timeout time for the download protocols. (Default: None)
        :param num_threads: Suggestion of number of threads to use for the download. It will be lowered if it's too high.
        :param trace_custom_fields: Custom key value pairs to send with the traces

        :returns: a list of dictionaries with an entry for each file, containing the input options, the did, and the clientState

        :raises InputValidationError: if one of the input items is in the wrong format
        :raises NoFilesDownloaded: if no files could be downloaded
        :raises NotAllFilesDownloaded: if not all files could be downloaded
        :raises RucioException: if something unexpected went wrong during the download
        """
        logger = self.logger
        trace_custom_fields['uuid'] = generate_uuid()

        logger.info('Processing %d item(s) for input' % len(items))
        resolved_items = []
        for item in items:
            did_str = item.get('did')
            if not did_str:
                raise InputValidationError('The key did is mandatory')

            logger.debug('Processing item %s' % did_str)

            new_item = copy.deepcopy(item)

            # extend RSE expression to exclude tape RSEs for non-admin accounts
            if not self.is_admin:
                rse = new_item.get('rse')
                new_item[
                    'rse'] = 'istape=False' if not rse else '(%s)&istape=False' % rse
                logger.debug('RSE-Expression: %s' % new_item['rse'])

            # resolve any wildcards in the input dids
            did_scope, did_name = self._split_did_str(did_str)
            logger.debug('Splitted DID: %s:%s' % (did_scope, did_name))
            new_item['scope'] = did_scope
            if '*' in did_name:
                logger.debug('Resolving wildcarded DID %s' % did_str)
                for dsn in self.client.list_dids(did_scope,
                                                 filters={'name': did_name},
                                                 type='all'):
                    logger.debug('%s:%s' % (did_scope, dsn))
                    new_item['name'] = dsn
                    new_item['did'] = '%s:%s' % (did_scope, dsn)
                    resolved_items.append(new_item)
            else:
                new_item['name'] = did_name
                resolved_items.append(new_item)

        input_items = []

        # get replicas for every file of the given dids
        logger.debug('%d DIDs after processing input' % len(resolved_items))
        for item in resolved_items:
            did_scope = item['scope']
            did_name = item['name']
            did_str = item['did']

            logger.debug('Processing: %s' % item)

            # get type of given did
            did_type = self.client.get_did(did_scope, did_name)['type'].upper()
            logger.debug('Type: %s' % did_type)

            # get replicas (RSEs) with PFNs for each file (especially if its a dataset)
            files_with_replicas = self.client.list_replicas(
                [{
                    'scope': did_scope,
                    'name': did_name
                }],
                schemes=item.get('force_scheme'),
                rse_expression=item.get('rse'),
                client_location=detect_client_location())

            nrandom = item.get('nrandom')
            if nrandom:
                logger.info('Selecting %d random replicas from dataset %s' %
                            (nrandom, did_str))
                files_with_replicas = list(files_with_replicas)
                random.shuffle(files_with_replicas)
                files_with_replicas = files_with_replicas[0:nrandom]

            for file_item in files_with_replicas:
                file_did_scope = file_item['scope']
                file_did_name = file_item['name']
                file_did_str = '%s:%s' % (file_did_scope, file_did_name)

                logger.debug('Queueing file: %s' % file_did_str)

                # put the input options from item into the file item
                file_item.update(item)

                dest_dir_name = file_did_scope
                if did_type == 'DATASET':
                    # if the did is a dataset, scope and name were updated wrongly
                    file_item['scope'] = file_did_scope
                    file_item['name'] = file_did_name
                    file_item['did'] = file_did_str
                    file_item['dataset_scope'] = did_scope
                    file_item['dataset_name'] = did_name
                    dest_dir_name = did_name

                dest_dir_path = self._prepare_dest_dir(
                    item.get('base_dir', '.'), dest_dir_name, file_did_name,
                    item.get('no_subdir'))
                file_item['dest_dir_path'] = dest_dir_path

                input_items.append(file_item)

        num_files_in = len(input_items)
        output_items = self._download_multithreaded(input_items, num_threads,
                                                    trace_custom_fields)
        num_files_out = len(output_items)

        if num_files_in != num_files_out:
            raise RucioException(
                '%d items were in the input queue but only %d are in the output queue'
                % (num_files_in, num_files_out))

        return self._check_output(output_items)