Example #1
0
def copy_in(files, **kwargs):
    """
        Download given files using rucio copytool.

        :param files: list of `FileSpec` objects
        :param ignore_errors: boolean, if specified then transfer failures will be ignored
        :raise: PilotException in case of controlled error
    """

    ignore_errors = kwargs.get('ignore_errors')
    trace_report = kwargs.get('trace_report')

    # don't spoil the output, we depend on stderr parsing
    os.environ[
        'RUCIO_LOGGING_FORMAT'] = '%(asctime)s %(levelname)s [%(message)s]'

    localsite = os.environ.get('RUCIO_LOCAL_SITE_ID',
                               os.environ.get('DQ2_LOCAL_SITE_ID', None))
    for fspec in files:
        logger.info('rucio copytool, downloading file with scope:%s lfn:%s' %
                    (str(fspec.scope), str(fspec.lfn)))
        # update the trace report
        localsite = localsite if localsite else fspec.ddmendpoint
        trace_report.update(localSite=localsite,
                            remoteSite=fspec.ddmendpoint,
                            filesize=fspec.filesize)
        trace_report.update(filename=fspec.lfn,
                            guid=fspec.guid.replace('-', ''))
        trace_report.update(scope=fspec.scope, dataset=fspec.dataset)

        trace_report.update(
            catStart=time())  ## is this metric still needed? LFC catalog
        fspec.status_code = 0
        dst = fspec.workdir or kwargs.get('workdir') or '.'
        logger.info('the file will be stored in %s' % str(dst))

        trace_report_out = []
        transfer_timeout = get_timeout(fspec.filesize)
        ctimeout = transfer_timeout + 10  # give the API a chance to do the time-out first
        logger.info('overall transfer timeout=%s' % ctimeout)

        error_msg = ""
        ec = 0
        try:
            ec, trace_report_out = timeout(ctimeout)(_stage_in_api)(
                dst, fspec, trace_report, trace_report_out, transfer_timeout)
            #_stage_in_api(dst, fspec, trace_report, trace_report_out)
        except Exception as error:
            error_msg = str(error)
            error_details = handle_rucio_error(error_msg,
                                               trace_report,
                                               trace_report_out,
                                               fspec,
                                               stagein=True)

            if not ignore_errors:
                trace_report.send()
                msg = ' %s:%s from %s, %s' % (fspec.scope, fspec.lfn,
                                              fspec.ddmendpoint,
                                              error_details.get('error'))
                raise PilotException(msg,
                                     code=error_details.get('rcode'),
                                     state=error_details.get('state'))

        # make sure there was no missed failure (only way to deal with this until rucio API has been fixed)
        # (using the timeout decorator prevents the trace_report_out from being updated - rucio API should return
        # the proper error immediately instead of encoding it into a dictionary)
        state_reason = None if not trace_report_out else trace_report_out[
            0].get('stateReason')
        if ec and state_reason and not error_msg:
            error_details = handle_rucio_error(state_reason,
                                               trace_report,
                                               trace_report_out,
                                               fspec,
                                               stagein=True)

            if not ignore_errors:
                trace_report.send()
                msg = ' %s:%s from %s, %s' % (fspec.scope, fspec.lfn,
                                              fspec.ddmendpoint,
                                              error_details.get('error'))
                raise PilotException(msg,
                                     code=error_details.get('rcode'),
                                     state=error_details.get('state'))

        # verify checksum; compare local checksum with catalog value (fspec.checksum), use same checksum type
        destination = os.path.join(dst, fspec.lfn)
        if os.path.exists(destination):
            state, diagnostics = verify_catalog_checksum(fspec, destination)
            if diagnostics != "" and not ignore_errors:
                trace_report.update(clientState=state
                                    or 'STAGEIN_ATTEMPT_FAILED',
                                    stateReason=diagnostics,
                                    timeEnd=time())
                trace_report.send()
                raise PilotException(diagnostics,
                                     code=fspec.status_code,
                                     state=state)
        else:
            diagnostics = 'file does not exist: %s (cannot verify catalog checksum)' % destination
            logger.warning(diagnostics)
            state = 'STAGEIN_ATTEMPT_FAILED'
            fspec.status_code = ErrorCodes.STAGEINFAILED
            trace_report.update(clientState=state,
                                stateReason=diagnostics,
                                timeEnd=time())
            trace_report.send()
            raise PilotException(diagnostics,
                                 code=fspec.status_code,
                                 state=state)

        if not fspec.status_code:
            fspec.status_code = 0
            fspec.status = 'transferred'
            trace_report.update(clientState='DONE',
                                stateReason='OK',
                                timeEnd=time())

        trace_report.send()

    return files
Example #2
0
def copy_out(files, **kwargs):  # noqa: C901
    """
        Upload given files using rucio copytool.

        :param files: list of `FileSpec` objects
        :param ignore_errors: boolean, if specified then transfer failures will be ignored
        :raise: PilotException in case of controlled error
    """

    # don't spoil the output, we depend on stderr parsing
    os.environ[
        'RUCIO_LOGGING_FORMAT'] = '%(asctime)s %(levelname)s [%(message)s]'

    summary = kwargs.pop('summary', True)
    ignore_errors = kwargs.pop('ignore_errors', False)
    trace_report = kwargs.get('trace_report')

    localsite = os.environ.get('RUCIO_LOCAL_SITE_ID',
                               os.environ.get('DQ2_LOCAL_SITE_ID', None))
    for fspec in files:
        logger.info(
            'rucio copytool, uploading file with scope: %s and lfn: %s' %
            (str(fspec.scope), str(fspec.lfn)))
        localsite = localsite if localsite else fspec.ddmendpoint
        trace_report.update(localSite=localsite, remoteSite=fspec.ddmendpoint)
        trace_report.update(scope=fspec.scope,
                            dataset=fspec.dataset,
                            url=fspec.surl,
                            filesize=fspec.filesize)
        trace_report.update(catStart=time(),
                            filename=fspec.lfn,
                            guid=fspec.guid.replace('-', ''))
        fspec.status_code = 0

        summary_file_path = None
        cwd = fspec.workdir or kwargs.get('workdir') or '.'
        if summary:
            summary_file_path = os.path.join(cwd, 'rucio_upload.json')

        logger.info('the file will be uploaded to %s' % str(fspec.ddmendpoint))
        trace_report_out = []
        transfer_timeout = get_timeout(fspec.filesize)
        ctimeout = transfer_timeout + 10  # give the API a chance to do the time-out first
        logger.info('overall transfer timeout=%s' % ctimeout)

        error_msg = ""
        ec = 0
        try:
            ec, trace_report_out = timeout(ctimeout)(_stage_out_api)(
                fspec, summary_file_path, trace_report, trace_report_out,
                transfer_timeout)
            #_stage_out_api(fspec, summary_file_path, trace_report, trace_report_out)
        except PilotException as error:
            error_msg = str(error)
            error_details = handle_rucio_error(error_msg,
                                               trace_report,
                                               trace_report_out,
                                               fspec,
                                               stagein=False)

            if not ignore_errors:
                trace_report.send()
                msg = ' %s:%s to %s, %s' % (fspec.scope, fspec.lfn,
                                            fspec.ddmendpoint,
                                            error_details.get('error'))
                raise PilotException(msg,
                                     code=error_details.get('rcode'),
                                     state=error_details.get('state'))
        except Exception as error:
            error_msg = str(error)
            error_details = handle_rucio_error(error_msg,
                                               trace_report,
                                               trace_report_out,
                                               fspec,
                                               stagein=False)

            if not ignore_errors:
                trace_report.send()
                msg = ' %s:%s to %s, %s' % (fspec.scope, fspec.lfn,
                                            fspec.ddmendpoint,
                                            error_details.get('error'))
                raise PilotException(msg,
                                     code=error_details.get('rcode'),
                                     state=error_details.get('state'))

        # make sure there was no missed failure (only way to deal with this until rucio API has been fixed)
        # (using the timeout decorator prevents the trace_report_out from being updated - rucio API should return
        # the proper error immediately instead of encoding it into a dictionary)
        state_reason = None if not trace_report_out else trace_report_out[
            0].get('stateReason')
        if ec and state_reason and not error_msg:
            error_details = handle_rucio_error(state_reason,
                                               trace_report,
                                               trace_report_out,
                                               fspec,
                                               stagein=False)

            if not ignore_errors:
                trace_report.send()
                msg = ' %s:%s from %s, %s' % (fspec.scope, fspec.lfn,
                                              fspec.ddmendpoint,
                                              error_details.get('error'))
                raise PilotException(msg,
                                     code=error_details.get('rcode'),
                                     state=error_details.get('state'))

        if summary:  # resolve final pfn (turl) from the summary JSON
            if not os.path.exists(summary_file_path):
                logger.error(
                    'Failed to resolve Rucio summary JSON, wrong path? file=%s'
                    % summary_file_path)
            else:
                with open(summary_file_path, 'rb') as f:
                    summary_json = json.load(f)
                    dat = summary_json.get("%s:%s" %
                                           (fspec.scope, fspec.lfn)) or {}
                    fspec.turl = dat.get('pfn')
                    # quick transfer verification:
                    # the logic should be unified and moved to base layer shared for all the movers
                    adler32 = dat.get('adler32')
                    local_checksum = fspec.checksum.get('adler32')
                    if local_checksum and adler32 and local_checksum != adler32:
                        msg = 'checksum verification failed: local %s != remote %s' % \
                              (local_checksum, adler32)
                        logger.warning(msg)
                        fspec.status = 'failed'
                        fspec.status_code = ErrorCodes.PUTADMISMATCH
                        trace_report.update(clientState='AD_MISMATCH',
                                            stateReason=msg,
                                            timeEnd=time())
                        trace_report.send()
                        if not ignore_errors:
                            raise PilotException(
                                "Failed to stageout: CRC mismatched",
                                code=ErrorCodes.PUTADMISMATCH,
                                state='AD_MISMATCH')
                    else:
                        if local_checksum and adler32 and local_checksum == adler32:
                            logger.info(
                                'local checksum (%s) = remote checksum (%s)' %
                                (local_checksum, adler32))
                        else:
                            logger.warning(
                                'checksum could not be verified: local checksum (%s), remote checksum (%s)'
                                % (str(local_checksum), str(adler32)))
        if not fspec.status_code:
            fspec.status_code = 0
            fspec.status = 'transferred'
            trace_report.update(clientState='DONE',
                                stateReason='OK',
                                timeEnd=time())

        trace_report.send()

    return files