def copy_in(files, **kwargs): """ Download given files using rucio copytool. :param files: list of `FileSpec` objects :param ignore_errors: boolean, if specified then transfer failures will be ignored :raise: PilotException in case of controlled error """ ignore_errors = kwargs.get('ignore_errors') trace_report = kwargs.get('trace_report') # don't spoil the output, we depend on stderr parsing os.environ[ 'RUCIO_LOGGING_FORMAT'] = '%(asctime)s %(levelname)s [%(message)s]' localsite = os.environ.get('RUCIO_LOCAL_SITE_ID', os.environ.get('DQ2_LOCAL_SITE_ID', None)) for fspec in files: logger.info('rucio copytool, downloading file with scope:%s lfn:%s' % (str(fspec.scope), str(fspec.lfn))) # update the trace report localsite = localsite if localsite else fspec.ddmendpoint trace_report.update(localSite=localsite, remoteSite=fspec.ddmendpoint, filesize=fspec.filesize) trace_report.update(filename=fspec.lfn, guid=fspec.guid.replace('-', '')) trace_report.update(scope=fspec.scope, dataset=fspec.dataset) trace_report.update( catStart=time()) ## is this metric still needed? LFC catalog fspec.status_code = 0 dst = fspec.workdir or kwargs.get('workdir') or '.' logger.info('the file will be stored in %s' % str(dst)) trace_report_out = [] transfer_timeout = get_timeout(fspec.filesize) ctimeout = transfer_timeout + 10 # give the API a chance to do the time-out first logger.info('overall transfer timeout=%s' % ctimeout) error_msg = "" ec = 0 try: ec, trace_report_out = timeout(ctimeout)(_stage_in_api)( dst, fspec, trace_report, trace_report_out, transfer_timeout) #_stage_in_api(dst, fspec, trace_report, trace_report_out) except Exception as error: error_msg = str(error) error_details = handle_rucio_error(error_msg, trace_report, trace_report_out, fspec, stagein=True) if not ignore_errors: trace_report.send() msg = ' %s:%s from %s, %s' % (fspec.scope, fspec.lfn, fspec.ddmendpoint, error_details.get('error')) raise PilotException(msg, code=error_details.get('rcode'), state=error_details.get('state')) # make sure there was no missed failure (only way to deal with this until rucio API has been fixed) # (using the timeout decorator prevents the trace_report_out from being updated - rucio API should return # the proper error immediately instead of encoding it into a dictionary) state_reason = None if not trace_report_out else trace_report_out[ 0].get('stateReason') if ec and state_reason and not error_msg: error_details = handle_rucio_error(state_reason, trace_report, trace_report_out, fspec, stagein=True) if not ignore_errors: trace_report.send() msg = ' %s:%s from %s, %s' % (fspec.scope, fspec.lfn, fspec.ddmendpoint, error_details.get('error')) raise PilotException(msg, code=error_details.get('rcode'), state=error_details.get('state')) # verify checksum; compare local checksum with catalog value (fspec.checksum), use same checksum type destination = os.path.join(dst, fspec.lfn) if os.path.exists(destination): state, diagnostics = verify_catalog_checksum(fspec, destination) if diagnostics != "" and not ignore_errors: trace_report.update(clientState=state or 'STAGEIN_ATTEMPT_FAILED', stateReason=diagnostics, timeEnd=time()) trace_report.send() raise PilotException(diagnostics, code=fspec.status_code, state=state) else: diagnostics = 'file does not exist: %s (cannot verify catalog checksum)' % destination logger.warning(diagnostics) state = 'STAGEIN_ATTEMPT_FAILED' fspec.status_code = ErrorCodes.STAGEINFAILED trace_report.update(clientState=state, stateReason=diagnostics, timeEnd=time()) trace_report.send() raise PilotException(diagnostics, code=fspec.status_code, state=state) if not fspec.status_code: fspec.status_code = 0 fspec.status = 'transferred' trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time()) trace_report.send() return files
def copy_out(files, **kwargs): # noqa: C901 """ Upload given files using rucio copytool. :param files: list of `FileSpec` objects :param ignore_errors: boolean, if specified then transfer failures will be ignored :raise: PilotException in case of controlled error """ # don't spoil the output, we depend on stderr parsing os.environ[ 'RUCIO_LOGGING_FORMAT'] = '%(asctime)s %(levelname)s [%(message)s]' summary = kwargs.pop('summary', True) ignore_errors = kwargs.pop('ignore_errors', False) trace_report = kwargs.get('trace_report') localsite = os.environ.get('RUCIO_LOCAL_SITE_ID', os.environ.get('DQ2_LOCAL_SITE_ID', None)) for fspec in files: logger.info( 'rucio copytool, uploading file with scope: %s and lfn: %s' % (str(fspec.scope), str(fspec.lfn))) localsite = localsite if localsite else fspec.ddmendpoint trace_report.update(localSite=localsite, remoteSite=fspec.ddmendpoint) trace_report.update(scope=fspec.scope, dataset=fspec.dataset, url=fspec.surl, filesize=fspec.filesize) trace_report.update(catStart=time(), filename=fspec.lfn, guid=fspec.guid.replace('-', '')) fspec.status_code = 0 summary_file_path = None cwd = fspec.workdir or kwargs.get('workdir') or '.' if summary: summary_file_path = os.path.join(cwd, 'rucio_upload.json') logger.info('the file will be uploaded to %s' % str(fspec.ddmendpoint)) trace_report_out = [] transfer_timeout = get_timeout(fspec.filesize) ctimeout = transfer_timeout + 10 # give the API a chance to do the time-out first logger.info('overall transfer timeout=%s' % ctimeout) error_msg = "" ec = 0 try: ec, trace_report_out = timeout(ctimeout)(_stage_out_api)( fspec, summary_file_path, trace_report, trace_report_out, transfer_timeout) #_stage_out_api(fspec, summary_file_path, trace_report, trace_report_out) except PilotException as error: error_msg = str(error) error_details = handle_rucio_error(error_msg, trace_report, trace_report_out, fspec, stagein=False) if not ignore_errors: trace_report.send() msg = ' %s:%s to %s, %s' % (fspec.scope, fspec.lfn, fspec.ddmendpoint, error_details.get('error')) raise PilotException(msg, code=error_details.get('rcode'), state=error_details.get('state')) except Exception as error: error_msg = str(error) error_details = handle_rucio_error(error_msg, trace_report, trace_report_out, fspec, stagein=False) if not ignore_errors: trace_report.send() msg = ' %s:%s to %s, %s' % (fspec.scope, fspec.lfn, fspec.ddmendpoint, error_details.get('error')) raise PilotException(msg, code=error_details.get('rcode'), state=error_details.get('state')) # make sure there was no missed failure (only way to deal with this until rucio API has been fixed) # (using the timeout decorator prevents the trace_report_out from being updated - rucio API should return # the proper error immediately instead of encoding it into a dictionary) state_reason = None if not trace_report_out else trace_report_out[ 0].get('stateReason') if ec and state_reason and not error_msg: error_details = handle_rucio_error(state_reason, trace_report, trace_report_out, fspec, stagein=False) if not ignore_errors: trace_report.send() msg = ' %s:%s from %s, %s' % (fspec.scope, fspec.lfn, fspec.ddmendpoint, error_details.get('error')) raise PilotException(msg, code=error_details.get('rcode'), state=error_details.get('state')) if summary: # resolve final pfn (turl) from the summary JSON if not os.path.exists(summary_file_path): logger.error( 'Failed to resolve Rucio summary JSON, wrong path? file=%s' % summary_file_path) else: with open(summary_file_path, 'rb') as f: summary_json = json.load(f) dat = summary_json.get("%s:%s" % (fspec.scope, fspec.lfn)) or {} fspec.turl = dat.get('pfn') # quick transfer verification: # the logic should be unified and moved to base layer shared for all the movers adler32 = dat.get('adler32') local_checksum = fspec.checksum.get('adler32') if local_checksum and adler32 and local_checksum != adler32: msg = 'checksum verification failed: local %s != remote %s' % \ (local_checksum, adler32) logger.warning(msg) fspec.status = 'failed' fspec.status_code = ErrorCodes.PUTADMISMATCH trace_report.update(clientState='AD_MISMATCH', stateReason=msg, timeEnd=time()) trace_report.send() if not ignore_errors: raise PilotException( "Failed to stageout: CRC mismatched", code=ErrorCodes.PUTADMISMATCH, state='AD_MISMATCH') else: if local_checksum and adler32 and local_checksum == adler32: logger.info( 'local checksum (%s) = remote checksum (%s)' % (local_checksum, adler32)) else: logger.warning( 'checksum could not be verified: local checksum (%s), remote checksum (%s)' % (str(local_checksum), str(adler32))) if not fspec.status_code: fspec.status_code = 0 fspec.status = 'transferred' trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time()) trace_report.send() return files