Beispiel #1
0
def _do_stageout(job, xdata, activity, title):
    """
    Use the `StageOutClient` in the Data API to perform stage-out.

    :param job: job object.
    :param xdata: list of FileSpec objects.
    :param activity: copytool activity or preferred list of activities to resolve copytools
    :param title: type of stage-out (output, log) (string).
    :return: True in case of success transfers
    """

    log = get_logger(job.jobid)
    log.info('prepare to stage-out %d %s file(s)' % (len(xdata), title))

    event_type = "put_sm"
    #if log_transfer:
    #    eventType += '_logs'
    #if special_log_transfer:
    #    eventType += '_logs_os'
    if job.is_analysis():
        event_type += "_a"
    rse = get_rse(xdata)
    localsite = remotesite = rse
    trace_report = TraceReport(pq=os.environ.get('PILOT_SITENAME', ''), localSite=localsite, remoteSite=remotesite, dataset="", eventType=event_type)
    trace_report.init(job)

    try:
        client = StageOutClient(job.infosys, logger=log, trace_report=trace_report)
        kwargs = dict(workdir=job.workdir, cwd=job.workdir, usecontainer=False, job=job)  #, mode='stage-out')
        # prod analy unification: use destination preferences from PanDA server for unified queues
        if job.infosys.queuedata.type != 'unified':
            client.prepare_destinations(xdata, activity)  ## FIX ME LATER: split activities: for astorages and for copytools (to unify with ES workflow)
        client.transfer(xdata, activity, **kwargs)
    except PilotException as error:
        import traceback
        error_msg = traceback.format_exc()
        log.error(error_msg)
        msg = errors.format_diagnostics(error.get_error_code(), error_msg)
        job.piloterrorcodes, job.piloterrordiags = errors.add_error_code(error.get_error_code(), msg=msg)
    except Exception:
        import traceback
        log.error(traceback.format_exc())
        # do not raise the exception since that will prevent also the log from being staged out
        # error = PilotException("stageOut failed with error=%s" % e, code=ErrorCodes.STAGEOUTFAILED)
    else:
        log.debug('stage-out client completed')

    log.info('summary of transferred files:')
    for e in xdata:
        if not e.status:
            status = "(not transferred)"
        else:
            status = e.status
        log.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, status))

    remain_files = [e for e in xdata if e.status not in ['transferred']]
    log.debug('remain_files=%s' % str(remain_files))
    log.debug('xdata=%s' % str(xdata))

    return not remain_files
Beispiel #2
0
    def __init__(self,
                 infosys_instance=None,
                 acopytools=None,
                 logger=None,
                 default_copytools='rucio',
                 trace_report=None):
        """
            If `acopytools` is not specified then it will be automatically resolved via infosys. In this case `infosys` requires initialization.
            :param acopytools: dict of copytool names per activity to be used for transfers. Accepts also list of names or string value without activity passed.
            :param logger: logging.Logger object to use for logging (None means no logging)
            :param default_copytools: copytool name(s) to be used in case of unknown activity passed. Accepts either list of names or single string value.
        """

        super(StagingClient, self).__init__()

        if not logger:
            logger = logging.getLogger('%s.%s' % (__name__, 'null'))
            logger.disabled = True

        self.logger = logger
        self.infosys = infosys_instance or infosys

        if isinstance(acopytools, basestring):
            acopytools = {'default': [acopytools]} if acopytools else {}
        if isinstance(acopytools, (list, tuple)):
            acopytools = {'default': acopytools} if acopytools else {}

        self.acopytools = acopytools or {}

        if self.infosys.queuedata:
            if not self.acopytools:  ## resolve from queuedata.acopytools using infosys
                self.acopytools = (self.infosys.queuedata.acopytools
                                   or {}).copy()
            if not self.acopytools:  ## resolve from queuedata.copytools using infosys
                self.acopytools = dict(
                    default=(self.infosys.queuedata.copytools or {}).keys())

        if not self.acopytools.get('default'):
            if isinstance(default_copytools, basestring):
                default_copytools = [default_copytools
                                     ] if default_copytools else []
            self.acopytools['default'] = default_copytools

        if not self.acopytools:
            msg = 'failed to initilize StagingClient: no acopytools options found, acopytools=%s' % self.acopytools
            logger.error(msg)
            self.trace_report.update(clientState='BAD_COPYTOOL',
                                     stateReason=msg)
            self.trace_report.send()
            raise PilotException("failed to resolve acopytools settings")
        logger.info('configured copytools per activity: acopytools=%s' %
                    self.acopytools)

        # get an initialized trace report (has to be updated for get/put if not defined before)
        self.trace_report = trace_report if trace_report else TraceReport(
            pq=os.environ.get('PILOT_SITENAME', ''))
Beispiel #3
0
def create_trace_report(job, label='stage-in'):
    """
    Create the trace report object.

    :param job: job object.
    :param label: 'stage-[in|out]' (string).
    :return: trace report object.
    """

    event_type, localsite, remotesite = get_trace_report_variables(job, label=label)
    trace_report = TraceReport(pq=os.environ.get('PILOT_SITENAME', ''), localSite=localsite, remoteSite=remotesite,
                               dataset="", eventType=event_type)
    trace_report.init(job)

    return trace_report
Beispiel #4
0
#    scopes = file_list_dictionary.get('scopes')
#    filesizes = file_list_dictionary.get('filesizes')
#    checksums = file_list_dictionary.get('checksums')
#    allowlans = file_list_dictionary.get('allowlans')
#    allowwans = file_list_dictionary.get('allowwans')
#    directaccesslans = file_list_dictionary.get('directaccesslans')
#    directaccesswans = file_list_dictionary.get('directaccesswans')
#    istars = file_list_dictionary.get('istars')
#    accessmodes = file_list_dictionary.get('accessmodes')
#    storagetokens = file_list_dictionary.get('storagetokens')
#    guids = file_list_dictionary.get('guids')

# generate the trace report
    trace_report = TraceReport(pq=os.environ.get('PILOT_SITENAME', ''),
                               localSite=args.localsite,
                               remoteSite=args.remotesite,
                               dataset="",
                               eventType=args.eventtype)
    job = Job(produserid=args.produserid,
              jobid=args.jobid,
              taskid=args.taskid,
              jobdefinitionid=args.jobdefinitionid)
    trace_report.init(job)

    try:
        infoservice = InfoService()
        infoservice.init(args.queuename, infosys.confinfo, infosys.extinfo)
        infosys.init(
            args.queuename
        )  # is this correct? otherwise infosys.queuedata doesn't get set
    except Exception as e:
Beispiel #5
0
def _stage_in(args, job):
    """
        :return: True in case of success
    """

    log = get_logger(job.jobid)

    # tested ok:
    #log.info('testing sending SIGUSR1')
    #import signal
    #os.kill(os.getpid(), signal.SIGUSR1)

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_PRE_STAGEIN, time.time(), args)

    # any DBRelease files should not be staged in
    for fspec in job.indata:
        if 'DBRelease' in fspec.lfn:
            fspec.status = 'no_transfer'

    event_type = "get_sm"
    #if log_transfer:
    #    eventType += '_logs'
    #if special_log_transfer:
    #    eventType += '_logs_os'
    if job.is_analysis():
        event_type += "_a"
    rse = get_rse(job.indata)
    localsite = remotesite = rse
    trace_report = TraceReport(pq='', localSite=localsite, remoteSite=remotesite, dataset="", eventType=event_type)
    trace_report.init(job)

    # now that the trace report has been created, remove any files that are not to be transferred (DBRelease files) from the indata list
    toberemoved = []
    for fspec in job.indata:
        if fspec.status == 'no_transfer':
            toberemoved.append(fspec)
    for fspec in toberemoved:
        logger.info('removing fspec object (lfn=%s) from list of input files' % fspec.lfn)
        job.indata.remove(fspec)

    try:
        if job.is_eventservicemerge:
            client = StageInESClient(job.infosys, logger=log, trace_report=trace_report)
            activity = 'es_events_read'
        else:
            client = StageInClient(job.infosys, logger=log, trace_report=trace_report)
            activity = 'pr'
        kwargs = dict(workdir=job.workdir, cwd=job.workdir, usecontainer=False, job=job)  #, mode='stage-in')

        client.transfer(job.indata, activity=activity, **kwargs)
    except PilotException as error:
        log.error('PilotException caught: %s' % error)
        job.piloterrorcodes, job.piloterrordiags = errors.add_error_code(error.get_error_code())
    except Exception as error:
        log.error('failed to stage-in: error=%s' % error)

    log.info('summary of transferred files:')
    for e in job.indata:
        if not e.status:
            status = "(not transferred)"
        else:
            status = e.status
        log.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, status))

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_POST_STAGEIN, time.time(), args)

    remain_files = [e for e in job.indata if e.status not in ['remote_io', 'transferred', 'no_transfer']]
    if not remain_files:
        log.info("stage-in finished")
    else:
        log.info("stage-in failed")

    return not remain_files
Beispiel #6
0
def _stage_in(args, job):
    """
        :return: True in case of success
    """

    log = get_logger(job.jobid)

    # tested ok:
    #log.info('testing sending SIGUSR1')
    #import signal
    #os.kill(os.getpid(), signal.SIGUSR1)

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_PRE_STAGEIN, time.time(), args)

    # any DBRelease files should not be staged in
    for fspec in job.indata:
        if 'DBRelease' in fspec.lfn:
            fspec.status = 'no_transfer'

    event_type = "get_sm"
    if job.is_analysis():
        event_type += "_a"
    rse = get_rse(job.indata)
    localsite = remotesite = rse
    trace_report = TraceReport(pq=os.environ.get('PILOT_SITENAME', ''),
                               localSite=localsite,
                               remoteSite=remotesite,
                               dataset="",
                               eventType=event_type)
    trace_report.init(job)

    # now that the trace report has been created, remove any files that are not to be transferred (DBRelease files) from the indata list
    toberemoved = []
    for fspec in job.indata:
        if fspec.status == 'no_transfer':
            toberemoved.append(fspec)
    for fspec in toberemoved:
        logger.info('removing fspec object (lfn=%s) from list of input files' %
                    fspec.lfn)
        job.indata.remove(fspec)

    ########### bulk transfer test
    # THE FOLLOWING WORKS BUT THERE IS AN ISSUE WITH TRACES, CHECK STAGEIN SCRIPT IF STORED CORRECTLY
    #filename = 'initial_trace_report.json'
    #tpath = os.path.join(job.workdir, filename)
    #write_json(tpath, trace_report)
    #lfns, scopes = get_filedata_strings(job.indata)
    #script = 'stagein.py'
    #srcdir = os.environ.get('PILOT_SOURCE_DIR')
    #scriptpath = os.path.join(os.path.join(srcdir, 'pilot/scripts'), script)
    #copy(scriptpath, srcdir)
    #cmd = 'python %s --lfns=%s --scopes=%s --tracereportname=%s -w %s -d -q %s' %\
    #      (os.path.join(srcdir, script), lfns, scopes, tpath, job.workdir, args.queue)
    #logger.debug('could have executed: %s' % script)
    #exit_code, stdout, stderr = execute(cmd, mode='python')
    #logger.debug('exit_code=%d' % exit_code)
    #logger.debug('stdout=%s' % stdout)
    #logger.debug('stderr=%s' % stderr)
    ########### bulk transfer test

    try:
        if job.is_eventservicemerge:
            client = StageInESClient(job.infosys,
                                     logger=log,
                                     trace_report=trace_report)
            activity = 'es_events_read'
        else:
            client = StageInClient(job.infosys,
                                   logger=log,
                                   trace_report=trace_report)
            activity = 'pr'
        kwargs = dict(workdir=job.workdir,
                      cwd=job.workdir,
                      usecontainer=False,
                      job=job,
                      use_bulk=False)
        client.prepare_sources(job.indata)
        client.transfer(job.indata, activity=activity, **kwargs)
    except PilotException as error:
        import traceback
        error_msg = traceback.format_exc()
        log.error(error_msg)
        msg = errors.format_diagnostics(error.get_error_code(), error_msg)
        job.piloterrorcodes, job.piloterrordiags = errors.add_error_code(
            error.get_error_code(), msg=msg)
    except Exception as error:
        log.error('failed to stage-in: error=%s' % error)

    log.info('summary of transferred files:')
    for e in job.indata:
        status = e.status if e.status else "(not transferred)"
        log.info(" -- lfn=%s, status_code=%s, status=%s" %
                 (e.lfn, e.status_code, status))

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_POST_STAGEIN, time.time(), args)

    remain_files = [
        e for e in job.indata
        if e.status not in ['remote_io', 'transferred', 'no_transfer']
    ]
    if not remain_files:
        log.info("stage-in finished")
    else:
        log.info("stage-in failed")

    return not remain_files
Beispiel #7
0
 def test_copy_out_rucio(self):
     from pilot.copytool.rucio import copy_out
     trace_report = TraceReport()
     trace_report.update(eventType='unit test')
     copy_out(self.outdata, trace_report=trace_report)
     os.remove(self.outdata[0].pfn)