예제 #1
0
def _stage_in(args, job):
    """
        :return: True in case of success
    """

    # tested ok:
    #logger.info('testing sending SIGUSR1')
    #import signal
    #os.kill(os.getpid(), signal.SIGUSR1)

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_PRE_STAGEIN, time.time(), args)

    # any DBRelease files should not be staged in
    skip_special_files(job)

    # now that the trace report has been created, remove any files that are not to be transferred (DBRelease files) from the indata list
    update_indata(job)

    label = 'stage-in'

    # should stage-in be done by a script (for containerisation) or by invoking the API (ie classic mode)?
    use_container = pilot.util.middleware.use_middleware_script(
        job.infosys.queuedata.container_type.get("middleware"))
    if use_container:
        logger.info('stage-in will be done by a script')
        try:
            eventtype, localsite, remotesite = get_trace_report_variables(
                job, label=label)
            pilot.util.middleware.containerise_middleware(
                job,
                job.indata,
                args.queue,
                eventtype,
                localsite,
                remotesite,
                job.infosys.queuedata.container_options,
                args.input_dir,
                label=label,
                container_type=job.infosys.queuedata.container_type.get(
                    "middleware"))
        except PilotException as error:
            logger.warning(
                'stage-in containerisation threw a pilot exception: %s', error)
        except Exception as error:
            import traceback
            logger.warning('stage-in containerisation threw an exception: %s',
                           error)
            logger.error(traceback.format_exc())
    else:
        try:
            logger.info('stage-in will not be done in a container')

            # create the trace report
            trace_report = create_trace_report(job, label=label)

            if job.is_eventservicemerge:
                client = StageInESClient(job.infosys,
                                         logger=logger,
                                         trace_report=trace_report)
                activity = 'es_events_read'
            else:
                client = StageInClient(job.infosys,
                                       logger=logger,
                                       trace_report=trace_report)
                activity = 'pr'
            use_pcache = job.infosys.queuedata.use_pcache
            kwargs = dict(workdir=job.workdir,
                          cwd=job.workdir,
                          usecontainer=False,
                          use_pcache=use_pcache,
                          use_bulk=False,
                          input_dir=args.input_dir,
                          use_vp=job.use_vp,
                          catchall=job.infosys.queuedata.catchall)
            client.prepare_sources(job.indata)
            client.transfer(job.indata, activity=activity, **kwargs)
        except PilotException as error:
            import traceback
            error_msg = traceback.format_exc()
            logger.error(error_msg)
            msg = errors.format_diagnostics(error.get_error_code(), error_msg)
            job.piloterrorcodes, job.piloterrordiags = errors.add_error_code(
                error.get_error_code(), msg=msg)
        except Exception as error:
            logger.error('failed to stage-in: error=%s', error)

    logger.info('summary of transferred files:')
    for infile in job.indata:
        status = infile.status if infile.status else "(not transferred)"
        logger.info(" -- lfn=%s, status_code=%s, status=%s", infile.lfn,
                    infile.status_code, status)

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_POST_STAGEIN, time.time(), args)

    remain_files = [
        infile for infile in job.indata
        if infile.status not in ['remote_io', 'transferred', 'no_transfer']
    ]
    logger.info("stage-in finished") if not remain_files else logger.info(
        "stage-in failed")

    return not remain_files
예제 #2
0
파일: stagein.py 프로젝트: brinick/pilot2
    try:
        infoservice = InfoService()
        infoservice.init(args.queuename, infosys.confinfo, infosys.extinfo)
        infosys.init(
            args.queuename
        )  # is this correct? otherwise infosys.queuedata doesn't get set
    except Exception as e:
        message(e)

    # perform stage-in (single transfers)
    err = ""
    errcode = 0
    if args.eventservicemerge:
        client = StageInESClient(infoservice,
                                 logger=logger,
                                 trace_report=trace_report)
        activity = 'es_events_read'
    else:
        client = StageInClient(infoservice,
                               logger=logger,
                               trace_report=trace_report)
        activity = 'pr'
    kwargs = dict(workdir=args.workdir,
                  cwd=args.workdir,
                  usecontainer=False,
                  use_pcache=args.usepcache,
                  use_bulk=False,
                  use_vp=args.usevp,
                  input_dir=args.inputdir,
                  catchall=args.catchall)
예제 #3
0
파일: data.py 프로젝트: ptrlv/pilot2
def _stage_in(args, job):
    """
        :return: True in case of success
    """

    log = get_logger(job.jobid)

    # tested ok:
    #log.info('testing sending SIGUSR1')
    #import signal
    #os.kill(os.getpid(), signal.SIGUSR1)

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_PRE_STAGEIN, time.time(), args)

    # any DBRelease files should not be staged in
    for fspec in job.indata:
        if 'DBRelease' in fspec.lfn:
            fspec.status = 'no_transfer'

    event_type = "get_sm"
    #if log_transfer:
    #    eventType += '_logs'
    #if special_log_transfer:
    #    eventType += '_logs_os'
    if job.is_analysis():
        event_type += "_a"
    rse = get_rse(job.indata)
    localsite = remotesite = rse
    trace_report = TraceReport(pq='', localSite=localsite, remoteSite=remotesite, dataset="", eventType=event_type)
    trace_report.init(job)

    # now that the trace report has been created, remove any files that are not to be transferred (DBRelease files) from the indata list
    toberemoved = []
    for fspec in job.indata:
        if fspec.status == 'no_transfer':
            toberemoved.append(fspec)
    for fspec in toberemoved:
        logger.info('removing fspec object (lfn=%s) from list of input files' % fspec.lfn)
        job.indata.remove(fspec)

    try:
        if job.is_eventservicemerge:
            client = StageInESClient(job.infosys, logger=log, trace_report=trace_report)
            activity = 'es_events_read'
        else:
            client = StageInClient(job.infosys, logger=log, trace_report=trace_report)
            activity = 'pr'
        kwargs = dict(workdir=job.workdir, cwd=job.workdir, usecontainer=False, job=job)  #, mode='stage-in')

        client.transfer(job.indata, activity=activity, **kwargs)
    except PilotException as error:
        log.error('PilotException caught: %s' % error)
        job.piloterrorcodes, job.piloterrordiags = errors.add_error_code(error.get_error_code())
    except Exception as error:
        log.error('failed to stage-in: error=%s' % error)

    log.info('summary of transferred files:')
    for e in job.indata:
        if not e.status:
            status = "(not transferred)"
        else:
            status = e.status
        log.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, status))

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_POST_STAGEIN, time.time(), args)

    remain_files = [e for e in job.indata if e.status not in ['remote_io', 'transferred', 'no_transfer']]
    if not remain_files:
        log.info("stage-in finished")
    else:
        log.info("stage-in failed")

    return not remain_files
예제 #4
0
    def test_stageout_noexist_activity_stagein(self):
        """
        Make sure that no exceptions to stage out file.
        """
        error = None
        try:
            from pilot.info import infosys, InfoService
            infoservice = InfoService()
            infoservice.init('BNL_CLOUD_MCORE', infosys.confinfo,
                             infosys.extinfo)

            output_file = os.path.join('/tmp', str(uuid.uuid4()))
            shutil.copy('/bin/hostname', output_file)
            file_data = {
                'scope': 'transient',
                'lfn': os.path.basename(output_file),
                #'ddmendpoint': None,
                #'type': 'es_events',
                #'surl': output_file
                #'turl': None,
                #'filesize': None,
                #'checksum': None
            }
            file_spec = FileSpec(filetype='output', **file_data)
            xdata = [file_spec]
            workdir = os.path.dirname(output_file)
            client = StageOutESClient(infoservice)
            kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False)
            client.prepare_destinations(
                xdata, activity=[
                    'es_events_no_exist', 'pw'
                ])  # allow to write to `es_events_no_exist` and `pw` astorages
            client.transfer(xdata,
                            activity=['es_events_no_exist', 'pw'],
                            **kwargs)
        except exception.PilotException as error:  # Python 2/3
            logger.error("Pilot Exeception: %s, %s" %
                         (error.get_detail(), traceback.format_exc()))
        except Exception as e:  # Python 2/3
            logger.error(traceback.format_exc())
            error = exception.StageOutFailure("stageOut failed with error=%s" %
                                              e)

        logger.info('Summary of transferred files:')
        for e in xdata:
            logger.info(" -- lfn=%s, status_code=%s, status=%s" %
                        (e.lfn, e.status_code, e.status))

        if error:
            logger.error(
                'Failed to stage-out eventservice file(%s): error=%s' %
                (output_file, error.get_detail()))
            raise error

        storage_id = infosys.get_storage_id(file_spec.ddmendpoint)
        logger.info('File %s staged out to %s(id: %s)' %
                    (file_spec.lfn, file_spec.ddmendpoint, storage_id))

        new_file_data = {
            'scope': 'test',
            'lfn': file_spec.lfn,
            'storage_token': '%s/1000' % storage_id
        }
        try:
            new_file_spec = FileSpec(filetype='input', **new_file_data)

            xdata = [new_file_spec]
            workdir = os.path.dirname(output_file)
            client = StageInESClient(infoservice)
            kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False)
            client.prepare_sources(xdata)
            client.transfer(xdata, activity=['es_events_read'], **kwargs)
        except exception.PilotException as error:  # Python 2/3
            logger.error("Pilot Exeception: %s, %s" %
                         (error.get_detail(), traceback.format_exc()))
        except Exception as e:  # Python 2/3
            logger.error(traceback.format_exc())
            error = exception.StageInFailure("stagein failed with error=%s" %
                                             e)

        logger.info('Summary of transferred files:')
        for e in xdata:
            logger.info(" -- lfn=%s, status_code=%s, status=%s" %
                        (e.lfn, e.status_code, e.status))

        if error:
            logger.error('Failed to stage-in eventservice file(%s): error=%s' %
                         (output_file, error.get_detail()))
            raise error
예제 #5
0
def _stage_in(args, job):
    """
        :return: True in case of success
    """

    log = get_logger(job.jobid)

    # tested ok:
    #log.info('testing sending SIGUSR1')
    #import signal
    #os.kill(os.getpid(), signal.SIGUSR1)

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_PRE_STAGEIN, time.time(), args)

    # any DBRelease files should not be staged in
    for fspec in job.indata:
        if 'DBRelease' in fspec.lfn:
            fspec.status = 'no_transfer'

    event_type = "get_sm"
    if job.is_analysis():
        event_type += "_a"
    rse = get_rse(job.indata)
    localsite = remotesite = rse
    trace_report = TraceReport(pq=os.environ.get('PILOT_SITENAME', ''),
                               localSite=localsite,
                               remoteSite=remotesite,
                               dataset="",
                               eventType=event_type)
    trace_report.init(job)

    # now that the trace report has been created, remove any files that are not to be transferred (DBRelease files) from the indata list
    toberemoved = []
    for fspec in job.indata:
        if fspec.status == 'no_transfer':
            toberemoved.append(fspec)
    for fspec in toberemoved:
        logger.info('removing fspec object (lfn=%s) from list of input files' %
                    fspec.lfn)
        job.indata.remove(fspec)

    ########### bulk transfer test
    # THE FOLLOWING WORKS BUT THERE IS AN ISSUE WITH TRACES, CHECK STAGEIN SCRIPT IF STORED CORRECTLY
    #filename = 'initial_trace_report.json'
    #tpath = os.path.join(job.workdir, filename)
    #write_json(tpath, trace_report)
    #lfns, scopes = get_filedata_strings(job.indata)
    #script = 'stagein.py'
    #srcdir = os.environ.get('PILOT_SOURCE_DIR')
    #scriptpath = os.path.join(os.path.join(srcdir, 'pilot/scripts'), script)
    #copy(scriptpath, srcdir)
    #cmd = 'python %s --lfns=%s --scopes=%s --tracereportname=%s -w %s -d -q %s' %\
    #      (os.path.join(srcdir, script), lfns, scopes, tpath, job.workdir, args.queue)
    #logger.debug('could have executed: %s' % script)
    #exit_code, stdout, stderr = execute(cmd, mode='python')
    #logger.debug('exit_code=%d' % exit_code)
    #logger.debug('stdout=%s' % stdout)
    #logger.debug('stderr=%s' % stderr)
    ########### bulk transfer test

    try:
        if job.is_eventservicemerge:
            client = StageInESClient(job.infosys,
                                     logger=log,
                                     trace_report=trace_report)
            activity = 'es_events_read'
        else:
            client = StageInClient(job.infosys,
                                   logger=log,
                                   trace_report=trace_report)
            activity = 'pr'
        kwargs = dict(workdir=job.workdir,
                      cwd=job.workdir,
                      usecontainer=False,
                      job=job,
                      use_bulk=False)
        client.prepare_sources(job.indata)
        client.transfer(job.indata, activity=activity, **kwargs)
    except PilotException as error:
        import traceback
        error_msg = traceback.format_exc()
        log.error(error_msg)
        msg = errors.format_diagnostics(error.get_error_code(), error_msg)
        job.piloterrorcodes, job.piloterrordiags = errors.add_error_code(
            error.get_error_code(), msg=msg)
    except Exception as error:
        log.error('failed to stage-in: error=%s' % error)

    log.info('summary of transferred files:')
    for e in job.indata:
        status = e.status if e.status else "(not transferred)"
        log.info(" -- lfn=%s, status_code=%s, status=%s" %
                 (e.lfn, e.status_code, status))

    # write time stamps to pilot timing file
    add_to_pilot_timing(job.jobid, PILOT_POST_STAGEIN, time.time(), args)

    remain_files = [
        e for e in job.indata
        if e.status not in ['remote_io', 'transferred', 'no_transfer']
    ]
    if not remain_files:
        log.info("stage-in finished")
    else:
        log.info("stage-in failed")

    return not remain_files
예제 #6
0
    def setUpClass(cls):
        try:
            args = {
                'workflow': 'eventservice_hpc',
                'queue': 'BNL_CLOUD_MCORE',
                'site': 'BNL_CLOUD_MCORE',
                'port': 25443,
                'url': 'https://aipanda007.cern.ch',
                'job_label': 'ptest',
                'pilot_user': '******',
                'node': socket.getfqdn(),
                'mem': 16000,
                'disk_space': 160000,
                'working_group': '',
                'cpu': 2601.0,
                'info': None
            }

            communicator_manager = CommunicationManager()
            cls._communicator_manager = communicator_manager
            communicator_manager.start()

            jobs = communicator_manager.get_jobs(njobs=1, args=args)
            job = create_job(jobs[0], 'BNL_CLOUD_MCORE')
            job.workdir = '/tmp/test_esworkexecutor'
            job.corecount = 1
            if not os.path.exists(job.workdir):
                os.makedirs(job.workdir)

            job_data = {}
            job_data['jobId'] = job['PandaID']
            job_data['siteName'] = 'BNL_CLOUD_MCORE'
            job_data['state'] = 'starting'
            job_data['attemptNr'] = job['attemptNr'] + 1
            job_data['node'] = 'pilot2_test'
            job_data['schedulerID'] = 'pilot2_test'
            job_data['coreCount'] = 1
            status = communicator_manager.update_jobs(jobs=[job_data])
            job_data['state'] = 'running'
            status = communicator_manager.update_jobs(jobs=[job_data])
            communicator_manager.stop()

            # download input files
            client = StageInESClient(job.infosys, logger=logger)
            kwargs = dict(workdir=job.workdir,
                          cwd=job.workdir,
                          usecontainer=False,
                          job=job)
            client.prepare_sources(job.indata)
            client.transfer(job.indata, activity='pr', **kwargs)

            # get the payload command from the user specific code
            pilot_user = os.environ.get('PILOT_USER', 'atlas').lower()
            user = __import__('pilot.user.%s.common' % pilot_user, globals(),
                              locals(), [pilot_user], 0)  # Python 2/3
            cmd = user.get_payload_command(job)
            logger.info("payload execution command: %s" % cmd)

            payload = {
                'executable': cmd,
                'workdir': job.workdir,
                'output_file': 'pilot_test_%s_stdout.txt' % job['PandaID'],
                'error_file': 'pilot_test_%s_stderr.txt' % job['PandaID'],
                'job': job
            }
            cls._payload = payload
        except Exception as ex:
            if cls._communicator_manager:
                cls._communicator_manager.stop()
            raise ex
예제 #7
0
        storage_id = infosys.get_storage_id(file_spec.ddmendpoint)
        logger.info('File %s staged out to %s(id: %s)' %
                    (file_spec.lfn, file_spec.ddmendpoint, storage_id))

        new_file_data = {
            'scope': 'test',
            'lfn': file_spec.lfn,
            'storage_token': '%s/1000' % storage_id
        }
        try:
            new_file_spec = FileSpec(filetype='input', **new_file_data)

            xdata = [new_file_spec]
            workdir = os.path.dirname(output_file)
            client = StageInESClient(infoservice)
            kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False)
            client.transfer(xdata, activity=['es_events_read'], **kwargs)
        except exception.PilotException, error:
            logger.error("Pilot Exeception: %s, %s" %
                         (error.get_detail(), traceback.format_exc()))
        except Exception, e:
            logger.error(traceback.format_exc())
            error = exception.StageInFailure("stagein failed with error=%s" %
                                             e)

        logger.info('Summary of transferred files:')
        for e in xdata:
            logger.info(" -- lfn=%s, status_code=%s, status=%s" %
                        (e.lfn, e.status_code, e.status))