def _stage_in(args, job): """ :return: True in case of success """ # tested ok: #logger.info('testing sending SIGUSR1') #import signal #os.kill(os.getpid(), signal.SIGUSR1) # write time stamps to pilot timing file add_to_pilot_timing(job.jobid, PILOT_PRE_STAGEIN, time.time(), args) # any DBRelease files should not be staged in skip_special_files(job) # now that the trace report has been created, remove any files that are not to be transferred (DBRelease files) from the indata list update_indata(job) label = 'stage-in' # should stage-in be done by a script (for containerisation) or by invoking the API (ie classic mode)? use_container = pilot.util.middleware.use_middleware_script( job.infosys.queuedata.container_type.get("middleware")) if use_container: logger.info('stage-in will be done by a script') try: eventtype, localsite, remotesite = get_trace_report_variables( job, label=label) pilot.util.middleware.containerise_middleware( job, job.indata, args.queue, eventtype, localsite, remotesite, job.infosys.queuedata.container_options, args.input_dir, label=label, container_type=job.infosys.queuedata.container_type.get( "middleware")) except PilotException as error: logger.warning( 'stage-in containerisation threw a pilot exception: %s', error) except Exception as error: import traceback logger.warning('stage-in containerisation threw an exception: %s', error) logger.error(traceback.format_exc()) else: try: logger.info('stage-in will not be done in a container') # create the trace report trace_report = create_trace_report(job, label=label) if job.is_eventservicemerge: client = StageInESClient(job.infosys, logger=logger, trace_report=trace_report) activity = 'es_events_read' else: client = StageInClient(job.infosys, logger=logger, trace_report=trace_report) activity = 'pr' use_pcache = job.infosys.queuedata.use_pcache kwargs = dict(workdir=job.workdir, cwd=job.workdir, usecontainer=False, use_pcache=use_pcache, use_bulk=False, input_dir=args.input_dir, use_vp=job.use_vp, catchall=job.infosys.queuedata.catchall) client.prepare_sources(job.indata) client.transfer(job.indata, activity=activity, **kwargs) except PilotException as error: import traceback error_msg = traceback.format_exc() logger.error(error_msg) msg = errors.format_diagnostics(error.get_error_code(), error_msg) job.piloterrorcodes, job.piloterrordiags = errors.add_error_code( error.get_error_code(), msg=msg) except Exception as error: logger.error('failed to stage-in: error=%s', error) logger.info('summary of transferred files:') for infile in job.indata: status = infile.status if infile.status else "(not transferred)" logger.info(" -- lfn=%s, status_code=%s, status=%s", infile.lfn, infile.status_code, status) # write time stamps to pilot timing file add_to_pilot_timing(job.jobid, PILOT_POST_STAGEIN, time.time(), args) remain_files = [ infile for infile in job.indata if infile.status not in ['remote_io', 'transferred', 'no_transfer'] ] logger.info("stage-in finished") if not remain_files else logger.info( "stage-in failed") return not remain_files
try: infoservice = InfoService() infoservice.init(args.queuename, infosys.confinfo, infosys.extinfo) infosys.init( args.queuename ) # is this correct? otherwise infosys.queuedata doesn't get set except Exception as e: message(e) # perform stage-in (single transfers) err = "" errcode = 0 if args.eventservicemerge: client = StageInESClient(infoservice, logger=logger, trace_report=trace_report) activity = 'es_events_read' else: client = StageInClient(infoservice, logger=logger, trace_report=trace_report) activity = 'pr' kwargs = dict(workdir=args.workdir, cwd=args.workdir, usecontainer=False, use_pcache=args.usepcache, use_bulk=False, use_vp=args.usevp, input_dir=args.inputdir, catchall=args.catchall)
def _stage_in(args, job): """ :return: True in case of success """ log = get_logger(job.jobid) # tested ok: #log.info('testing sending SIGUSR1') #import signal #os.kill(os.getpid(), signal.SIGUSR1) # write time stamps to pilot timing file add_to_pilot_timing(job.jobid, PILOT_PRE_STAGEIN, time.time(), args) # any DBRelease files should not be staged in for fspec in job.indata: if 'DBRelease' in fspec.lfn: fspec.status = 'no_transfer' event_type = "get_sm" #if log_transfer: # eventType += '_logs' #if special_log_transfer: # eventType += '_logs_os' if job.is_analysis(): event_type += "_a" rse = get_rse(job.indata) localsite = remotesite = rse trace_report = TraceReport(pq='', localSite=localsite, remoteSite=remotesite, dataset="", eventType=event_type) trace_report.init(job) # now that the trace report has been created, remove any files that are not to be transferred (DBRelease files) from the indata list toberemoved = [] for fspec in job.indata: if fspec.status == 'no_transfer': toberemoved.append(fspec) for fspec in toberemoved: logger.info('removing fspec object (lfn=%s) from list of input files' % fspec.lfn) job.indata.remove(fspec) try: if job.is_eventservicemerge: client = StageInESClient(job.infosys, logger=log, trace_report=trace_report) activity = 'es_events_read' else: client = StageInClient(job.infosys, logger=log, trace_report=trace_report) activity = 'pr' kwargs = dict(workdir=job.workdir, cwd=job.workdir, usecontainer=False, job=job) #, mode='stage-in') client.transfer(job.indata, activity=activity, **kwargs) except PilotException as error: log.error('PilotException caught: %s' % error) job.piloterrorcodes, job.piloterrordiags = errors.add_error_code(error.get_error_code()) except Exception as error: log.error('failed to stage-in: error=%s' % error) log.info('summary of transferred files:') for e in job.indata: if not e.status: status = "(not transferred)" else: status = e.status log.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, status)) # write time stamps to pilot timing file add_to_pilot_timing(job.jobid, PILOT_POST_STAGEIN, time.time(), args) remain_files = [e for e in job.indata if e.status not in ['remote_io', 'transferred', 'no_transfer']] if not remain_files: log.info("stage-in finished") else: log.info("stage-in failed") return not remain_files
def test_stageout_noexist_activity_stagein(self): """ Make sure that no exceptions to stage out file. """ error = None try: from pilot.info import infosys, InfoService infoservice = InfoService() infoservice.init('BNL_CLOUD_MCORE', infosys.confinfo, infosys.extinfo) output_file = os.path.join('/tmp', str(uuid.uuid4())) shutil.copy('/bin/hostname', output_file) file_data = { 'scope': 'transient', 'lfn': os.path.basename(output_file), #'ddmendpoint': None, #'type': 'es_events', #'surl': output_file #'turl': None, #'filesize': None, #'checksum': None } file_spec = FileSpec(filetype='output', **file_data) xdata = [file_spec] workdir = os.path.dirname(output_file) client = StageOutESClient(infoservice) kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False) client.prepare_destinations( xdata, activity=[ 'es_events_no_exist', 'pw' ]) # allow to write to `es_events_no_exist` and `pw` astorages client.transfer(xdata, activity=['es_events_no_exist', 'pw'], **kwargs) except exception.PilotException as error: # Python 2/3 logger.error("Pilot Exeception: %s, %s" % (error.get_detail(), traceback.format_exc())) except Exception as e: # Python 2/3 logger.error(traceback.format_exc()) error = exception.StageOutFailure("stageOut failed with error=%s" % e) logger.info('Summary of transferred files:') for e in xdata: logger.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, e.status)) if error: logger.error( 'Failed to stage-out eventservice file(%s): error=%s' % (output_file, error.get_detail())) raise error storage_id = infosys.get_storage_id(file_spec.ddmendpoint) logger.info('File %s staged out to %s(id: %s)' % (file_spec.lfn, file_spec.ddmendpoint, storage_id)) new_file_data = { 'scope': 'test', 'lfn': file_spec.lfn, 'storage_token': '%s/1000' % storage_id } try: new_file_spec = FileSpec(filetype='input', **new_file_data) xdata = [new_file_spec] workdir = os.path.dirname(output_file) client = StageInESClient(infoservice) kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False) client.prepare_sources(xdata) client.transfer(xdata, activity=['es_events_read'], **kwargs) except exception.PilotException as error: # Python 2/3 logger.error("Pilot Exeception: %s, %s" % (error.get_detail(), traceback.format_exc())) except Exception as e: # Python 2/3 logger.error(traceback.format_exc()) error = exception.StageInFailure("stagein failed with error=%s" % e) logger.info('Summary of transferred files:') for e in xdata: logger.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, e.status)) if error: logger.error('Failed to stage-in eventservice file(%s): error=%s' % (output_file, error.get_detail())) raise error
def _stage_in(args, job): """ :return: True in case of success """ log = get_logger(job.jobid) # tested ok: #log.info('testing sending SIGUSR1') #import signal #os.kill(os.getpid(), signal.SIGUSR1) # write time stamps to pilot timing file add_to_pilot_timing(job.jobid, PILOT_PRE_STAGEIN, time.time(), args) # any DBRelease files should not be staged in for fspec in job.indata: if 'DBRelease' in fspec.lfn: fspec.status = 'no_transfer' event_type = "get_sm" if job.is_analysis(): event_type += "_a" rse = get_rse(job.indata) localsite = remotesite = rse trace_report = TraceReport(pq=os.environ.get('PILOT_SITENAME', ''), localSite=localsite, remoteSite=remotesite, dataset="", eventType=event_type) trace_report.init(job) # now that the trace report has been created, remove any files that are not to be transferred (DBRelease files) from the indata list toberemoved = [] for fspec in job.indata: if fspec.status == 'no_transfer': toberemoved.append(fspec) for fspec in toberemoved: logger.info('removing fspec object (lfn=%s) from list of input files' % fspec.lfn) job.indata.remove(fspec) ########### bulk transfer test # THE FOLLOWING WORKS BUT THERE IS AN ISSUE WITH TRACES, CHECK STAGEIN SCRIPT IF STORED CORRECTLY #filename = 'initial_trace_report.json' #tpath = os.path.join(job.workdir, filename) #write_json(tpath, trace_report) #lfns, scopes = get_filedata_strings(job.indata) #script = 'stagein.py' #srcdir = os.environ.get('PILOT_SOURCE_DIR') #scriptpath = os.path.join(os.path.join(srcdir, 'pilot/scripts'), script) #copy(scriptpath, srcdir) #cmd = 'python %s --lfns=%s --scopes=%s --tracereportname=%s -w %s -d -q %s' %\ # (os.path.join(srcdir, script), lfns, scopes, tpath, job.workdir, args.queue) #logger.debug('could have executed: %s' % script) #exit_code, stdout, stderr = execute(cmd, mode='python') #logger.debug('exit_code=%d' % exit_code) #logger.debug('stdout=%s' % stdout) #logger.debug('stderr=%s' % stderr) ########### bulk transfer test try: if job.is_eventservicemerge: client = StageInESClient(job.infosys, logger=log, trace_report=trace_report) activity = 'es_events_read' else: client = StageInClient(job.infosys, logger=log, trace_report=trace_report) activity = 'pr' kwargs = dict(workdir=job.workdir, cwd=job.workdir, usecontainer=False, job=job, use_bulk=False) client.prepare_sources(job.indata) client.transfer(job.indata, activity=activity, **kwargs) except PilotException as error: import traceback error_msg = traceback.format_exc() log.error(error_msg) msg = errors.format_diagnostics(error.get_error_code(), error_msg) job.piloterrorcodes, job.piloterrordiags = errors.add_error_code( error.get_error_code(), msg=msg) except Exception as error: log.error('failed to stage-in: error=%s' % error) log.info('summary of transferred files:') for e in job.indata: status = e.status if e.status else "(not transferred)" log.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, status)) # write time stamps to pilot timing file add_to_pilot_timing(job.jobid, PILOT_POST_STAGEIN, time.time(), args) remain_files = [ e for e in job.indata if e.status not in ['remote_io', 'transferred', 'no_transfer'] ] if not remain_files: log.info("stage-in finished") else: log.info("stage-in failed") return not remain_files
def setUpClass(cls): try: args = { 'workflow': 'eventservice_hpc', 'queue': 'BNL_CLOUD_MCORE', 'site': 'BNL_CLOUD_MCORE', 'port': 25443, 'url': 'https://aipanda007.cern.ch', 'job_label': 'ptest', 'pilot_user': '******', 'node': socket.getfqdn(), 'mem': 16000, 'disk_space': 160000, 'working_group': '', 'cpu': 2601.0, 'info': None } communicator_manager = CommunicationManager() cls._communicator_manager = communicator_manager communicator_manager.start() jobs = communicator_manager.get_jobs(njobs=1, args=args) job = create_job(jobs[0], 'BNL_CLOUD_MCORE') job.workdir = '/tmp/test_esworkexecutor' job.corecount = 1 if not os.path.exists(job.workdir): os.makedirs(job.workdir) job_data = {} job_data['jobId'] = job['PandaID'] job_data['siteName'] = 'BNL_CLOUD_MCORE' job_data['state'] = 'starting' job_data['attemptNr'] = job['attemptNr'] + 1 job_data['node'] = 'pilot2_test' job_data['schedulerID'] = 'pilot2_test' job_data['coreCount'] = 1 status = communicator_manager.update_jobs(jobs=[job_data]) job_data['state'] = 'running' status = communicator_manager.update_jobs(jobs=[job_data]) communicator_manager.stop() # download input files client = StageInESClient(job.infosys, logger=logger) kwargs = dict(workdir=job.workdir, cwd=job.workdir, usecontainer=False, job=job) client.prepare_sources(job.indata) client.transfer(job.indata, activity='pr', **kwargs) # get the payload command from the user specific code pilot_user = os.environ.get('PILOT_USER', 'atlas').lower() user = __import__('pilot.user.%s.common' % pilot_user, globals(), locals(), [pilot_user], 0) # Python 2/3 cmd = user.get_payload_command(job) logger.info("payload execution command: %s" % cmd) payload = { 'executable': cmd, 'workdir': job.workdir, 'output_file': 'pilot_test_%s_stdout.txt' % job['PandaID'], 'error_file': 'pilot_test_%s_stderr.txt' % job['PandaID'], 'job': job } cls._payload = payload except Exception as ex: if cls._communicator_manager: cls._communicator_manager.stop() raise ex
storage_id = infosys.get_storage_id(file_spec.ddmendpoint) logger.info('File %s staged out to %s(id: %s)' % (file_spec.lfn, file_spec.ddmendpoint, storage_id)) new_file_data = { 'scope': 'test', 'lfn': file_spec.lfn, 'storage_token': '%s/1000' % storage_id } try: new_file_spec = FileSpec(filetype='input', **new_file_data) xdata = [new_file_spec] workdir = os.path.dirname(output_file) client = StageInESClient(infoservice) kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False) client.transfer(xdata, activity=['es_events_read'], **kwargs) except exception.PilotException, error: logger.error("Pilot Exeception: %s, %s" % (error.get_detail(), traceback.format_exc())) except Exception, e: logger.error(traceback.format_exc()) error = exception.StageInFailure("stagein failed with error=%s" % e) logger.info('Summary of transferred files:') for e in xdata: logger.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, e.status))