def test_stageout_es_events_non_exist_pw(self): """ Make sure that no exceptions to stage out file. """ error = None try: from pilot.info import infosys, InfoService infoservice = InfoService() infoservice.init('BNL_CLOUD_MCORE', infosys.confinfo, infosys.extinfo) output_file = os.path.join('/tmp', str(uuid.uuid4())) shutil.copy('/bin/hostname', output_file) file_data = { 'scope': 'transient', 'lfn': os.path.basename(output_file), #'ddmendpoint': None, #'type': 'es_events', #'surl': output_file #'turl': None, #'filesize': None, #'checksum': None } file_spec = FileSpec(filetype='output', **file_data) xdata = [file_spec] workdir = os.path.dirname(output_file) client = StageOutESClient(infoservice) kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False) client.prepare_destinations( xdata, activity=['es_events_non_exist', 'pw'] ) # allow to write to `es_events_non_exist` and `pw` astorages client.transfer(xdata, activity=['es_events_non_exist', 'pw'], **kwargs) except exception.PilotException as error: # Python 2/3 logger.error("Pilot Exeception: %s, %s" % (error.get_detail(), traceback.format_exc())) except Exception as e: # Python 2/3 logger.error(traceback.format_exc()) error = exception.StageOutFailure("stageOut failed with error=%s" % e) logger.info('Summary of transferred files:') for e in xdata: logger.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, e.status)) if error: logger.error( 'Failed to stage-out eventservice file(%s): error=%s' % (output_file, error.get_detail())) raise error
class TestStager(unittest.TestCase): """ Unit tests for event service Grid work executor """ @unittest.skipIf(not check_env(), "No CVMFS") def test_stageout_es_events(self): """ Make sure that no exceptions to stage out file. """ error = None try: from pilot.info import infosys, InfoService infoservice = InfoService() infoservice.init('BNL_CLOUD_MCORE', infosys.confinfo, infosys.extinfo) output_file = os.path.join('/tmp', str(uuid.uuid4())) shutil.copy('/bin/hostname', output_file) file_data = { 'scope': 'transient', 'lfn': os.path.basename(output_file), #'ddmendpoint': None, #'type': 'es_events', #'surl': output_file #'turl': None, #'filesize': None, #'checksum': None } file_spec = FileSpec(filetype='output', **file_data) xdata = [file_spec] workdir = os.path.dirname(output_file) client = StageOutESClient(infoservice) kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False) client.transfer(xdata, activity=['es_events'], **kwargs) except exception.PilotException, error: logger.error("Pilot Exeception: %s, %s" % (error.get_detail(), traceback.format_exc())) except Exception, e: logger.error(traceback.format_exc()) error = exception.StageOutFailure("stageOut failed with error=%s" % e)
class GenericExecutor(BaseExecutor): def __init__(self, **kwargs): super(GenericExecutor, self).__init__(**kwargs) self.setName("GenericExecutor") self.__queued_out_messages = [] self.__last_stageout_time = None self.__all_out_messages = [] self.proc = None self.exit_code = None def is_payload_started(self): return self.proc.is_payload_started() if self.proc else False def get_pid(self): return self.proc.pid if self.proc else None def get_exit_code(self): return self.exit_code def update_finished_event_ranges(self, out_messagess, output_file, fsize, checksum, storage_id): """ Update finished event ranges :param out_messages: messages from AthenaMP. :param output_file: output file name. :param fsize: file size. :param adler32: checksum (adler32) of the file. :param storage_id: the id of the storage. """ if len(out_messagess) == 0: return event_ranges = [] for out_msg in out_messagess: event_ranges.append({ "eventRangeID": out_msg['id'], "eventStatus": 'finished' }) event_range_status = { "zipFile": { "numEvents": len(event_ranges), "objstoreID": storage_id, "lfn": os.path.basename(output_file), "fsize": fsize, "pathConvention": 1000 }, "eventRanges": event_ranges } for checksum_key in checksum: event_range_status["zipFile"][checksum_key] = checksum[ checksum_key] event_range_message = { 'version': 1, 'eventRanges': json.dumps([event_range_status]) } self.update_events(event_range_message) job = self.get_job() job.nevents += len(event_ranges) def update_failed_event_ranges(self, out_messagess): """ Update failed event ranges :param out_messages: messages from AthenaMP. """ if len(out_messagess) == 0: return event_ranges = [] for message in out_messagess: status = message['status'] if message['status'] in [ 'failed', 'fatal' ] else 'failed' # ToBeFixed errorCode event_ranges.append({ "errorCode": 1220, "eventRangeID": message['id'], "eventStatus": status }) event_range_message = { 'version': 0, 'eventRanges': json.dumps(event_ranges) } self.update_events(event_range_message) def handle_out_message(self, message): """ Handle ES output or error messages hook function for tests. :param message: a dict of parsed message. For 'finished' event ranges, it's {'id': <id>, 'status': 'finished', 'output': <output>, 'cpu': <cpu>, 'wall': <wall>, 'message': <full message>}. Fro 'failed' event ranges, it's {'id': <id>, 'status': 'failed', 'message': <full message>}. """ job = self.get_job() log = get_logger(job.jobid, logger) log.info("Handling out message: %s" % message) self.__all_out_messages.append(message) if message['status'] in ['failed', 'fatal']: self.update_failed_event_ranges([message]) else: self.__queued_out_messages.append(message) def tarzip_output_es(self): """ Tar/zip eventservice outputs. :return: out_messages, output_file """ job = self.get_job() log = get_logger(job.jobid, logger) out_messages = [] while len(self.__queued_out_messages) > 0: out_messages.append(self.__queued_out_messages.pop()) output_file = "EventService_premerge_%s.tar" % out_messages[0]['id'] ret_messages = [] try: for out_msg in out_messages: command = "tar -rf " + output_file + " --directory=%s %s" % ( os.path.dirname(out_msg['output']), os.path.basename(out_msg['output'])) exit_code, stdout, stderr = execute(command) if exit_code == 0: ret_messages.append(out_msg) else: log.error( "Failed to add event output to tar/zip file: out_message: %s, exit_code: %s, stdout: %s, stderr: %s" % (out_msg, exit_code, stdout, stderr)) if 'retries' in out_msg and out_msg['retries'] >= 3: log.error( "Discard out messages because it has been retried more than 3 times: %s" % out_msg) else: if 'retries' in out_msg: out_msg['retries'] += 1 else: out_msg['retries'] = 1 self.__queued_out_messages.append(out_msg) except Exception as e: log.error("Failed to tar/zip event ranges: %s" % str(e)) self.__queued_out_messages += out_messages return None, None return ret_messages, output_file def stageout_es_real(self, output_file): """ Stage out event service output file. :param output_file: output file name. """ job = self.get_job() log = get_logger(job.jobid, logger) log.info('prepare to stage-out eventservice files') error = None try: file_data = { 'scope': 'transient', 'lfn': os.path.basename(output_file), } file_spec = FileSpec(filetype='output', **file_data) xdata = [file_spec] client = StageOutESClient(job.infosys, logger=log) kwargs = dict(workdir=job.workdir, cwd=job.workdir, usecontainer=False, job=job) client.transfer(xdata, activity=['es_events', 'pw'], **kwargs) except exception.PilotException, error: log.error(error.get_detail()) except Exception, e: import traceback log.error(traceback.format_exc()) error = exception.StageOutFailure("stageOut failed with error=%s" % e)
#'turl': None, #'filesize': None, #'checksum': None } file_spec = FileSpec(filetype='output', **file_data) xdata = [file_spec] workdir = os.path.dirname(output_file) client = StageOutESClient(infoservice) kwargs = dict(workdir=workdir, cwd=workdir, usecontainer=False) client.transfer(xdata, activity=['es_events', 'pw'], **kwargs) except exception.PilotException, error: logger.error("Pilot Exeception: %s, %s" % (error.get_detail(), traceback.format_exc())) except Exception, e: logger.error(traceback.format_exc()) error = exception.StageOutFailure("stageOut failed with error=%s" % e) logger.info('Summary of transferred files:') for e in xdata: logger.info(" -- lfn=%s, status_code=%s, status=%s" % (e.lfn, e.status_code, e.status)) if error: logger.error( 'Failed to stage-out eventservice file(%s): error=%s' % (output_file, error.get_detail())) raise error @unittest.skipIf(not check_env(), "No CVMFS") def test_stageout_es_events_non_exist_pw(self): """