def retrieve_jobs(self, gc_id_jobnum_list): # Process output sandboxes returned by getJobsOutput jobnum_list_retrieved = [] for jobnum_input, output_dn in self._get_jobs_output(gc_id_jobnum_list): # jobnum_input != None, output_dn == None => Job could not be retrieved if output_dn is None: if jobnum_input not in jobnum_list_retrieved: yield (jobnum_input, -1, {}, None) continue # jobnum_input == None, output_dn != None => Found leftovers of job retrieval if jobnum_input is None: continue # jobnum_input != None, output_dn != None => Job retrieval from WMS was ok job_fn = os.path.join(output_dn, 'job.info') retrieve_result = self._parse_job_info_file(jobnum_input, job_fn, output_dn, jobnum_list_retrieved) if retrieve_result is not None: yield retrieve_result continue # Clean empty output_dns for sub_dn in imap(lambda x: x[0], os.walk(output_dn, topdown=False)): ignore_exception(Exception, None, os.rmdir, sub_dn) if os.path.exists(output_dn): # Preserve failed job ensure_dir_exists(self._path_fail, 'failed output directory') _force_move(self._log, output_dn, os.path.join(self._path_fail, os.path.basename(output_dn))) yield (jobnum_input, -1, {}, None)
def _get_cms_cert(config): config = config.change_view(set_sections=['cms', 'access', 'proxy']) try: access = AccessToken.create_instance('VomsAccessToken', config, 'cms-proxy') except Exception: if os.environ.get('X509_USER_PROXY'): return os.environ['X509_USER_PROXY'] raise CMSAuthenticationException('Unable to find grid environment') can_submit = ignore_exception(Exception, False, access.can_submit, 5 * 60, True) if not can_submit: logging.getLogger('access.cms').warning('The grid proxy has expired or is invalid!') role = config.get_list('new proxy roles', '', on_change=None) timeout = config.get_time('new proxy timeout', 10, on_change=None) lifetime = config.get_time('new proxy lifetime', 192 * 60, on_change=None) # password in variable name removes it from debug log password = getpass.getpass('Please enter proxy password: '******'voms-proxy-init') proc = LocalProcess(proxy_init_exec, '--voms', str.join(':', ['cms'] + role), '--valid', '%d:%d' % (lifetime / 60, lifetime % 60), logging=False) if password: proc.stdin.write(password + '\n') proc.stdin.close() proc.get_output(timeout=timeout) except Exception: raise CMSAuthenticationException('Unable to create new grid proxy') access = AccessToken.create_instance('VomsAccessToken', config, 'cms-proxy') # new instance can_submit = ignore_exception(Exception, False, access.can_submit, 5 * 60, True) if not can_submit: raise CMSAuthenticationException('Newly created grid proxy is also invalid') return access.get_auth_fn_list()[0]
def _pop_std_stream(handler_cls): ignore_exception(AttributeError, None, lambda stream: stream.disable(), handler_cls.stream[-1]) handler_cls.stream.pop() ignore_exception(AttributeError, None, lambda stream: stream.enable(), handler_cls.stream[-1])
def _push_std_stream(handler_cls, user_stream): ignore_exception(AttributeError, None, lambda stream: stream.disable(), handler_cls.stream[-1]) handler_cls.stream.append(user_stream) ignore_exception(AttributeError, None, lambda stream: stream.enable(), handler_cls.stream[-1])
def _debug_watchdog(): def _check_write_stack_log(): if os.path.exists('gc_debug_stack.log'): with_file(SafeFile('gc_debug_stack.log', 'w'), lambda fp: DebugInterface(stream=fp).show_stack(thread_id='all')) while True: ignore_exception(Exception, None, _check_write_stack_log) time.sleep(60)
def __init__(self, config): # This class allows to specify events handlers as done in the past with a single option EventHandlerManager.__init__(self, config) for old_monitor in config.get_list(['monitor', 'event handler'], ['scripts']): if ignore_exception(Exception, None, LocalEventHandler.get_class, old_monitor): config.set('local event handler', old_monitor, '+=', section='jobs') if ignore_exception(Exception, None, RemoteEventHandler.get_class, old_monitor): config.set('remote event handler', old_monitor, '+=', section='backend')
def ping_host(host, timeout=1): proc = ignore_exception(Exception, None, LocalProcess, 'ping', '-Uqnc', 1, '-W', timeout, host) ping_str_list = ignore_exception(Exception, '', proc.get_output, timeout).strip().split('\n') if ping_str_list[-1].endswith('ms'): return ignore_exception( Exception, None, lambda: float(ping_str_list[-1].split('/')[-2]) / 1000.)
def _debug_watchdog(): def _check_write_stack_log(): if os.path.exists('gc_debug_stack.log'): with_file( SafeFile('gc_debug_stack.log', 'w'), lambda fp: DebugInterface( stream=fp).show_stack(thread_id='all')) while True: ignore_exception(Exception, None, _check_write_stack_log) time.sleep(60)
def logging_defaults(): formatter_verbose = GCFormatter(ex_context=2, ex_vars=200, ex_fstack=1, ex_tree=2, ex_threads=1) root_logger = _clean_logger() root_logger.manager.loggerDict.clear() root_logger.setLevel(logging.DEFAULT) root_handler = _register_handler(root_logger, StdoutStreamHandler(), formatter_verbose) # Setup logger used for abort messages abort_logger = _clean_logger('abort') abort_logger.propagate = False abort_handler = _register_handler(abort_logger, StderrStreamHandler(), formatter_verbose) # Output verbose exception information into dedicated GC log (in gc / tmp / user directory) ignore_exception(Exception, None, _register_debug_log, abort_logger, abort_handler, formatter_verbose, root_handler) # External libraries logging.getLogger('requests').setLevel(logging.WARNING) # Adding log_process_result to Logging class def _log_process(self, proc, level=logging.WARNING, files=None, msg=None): msg = msg or 'Process %(call)s finished with exit code %(proc_status)s' status = proc.status(timeout=0) record = self.makeRecord(self.name, level, '<process>', 0, msg, tuple(), None) record.proc = proc record.call = proc.get_call() record.proc_status = status record.files = files or {} record.msg = record.msg % record.__dict__ self.handle(record) logging.Logger.log_process = _log_process # Adding log with time prefix to Logging class def _log_time(self, level, msg, *args, **kwargs): if self.isEnabledFor(level): tmp = self.findCaller() record = self.makeRecord(self.name, level, tmp[0], tmp[1], msg, args, kwargs.pop('exc_info', None)) record.print_time = True self.handle(record) logging.Logger.log_time = _log_time
def getmaxyx(cls): def _getmaxyx(fd_term=None): winsize_ptr = fcntl.ioctl(fd_term or sys.stdout.fileno(), termios.TIOCGWINSZ, struct.pack("HHHH", 0, 0, 0, 0)) winsize = struct.unpack('HHHH', winsize_ptr) return (winsize[0], winsize[1]) return ignore_exception(Exception, (24, 80), _getmaxyx) # 24x80 is vt100 default
def _interact_with_child(self, pid, fd_parent_stdin, fd_parent_stdout, fd_parent_stderr): thread_in = self._start_watcher('stdin', False, pid, self._handle_input, fd_parent_stdin, self._buffer_stdin, self._event_shutdown) thread_out = self._start_watcher('stdout', False, pid, self._handle_output, fd_parent_stdout, self._buffer_stdout, self._event_shutdown) thread_err = self._start_watcher('stderr', False, pid, self._handle_output, fd_parent_stderr, self._buffer_stderr, self._event_shutdown) while self._status is None: # blocking (with spurious wakeups!) - OSError=unable to wait for child - status=False => OS_ABORT (result_pid, status) = ignore_exception(OSError, (pid, False), os.waitpid, pid, 0) if result_pid == pid: self._status = status self._time_finished = time.time() self._event_shutdown.set( ) # start shutdown of handlers and wait for it to finish self._buffer_stdin.finish() # wakeup process input handler thread_in.join() thread_out.join() thread_err.join() for fd_open in set( [fd_parent_stdin, fd_parent_stdout, fd_parent_stderr]): os.close(fd_open) # fd_parent_stdin == fd_parent_stdout for pty self._buffer_stdout.finish() # wakeup pending output buffer waits self._buffer_stderr.finish() self._event_finished.set()
def __init__(self, cert=None, url=None, cert_error_msg='', cert_error_cls=Exception): (self._cert_error_msg, self._cert_error_cls) = (cert_error_msg, cert_error_cls) JSONRestClient.__init__(self, cert, url) if not self._cert: self._cert = ignore_exception(Exception, None, self._get_grid_cert) if not self._cert: self._log.warning(self._fmt_cert_error('Using this webservice requires a valid grid proxy!'))
def parse_type(value): def _parse_number(value): if '.' in value: return float(value) return int(value) return ignore_exception(ValueError, value, _parse_number, value)
def hash_verify(opts, status_mon, local_se_path, jobnum, fi_idx, fi): if not opts.verify_md5: return status_mon.register_file_result(jobnum, fi_idx, 'Download successful', FileDownloadStatus.FILE_OK) # Verify => compute md5hash remote_hash = fi[FileInfo.Hash] activity = Activity('Verifying checksum') try: local_hash = ignore_exception(Exception, None, hash_calc, local_se_path.replace('file://', '')) if local_hash is None: return status_mon.register_file_result(jobnum, fi_idx, 'Unable to calculate checksum', FileDownloadStatus.FILE_HASH_FAILED) finally: activity.finish() hash_match = fi[FileInfo.Hash] == local_hash match_map = {True: 'MATCH', False: 'FAIL'} if ANSI is not None: match_map = {True: ANSI.reset + ANSI.color_green + 'MATCH' + ANSI.reset, False: ANSI.reset + ANSI.color_red + 'FAIL' + ANSI.reset} msg = '\tLocal hash: %s\n' % local_hash + \ log_intro(jobnum, fi_idx) + '\tRemote hash: %s\n' % remote_hash + \ log_intro(jobnum, fi_idx) + 'Checksum comparison: ' + match_map[hash_match] if hash_match: return status_mon.register_file_result(jobnum, fi_idx, msg, FileDownloadStatus.FILE_OK) return status_mon.register_file_result(jobnum, fi_idx, msg, FileDownloadStatus.FILE_HASH_FAILED)
def hash_verify(opts, status_mon, local_se_path, jobnum, fi_idx, fi): if not opts.verify_md5: return status_mon.register_file_result(jobnum, fi_idx, 'Download successful', FileDownloadStatus.FILE_OK) # Verify => compute md5hash remote_hash = fi[FileInfo.Hash] activity = Activity('Verifying checksum') try: local_hash = ignore_exception(Exception, None, hash_calc, local_se_path.replace('file://', '')) if local_hash is None: return status_mon.register_file_result( jobnum, fi_idx, 'Unable to calculate checksum', FileDownloadStatus.FILE_HASH_FAILED) finally: activity.finish() hash_match = fi[FileInfo.Hash] == local_hash match_map = {True: 'MATCH', False: 'FAIL'} if ANSI is not None: match_map = { True: ANSI.reset + ANSI.color_green + 'MATCH' + ANSI.reset, False: ANSI.reset + ANSI.color_red + 'FAIL' + ANSI.reset } msg = '\tLocal hash: %s\n' % local_hash + \ log_intro(jobnum, fi_idx) + '\tRemote hash: %s\n' % remote_hash + \ log_intro(jobnum, fi_idx) + 'Checksum comparison: ' + match_map[hash_match] if hash_match: return status_mon.register_file_result(jobnum, fi_idx, msg, FileDownloadStatus.FILE_OK) return status_mon.register_file_result(jobnum, fi_idx, msg, FileDownloadStatus.FILE_HASH_FAILED)
def _gc_run(args): config = gc_create_config(args or sys.argv[1:], use_default_files=True) (workflow, gui) = _gc_create_workflow(config) if not abort(): DebugInterface.callback_list.append((gui.end_interface, gui.start_interface)) try: try: gui.start_interface() except Exception: ex_value = GUIException('GUI init exception') ignore_exception(Exception, None, gui.end_interface) raise ex_value try: workflow.run() finally: gui.end_interface() finally: DebugInterface.callback_list.remove((gui.end_interface, gui.start_interface))
def _gc_run(args): config = gc_create_config(args or sys.argv[1:], use_default_files=True) (workflow, gui) = _gc_create_workflow(config) if not abort(): DebugInterface.callback_list.append( (gui.end_interface, gui.start_interface)) try: try: gui.start_interface() except Exception: ex_value = GUIException('GUI init exception') ignore_exception(Exception, None, gui.end_interface) raise ex_value try: workflow.run() finally: gui.end_interface() finally: DebugInterface.callback_list.remove( (gui.end_interface, gui.start_interface))
def gc_excepthook(*exc_info): # Exception handler for interactive mode: if hasattr(gc_excepthook, 'restore_old_hook') and getattr(gc_excepthook, 'restore_old_hook'): sys.excepthook = INITIAL_EXCEPTHOOK version = ignore_exception(Exception, 'unknown version', lambda: sys.modules['grid_control'].__version__) log = logging.getLogger('abort') if not log.handlers and not (log.propagate and logging.getLogger().handlers): log.addHandler(logging.StreamHandler(sys.stderr)) log.handle(log.makeRecord('exception', logging.CRITICAL, __file__, None, 'Exception occured in grid-control [%s]\n\n' % version, tuple(), exc_info))
def logging_defaults(): formatter_verbose = GCFormatter(ex_context=2, ex_vars=200, ex_fstack=1, ex_tree=2, ex_threads=1) root_logger = clean_logger() root_logger.manager.loggerDict.clear() root_logger.setLevel(logging.DEFAULT) root_handler = register_handler(root_logger, StdoutStreamHandler(), formatter_verbose) # Setup logger used for abort messages abort_logger = clean_logger('abort') abort_logger.propagate = False abort_handler = register_handler(abort_logger, StderrStreamHandler(), formatter_verbose) # Output verbose exception information into dedicated GC log (in gc / tmp / user directory) ignore_exception(Exception, None, _register_debug_log, abort_logger, abort_handler, formatter_verbose, root_handler) # External libraries logging.getLogger('requests').setLevel(logging.WARNING) # Adding log_process_result to Logging class def _log_process(self, proc, level=logging.WARNING, files=None, msg=None): msg = msg or 'Process %(call)s finished with exit code %(proc_status)s' status = proc.status(timeout=0) record = self.makeRecord(self.name, level, '<process>', 0, msg, tuple(), None) record.proc = proc record.call = proc.get_call() record.proc_status = status record.files = files or {} record.msg = record.msg % record.__dict__ self.handle(record) logging.Logger.log_process = _log_process # Adding log with time prefix to Logging class def _log_time(self, level, msg, *args, **kwargs): if self.isEnabledFor(level): tmp = self.findCaller() record = self.makeRecord(self.name, level, tmp[0], tmp[1], msg, args, kwargs.pop('exc_info', None)) record.print_time = True self.handle(record) logging.Logger.log_time = _log_time
def _handle_input(cls, fd_write, buffer, event_shutdown): local_buffer = '' while not event_shutdown.is_set(): if local_buffer: # local buffer has leftover bytes from last write - just poll for more local_buffer = buffer.get(timeout=0, default='') else: # empty local buffer - wait for data to process local_buffer = buffer.get(timeout=1, default='') if local_buffer: _wait_fd(fd_write_list=[fd_write]) if not event_shutdown.is_set(): written = ignore_exception(OSError, 0, os.write, fd_write, str2bytes(local_buffer)) local_buffer = local_buffer[written:]
def _submit_job(self, jobnum, task): # Submit job and yield (jobnum, WMS ID, other data) activity = Activity('submitting job %d' % jobnum) try: sandbox = tempfile.mkdtemp( '', '%s.%04d.' % (task.get_description().task_id, jobnum), self._sandbox_helper.get_path()) except Exception: raise BackendError('Unable to create sandbox directory "%s"!' % sandbox) sb_prefix = sandbox.replace(self._sandbox_helper.get_path(), '').lstrip('/') def _translate_target(desc, src, target): return (desc, src, os.path.join(sb_prefix, target)) self._sm_sb_in.do_transfer( ismap(_translate_target, self._get_in_transfer_info_list(task))) self._write_job_config( os.path.join(sandbox, '_jobconfig.sh'), jobnum, task, { 'GC_SANDBOX': sandbox, 'GC_SCRATCH_SEARCH': str.join(' ', self._scratch_path) }) reqs = self._broker_site.broker(task.get_requirement_list(jobnum), WMS.SITES) reqs = dict(self._broker_queue.broker(reqs, WMS.QUEUES)) if (self._memory > 0) and (reqs.get(WMS.MEMORY, 0) < self._memory): reqs[ WMS. MEMORY] = self._memory # local jobs need higher (more realistic) memory requirements job_name = task.get_description(jobnum).job_name proc = self._get_submit_proc(jobnum, sandbox, job_name, reqs) exit_code = proc.status(timeout=20, terminate=True) wms_id_str = proc.stdout.read(timeout=0).strip().strip('\n') wms_id = ignore_exception(Exception, None, self.parse_submit_output, wms_id_str) activity.finish() if exit_code != 0: self._log.warning('%s failed:', self._submit_exec) elif wms_id is None: self._log.warning('%s did not yield job id:\n%s', self._submit_exec, wms_id_str) gc_id = self._create_gc_id(wms_id) if gc_id is not None: open(os.path.join(sandbox, gc_id), 'w') else: self._log.log_process(proc) return (jobnum, gc_id, {'sandbox': sandbox})
def gc_excepthook(*exc_info): # Exception handler for interactive mode: if hasattr(gc_excepthook, 'restore_old_hook') and getattr( gc_excepthook, 'restore_old_hook'): sys.excepthook = INITIAL_EXCEPTHOOK version = ignore_exception(Exception, 'unknown version', lambda: sys.modules['grid_control'].__version__) log = logging.getLogger('abort') if not log.handlers and not (log.propagate and logging.getLogger().handlers): log.addHandler(logging.StreamHandler(sys.stderr)) log.handle( log.makeRecord('exception', logging.CRITICAL, __file__, None, 'Exception occured in grid-control [%s]\n\n' % version, tuple(), exc_info))
def on_task_finish(self, task, job_len): try: jid = self._xmpp.protocol.JID(self._source_jid) xmpp_client = self._xmpp.Client(jid.getDomain(), debug=[]) con = ignore_exception(Exception, None, xmpp_client.connect) if not con: return self._log.warning('Could not connect to jabber server!') auth = xmpp_client.auth(jid.getNode(), self._source_password, resource=jid.getResource()) if not auth: return self._log.warning('Could not authenticate to jabber server!') text = 'Task %s finished!' % task.get_description().task_name xmpp_client.send(self._xmpp.protocol.Message(self._target_jid, text)) time.sleep(1) # Stay connected until delivered except Exception: self._log.exception('Error while sending message to jabber server')
class Urllib2Session(RestSession): alias_list = ['urllib2'] ignore_exception(Exception, None, disable_ca_cert_check) build_opener = resolve_sfun('urllib.request:build_opener', urllib2_path('urllib2:build_opener')) HTTPSConnection = resolve_sfun('http.client:HTTPSConnection', 'httplib:HTTPSConnection') HTTPSHandler = resolve_sfun('urllib.request:HTTPSHandler', urllib2_path('urllib2:HTTPSHandler')) Request = resolve_sfun('urllib.request:Request', urllib2_path('urllib2:Request')) urlencode = resolve_sfun('urllib.parse:urlencode', 'urllib:urlencode') def request(self, mode, url, headers, params=None, data=None, cert=None): request_fun = { RestSession.GET: lambda: 'GET', RestSession.PUT: lambda: 'PUT', RestSession.POST: lambda: 'POST', RestSession.DELETE: lambda: 'DELETE' }[mode] if params: url += '?%s' % Urllib2Session.urlencode(params) if data: data = str2bytes(data) request = Urllib2Session.Request(url=url, data=data, headers=headers) request.get_method = request_fun return bytes2str(self._get_opener(cert).open(request).read()) def _get_opener(self, cert): class HTTPSClientAuthHandler(Urllib2Session.HTTPSHandler): def https_open(self, req): return self.do_open(self._get_connection, req) def _get_connection(self, host, timeout=None): return Urllib2Session.HTTPSConnection(host, key_file=cert, cert_file=cert) setattr(HTTPSClientAuthHandler, 'getConnection', getattr(HTTPSClientAuthHandler, '_get_connection')) if cert: return Urllib2Session.build_opener(HTTPSClientAuthHandler()) return Urllib2Session.build_opener()
def on_task_finish(self, task, job_len): try: jid = self._xmpp.protocol.JID(self._source_jid) xmpp_client = self._xmpp.Client(jid.getDomain(), debug=[]) con = ignore_exception(Exception, None, xmpp_client.connect) if not con: return self._log.warning('Could not connect to jabber server!') auth = xmpp_client.auth(jid.getNode(), self._source_password, resource=jid.getResource()) if not auth: return self._log.warning( 'Could not authenticate to jabber server!') text = 'Task %s finished!' % task.get_description().task_name xmpp_client.send( self._xmpp.protocol.Message(self._target_jid, text)) time.sleep(1) # Stay connected until delivered except Exception: self._log.exception('Error while sending message to jabber server')
def _parse_job_info_file(self, jobnum_input, job_fn, output_dn, jobnum_list_retrieved): # jobnum_input != None, output_dn != None => Job retrieval from WMS was ok job_fn = os.path.join(output_dn, 'job.info') job_info_dict = ignore_exception(Exception, None, self._job_parser.process, output_dn) if not os.path.exists(job_fn): self._log.warning('Job information file is missing') elif job_info_dict is None: self._log.warning('Unable to parse job information file') elif job_info_dict.get(JobResult.JOBNUM, '') == '': self._log.warning('Job was unable to read job config file') elif job_info_dict: jobnum = job_info_dict[JobResult.JOBNUM] if jobnum != jobnum_input: # consistency check raise BackendError('Invalid job id in job file %s' % job_fn) if _force_move(self._log, output_dn, os.path.join(self._path_output, 'job_%d' % jobnum)): jobnum_list_retrieved.append(jobnum_input) return (jobnum, job_info_dict[JobResult.EXITCODE], job_info_dict[JobResult.RAW], output_dn) else: # error while moving job output directory return (jobnum, -1, {}, None)
def __init__(self, config, name): LocalEventHandler.__init__(self, config, name) self._source_jid = config.get('source jid', on_change=None) self._target_jid = config.get('target jid', on_change=None) password_fn = config.get_fn('source password file') os.chmod(password_fn, stat.S_IRUSR) # password in variable name removes it from debug log! self._source_password = SafeFile(password_fn).read_close().strip() try: # xmpp contains many deprecated constructs import warnings warnings.simplefilter('ignore', DeprecationWarning) except Exception: clear_current_exception() self._xmpp = ignore_exception(Exception, None, __import__, 'xmpp') if self._xmpp is None: try: import grid_control_gui.xmpp self._xmpp = grid_control_gui.xmpp except Exception: raise Exception('Unable to load jabber library!')
def _parse_job_info_file(self, jobnum_input, job_fn, output_dn, jobnum_list_retrieved): # jobnum_input != None, output_dn != None => Job retrieval from WMS was ok job_fn = os.path.join(output_dn, 'job.info') job_info_dict = ignore_exception(Exception, None, self._job_parser.process, output_dn) if not os.path.exists(job_fn): self._log.warning('Job information file is missing') elif job_info_dict is None: self._log.warning('Unable to parse job information file') elif job_info_dict.get(JobResult.JOBNUM, '') == '': self._log.warning('Job was unable to read job config file') elif job_info_dict: jobnum = job_info_dict[JobResult.JOBNUM] if jobnum != jobnum_input: # consistency check raise BackendError('Invalid job id in job file %s' % job_fn) target_dn = os.path.join(self._path_output, 'job_%d' % jobnum) if _force_move(self._log, output_dn, target_dn): jobnum_list_retrieved.append(jobnum_input) return (jobnum, job_info_dict[JobResult.EXITCODE], job_info_dict[JobResult.RAW], target_dn) else: # error while moving job output directory return (jobnum, -1, {}, None)
def _submit_job(self, jobnum, task): # Submit job and yield (jobnum, WMS ID, other data) activity = Activity('submitting job %d' % jobnum) try: sandbox = tempfile.mkdtemp('', '%s.%04d.' % (task.get_description().task_id, jobnum), self._sandbox_helper.get_path()) except Exception: raise BackendError('Unable to create sandbox directory "%s"!' % sandbox) sb_prefix = sandbox.replace(self._sandbox_helper.get_path(), '').lstrip('/') def _translate_target(desc, src, target): return (desc, src, os.path.join(sb_prefix, target)) self._sm_sb_in.do_transfer(ismap(_translate_target, self._get_in_transfer_info_list(task))) self._write_job_config(os.path.join(sandbox, '_jobconfig.sh'), jobnum, task, { 'GC_SANDBOX': sandbox, 'GC_SCRATCH_SEARCH': str.join(' ', self._scratch_path)}) reqs = self._broker_site.broker(task.get_requirement_list(jobnum), WMS.SITES) reqs = dict(self._broker_queue.broker(reqs, WMS.QUEUES)) if (self._memory > 0) and (reqs.get(WMS.MEMORY, 0) < self._memory): reqs[WMS.MEMORY] = self._memory # local jobs need higher (more realistic) memory requirements job_name = task.get_description(jobnum).job_name proc = self._get_submit_proc(jobnum, sandbox, job_name, reqs) exit_code = proc.status(timeout=20, terminate=True) wms_id_str = proc.stdout.read(timeout=0).strip().strip('\n') wms_id = ignore_exception(Exception, None, self.parse_submit_output, wms_id_str) activity.finish() if exit_code != 0: self._log.warning('%s failed:', self._submit_exec) elif wms_id is None: self._log.warning('%s did not yield job id:\n%s', self._submit_exec, wms_id_str) gc_id = self._create_gc_id(wms_id) if gc_id is not None: open(os.path.join(sandbox, gc_id), 'w') else: self._log.log_process(proc) return (jobnum, gc_id, {'sandbox': sandbox})
def _interact_with_child(self, pid, fd_parent_stdin, fd_parent_stdout, fd_parent_stderr): thread_in = self._start_watcher('stdin', False, pid, self._handle_input, fd_parent_stdin, self._buffer_stdin, self._event_shutdown) thread_out = self._start_watcher('stdout', False, pid, self._handle_output, fd_parent_stdout, self._buffer_stdout, self._event_shutdown) thread_err = self._start_watcher('stderr', False, pid, self._handle_output, fd_parent_stderr, self._buffer_stderr, self._event_shutdown) while self._status is None: # blocking (with spurious wakeups!) - OSError=unable to wait for child - status=False => OS_ABORT (result_pid, status) = ignore_exception(OSError, (pid, False), os.waitpid, pid, 0) if result_pid == pid: self._status = status self._time_finished = time.time() self._event_shutdown.set() # start shutdown of handlers and wait for it to finish self._buffer_stdin.finish() # wakeup process input handler thread_in.join() thread_out.join() thread_err.join() for fd_open in set([fd_parent_stdin, fd_parent_stdout, fd_parent_stderr]): os.close(fd_open) # fd_parent_stdin == fd_parent_stdout for pty self._buffer_stdout.finish() # wakeup pending output buffer waits self._buffer_stderr.finish() self._event_finished.set()
def parse_str(value, cls, default=None): return ignore_exception(Exception, default, cls, value)
def _intstr2enum(cls, value, default=unspecified): enum = ignore_exception(Exception, default, int, value) if enum not in cls.enum_value_list: allowed_str = str.join(', ', imap(lambda nv: '%s=%s', _map_name2value.items())) raise Exception('Invalid enum value %s (allowed are %r)' % (repr(value), allowed_str)) return enum
def __new__(cls, path): version = ignore_exception(Exception, 1, lambda: int(tarfile.open(path, 'r:').extractfile('Version').read())) return FilePartitionReader.create_instance('version_%s' % version, path)
def fill_parameter_content(self, pnum, result): tmp = dict( imap(lambda k_v: (str(k_v[0]), parse_type(str(k_v[1]))), result.items())) result[self._output_vn] = ignore_exception(Exception, self._default, eval, self._fmt, tmp)
def get_cms_cert(config=None, ignore_errors=False): logging.getLogger('access.cms-proxy').setLevel(logging.ERROR) if not ignore_errors: return _get_cms_cert(config or create_config()) return ignore_exception(Exception, None, _get_cms_cert, config or create_config())
def retrieve_jobs(self, gc_id_jobnum_list ): # Process output sandboxes returned by getJobsOutput # Function to force moving a directory def _force_move(source, target): try: if os.path.exists(target): shutil.rmtree(target) except IOError: self._log.exception('%r cannot be removed', target) clear_current_exception() return False try: shutil.move(source, target) except IOError: self._log.exception( 'Error moving job output directory from %r to %r', source, target) clear_current_exception() return False return True jobnum_list_retrieved = [] for jobnum_input, output_dn in self._get_jobs_output( gc_id_jobnum_list): # jobnum_input != None, output_dn == None => Job could not be retrieved if output_dn is None: if jobnum_input not in jobnum_list_retrieved: yield (jobnum_input, -1, {}, None) continue # jobnum_input == None, output_dn != None => Found leftovers of job retrieval if jobnum_input is None: continue # jobnum_input != None, output_dn != None => Job retrieval from WMS was ok job_fn = os.path.join(output_dn, 'job.info') job_info = ignore_exception(Exception, None, self._job_parser.process, output_dn) if job_info is None: self._log.exception('Unable to parse job.info') if job_info: jobnum = job_info[JobResult.JOBNUM] if jobnum != jobnum_input: raise BackendError('Invalid job id in job file %s' % job_fn) if _force_move( output_dn, os.path.join(self._path_output, 'job_%d' % jobnum)): jobnum_list_retrieved.append(jobnum_input) yield (jobnum, job_info[JobResult.EXITCODE], job_info[JobResult.RAW], output_dn) else: yield (jobnum, -1, {}, None) continue # Clean empty output_dns for sub_dn in imap(lambda x: x[0], os.walk(output_dn, topdown=False)): ignore_exception(Exception, None, os.rmdir, sub_dn) if os.path.exists(output_dn): # Preserve failed job ensure_dir_exists(self._path_fail, 'failed output directory') _force_move( output_dn, os.path.join(self._path_fail, os.path.basename(output_dn))) yield (jobnum_input, -1, {}, None)
def _resolve_hostname(): import socket host = socket.gethostname() return ignore_exception(Exception, host, lambda: socket.gethostbyaddr(host)[0])
def fill_parameter_content(self, pnum, result): tmp = dict(imap(lambda k_v: (str(k_v[0]), parse_type(str(k_v[1]))), result.items())) result[self._output_vn] = ignore_exception(Exception, self._default, eval, self._fmt, tmp)
def _hang_protection_wrapper(): result[None] = ignore_exception(Exception, None, fun)
def ping_host(host, timeout=1): proc = ignore_exception(Exception, None, LocalProcess, 'ping', '-Uqnc', 1, '-W', timeout, host) ping_str_list = ignore_exception(Exception, '', proc.get_output, timeout).strip().split('\n') if ping_str_list[-1].endswith('ms'): return ignore_exception(Exception, None, lambda: float(ping_str_list[-1].split('/')[-2]) / 1000.)