def get_compmake_db(): if CompmakeGlobalState.compmake_db is None: msg = 'Warning, no DB was specified. Will use in-memory dict.' logger.warning(msg) set_compmake_db({}) return CompmakeGlobalState.compmake_db
def clean_cache_relations(job_id, db): # print('cleaning cache relations for %r ' % job_id) if not job_exists(job_id, db): print('Cleaning cache for job %r which does not exist anymore; ignoring' % job_id) return # for all jobs that were done cache = get_job_cache(job_id, db) if cache.state == Cache.DONE: for parent in direct_parents(job_id, db): if not job_exists(parent, db): msg = ('Could not find job %r (parent of %s) - ok if the job was deleted' ' otherwise it is a bug' % (parent, job_id)) logger.warning(msg) continue parent_job = get_job(parent, db) # print(' parent %r has dynamic %s' % (parent, parent_job.dynamic_children)) if not job_id in parent_job.dynamic_children: # print(' skipping parent %r ' % parent) continue else: dynamic_children = parent_job.dynamic_children[job_id] # print(' dynamic_children %s' % parent_job.dynamic_children) # print(' children %s' % parent_job.children) del parent_job.dynamic_children[job_id] parent_job.children = parent_job.children - dynamic_children set_job(parent, parent_job, db)
def clean_cache_relations(job_id, db): # print('cleaning cache relations for %r ' % job_id) if not job_exists(job_id, db): print( 'Cleaning cache for job %r which does not exist anymore; ignoring' % job_id) return # for all jobs that were done cache = get_job_cache(job_id, db) if cache.state == Cache.DONE: for parent in direct_parents(job_id, db): if not job_exists(parent, db): msg = ( 'Could not find job %r (parent of %s) - ok if the job was deleted' ' otherwise it is a bug' % (parent, job_id)) logger.warning(msg) continue parent_job = get_job(parent, db) # print(' parent %r has dynamic %s' % (parent, parent_job.dynamic_children)) if not job_id in parent_job.dynamic_children: # print(' skipping parent %r ' % parent) continue else: dynamic_children = parent_job.dynamic_children[job_id] # print(' dynamic_children %s' % parent_job.dynamic_children) # print(' children %s' % parent_job.children) del parent_job.dynamic_children[job_id] parent_job.children = parent_job.children - dynamic_children set_job(parent, parent_job, db)
def tearDown(self): if True: print('not deleting %s' % self.root0) else: rmtree(self.root0) from multiprocessing import active_children c = active_children() print('active children: %s' % c) if c: if True: msg = 'Still active children: %s' % c logger.warning(msg) else: raise Exception(msg)
def get_readline(): """ Returns a reference to the readline (or Pyreadline) module if they are available and the config "readline" is True, otherwise None. :return:Reference to readline module or None """ use_readline = get_compmake_config('readline') if not use_readline: return None else: try: import readline return readline except BaseException as e: try: import pyreadline as readline # @UnresolvedImport return readline except Exception as e2: # TODO: write message msg = 'Neither readline or pyreadline available.' msg += '\n- readline error: %s' % e msg += '\n- pyreadline error: %s' % e2 logger.warning(msg) return None
import time __all__ = [ 'AvgSystemStats', ] try: import psutil # @UnusedImport except ImportError: from compmake import logger logger.warning('Package "psutil" not found; load balancing ' 'and system stats (CPU, MEM) not available.') class AvgSystemStats(object): """ Collects average statistics about the system using psutil. """ def __init__(self, interval, history_len): """ :param interval: Collect statistics according to this interval. :param history_len: Use this many to compute avg/max statistics. """ self.interval = interval self.history_len = history_len try: import psutil # @UnresolvedImport @Reimport except: self._available = False
def make(job_id, context, echo=False): # @UnusedVariable """ Makes a single job. Returns a dictionary with fields: "user_object" "user_object_deps" = set of Promises "new_jobs" -> new jobs defined "deleted_jobs" -> jobs that were defined but not anymore Raises JobFailed or JobInterrupted. Also SystemExit, KeyboardInterrupt, MemoryError are captured. """ db = context.get_compmake_db() int_make = IntervalTimer() host = 'hostname' # XXX if get_compmake_config('set_proc_title'): setproctitle('cm-%s' % job_id) # TODO: should we make sure we are up to date??? # up, reason = up_to_date(job_id, db=db) # @UnusedVariable # if up: # msg = 'Job %r appears already done.' % job_id # msg += 'This can only happen if another compmake process uses the ' \ # 'same DB.' # logger.error(msg) # user_object = get_job_userobject(job_id, db=db) # # XXX: this is not right anyway # return dict(user_object=user_object, # user_object_deps=collect_dependencies(user_object), # deleted_jobs=[], # new_jobs=[]) job = get_job(job_id, db=db) cache = get_job_cache(job_id, db=db) if cache.state == Cache.DONE: prev_defined_jobs = set(cache.jobs_defined) # print('%s had previously defined %s' % (job_id, prev_defined_jobs)) else: # print('%s was not DONE' % job_id) prev_defined_jobs = None # Note that at this point we save important information in the Cache # so if we set this then it's going to destroy it # cache.state = Cache.IN _ PROGRESS # set_job_cache(job_id, cache, db=db) # TODO: delete previous user object def progress_callback(stack): publish(context, 'job-progress-plus', job_id=job_id, host=host, stack=stack) init_progress_tracking(progress_callback) disable_capture = False if disable_capture: capture = None else: echo = False capture = OutputCapture(context=context, prefix=job_id, # This is instantaneous echo and should be False # They will generate events anyway. echo_stdout=echo, echo_stderr=echo) # TODO: add whether we should just capture and not echo old_emit = logging.StreamHandler.emit from compmake.ui.coloredlog import colorize_loglevel FORMAT = "%(name)10s|%(filename)15s:%(lineno)-4s - %(funcName)-15s| %(message)s" formatter = Formatter(FORMAT) class Store(object): nhidden = 0 def my_emit(_, log_record): # note that log_record.msg might be an exception try: try: s = str(log_record.msg) except UnicodeEncodeError: s = unicode(log_record.msg) except: s = 'Could not print log_record %s' % id(log_record) log_record.msg = colorize_loglevel(log_record.levelno, s) res = formatter.format(log_record) print(res) # this will be captured by OutputCapture anyway except: Store.nhidden += 1 logging.StreamHandler.emit = my_emit already = set(context.get_jobs_defined_in_this_session()) def get_deleted_jobs(): generated = set(context.get_jobs_defined_in_this_session()) - already # print('failure: rolling back %s' % generated) from compmake.ui.ui import delete_jobs_recurse_definition todelete_ = set() # delete the jobs that were previously defined if prev_defined_jobs: todelete_.update(prev_defined_jobs) # and also the ones that were generated todelete_.update(generated) deleted_jobs_ = delete_jobs_recurse_definition(jobs=todelete_, db=db) # now we failed, so we need to roll back other changes # to the db return deleted_jobs_ try: result = job_compute(job=job, context=context) assert isinstance(result, dict) and len(result) == 5 user_object = result['user_object'] new_jobs = result['new_jobs'] int_load_results = result['int_load_results'] int_compute = result['int_compute'] int_gc = result['int_gc'] int_gc.stop() except KeyboardInterrupt as e: bt = traceback.format_exc() deleted_jobs = get_deleted_jobs() mark_as_failed(job_id, 'KeyboardInterrupt: ' + str(e), backtrace=bt, db=db) cache = get_job_cache(job_id, db=db) if capture is not None: cache.captured_stderr = capture.get_logged_stderr() cache.captured_stdout = capture.get_logged_stdout() else: msg = '(Capture turned off.)' cache.captured_stderr = msg cache.captured_stdout = msg set_job_cache(job_id, cache, db=db) raise JobInterrupted(job_id=job_id, deleted_jobs=deleted_jobs) except (BaseException, ArithmeticError, BufferError, LookupError, Exception, SystemExit, MemoryError) as e: bt = traceback.format_exc() if six.PY2: s = '%s: %s' % (type(e).__name__, e) # # s = type(e).__name__ + ': ' + e.__str__().strip() # try: # s = s.decode('utf-8', 'replace').encode('utf-8', 'replace') # except (UnicodeDecodeError, UnicodeEncodeError) as ue: # print(ue) # XXX # s = 'Could not represent string.' else: s = '%s: %s' % (type(e).__name__, e) mark_as_failed(job_id, s, backtrace=bt, db=db) deleted_jobs = get_deleted_jobs() cache = get_job_cache(job_id, db=db) if capture is not None: cache.captured_stderr = capture.get_logged_stderr() cache.captured_stdout = capture.get_logged_stdout() else: msg = '(Capture turned off.)' cache.captured_stderr = msg cache.captured_stdout = msg set_job_cache(job_id, cache, db=db) raise JobFailed(job_id=job_id, reason=s, bt=bt, deleted_jobs=deleted_jobs) finally: int_finally = IntervalTimer() if capture is not None: capture.deactivate() # even if we send an error, let's save the output of the process logging.StreamHandler.emit = old_emit if Store.nhidden > 0: msg = 'compmake: There were %d messages hidden due to bugs in logging.' % Store.nhidden print(msg) int_finally.stop() # print('finally: %s' % int_finally) int_save_results = IntervalTimer() # print('Now %s has defined %s' % (job_id, new_jobs)) if prev_defined_jobs is not None: # did we defined fewer jobs this time around? # then we need to delete them todelete = set() for x in prev_defined_jobs: if x not in new_jobs: todelete.add(x) from compmake.ui.ui import delete_jobs_recurse_definition deleted_jobs = delete_jobs_recurse_definition(jobs=todelete, db=db) else: deleted_jobs = set() # print('Now %s has deleted %s' % (job_id, deleted_jobs)) set_job_userobject(job_id, user_object, db=db) int_save_results.stop() # logger.debug('Save time for %s: %s s' % (job_id, walltime_save_result)) int_make.stop() end_time = time() cache = Cache(Cache.DONE) # print('int_make: %s' % int_make) # print('int_load_results: %s' % int_load_results) # print('int_compute: %s' % int_compute) if int_gc.get_walltime_used() > 1.0: logger.warning('Expensive garbage collection detected at the end of %s: %s' % (job_id, int_gc)) # print('int_save_results: %s' % int_save_results) cache.int_make = int_make cache.int_load_results = int_load_results cache.int_compute = int_compute cache.int_gc = int_gc cache.int_save_results = int_save_results cache.timestamp = end_time cache.walltime_used = int_make.get_walltime_used() cache.cputime_used = int_make.get_cputime_used() cache.host = host cache.jobs_defined = new_jobs set_job_cache(job_id, cache, db=db) return dict(user_object=user_object, user_object_deps=collect_dependencies(user_object), new_jobs=new_jobs, deleted_jobs=deleted_jobs)
def make(job_id, context, echo=False): # @UnusedVariable """ Makes a single job. Returns a dictionary with fields: "user_object" "user_object_deps" = set of Promises "new_jobs" -> new jobs defined "deleted_jobs" -> jobs that were defined but not anymore Raises JobFailed or JobInterrupted. Also SystemExit, KeyboardInterrupt, MemoryError are captured. """ db = context.get_compmake_db() int_make = IntervalTimer() host = 'hostname' # XXX if get_compmake_config('set_proc_title'): setproctitle('cm-%s' % job_id) # TODO: should we make sure we are up to date??? # up, reason = up_to_date(job_id, db=db) # @UnusedVariable # if up: # msg = 'Job %r appears already done.' % job_id # msg += 'This can only happen if another compmake process uses the ' \ # 'same DB.' # logger.error(msg) # user_object = get_job_userobject(job_id, db=db) # # XXX: this is not right anyway # return dict(user_object=user_object, # user_object_deps=collect_dependencies(user_object), # deleted_jobs=[], # new_jobs=[]) job = get_job(job_id, db=db) cache = get_job_cache(job_id, db=db) if cache.state == Cache.DONE: prev_defined_jobs = set(cache.jobs_defined) # print('%s had previously defined %s' % (job_id, prev_defined_jobs)) else: # print('%s was not DONE' % job_id) prev_defined_jobs = None # Note that at this point we save important information in the Cache # so if we set this then it's going to destroy it # cache.state = Cache.IN _ PROGRESS # set_job_cache(job_id, cache, db=db) # TODO: delete previous user object def progress_callback(stack): publish(context, 'job-progress-plus', job_id=job_id, host=host, stack=stack) init_progress_tracking(progress_callback) disable_capture = False if disable_capture: capture = None else: echo = False capture = OutputCapture( context=context, prefix=job_id, # This is instantaneous echo and should be False # They will generate events anyway. echo_stdout=echo, echo_stderr=echo) # TODO: add whether we should just capture and not echo old_emit = logging.StreamHandler.emit from compmake.ui.coloredlog import colorize_loglevel FORMAT = "%(name)10s|%(filename)15s:%(lineno)-4s - %(funcName)-15s| %(message)s" formatter = Formatter(FORMAT) class Store(object): nhidden = 0 def my_emit(_, log_record): # note that log_record.msg might be an exception try: try: s = str(log_record.msg) except UnicodeEncodeError: s = unicode(log_record.msg) except: s = 'Could not print log_record %s' % id(log_record) log_record.msg = colorize_loglevel(log_record.levelno, s) res = formatter.format(log_record) print(res) # this will be captured by OutputCapture anyway except: Store.nhidden += 1 logging.StreamHandler.emit = my_emit already = set(context.get_jobs_defined_in_this_session()) def get_deleted_jobs(): generated = set(context.get_jobs_defined_in_this_session()) - already # print('failure: rolling back %s' % generated) from compmake.ui.ui import delete_jobs_recurse_definition todelete_ = set() # delete the jobs that were previously defined if prev_defined_jobs: todelete_.update(prev_defined_jobs) # and also the ones that were generated todelete_.update(generated) deleted_jobs_ = delete_jobs_recurse_definition(jobs=todelete_, db=db) # now we failed, so we need to roll back other changes # to the db return deleted_jobs_ try: result = job_compute(job=job, context=context) assert isinstance(result, dict) and len(result) == 5 user_object = result['user_object'] new_jobs = result['new_jobs'] int_load_results = result['int_load_results'] int_compute = result['int_compute'] int_gc = result['int_gc'] int_gc.stop() except KeyboardInterrupt as e: bt = traceback.format_exc() deleted_jobs = get_deleted_jobs() mark_as_failed(job_id, 'KeyboardInterrupt: ' + str(e), backtrace=bt, db=db) cache = get_job_cache(job_id, db=db) if capture is not None: cache.captured_stderr = capture.get_logged_stderr() cache.captured_stdout = capture.get_logged_stdout() else: msg = '(Capture turned off.)' cache.captured_stderr = msg cache.captured_stdout = msg set_job_cache(job_id, cache, db=db) raise JobInterrupted(job_id=job_id, deleted_jobs=deleted_jobs) except (BaseException, ArithmeticError, BufferError, LookupError, Exception, SystemExit, MemoryError) as e: bt = traceback.format_exc() if six.PY2: s = '%s: %s' % (type(e).__name__, e) # # s = type(e).__name__ + ': ' + e.__str__().strip() # try: # s = s.decode('utf-8', 'replace').encode('utf-8', 'replace') # except (UnicodeDecodeError, UnicodeEncodeError) as ue: # print(ue) # XXX # s = 'Could not represent string.' else: s = '%s: %s' % (type(e).__name__, e) mark_as_failed(job_id, s, backtrace=bt, db=db) deleted_jobs = get_deleted_jobs() cache = get_job_cache(job_id, db=db) if capture is not None: cache.captured_stderr = capture.get_logged_stderr() cache.captured_stdout = capture.get_logged_stdout() else: msg = '(Capture turned off.)' cache.captured_stderr = msg cache.captured_stdout = msg set_job_cache(job_id, cache, db=db) raise JobFailed(job_id=job_id, reason=s, bt=bt, deleted_jobs=deleted_jobs) finally: int_finally = IntervalTimer() if capture is not None: capture.deactivate() # even if we send an error, let's save the output of the process logging.StreamHandler.emit = old_emit if Store.nhidden > 0: msg = 'compmake: There were %d messages hidden due to bugs in logging.' % Store.nhidden print(msg) int_finally.stop() # print('finally: %s' % int_finally) int_save_results = IntervalTimer() # print('Now %s has defined %s' % (job_id, new_jobs)) if prev_defined_jobs is not None: # did we defined fewer jobs this time around? # then we need to delete them todelete = set() for x in prev_defined_jobs: if x not in new_jobs: todelete.add(x) from compmake.ui.ui import delete_jobs_recurse_definition deleted_jobs = delete_jobs_recurse_definition(jobs=todelete, db=db) else: deleted_jobs = set() # print('Now %s has deleted %s' % (job_id, deleted_jobs)) set_job_userobject(job_id, user_object, db=db) int_save_results.stop() # logger.debug('Save time for %s: %s s' % (job_id, walltime_save_result)) int_make.stop() end_time = time() cache = Cache(Cache.DONE) # print('int_make: %s' % int_make) # print('int_load_results: %s' % int_load_results) # print('int_compute: %s' % int_compute) if int_gc.get_walltime_used() > 1.0: logger.warning( 'Expensive garbage collection detected at the end of %s: %s' % (job_id, int_gc)) # print('int_save_results: %s' % int_save_results) cache.int_make = int_make cache.int_load_results = int_load_results cache.int_compute = int_compute cache.int_gc = int_gc cache.int_save_results = int_save_results cache.timestamp = end_time cache.walltime_used = int_make.get_walltime_used() cache.cputime_used = int_make.get_cputime_used() cache.host = host cache.jobs_defined = new_jobs set_job_cache(job_id, cache, db=db) return dict(user_object=user_object, user_object_deps=collect_dependencies(user_object), new_jobs=new_jobs, deleted_jobs=deleted_jobs)
import traceback from contracts import contract from compmake import logger use_readline = True if use_readline: try: import readline except ImportError as e: try: import pyreadline as readline except Exception as e2: # TODO: write message use_readline = False logger.warning('Neither readline or pyreadline available:\n- %s\n- %s' % (e, e2)) def interpret_commands_wrap(commands): """ Returns: 0 everything ok int not 0 error string an error, explained False? we want to exit (not found in source though) """ publish('command-line-starting', command=commands)
# -*- coding: utf-8 -*- import time __all__ = [ 'AvgSystemStats', ] try: import psutil # @UnusedImport except ImportError: from compmake import logger logger.warning('Package "psutil" not found; load balancing ' 'and system stats (CPU, MEM) not available.') class AvgSystemStats(object): """ Collects average statistics about the system using psutil. """ def __init__(self, interval, history_len): """ :param interval: Collect statistics according to this interval. :param history_len: Use this many to compute avg/max statistics. """ self.interval = interval self.history_len = history_len try: import psutil # @UnresolvedImport @Reimport except: self._available = False