def setUp(self): use_filesystem('test1_db') set_namespace('Test1') remove_all_jobs() reset_jobs_definition_set() self.jobs = [ ('a', Cache.DONE), ('b', Cache.FAILED), ('c', Cache.NOT_STARTED), ('d', Cache.DONE), ('e', Cache.DONE), ('f', Cache.IN_PROGRESS), ('g', Cache.DONE), ('h', Cache.FAILED), ('i', Cache.DONE), ('ii', Cache.DONE), ('v_rangefinder_nonunif-plot_tensors_tex-0', Cache.DONE), ] for job_id, state in self.jobs: comp(dummy, job_id=job_id) cache = get_job_cache(job_id) cache.state = state set_job_cache(job_id, cache) self.all = set([job_id for job_id, state in self.jobs]) select = lambda S: set([id for id, state in self.jobs if state == S]) self.failed = select(Cache.FAILED) self.done = select(Cache.DONE) self.in_progress = select(Cache.IN_PROGRESS) self.not_started = select(Cache.NOT_STARTED)
def erase_job_if_files_updated(compmake_context, promise, filenames): """ Invalidates the job if the filename is newer """ check_isinstance(promise, Promise) check_isinstance(filenames, (list, tuple)) def friendly_age(ts): age = time.time() - ts return '%.3fs ago' % age filenames = list(filenames) for _ in filenames: if not os.path.exists(_): msg = 'File does not exist: %s' % _ raise ValueError(msg) last_update = max(os.path.getmtime(_) for _ in filenames) db = compmake_context.get_compmake_db() job_id = promise.job_id cache = get_job_cache(job_id, db) if cache.state == cache.DONE: done_at = cache.timestamp if done_at < last_update: show_filenames = filenames if len( filenames) < 3 else '(too long to show)' logger.info('Cleaning job %r because files updated %s' % (job_id, show_filenames)) logger.info(' files last updated: %s' % friendly_age(last_update)) logger.info(' job last done: %s' % friendly_age(done_at)) mark_to_remake(job_id, db)
def list_jobs(job_list): for job_id in job_list: up, reason = up_to_date(job_id) s = job_id s += " " + (" " * (60 - len(s))) cache = get_job_cache(job_id) tag = Cache.state2desc[cache.state] #if not up: # tag += ' (needs update)' k = (cache.state, up) assert k in state2color, "I found strange state %s" % str(k) color_args = state2color[k] s += colored(tag, **color_args) if cache.state == Cache.DONE and cache.done_iterations > 1: s += ' %s iterations completed ' % cache.done_iterations if cache.state == Cache.IN_PROGRESS: s += ' (%s/%s iterations in progress) ' % \ (cache.iterations_in_progress, cache.iterations_goal) if up: when = duration_human(time() - cache.timestamp) s += " (%s ago)" % when else: if cache.state in [Cache.DONE, Cache.MORE_REQUESTED]: s += " (needs update: %s)" % reason print s
def details_why_one(job_id, context, cq): db = context.get_compmake_db() if job_cache_exists(job_id, db): cache = get_job_cache(job_id, db) why = str(cache.exception) lines = why.split('\n') one = lines[0] if len(lines) > 1: one += ' ... +%d lines' % (len(lines)-1) print('%20s: %s' %(job_id, one))
def display_stats(job_list): states_order = [Cache.NOT_STARTED, Cache.IN_PROGRESS, Cache.MORE_REQUESTED, Cache.FAILED, Cache.DONE] # initialize counters to 0 states2count = dict(map(lambda x: (x, 0), states_order)) function2state2count = {} total = 0 for job_id in job_list: cache = get_job_cache(job_id) states2count[cache.state] += 1 total += 1 function_id = get_job(job_id).command_desc # initialize record if not present if not function_id in function2state2count: function2state2count[function_id] = dict(map(lambda x: (x, 0), states_order) + [('all', 0)]) # update function2state2count[function_id][cache.state] += 1 function2state2count[function_id]['all'] += 1 if total == 100: # XXX: use standard method info("Loading a large number of jobs...") print("Found %s jobs in total. Summary by state:" % total) for state in states_order: desc = "%30s" % Cache.state2desc[state] # colorize output desc = colored(desc, **state2color[state]) num = states2count[state] if num > 0: print("%s: %5d" % (desc, num)) print("Summary by function:") for function_id, function_stats in function2state2count.items(): ndone = function_stats[Cache.DONE] nfailed = function_stats[Cache.FAILED] nrest = function_stats['all'] - ndone - nfailed failed_s = "%5d failed" % nfailed if nfailed > 0: failed_s = colored(failed_s, color='red') s = "%5d done, %s, %5d to do." % (ndone, failed_s, nrest) print(" %30s(): %s" % (function_id, s))
def jobs_defined(job_id, db): """ Gets the jobs defined by the given job. The job must be DONE. """ check_isinstance(job_id, six.string_types) with trace_bugs('jobs_defined(%r)' % job_id): cache = get_job_cache(job_id, db=db) if cache.state != Cache.DONE: msg = ('Cannot get jobs_defined for job not done ' + '(status: %s)' % Cache.state2desc[cache.state]) raise CompmakeBug(msg) return set(cache.jobs_defined)
def jobs_defined(job_id, db): """ Gets the jobs defined by the given job. The job must be DONE. """ check_isinstance(job_id, six.string_types) with trace_bugs('jobs_defined(%r)' % job_id): cache = get_job_cache(job_id, db=db) if cache.state != Cache.DONE: msg = ('Cannot get jobs_defined for job not done ' +'(status: %s)' % Cache.state2desc[cache.state]) raise CompmakeBug(msg) return set(cache.jobs_defined)
def list_job_detail(job_id): #computation = get_computation(job_id) cache = get_job_cache(job_id) parents = direct_parents(job_id) children = direct_children(job_id) up, reason = up_to_date(job_id) red = lambda x: colored(x, 'red') bold = lambda x: colored(rjust(x + ' ', 15), attrs=['bold']) try: print bold('Job ID:') + '%s' % job_id print bold('Status:') + '%s' % Cache.state2desc[cache.state] print bold('Uptodate:') + '%s (%s)' % (up, reason) print bold('Children:') + '%s' % ', '.join(children) print bold('Parents:') + '%s' % ', '.join(parents) if cache.state == Cache.DONE and cache.done_iterations > 1: print bold('Iterations:') + '%s' % cache.done_iterations print bold('Wall Time:') + '%.4f s' % cache.walltime_used print bold('CPU Time:') + '%.4f s' % cache.cputime_used print bold('Host:') + '%s' % cache.host if cache.state == Cache.IN_PROGRESS: print bold('Progress:') + '%s/%s' % \ (cache.iterations_in_progress, cache.iterations_goal) if cache.state == Cache.FAILED: print red(cache.exception) print red(cache.backtrace) def display_with_prefix(buffer, prefix, transform=lambda x:x, out=sys.stdout): for line in buffer.split('\n'): out.write('%s%s\n' % (prefix, transform(line))) if cache.captured_stdout: print "-----> captured stdout <-----" display_with_prefix(cache.captured_stdout, prefix='|', transform=lambda x: colored(x, attrs=['dark'])) if cache.captured_stderr: print "-----> captured stderr <-----" display_with_prefix(cache.captured_stdout, prefix='|', transform=lambda x: colored(x, attrs=['dark'])) except AttributeError: pass
def direct_uptodate_deps_inverse(job_id, db): """ Returns all jobs that have this as a direct 'dependency' the jobs that are direct parents plus the jobs that were defined by it. Assumes that the job is DONE. """ from compmake.jobs.queries import direct_parents dep_inv = direct_parents(job_id, db) from compmake.jobs.storage import get_job_cache # Not sure if need to be here --- added when doing graph-animation for jobs in progress if get_job_cache(job_id, db).state == Cache.DONE: dep_inv.update(jobs_defined(job_id, db)) return dep_inv
def direct_uptodate_deps_inverse_closure(job_id, db): """ Closure of direct_uptodate_deps_inverse: all jobs that depend on this. """ from compmake.jobs.queries import parents # all parents dep_inv = parents(job_id, db) # plus their definition closure from compmake.jobs.queries import definition_closure closure = definition_closure(dep_inv, db) # this is not true in general # assert not closure & dep_inv dep_inv.update(closure) # plus the ones that were defined by it from compmake.jobs.storage import get_job_cache if get_job_cache(job_id, db).state == Cache.DONE: dep_inv.update(jobs_defined(job_id, db)) return dep_inv
def go(path): db = StorageFilesystem(path, compress=True) args = ['failed'] cq = CacheQueryDB(db) context = Context(db) if not list(db.keys()): msg = 'Compmake DB is empty' logger.error(msg) else: job_list = parse_job_list(args, context=context, cq=cq) s = "" if job_list: job_list = job_list[:2] s += 'Running on host: %s' % hostname s += "\nJob failed in path %s" % path for job_id in job_list: if job_cache_exists(job_id, db): cache = get_job_cache(job_id, db) status = Cache.state2desc[cache.state] s += "\nFailure of job %s" % job_id if cache.state in [Cache.FAILED, Cache.BLOCKED]: why = str(cache.exception).strip() else: why = 'No why for job done.' s += '\n' + "```\n" + why + "\n```" s += '\n\n' else: logger.warning('no cache for %s' % job_id) s += '\n@censi' s += '\n@jacopo' s += '\n@paull' s += '\n@walter' s += '\n@daniele' print(s) slack.chat.post_message(channel, s, link_names=1) else: s = 'Everything is fine' # slack.chat.post_message(channel, s) logger.info('No jobs found')
def details_why_one(job_id, context, cq): # @UnusedVariable db = context.get_compmake_db() lines = [] if job_cache_exists(job_id, db): cache = get_job_cache(job_id, db) status = Cache.state2desc[cache.state] if cache.state in [Cache.FAILED, Cache.BLOCKED]: why = cache.exception why = why.strip() lines = why.split('\n') one = lines[0] if len(lines) > 1: one += ' [+%d lines] ' % (len(lines)-1) details = (job_id, status, one) return details return None
def junit_test_case_from_compmake(db, job_id): cache = get_job_cache(job_id, db=db) if cache.state == Cache.DONE: # and cache.done_iterations > 1: #elapsed_sec = cache.walltime_used elapsed_sec = cache.cputime_used else: elapsed_sec = None stderr = flatten_ascii(cache.captured_stderr) stdout = flatten_ascii(cache.captured_stdout) tc = TestCase(name=job_id, classname=None, elapsed_sec=elapsed_sec, stdout=stdout, stderr=stderr) if cache.state == Cache.FAILED: message = cache.exception output = cache.exception + "\n" + cache.backtrace tc.add_failure_info(flatten_ascii(message), flatten_ascii(output)) return tc
def junit_test_case_from_compmake(db, job_id): from junit_xml import TestCase cache = get_job_cache(job_id, db=db) if cache.state == Cache.DONE: # and cache.done_iterations > 1: # elapsed_sec = cache.walltime_used elapsed_sec = cache.cputime_used else: elapsed_sec = None check_isinstance(cache.captured_stderr, (type(None), six.text_type)) check_isinstance(cache.captured_stdout, (type(None), six.text_type)) check_isinstance(cache.exception, (type(None), six.text_type)) stderr = remove_escapes(cache.captured_stderr) stdout = remove_escapes(cache.captured_stdout) tc = TestCase(name=job_id, classname=None, elapsed_sec=elapsed_sec, stdout=stdout, stderr=stderr) if cache.state == Cache.FAILED: message = cache.exception output = cache.exception + "\n" + cache.backtrace tc.add_failure_info(message, output) return tc
def get_job_cache(self, job_id): from .storage import get_job_cache return get_job_cache(job_id, db=self.db)
def mvac_job(args): """ args = tuple job_id, context, queue_name, show_events Returns a dictionary with fields "user_object", "new_jobs", 'delete_jobs'. "user_object" is set to None because we do not want to load in our thread if not necessary. Sometimes it is necessary because it might contain a Promise. """ job_id, context, event_queue_name, show_output, volumes, cwd = args # @UnusedVariable check_isinstance(job_id, str) check_isinstance(event_queue_name, str) # Disable multyvac logging disable_logging_if_config(context) db = context.get_compmake_db() job = get_job(job_id=job_id, db=db) if job.needs_context: msg = 'Cannot use multyvac for dynamic job.' raise CompmakeException(msg) time_start = time.time() multyvac_job = mvac_instance(db, job_id, volumes, cwd) multyvac_job.wait() errors = [multyvac_job.status_error, multyvac_job.status_killed] if multyvac_job.status in errors: e = 'Multyvac error (status: %r)' % multyvac_job.status bt = str(multyvac_job.stderr) cache = Cache(Cache.FAILED) cache.exception = e cache.backtrace = bt cache.timestamp = time.time() cache.captured_stderr = str(multyvac_job.stderr) cache.captured_stdout = str(multyvac_job.stdout) set_job_cache(job_id, cache, db=db) raise JobFailed(job_id=job_id, reason=str(e), bt=bt) user_object = multyvac_job.result user_object_deps = collect_dependencies(user_object) set_job_userobject(job_id, user_object, db=db) cache = get_job_cache(job_id, db=db) cache.captured_stderr = str(multyvac_job.stderr) cache.captured_stdout = str(multyvac_job.stdout) cache.state = Cache.DONE cache.timestamp = time.time() walltime = cache.timestamp - time_start cache.walltime_used = walltime cache.cputime_used = multyvac_job.cputime_system cache.host = 'multyvac' cache.jobs_defined = set() set_job_cache(job_id, cache, db=db) result_dict = dict(user_object=user_object, user_object_deps=user_object_deps, new_jobs=[], deleted_jobs=[]) result_dict_check(result_dict) return result_dict
def report_results_pairs_jobs(context, func, objspec1_name, objspec2_name, jobs): """ This version gets the jobs ID """ reason2symbol = {} def get_string_result(res): if res is None: s = 'ok' elif isinstance(res, Skipped): s = 'skipped' reason = res.get_reason() if not reason in reason2symbol: reason2symbol[reason] = len(reason2symbol) + 1 s += '(%s)' % reason2symbol[reason] elif isinstance(res, PartiallySkipped): parts = res.get_skipped_parts() s = 'no ' + ','.join(parts) else: print('how to interpret %s? ' % describe_value(res)) s = '?' return s r = Report() if not jobs: r.text('warning', 'no test objects defined') return r rows = sorted(set([a for a, _ in jobs])) cols = sorted(set([b for _, b in jobs])) data = [[None for a in range(len(cols))] for b in range(len(rows))] # a nice bug: data = [[None * len(cols)] * len(rows) db = context.get_compmake_db() comb = itertools.product(enumerate(rows), enumerate(cols)) for ((i, id_object1), (j, id_object2)) in comb: job_id = jobs[(id_object1, id_object2)] cache = get_job_cache(job_id, db) if cache.state == Cache.DONE: res = get_job_userobject(job_id, db) s = get_string_result(res) elif cache.state == Cache.FAILED: s = 'FAIL' elif cache.state == Cache.BLOCKED: s = 'blocked' # elif cache.state == Cache.IN_PROGRESS: # s = '(in progress)' elif cache.state == Cache.NOT_STARTED: s = ' ' else: s = '?' data[i][j] = s r.table('summary', rows=rows, data=data, cols=cols) expl = "" for reason, symbol in list(reason2symbol.items()): expl += '(%s): %s\n' % (symbol, reason) r.text('notes', expl) return r
def report_results_pairs_jobs(context, func, objspec1_name, objspec2_name, jobs): """ This version gets the jobs ID """ reason2symbol = {} def get_string_result(res): if res is None: s = 'ok' elif isinstance(res, Skipped): s = 'skipped' reason = res.get_reason() if not reason in reason2symbol: reason2symbol[reason] = len(reason2symbol) + 1 s += '(%s)' % reason2symbol[reason] elif isinstance(res, PartiallySkipped): parts = res.get_skipped_parts() s = 'no ' + ','.join(parts) else: print('how to interpret %s? ' % describe_value(res)) s = '?' return s r = Report() if not jobs: r.text('warning', 'no test objects defined') return r rows = sorted(set([a for a, _ in jobs])) cols = sorted(set([b for _, b in jobs])) data = [[None for a in range(len(cols))] for b in range(len(rows))] # a nice bug: data = [[None * len(cols)] * len(rows) db = context.get_compmake_db() comb = itertools.product(enumerate(rows), enumerate(cols)) for ((i, id_object1), (j, id_object2)) in comb: job_id = jobs[(id_object1, id_object2)] cache = get_job_cache(job_id, db) if cache.state == Cache.DONE: res = get_job_userobject(job_id, db) s = get_string_result(res) elif cache.state == Cache.FAILED: s = 'FAIL' elif cache.state == Cache.BLOCKED: s = 'blocked' # elif cache.state == Cache.IN_PROGRESS: # s = '(in progress)' elif cache.state == Cache.NOT_STARTED: s = ' ' data[i][j] = s r.table('summary', rows=rows, data=data, cols=cols) expl = "" for reason, symbol in reason2symbol.items(): expl += '(%s): %s\n' % (symbol, reason) r.text('notes', expl) return r
def get_job_cache(self, job_id): from compmake.jobs.storage import get_job_cache return get_job_cache(job_id)
def list_jobs_with_state(state): ''' Returns a list of jobs in the given state. ''' for job_id in all_jobs(): if get_job_cache(job_id).state == state: yield job_id
def graph(job_list, filename='compmake', compact=0, filter='dot', format='png'): '''Creates a graph of the given targets and dependencies graph filename=filename compact=0,1 format=png,... Params: filename: name of generated filename in the dot format compact=0: whether to include the job names in the nodes filter=[dot,circo,twopi,...] which algorithm to use to arrange the nodes. This depends on the topology of your computation. The default is 'dot' (hierarchy top-bottom). format=[png,...] The output file format. ''' if not job_list: job_list = top_targets() job_list = tree(job_list) try: import gvgen #@UnresolvedImport except: gvgen_url = 'http://software.inl.fr/trac/wiki/GvGen' raise UserError('To use the "graph" command' + ' you have to install the "gvgen" package from %s' % gvgen_url) graph = gvgen.GvGen() state2color = { Cache.NOT_STARTED: 'grey', Cache.IN_PROGRESS: 'yellow', Cache.MORE_REQUESTED: 'blue', Cache.FAILED: 'red', Cache.DONE: 'green' } job2node = {} for job_id in job_list: if int(compact): job2node[job_id] = graph.newItem("") else: job2node[job_id] = graph.newItem(job_id) cache = get_job_cache(job_id) graph.styleAppend(job_id, "style", "filled") graph.styleAppend(job_id, "fillcolor", state2color[cache.state]) graph.styleApply(job_id, job2node[job_id]) for job_id in job_list: #c = get_computation(job_id) #children_id = [x.job_id for x in c.depends] for child in direct_children(job_id): graph.newLink(job2node[job_id], job2node[child]) # TODO: add check? with open(filename, 'w') as f: graph.dot(f) output = filename + '.' + format cmd_line = '%s %s -T%s -o%s' % (filter, filename, format, output) try: os.system(cmd_line) except: raise UserError("Could not run dot (cmdline='%s')\ Make sure graphviz is installed" % cmd_line) # XXX maybe not UserError info("Written output on files %s, %s." % (filename, output))
def list_todo_jobs(): ''' Returns a list of jobs that haven't been DONE. ''' for job_id in all_jobs(): if get_job_cache(job_id).state != Cache.DONE: yield job_id