def execute_with_context(db, context, job_id, command, args, kwargs): """ Returns a dictionary with fields "user_object" and "new_jobs" """ from compmake.context import Context assert isinstance(context, Context) from compmake.jobs.storage import get_job cur_job = get_job(job_id=job_id, db=db) context.currently_executing = cur_job.defined_by + [job_id] already = set(context.get_jobs_defined_in_this_session()) context.reset_jobs_defined_in_this_session([]) if args: if isinstance(args[0], Context) and args[0] != context: msg = ('%s(%s, %s)' % (command, args, kwargs)) raise ValueError(msg) # context is one of the arguments assert context in args res = command(*args, **kwargs) generated = set(context.get_jobs_defined_in_this_session()) context.reset_jobs_defined_in_this_session(already) return dict(user_object=res, new_jobs=generated)
def display_stats(job_list): states_order = [Cache.NOT_STARTED, Cache.IN_PROGRESS, Cache.MORE_REQUESTED, Cache.FAILED, Cache.DONE] # initialize counters to 0 states2count = dict(map(lambda x: (x, 0), states_order)) function2state2count = {} total = 0 for job_id in job_list: cache = get_job_cache(job_id) states2count[cache.state] += 1 total += 1 function_id = get_job(job_id).command_desc # initialize record if not present if not function_id in function2state2count: function2state2count[function_id] = dict(map(lambda x: (x, 0), states_order) + [('all', 0)]) # update function2state2count[function_id][cache.state] += 1 function2state2count[function_id]['all'] += 1 if total == 100: # XXX: use standard method info("Loading a large number of jobs...") print("Found %s jobs in total. Summary by state:" % total) for state in states_order: desc = "%30s" % Cache.state2desc[state] # colorize output desc = colored(desc, **state2color[state]) num = states2count[state] if num > 0: print("%s: %5d" % (desc, num)) print("Summary by function:") for function_id, function_stats in function2state2count.items(): ndone = function_stats[Cache.DONE] nfailed = function_stats[Cache.FAILED] nrest = function_stats['all'] - ndone - nfailed failed_s = "%5d failed" % nfailed if nfailed > 0: failed_s = colored(failed_s, color='red') s = "%5d done, %s, %5d to do." % (ndone, failed_s, nrest) print(" %30s(): %s" % (function_id, s))
def direct_uptodate_deps(job_id, db): """ Returns all direct 'dependencies' of this job: the jobs that are children (arguemnts) plus the job that defined it (if not root). """ from compmake.jobs.queries import direct_children dependencies = direct_children(job_id, db) # plus jobs that defined it from compmake.jobs.storage import get_job defined_by = get_job(job_id, db).defined_by last = defined_by[-1] if last != 'root': dependencies.add(last) return dependencies
def list_matching_functions(token): assert token.endswith('()') if len(token) < 3: raise UserError('Malformed token "%s".' % token) function_id = token[:-2] num_matches = 0 for job_id in all_jobs(): if function_id.lower() == get_job(job_id).command.__name__.lower(): yield job_id num_matches += 1 if num_matches == 0: raise UserError('Could not find matches for function "%s()".' % function_id)
def instance_job(self, job_id): publish(self.context, 'worker-status', job_id=job_id, status='apply_async') assert len(self.sub_available) > 0 name = sorted(self.sub_available)[0] self.sub_available.remove(name) assert not name in self.sub_processing self.sub_processing.add(name) sub = self.subs[name] self.job2subname[job_id] = name self.subname2job[name] = job_id job = get_job(job_id, self.db) if self.rdb: f = mvac_job_rdb args = (job_id, self.context, self.event_queue_name, self.show_output, self.volumes, self.rdb_vol.name, self.rdb_db, os.getcwd()) else: if job.needs_context: # if self.new_process: # f = parmake_job2_new_process # args = (job_id, self.context) # # else: f = parmake_job2 args = (job_id, self.context, self.event_queue_name, self.show_output) else: f = mvac_job args = (job_id, self.context, self.event_queue_name, self.show_output, self.volumes, os.getcwd()) if True: async_result = sub.apply_async(f, args) else: warnings.warn('Debugging synchronously') async_result = f(args) return async_result
def mvac_job(args): """ args = tuple job_id, context, queue_name, show_events Returns a dictionary with fields "user_object", "new_jobs", 'delete_jobs'. "user_object" is set to None because we do not want to load in our thread if not necessary. Sometimes it is necessary because it might contain a Promise. """ job_id, context, event_queue_name, show_output, volumes, cwd = args # @UnusedVariable check_isinstance(job_id, str) check_isinstance(event_queue_name, str) # Disable multyvac logging disable_logging_if_config(context) db = context.get_compmake_db() job = get_job(job_id=job_id, db=db) if job.needs_context: msg = 'Cannot use multyvac for dynamic job.' raise CompmakeException(msg) time_start = time.time() multyvac_job = mvac_instance(db, job_id, volumes, cwd) multyvac_job.wait() errors = [multyvac_job.status_error, multyvac_job.status_killed] if multyvac_job.status in errors: e = 'Multyvac error (status: %r)' % multyvac_job.status bt = str(multyvac_job.stderr) cache = Cache(Cache.FAILED) cache.exception = e cache.backtrace = bt cache.timestamp = time.time() cache.captured_stderr = str(multyvac_job.stderr) cache.captured_stdout = str(multyvac_job.stdout) set_job_cache(job_id, cache, db=db) raise JobFailed(job_id=job_id, reason=str(e), bt=bt) user_object = multyvac_job.result user_object_deps = collect_dependencies(user_object) set_job_userobject(job_id, user_object, db=db) cache = get_job_cache(job_id, db=db) cache.captured_stderr = str(multyvac_job.stderr) cache.captured_stdout = str(multyvac_job.stdout) cache.state = Cache.DONE cache.timestamp = time.time() walltime = cache.timestamp - time_start cache.walltime_used = walltime cache.cputime_used = multyvac_job.cputime_system cache.host = 'multyvac' cache.jobs_defined = set() set_job_cache(job_id, cache, db=db) result_dict = dict(user_object=user_object, user_object_deps=user_object_deps, new_jobs=[], deleted_jobs=[]) result_dict_check(result_dict) return result_dict
def direct_children(job_id): ''' Returns the direct children (dependences) of the specified job ''' assert(isinstance(job_id, str)) computation = get_job(job_id) return computation.children
def get_job(self, job_id): from .storage import get_job return get_job(job_id, db=self.db)
def execute_with_context(db, context, job_id, command, args, kwargs): """ Returns a dictionary with fields "user_object" and "new_jobs" """ from compmake.context import Context assert isinstance(context, Context) from compmake.jobs.storage import get_job cur_job = get_job(job_id=job_id, db=db) context.currently_executing = cur_job.defined_by + [job_id] already = set(context.get_jobs_defined_in_this_session()) context.reset_jobs_defined_in_this_session([]) if args: if isinstance(args[0], Context) and args[0] != context: msg = ('%s(%s, %s)' % (command, args, kwargs)) raise ValueError(msg) # context is one of the arguments assert context in args res = command(*args, **kwargs) generated = set(context.get_jobs_defined_in_this_session()) context.reset_jobs_defined_in_this_session(already) if generated: if len(generated) < 4: # info('Job %r generated %s.' % (job_id, generated)) pass else: # info('Job %r generated %d jobs such as %s.' % # (job_id, len(generated), sorted(generated)[:M])) pass # # now remove the extra jobs that are not needed anymore # extra = [] # FIXME this is a RACE CONDITION -- needs to be done in the main thread # from compmake.ui.visualization import info # info('now cleaning up; generated = %s' % generated) # # if False: # for g in all_jobs(db=db): # try: # job = get_job(g, db=db) # except: # continue # if job.defined_by[-1] == job_id: # if not g in generated: # extra.append(g) # # for g in extra: # #info('Previously generated job %r (%s) removed.' % (g, # # job.defined_by)) # delete_all_job_data(g, db=db) # # # from compmake.jobs.manager import # # clean_other_jobs_distributed # # clean_other_jobs_distributed(db=db, job_id=job_id, # # new_jobs=generated) return dict(user_object=res, new_jobs=generated)
def check_job(job_id, context): db = context.get_compmake_db() job = get_job(job_id, db) defined_by = job.defined_by assert 'root' in defined_by dparents = direct_parents(job_id, db=db) all_parents = parents(job_id, db=db) dchildren = direct_children(job_id, db=db) all_children = children(job_id, db=db) #print(job_id) #print('d children: %s' % dchildren) #print('all children: %s' % all_children) errors = [] def e(msg): errors.append(msg) for defb in defined_by: if defb == 'root': continue if not job_exists(defb, db=db): s = ('%r defined by %r but %r not existing.' % (job_id, defined_by, defb)) e(s) for dp in dparents: if not job_exists(dp, db=db): s = 'Direct parent %r of %r does not exist.' % (dp, job_id) e(s) else: if not job_id in direct_children(dp, db=db): s = '%s thinks %s is its direct parent;' % (job_id, dp) s += 'but %s does not think %s is its direct child' % (dp, job_id) e(s) for ap in all_parents: if not job_exists(ap, db=db): s = 'Parent %r of %r does not exist.' % (ap, job_id) e(s) else: if not job_id in children(ap, db=db): e('%s is parent but no child relation' % ap) for dc in dchildren: if not job_exists(dc, db=db): s = 'Direct child %r of %r does not exist.' % (dc, job_id) e(s) else: if not job_id in direct_parents(dc, db=db): e('%s is direct child but no direct_parent relation' % dc) for ac in all_children: if not job_exists(ac, db=db): s = 'A child %r of %r does not exist.' % (ac, job_id) e(s) else: if not job_id in parents(ac, db=db): e('%s is direct child but no parent relation' % ac) if errors: s = ('Inconsistencies for %s:\n' % job_id) s += '\n'.join('- %s' % msg for msg in errors) error(s) return False, errors else: return True, []
def comp(command, *args, **kwargs): ''' Main method to define a computation. Extra arguments: :arg:job_id: sets the job id (respects job_prefix) :arg:extra_dep: extra dependencies (not passed as arguments) ''' if compmake.compmake_status == compmake_status_slave: return None # Check that this is a pickable function try: pickle.dumps(command) except: msg = ('Cannot pickle %r. Make sure it is not a lambda function or a ' 'nested function. (This is a limitation of Python)' % command) raise SerializationError(msg) args = list(args) # args is a non iterable tuple # Get job id from arguments job_id_key = 'job_id' if job_id_key in kwargs: # make sure that command does not have itself a job_id key #available = command.func_code.co_varnames argspec = inspect.getargspec(command) if job_id_key in argspec.args: msg = ("You cannot define the job id in this way because 'job_id' " "is already a parameter of this function.") raise UserError(msg) job_id = kwargs[job_id_key] if job_prefix: job_id = '%s-%s' % (job_prefix, job_id) del kwargs[job_id_key] if job_id in jobs_defined_in_this_session: raise UserError('Job %r already defined.' % job_id) else: job_id = generate_job_id(command) jobs_defined_in_this_session.add(job_id) if 'extra_dep' in kwargs: extra_dep = collect_dependencies(kwargs['extra_dep']) del kwargs['extra_dep'] else: extra_dep = set() children = collect_dependencies([args, kwargs]) children.update(extra_dep) all_args = (command, args, kwargs) command_desc = command.__name__ c = Job(job_id=job_id, children=list(children), command_desc=command_desc) # TODO: check for loops for child in children: child_comp = get_job(child) if not job_id in child_comp.parents: child_comp.parents.append(job_id) set_job(child, child_comp) if job_exists(job_id): # OK, this is going to be black magic. # We want to load the previous job definition, # however, by unpickling(), it will start # __import__()ing the modules, perhaps # even the one that is calling us. # What happens, then is that it will try to # add another time this computation recursively. # What we do, is that we temporarely switch to # slave mode, so that recursive calls to comp() # are disabled. if compmake_config.check_params: #@UndefinedVariable old_status = compmake_status set_compmake_status(compmake_status_slave) old_computation = get_job(job_id) set_compmake_status(old_status) assert False, 'update for job_args' same, reason = old_computation.same_computation(c) if not same: set_job(job_id, c) set_job_args(job_id, all_args) publish('job-redefined', job_id=job_id , reason=reason) # XXX TODO clean the cache else: publish('job-already-defined', job_id=job_id) else: # We assume everything's ok set_job(job_id, c) set_job_args(job_id, all_args) publish('job-defined', job_id=job_id) else: set_job(job_id, c) set_job_args(job_id, all_args) publish('job-defined', job_id=job_id) assert job_exists(job_id) assert job_args_exists(job_id) return Promise(job_id)
def check_job(job_id, context): db = context.get_compmake_db() job = get_job(job_id, db) defined_by = job.defined_by assert 'root' in defined_by dparents = direct_parents(job_id, db=db) all_parents = parents(job_id, db=db) dchildren = direct_children(job_id, db=db) all_children = children(job_id, db=db) #print(job_id) #print('d children: %s' % dchildren) #print('all children: %s' % all_children) errors = [] def e(msg): errors.append(msg) for defb in defined_by: if defb == 'root': continue if not job_exists(defb, db=db): s = ('%r defined by %r but %r not existing.' %(job_id, defined_by, defb)) e(s) for dp in dparents: if not job_exists(dp, db=db): s = 'Direct parent %r of %r does not exist.' % (dp, job_id) e(s) else: if not job_id in direct_children(dp, db=db): s = '%s thinks %s is its direct parent;' % (job_id, dp) s += 'but %s does not think %s is its direct child' % (dp, job_id) e(s) for ap in all_parents: if not job_exists(ap, db=db): s = 'Parent %r of %r does not exist.' % (ap, job_id) e(s) else: if not job_id in children(ap, db=db): e('%s is parent but no child relation' % ap) for dc in dchildren: if not job_exists(dc, db=db): s = 'Direct child %r of %r does not exist.' % (dc, job_id) e(s) else: if not job_id in direct_parents(dc, db=db): e('%s is direct child but no direct_parent relation' % dc) for ac in all_children: if not job_exists(ac, db=db): s = 'A child %r of %r does not exist.' % (ac, job_id) e(s) else: if not job_id in parents(ac, db=db): e('%s is direct child but no parent relation' % ac) if errors: s = ('Inconsistencies for %s:\n' % job_id) s += '\n'.join('- %s' % msg for msg in errors) error(s) return False, errors else: return True, []
def direct_parents(job_id): ''' Returns the direct parents of the specified job. (Jobs that depend directly on this one) ''' assert(isinstance(job_id, str)) computation = get_job(job_id) return computation.parents