def instance_job(self, job_id): publish(self.context, 'worker-status', job_id=job_id, status='apply_async') handle, tmp_filename = tempfile.mkstemp(prefix='compmake', text=True) os.close(handle) os.remove(tmp_filename) async_result = self.pool.apply_async(parmake_job2, [(job_id, self.context, tmp_filename, False)]) publish(self.context, 'worker-status', job_id=job_id, status='apply_async_done') return AsyncResultWrap(job_id, async_result, tmp_filename)
def instance_job(self, job_id): publish(self.context, 'worker-status', job_id=job_id, status='apply_async') handle, tmp_filename = tempfile.mkstemp(prefix='compmake', text=True) os.close(handle) os.remove(tmp_filename) async_result = self.pool.apply_async( parmake_job2, [(job_id, self.context, tmp_filename, False)]) publish(self.context, 'worker-status', job_id=job_id, status='apply_async_done') return AsyncResultWrap(job_id, async_result, tmp_filename)
def process(self): ''' Start processing jobs. ''' # precompute job priorities #print "Computing priorities..." self.priorities = compute_priorities(self.all_targets) #print "... done" if not self.todo: info('Nothing to do.') return True self.process_init() try: while self.todo: assert self.ready_todo or self.processing assert not self.failed.intersection(self.todo) self.publish_progress() self.instance_some_jobs() self.publish_progress() if self.ready_todo and not self.processing: publish('manager-failed', reason='No resources.', targets=self.targets, done=self.done, todo=self.todo, failed=self.failed, ready=self.ready_todo, processing=self.processing, all_targets=self.all_targets) raise CompmakeException('Cannot find computing resources, giving up.') self.publish_progress() self.loop_until_something_finishes() self.process_finished() publish('manager-succeeded', targets=self.targets, done=self.done, all_targets=self.all_targets, todo=self.todo, failed=self.failed, ready=self.ready_todo, processing=self.processing) return True except JobInterrupted: # XXX I'm getting confused raise KeyboardInterrupt
def publish_progress(self): publish('manager-progress', targets=self.targets, done=self.done, all_targets=self.all_targets, todo=self.todo, failed=self.failed, ready=self.ready_todo, processing=self.processing)
def interactive_console(): publish('console-starting') exit_requested = False while not exit_requested: try: for line in compmake_console(): commands = line.strip().split() if commands: try: publish('command-starting', command=commands) interpret_commands(commands) publish('command-succeeded', command=commands) except UserError as e: publish('command-failed', command=commands, reason=e) user_error(e) except CompmakeException as e: publish('command-failed', command=commands, reason=e) # Added this for KeyboardInterrupt error(e) except KeyboardInterrupt: publish('command-interrupted', command=commands, reason='keyboard') user_error('Execution of "%s" interrupted' % line) except ShellExitRequested: exit_requested = True break except Exception as e: traceback.print_exc() error('Warning, I got this exception, while it should have' ' been filtered out already. This is a compmake BUG ' ' that should be reported: %s' % e) except KeyboardInterrupt: # CTRL-C print "\nPlease use 'exit' to quit." except EOFError: # CTRL-D # TODO maybe make loop different? we don't want to catch # EOFerror in interpret_commands print "(end of input detected)" exit_requested = True publish('console-ending') return
def comp(command, *args, **kwargs): ''' Main method to define a computation. Extra arguments: :arg:job_id: sets the job id (respects job_prefix) :arg:extra_dep: extra dependencies (not passed as arguments) ''' if compmake.compmake_status == compmake_status_slave: return None # Check that this is a pickable function try: pickle.dumps(command) except: msg = ('Cannot pickle %r. Make sure it is not a lambda function or a ' 'nested function. (This is a limitation of Python)' % command) raise SerializationError(msg) args = list(args) # args is a non iterable tuple # Get job id from arguments job_id_key = 'job_id' if job_id_key in kwargs: # make sure that command does not have itself a job_id key #available = command.func_code.co_varnames argspec = inspect.getargspec(command) if job_id_key in argspec.args: msg = ("You cannot define the job id in this way because 'job_id' " "is already a parameter of this function.") raise UserError(msg) job_id = kwargs[job_id_key] if job_prefix: job_id = '%s-%s' % (job_prefix, job_id) del kwargs[job_id_key] if job_id in jobs_defined_in_this_session: raise UserError('Job %r already defined.' % job_id) else: job_id = generate_job_id(command) jobs_defined_in_this_session.add(job_id) if 'extra_dep' in kwargs: extra_dep = collect_dependencies(kwargs['extra_dep']) del kwargs['extra_dep'] else: extra_dep = set() children = collect_dependencies([args, kwargs]) children.update(extra_dep) all_args = (command, args, kwargs) command_desc = command.__name__ c = Job(job_id=job_id, children=list(children), command_desc=command_desc) # TODO: check for loops for child in children: child_comp = get_job(child) if not job_id in child_comp.parents: child_comp.parents.append(job_id) set_job(child, child_comp) if job_exists(job_id): # OK, this is going to be black magic. # We want to load the previous job definition, # however, by unpickling(), it will start # __import__()ing the modules, perhaps # even the one that is calling us. # What happens, then is that it will try to # add another time this computation recursively. # What we do, is that we temporarely switch to # slave mode, so that recursive calls to comp() # are disabled. if compmake_config.check_params: #@UndefinedVariable old_status = compmake_status set_compmake_status(compmake_status_slave) old_computation = get_job(job_id) set_compmake_status(old_status) assert False, 'update for job_args' same, reason = old_computation.same_computation(c) if not same: set_job(job_id, c) set_job_args(job_id, all_args) publish('job-redefined', job_id=job_id , reason=reason) # XXX TODO clean the cache else: publish('job-already-defined', job_id=job_id) else: # We assume everything's ok set_job(job_id, c) set_job_args(job_id, all_args) publish('job-defined', job_id=job_id) else: set_job(job_id, c) set_job_args(job_id, all_args) publish('job-defined', job_id=job_id) assert job_exists(job_id) assert job_args_exists(job_id) return Promise(job_id)
def make(job_id, more=False): """ Makes a single job. Returns the user-object or raises JobFailed """ host = compmake_config.hostname #@UndefinedVariable setproctitle(job_id) # TODO: should we make sure we are up to date??? up, reason = up_to_date(job_id) #@UnusedVariable cache = get_job_cache(job_id) want_more = cache.state == Cache.MORE_REQUESTED if up and not (more and want_more): # print "%s is up to date" % job_id assert is_job_userobject_available(job_id) return get_job_userobject(job_id) else: # if up and (more and want_more): # XXX review the logic # reason = 'want more' # print "Making %s (%s)" % (job_id, reason) computation = get_job(job_id) assert(cache.state in [Cache.NOT_STARTED, Cache.IN_PROGRESS, Cache.MORE_REQUESTED, Cache.DONE, Cache.FAILED]) if cache.state == Cache.NOT_STARTED: previous_user_object = None cache.state = Cache.IN_PROGRESS if cache.state == Cache.FAILED: previous_user_object = None cache.state = Cache.IN_PROGRESS elif cache.state == Cache.IN_PROGRESS: if is_job_tmpobject_available(job_id): previous_user_object = get_job_tmpobject(job_id) else: previous_user_object = None elif cache.state == Cache.MORE_REQUESTED: assert(is_job_userobject_available(job_id)) if is_job_tmpobject_available(job_id): # resuming more computation previous_user_object = get_job_tmpobject(job_id) else: # starting more computation previous_user_object = get_job_userobject(job_id) elif cache.state == Cache.DONE: # If we are done, it means children have been updated assert(not up) previous_user_object = None else: assert(False) # update state cache.time_start = time() cpu_start = clock() set_job_cache(job_id, cache) def progress_callback(stack): publish('job-progress-plus', job_id=job_id, host=host, stack=stack) init_progress_tracking(progress_callback) num, total = 0, None user_object = None capture = OutputCapture(prefix=job_id, echo_stdout=compmake_config.echo_stdout, #@UndefinedVariable echo_stderr=compmake_config.echo_stderr) #@UndefinedVariable try: result = computation.compute(previous_user_object) if type(result) == GeneratorType: try: while True: next = result.next() if isinstance(next, tuple): if len(next) != 3: raise CompmakeException('If computation yields a tuple, ' + 'should be a tuple with 3 elemnts.' + 'Got: %s' % str(next)) user_object, num, total = next publish('job-progress', job_id=job_id, host=host, done=None, progress=num, goal=total) if compmake_config.save_progress: #@UndefinedVariable set_job_tmpobject(job_id, user_object) except StopIteration: pass else: publish('job-progress', job_id=job_id, host='XXX', done=1, progress=1, goal=1) user_object = result except KeyboardInterrupt: # TODO: clear progress cache # Save the current progress: cache.iterations_in_progress = num cache.iterations_goal = total if user_object: set_job_tmpobject(job_id, user_object) set_job_cache(job_id, cache) # clear progress cache publish('job-interrupted', job_id=job_id, host=host) raise JobInterrupted('Keyboard interrupt') except Exception as e: sio = StringIO() print_exc(file=sio) bt = sio.getvalue() error("Job %s failed: %s" % (job_id, e)) error(bt) mark_as_failed(job_id, e, bt) # clear progress cache publish('job-failed', job_id=job_id, host=host, reason=e) raise JobFailed('Job %s failed: %s' % (job_id, e)) finally: capture.deactivate() # even if we send an error, let's save the output of the process cache = get_job_cache(job_id) cache.captured_stderr = capture.stderr_replacement.buffer.getvalue() cache.captured_stdout = capture.stdout_replacement.buffer.getvalue() set_job_cache(job_id, cache) set_job_userobject(job_id, user_object) if is_job_tmpobject_available(job_id): # We only have one with yield delete_job_tmpobject(job_id) cache.state = Cache.DONE cache.timestamp = time() walltime = cache.timestamp - cache.time_start cputime = clock() - cpu_start # FIXME walltime/cputime not precise (especially for "more" computation) cache.walltime_used = walltime cache.cputime_used = cputime cache.done_iterations = num # XXX not true cache.host = compmake_config.hostname #@UndefinedVariable set_job_cache(job_id, cache) publish('job-succeeded', job_id=job_id, host=host) # TODO: clear these records in other place return user_object
def progress_callback(stack): publish('job-progress-plus', job_id=job_id, host=host, stack=stack)