def instance_job(self, job_id):
     publish(self.context, 'worker-status', job_id=job_id,
             status='apply_async')
     handle, tmp_filename = tempfile.mkstemp(prefix='compmake', text=True)
     os.close(handle)
     os.remove(tmp_filename)
     async_result = self.pool.apply_async(parmake_job2,
                                          [(job_id, self.context,
                                            tmp_filename, False)])
     publish(self.context, 'worker-status', job_id=job_id,
             status='apply_async_done')
     return AsyncResultWrap(job_id, async_result, tmp_filename)
Example #2
0
 def instance_job(self, job_id):
     publish(self.context,
             'worker-status',
             job_id=job_id,
             status='apply_async')
     handle, tmp_filename = tempfile.mkstemp(prefix='compmake', text=True)
     os.close(handle)
     os.remove(tmp_filename)
     async_result = self.pool.apply_async(
         parmake_job2, [(job_id, self.context, tmp_filename, False)])
     publish(self.context,
             'worker-status',
             job_id=job_id,
             status='apply_async_done')
     return AsyncResultWrap(job_id, async_result, tmp_filename)
Example #3
0
    def process(self):
        ''' Start processing jobs. '''
        
        # precompute job priorities
        #print "Computing priorities..."
        self.priorities = compute_priorities(self.all_targets)
        #print "... done"
        
        if not self.todo:
            info('Nothing to do.')
            return True
        
        self.process_init()
        
        try:
            while self.todo:
                assert self.ready_todo or self.processing 
                assert not self.failed.intersection(self.todo)
        
                self.publish_progress()
                self.instance_some_jobs()
                self.publish_progress()
                if self.ready_todo and not self.processing:
                    publish('manager-failed', reason='No resources.',
                        targets=self.targets, done=self.done,
                        todo=self.todo, failed=self.failed, ready=self.ready_todo,
                        processing=self.processing, all_targets=self.all_targets)
        
                    raise CompmakeException('Cannot find computing resources, giving up.') 
                
                self.publish_progress()
                self.loop_until_something_finishes()
               
            self.process_finished()
        
            publish('manager-succeeded',
                targets=self.targets, done=self.done, all_targets=self.all_targets,
                todo=self.todo, failed=self.failed, ready=self.ready_todo,
                processing=self.processing)

            return True

        except JobInterrupted:
            # XXX I'm getting confused
            raise KeyboardInterrupt
Example #4
0
 def publish_progress(self):
     publish('manager-progress', targets=self.targets, done=self.done,
             all_targets=self.all_targets, todo=self.todo, failed=self.failed, ready=self.ready_todo,
             processing=self.processing)
Example #5
0
def interactive_console():
    publish('console-starting') 
    exit_requested = False
    while not exit_requested:
        try:
            for line in compmake_console():
                commands = line.strip().split()
                if commands:
                    try:
                        publish('command-starting', command=commands)
                        interpret_commands(commands)
                        publish('command-succeeded', command=commands)
                    except UserError as e:
                        publish('command-failed', command=commands, reason=e)
                        user_error(e)
                    except CompmakeException as e:
                        publish('command-failed', command=commands, reason=e)
                        # Added this for KeyboardInterrupt
                        error(e)
                    except KeyboardInterrupt:
                        publish('command-interrupted',
                                command=commands, reason='keyboard')
                        user_error('Execution of "%s" interrupted' % line)
                    except ShellExitRequested:
                        exit_requested = True
                        break
                    except Exception as e:
                        traceback.print_exc()
                        error('Warning, I got this exception, while it should have'
                              ' been filtered out already. This is a compmake BUG '
                              ' that should be reported:  %s' % e)
                        
        except KeyboardInterrupt:  # CTRL-C
            print "\nPlease use 'exit' to quit."
        except EOFError: # CTRL-D
            # TODO maybe make loop different? we don't want to catch
            # EOFerror in interpret_commands
            print "(end of input detected)"
            exit_requested = True
    
    publish('console-ending')
    return
Example #6
0
def comp(command, *args, **kwargs):
    ''' Main method to define a computation.
    
    
        Extra arguments:
    
        :arg:job_id:   sets the job id (respects job_prefix)
        :arg:extra_dep: extra dependencies (not passed as arguments)
    '''
    if compmake.compmake_status == compmake_status_slave:
        return None
    
    # Check that this is a pickable function
    try:
        pickle.dumps(command)
    except:
        msg = ('Cannot pickle %r. Make sure it is not a lambda function or a '
              'nested function. (This is a limitation of Python)' % command)
        raise SerializationError(msg)
    
    args = list(args) # args is a non iterable tuple
    # Get job id from arguments
    job_id_key = 'job_id'
    if job_id_key in kwargs:
        # make sure that command does not have itself a job_id key
        #available = command.func_code.co_varnames
        argspec = inspect.getargspec(command)
        
        if job_id_key in argspec.args:
            msg = ("You cannot define the job id in this way because 'job_id' " 
                   "is already a parameter of this function.")
            raise UserError(msg)    
        
        job_id = kwargs[job_id_key]
        if job_prefix:
            job_id = '%s-%s' % (job_prefix, job_id)
        del kwargs[job_id_key]
        
        if job_id in jobs_defined_in_this_session:
            raise UserError('Job %r already defined.' % job_id)
    else:
        job_id = generate_job_id(command)
    
    jobs_defined_in_this_session.add(job_id)
     
    if 'extra_dep' in kwargs:
        extra_dep = collect_dependencies(kwargs['extra_dep'])
        del kwargs['extra_dep']
    else:
        extra_dep = set()
        
    children = collect_dependencies([args, kwargs])
    children.update(extra_dep)
    
    all_args = (command, args, kwargs) 
    
    command_desc = command.__name__
    
    c = Job(job_id=job_id, children=list(children), command_desc=command_desc)
    # TODO: check for loops     
            
    for child in children:
        child_comp = get_job(child)
        if not job_id in child_comp.parents:
            child_comp.parents.append(job_id)
            set_job(child, child_comp)
    
    if job_exists(job_id):
        # OK, this is going to be black magic.
        # We want to load the previous job definition,
        # however, by unpickling(), it will start
        # __import__()ing the modules, perhaps
        # even the one that is calling us.
        # What happens, then is that it will try to 
        # add another time this computation recursively.
        # What we do, is that we temporarely switch to 
        # slave mode, so that recursive calls to comp() 
        # are disabled.
        
        if compmake_config.check_params: #@UndefinedVariable
            old_status = compmake_status
            set_compmake_status(compmake_status_slave) 
            old_computation = get_job(job_id)
            set_compmake_status(old_status)
            
            assert False, 'update for job_args'
            same, reason = old_computation.same_computation(c)
            
            if not same:
                set_job(job_id, c)
                set_job_args(job_id, all_args)
                publish('job-redefined', job_id=job_id , reason=reason)
                # XXX TODO clean the cache
            else:
                publish('job-already-defined', job_id=job_id)
        else:
            # We assume everything's ok
            set_job(job_id, c)
            set_job_args(job_id, all_args)
            publish('job-defined', job_id=job_id)
    
    else:    
        set_job(job_id, c)
        set_job_args(job_id, all_args)
        publish('job-defined', job_id=job_id)
        
    assert job_exists(job_id)
    assert job_args_exists(job_id)

    return Promise(job_id)
Example #7
0
def make(job_id, more=False):
    """ Makes a single job. Returns the user-object or raises JobFailed """
    host = compmake_config.hostname #@UndefinedVariable
    
    setproctitle(job_id)
     
    # TODO: should we make sure we are up to date???
    up, reason = up_to_date(job_id) #@UnusedVariable
    cache = get_job_cache(job_id)
    want_more = cache.state == Cache.MORE_REQUESTED
    if up and not (more and want_more):
        # print "%s is up to date" % job_id
        assert is_job_userobject_available(job_id)
        return get_job_userobject(job_id)
    else:
        # if up and (more and want_more): # XXX review the logic 
        #    reason = 'want more'
        # print "Making %s (%s)" % (job_id, reason)
        computation = get_job(job_id)
        
        assert(cache.state in [Cache.NOT_STARTED, Cache.IN_PROGRESS,
                               Cache.MORE_REQUESTED, Cache.DONE, Cache.FAILED])
        
        if cache.state == Cache.NOT_STARTED:
            previous_user_object = None
            cache.state = Cache.IN_PROGRESS
        if cache.state == Cache.FAILED:
            previous_user_object = None
            cache.state = Cache.IN_PROGRESS
        elif cache.state == Cache.IN_PROGRESS:
            if is_job_tmpobject_available(job_id):
                previous_user_object = get_job_tmpobject(job_id)
            else:
                previous_user_object = None
        elif cache.state == Cache.MORE_REQUESTED:
            assert(is_job_userobject_available(job_id))
            if is_job_tmpobject_available(job_id):
                # resuming more computation
                previous_user_object = get_job_tmpobject(job_id)
            else:
                # starting more computation
                previous_user_object = get_job_userobject(job_id)
        elif cache.state == Cache.DONE:
            # If we are done, it means children have been updated
            assert(not up)
            previous_user_object = None
        else:
            assert(False)
        
        # update state
        cache.time_start = time()
        cpu_start = clock()
        set_job_cache(job_id, cache)
        
        def progress_callback(stack):
            publish('job-progress-plus', job_id=job_id, host=host, stack=stack)
        
        init_progress_tracking(progress_callback)
        
        num, total = 0, None
        user_object = None

        capture = OutputCapture(prefix=job_id,
            echo_stdout=compmake_config.echo_stdout, #@UndefinedVariable
            echo_stderr=compmake_config.echo_stderr) #@UndefinedVariable
        try: 
            result = computation.compute(previous_user_object)
            
            if type(result) == GeneratorType:
                try:
                    while True:
                        next = result.next()
                        if isinstance(next, tuple):
                            if len(next) != 3:
                                raise CompmakeException('If computation yields a tuple, ' + 
                                                      'should be a tuple with 3 elemnts.' + 
                                                      'Got: %s' % str(next))
                            user_object, num, total = next

                            publish('job-progress', job_id=job_id, host=host,
                                    done=None, progress=num, goal=total)
                            if compmake_config.save_progress: #@UndefinedVariable
                                set_job_tmpobject(job_id, user_object)
                            
                except StopIteration:
                    pass
            else:
                publish('job-progress', job_id=job_id, host='XXX',
                        done=1, progress=1, goal=1)

                user_object = result

        
        except KeyboardInterrupt: 
            # TODO: clear progress cache
            # Save the current progress:
            cache.iterations_in_progress = num
            cache.iterations_goal = total
            if user_object:
                set_job_tmpobject(job_id, user_object)
            
            set_job_cache(job_id, cache)

            # clear progress cache
            publish('job-interrupted', job_id=job_id, host=host)
            raise JobInterrupted('Keyboard interrupt')
        
        except Exception as e:
            sio = StringIO()
            print_exc(file=sio)
            bt = sio.getvalue()
            
            error("Job %s failed: %s" % (job_id, e))
            error(bt)
            
            mark_as_failed(job_id, e, bt)
            
            # clear progress cache
            publish('job-failed', job_id=job_id, host=host, reason=e)
            raise JobFailed('Job %s failed: %s' % (job_id, e))
    
        finally:
            capture.deactivate()
            # even if we send an error, let's save the output of the process
            cache = get_job_cache(job_id)
            cache.captured_stderr = capture.stderr_replacement.buffer.getvalue()
            cache.captured_stdout = capture.stdout_replacement.buffer.getvalue()
            set_job_cache(job_id, cache)
            
        set_job_userobject(job_id, user_object)
                
        if is_job_tmpobject_available(job_id):
            # We only have one with yield
            delete_job_tmpobject(job_id)
        
        cache.state = Cache.DONE
        cache.timestamp = time()
        walltime = cache.timestamp - cache.time_start 
        cputime = clock() - cpu_start
        # FIXME walltime/cputime not precise (especially for "more" computation)
        cache.walltime_used = walltime
        cache.cputime_used = cputime
        cache.done_iterations = num # XXX not true
        cache.host = compmake_config.hostname #@UndefinedVariable
        
        set_job_cache(job_id, cache)
        
        publish('job-succeeded', job_id=job_id, host=host)

        # TODO: clear these records in other place
        return user_object
Example #8
0
 def progress_callback(stack):
     publish('job-progress-plus', job_id=job_id, host=host, stack=stack)