Esempio n. 1
0
def initialize_backend():
    allowed_db = ['filesystem', 'redis']

    chosen_db = compmake_config.db #@UndefinedVariable
    if not chosen_db in allowed_db:
        user_error('Backend name "%s" not valid. I was expecting one in %s.' % 
              (chosen_db, allowed_db))
        sys.exit(-1)
    
    if compmake_config.db == 'redis': #@UndefinedVariable
        hostname = compmake_config.redis_host #@UndefinedVariable
        if ':' in hostname:
            # XXX this should be done elsewhere
            hostname, port = hostname.split(':')
        else:
            port = None
        use_redis(hostname, port)        
        
    elif compmake_config.db == 'filesystem': #@UndefinedVariable
        use_filesystem(compmake_config.path) #@UndefinedVariable
    else: 
        assert(False)

    from compmake.storage import db
    if not db:
        error('There was some error in initializing db.')
        sys.exit(-54)
Esempio n. 2
0
    def job_failed(self, job_id):
        ''' The specified job has failed. Update the structures,
            mark any parent as failed as well. '''
        error('Job %s failed ' % job_id)
        self.failed.add(job_id)
        self.todo.remove(job_id)
        self.processing.remove(job_id)
        del self.processing2result[job_id]

        its_parents = set(parents(job_id))
        for p in its_parents:
            mark_as_failed(p, 'Failure of dependency %s' % job_id)
            if p in self.todo:
                self.todo.remove(p)
                self.failed.add(p)
                if p in self.ready_todo:
                    self.ready_todo.remove(p)
Esempio n. 3
0
 def host_failed(self, job_id):
     error('Job %s: host failed' % job_id)
     self.processing.remove(job_id)
     del self.processing2result[job_id]
     assert job_id in self.todo
     self.ready_todo.add(job_id)
Esempio n. 4
0
def make(job_id, more=False):
    """ Makes a single job. Returns the user-object or raises JobFailed """
    host = compmake_config.hostname #@UndefinedVariable
    
    setproctitle(job_id)
     
    # TODO: should we make sure we are up to date???
    up, reason = up_to_date(job_id) #@UnusedVariable
    cache = get_job_cache(job_id)
    want_more = cache.state == Cache.MORE_REQUESTED
    if up and not (more and want_more):
        # print "%s is up to date" % job_id
        assert is_job_userobject_available(job_id)
        return get_job_userobject(job_id)
    else:
        # if up and (more and want_more): # XXX review the logic 
        #    reason = 'want more'
        # print "Making %s (%s)" % (job_id, reason)
        computation = get_job(job_id)
        
        assert(cache.state in [Cache.NOT_STARTED, Cache.IN_PROGRESS,
                               Cache.MORE_REQUESTED, Cache.DONE, Cache.FAILED])
        
        if cache.state == Cache.NOT_STARTED:
            previous_user_object = None
            cache.state = Cache.IN_PROGRESS
        if cache.state == Cache.FAILED:
            previous_user_object = None
            cache.state = Cache.IN_PROGRESS
        elif cache.state == Cache.IN_PROGRESS:
            if is_job_tmpobject_available(job_id):
                previous_user_object = get_job_tmpobject(job_id)
            else:
                previous_user_object = None
        elif cache.state == Cache.MORE_REQUESTED:
            assert(is_job_userobject_available(job_id))
            if is_job_tmpobject_available(job_id):
                # resuming more computation
                previous_user_object = get_job_tmpobject(job_id)
            else:
                # starting more computation
                previous_user_object = get_job_userobject(job_id)
        elif cache.state == Cache.DONE:
            # If we are done, it means children have been updated
            assert(not up)
            previous_user_object = None
        else:
            assert(False)
        
        # update state
        cache.time_start = time()
        cpu_start = clock()
        set_job_cache(job_id, cache)
        
        def progress_callback(stack):
            publish('job-progress-plus', job_id=job_id, host=host, stack=stack)
        
        init_progress_tracking(progress_callback)
        
        num, total = 0, None
        user_object = None

        capture = OutputCapture(prefix=job_id,
            echo_stdout=compmake_config.echo_stdout, #@UndefinedVariable
            echo_stderr=compmake_config.echo_stderr) #@UndefinedVariable
        try: 
            result = computation.compute(previous_user_object)
            
            if type(result) == GeneratorType:
                try:
                    while True:
                        next = result.next()
                        if isinstance(next, tuple):
                            if len(next) != 3:
                                raise CompmakeException('If computation yields a tuple, ' + 
                                                      'should be a tuple with 3 elemnts.' + 
                                                      'Got: %s' % str(next))
                            user_object, num, total = next

                            publish('job-progress', job_id=job_id, host=host,
                                    done=None, progress=num, goal=total)
                            if compmake_config.save_progress: #@UndefinedVariable
                                set_job_tmpobject(job_id, user_object)
                            
                except StopIteration:
                    pass
            else:
                publish('job-progress', job_id=job_id, host='XXX',
                        done=1, progress=1, goal=1)

                user_object = result

        
        except KeyboardInterrupt: 
            # TODO: clear progress cache
            # Save the current progress:
            cache.iterations_in_progress = num
            cache.iterations_goal = total
            if user_object:
                set_job_tmpobject(job_id, user_object)
            
            set_job_cache(job_id, cache)

            # clear progress cache
            publish('job-interrupted', job_id=job_id, host=host)
            raise JobInterrupted('Keyboard interrupt')
        
        except Exception as e:
            sio = StringIO()
            print_exc(file=sio)
            bt = sio.getvalue()
            
            error("Job %s failed: %s" % (job_id, e))
            error(bt)
            
            mark_as_failed(job_id, e, bt)
            
            # clear progress cache
            publish('job-failed', job_id=job_id, host=host, reason=e)
            raise JobFailed('Job %s failed: %s' % (job_id, e))
    
        finally:
            capture.deactivate()
            # even if we send an error, let's save the output of the process
            cache = get_job_cache(job_id)
            cache.captured_stderr = capture.stderr_replacement.buffer.getvalue()
            cache.captured_stdout = capture.stdout_replacement.buffer.getvalue()
            set_job_cache(job_id, cache)
            
        set_job_userobject(job_id, user_object)
                
        if is_job_tmpobject_available(job_id):
            # We only have one with yield
            delete_job_tmpobject(job_id)
        
        cache.state = Cache.DONE
        cache.timestamp = time()
        walltime = cache.timestamp - cache.time_start 
        cputime = clock() - cpu_start
        # FIXME walltime/cputime not precise (especially for "more" computation)
        cache.walltime_used = walltime
        cache.cputime_used = cputime
        cache.done_iterations = num # XXX not true
        cache.host = compmake_config.hostname #@UndefinedVariable
        
        set_job_cache(job_id, cache)
        
        publish('job-succeeded', job_id=job_id, host=host)

        # TODO: clear these records in other place
        return user_object
Esempio n. 5
0
def main():        
    
    setproctitle('compmake')
    
    parser = OptionParser(version=version)
     
    parser.add_option("--slave", action="store_true",
                      default=False, dest="slave",
                      help="[internal] Runs compmake in slave mode.")
    
    parser.add_option("--redis_events", action="store_true",
                      default=False, dest="redis_events",
                      help="[internal] Relays events using Redis.")
    
    config_populate_optparser(parser)
    
    (options, args) = parser.parse_args()
    
    initialize_backend()

    # We load plugins after we parsed the configuration
    from compmake import plugins #@UnusedImport
    
    if options.redis_events:
        if not compmake_config.db == 'redis': #@UndefinedVariable
            error('Cannot use redis_events without redis.')
            sys.exit(-2)
        
        from compmake.storage.redisdb import RedisInterface

        # register an handler that will capture all events    
        def handler(event):
            RedisInterface.events_push(event) 
    
        remove_all_handlers()    
        register_handler("*", handler)


    
    if not options.slave:
        # XXX make sure this is the default
        set_compmake_status(compmake_status_interactive)
        
        # TODO: add command namespace
        # TODO: add command "load"
        if not args:
            user_error('I expect at least one parameter (module name)')
            sys.exit(-2)
            
        module_name = args[0]
        args = args[1:]
    
        if module_name.endswith('.py') or (module_name.find('/') > 0):
            warning('You passed a string "%s" which looks like a filename.' % 
                    module_name)
            module_name = module_name.replace('/', '.')
            module_name = module_name.replace('.py', '')
            warning('However, I need a module name. I will try with "%s".' % 
                    module_name)
        
        set_namespace(module_name) 
        compmake.is_it_time = True
        try:
            __import__(module_name)
        except Exception as e:
            error('Error while trying to import module "%s": %s' % 
                  (module_name, e)) 
            traceback.print_exc(file=sys.stderr)
            sys.exit(-5)
            
        # TODO: BUG: XXX: remove old jobs those in defined_this_section
    else:
        set_compmake_status(compmake_status_slave)
        
        if not args:
            user_error('I expect at least one parameter (namespace name)')
            sys.exit(-2)
        
        module_name = args.pop(0)
        set_namespace(module_name)
             
    if args:
        try:
            # XXX is this redudant?
            # compmake_config.interactive = False
            retcode = interpret_commands(args)
            # print "Exiting with retcode %s" % retcode
            sys.exit(retcode)
        except UserError as e:
            user_error(e)
            sys.exit(-6)
    else:
        retcode = interactive_console()
        sys.exit(retcode)