コード例 #1
0
def cluster_job(config, job_id, more=False):
    setproctitle('%s %s' % (job_id, config.name))
    
    proxy_port = 13000 + config.instance
    
    hostname_no_instance = config.name.split(':')[0]
    nice = compmake_config.cluster_nice #@UndefinedVariable
    
    compmake_cmd = \
    'nice -n %d compmake --hostname=%s --db=redis --redis_events --redis_host=localhost:%s --slave  %s --save_progress=False\
     make_single more=%s %s' % \
            (nice, hostname_no_instance, proxy_port, get_namespace(), more, job_id)
            
    redis_host = RedisInterface.host
    redis_port = RedisInterface.port
    if config.username:
        connection_string = '%s@%s' % (config.username, config.host)
    else:
        connection_string = config.host
        
    # TODO: make additional switches configurable
    args = ['ssh', connection_string, '-X', '-R',
            '%s:%s:%s' % (proxy_port, redis_host, redis_port),
            '%s' % compmake_cmd]
    
    if compmake_config.cluster_show_cmd: #@UndefinedVariable
        print " ".join(args)
    PIPE = subprocess.PIPE 
    p = subprocess.Popen(args, stdout=PIPE, stdin=PIPE, stderr=PIPE)
    ret = p.wait()
    
    if ret == RET_CODE_JOB_FAILED:
        raise JobFailed('Job %s failed' % job_id)
    
    if ret != 0:
        raise HostFailed('Job %s: host failed: (line: "%s", ret=%s)' % 
                             (job_id, " ".join(args), ret))
        
    return ret
コード例 #2
0
ファイル: actions.py プロジェクト: welinder/compmake
def make(job_id, more=False):
    """ Makes a single job. Returns the user-object or raises JobFailed """
    host = compmake_config.hostname #@UndefinedVariable
    
    setproctitle(job_id)
     
    # TODO: should we make sure we are up to date???
    up, reason = up_to_date(job_id) #@UnusedVariable
    cache = get_job_cache(job_id)
    want_more = cache.state == Cache.MORE_REQUESTED
    if up and not (more and want_more):
        # print "%s is up to date" % job_id
        assert is_job_userobject_available(job_id)
        return get_job_userobject(job_id)
    else:
        # if up and (more and want_more): # XXX review the logic 
        #    reason = 'want more'
        # print "Making %s (%s)" % (job_id, reason)
        computation = get_job(job_id)
        
        assert(cache.state in [Cache.NOT_STARTED, Cache.IN_PROGRESS,
                               Cache.MORE_REQUESTED, Cache.DONE, Cache.FAILED])
        
        if cache.state == Cache.NOT_STARTED:
            previous_user_object = None
            cache.state = Cache.IN_PROGRESS
        if cache.state == Cache.FAILED:
            previous_user_object = None
            cache.state = Cache.IN_PROGRESS
        elif cache.state == Cache.IN_PROGRESS:
            if is_job_tmpobject_available(job_id):
                previous_user_object = get_job_tmpobject(job_id)
            else:
                previous_user_object = None
        elif cache.state == Cache.MORE_REQUESTED:
            assert(is_job_userobject_available(job_id))
            if is_job_tmpobject_available(job_id):
                # resuming more computation
                previous_user_object = get_job_tmpobject(job_id)
            else:
                # starting more computation
                previous_user_object = get_job_userobject(job_id)
        elif cache.state == Cache.DONE:
            # If we are done, it means children have been updated
            assert(not up)
            previous_user_object = None
        else:
            assert(False)
        
        # update state
        cache.time_start = time()
        cpu_start = clock()
        set_job_cache(job_id, cache)
        
        def progress_callback(stack):
            publish('job-progress-plus', job_id=job_id, host=host, stack=stack)
        
        init_progress_tracking(progress_callback)
        
        num, total = 0, None
        user_object = None

        capture = OutputCapture(prefix=job_id,
            echo_stdout=compmake_config.echo_stdout, #@UndefinedVariable
            echo_stderr=compmake_config.echo_stderr) #@UndefinedVariable
        try: 
            result = computation.compute(previous_user_object)
            
            if type(result) == GeneratorType:
                try:
                    while True:
                        next = result.next()
                        if isinstance(next, tuple):
                            if len(next) != 3:
                                raise CompmakeException('If computation yields a tuple, ' + 
                                                      'should be a tuple with 3 elemnts.' + 
                                                      'Got: %s' % str(next))
                            user_object, num, total = next

                            publish('job-progress', job_id=job_id, host=host,
                                    done=None, progress=num, goal=total)
                            if compmake_config.save_progress: #@UndefinedVariable
                                set_job_tmpobject(job_id, user_object)
                            
                except StopIteration:
                    pass
            else:
                publish('job-progress', job_id=job_id, host='XXX',
                        done=1, progress=1, goal=1)

                user_object = result

        
        except KeyboardInterrupt: 
            # TODO: clear progress cache
            # Save the current progress:
            cache.iterations_in_progress = num
            cache.iterations_goal = total
            if user_object:
                set_job_tmpobject(job_id, user_object)
            
            set_job_cache(job_id, cache)

            # clear progress cache
            publish('job-interrupted', job_id=job_id, host=host)
            raise JobInterrupted('Keyboard interrupt')
        
        except Exception as e:
            sio = StringIO()
            print_exc(file=sio)
            bt = sio.getvalue()
            
            error("Job %s failed: %s" % (job_id, e))
            error(bt)
            
            mark_as_failed(job_id, e, bt)
            
            # clear progress cache
            publish('job-failed', job_id=job_id, host=host, reason=e)
            raise JobFailed('Job %s failed: %s' % (job_id, e))
    
        finally:
            capture.deactivate()
            # even if we send an error, let's save the output of the process
            cache = get_job_cache(job_id)
            cache.captured_stderr = capture.stderr_replacement.buffer.getvalue()
            cache.captured_stdout = capture.stdout_replacement.buffer.getvalue()
            set_job_cache(job_id, cache)
            
        set_job_userobject(job_id, user_object)
                
        if is_job_tmpobject_available(job_id):
            # We only have one with yield
            delete_job_tmpobject(job_id)
        
        cache.state = Cache.DONE
        cache.timestamp = time()
        walltime = cache.timestamp - cache.time_start 
        cputime = clock() - cpu_start
        # FIXME walltime/cputime not precise (especially for "more" computation)
        cache.walltime_used = walltime
        cache.cputime_used = cputime
        cache.done_iterations = num # XXX not true
        cache.host = compmake_config.hostname #@UndefinedVariable
        
        set_job_cache(job_id, cache)
        
        publish('job-succeeded', job_id=job_id, host=host)

        # TODO: clear these records in other place
        return user_object
コード例 #3
0
ファイル: master.py プロジェクト: welinder/compmake
def main():        
    
    setproctitle('compmake')
    
    parser = OptionParser(version=version)
     
    parser.add_option("--slave", action="store_true",
                      default=False, dest="slave",
                      help="[internal] Runs compmake in slave mode.")
    
    parser.add_option("--redis_events", action="store_true",
                      default=False, dest="redis_events",
                      help="[internal] Relays events using Redis.")
    
    config_populate_optparser(parser)
    
    (options, args) = parser.parse_args()
    
    initialize_backend()

    # We load plugins after we parsed the configuration
    from compmake import plugins #@UnusedImport
    
    if options.redis_events:
        if not compmake_config.db == 'redis': #@UndefinedVariable
            error('Cannot use redis_events without redis.')
            sys.exit(-2)
        
        from compmake.storage.redisdb import RedisInterface

        # register an handler that will capture all events    
        def handler(event):
            RedisInterface.events_push(event) 
    
        remove_all_handlers()    
        register_handler("*", handler)


    
    if not options.slave:
        # XXX make sure this is the default
        set_compmake_status(compmake_status_interactive)
        
        # TODO: add command namespace
        # TODO: add command "load"
        if not args:
            user_error('I expect at least one parameter (module name)')
            sys.exit(-2)
            
        module_name = args[0]
        args = args[1:]
    
        if module_name.endswith('.py') or (module_name.find('/') > 0):
            warning('You passed a string "%s" which looks like a filename.' % 
                    module_name)
            module_name = module_name.replace('/', '.')
            module_name = module_name.replace('.py', '')
            warning('However, I need a module name. I will try with "%s".' % 
                    module_name)
        
        set_namespace(module_name) 
        compmake.is_it_time = True
        try:
            __import__(module_name)
        except Exception as e:
            error('Error while trying to import module "%s": %s' % 
                  (module_name, e)) 
            traceback.print_exc(file=sys.stderr)
            sys.exit(-5)
            
        # TODO: BUG: XXX: remove old jobs those in defined_this_section
    else:
        set_compmake_status(compmake_status_slave)
        
        if not args:
            user_error('I expect at least one parameter (namespace name)')
            sys.exit(-2)
        
        module_name = args.pop(0)
        set_namespace(module_name)
             
    if args:
        try:
            # XXX is this redudant?
            # compmake_config.interactive = False
            retcode = interpret_commands(args)
            # print "Exiting with retcode %s" % retcode
            sys.exit(retcode)
        except UserError as e:
            user_error(e)
            sys.exit(-6)
    else:
        retcode = interactive_console()
        sys.exit(retcode)