def call_method(self, method, options={}, datasets={}, jobs={}, record_in=None, record_as=None, why_build=False, caption=None, workdir=None, concurrency=None, **kw): if method not in self._method_info: raise Exception('Unknown method %s' % (method, )) info = self._method_info[method] params = dict(options=dict(options), datasets=dict(datasets), jobs=dict(jobs)) argmap = defaultdict(list) for thing in ('options', 'datasets', 'jobs'): for n in info[thing]: argmap[n].append(thing) for k, v in kw.items(): if k not in argmap: raise Exception( 'Keyword %s not in options/datasets/jobs for method %s' % ( k, method, )) if len(argmap[k]) != 1: raise Exception( 'Keyword %s has several targets on method %s: %r' % ( k, method, argmap[k], )) params[argmap[k][0]][k] = v jid, res = self._submit(method, caption=caption, why_build=why_build, workdir=workdir, concurrency=concurrency, **params) if why_build: # specified by caller return res.why_build if 'why_build' in res: # done by server anyway (because --flags why_build) print("Would have built from:") print("======================") print(setupfile.encode_setup(self.history[-1][0], as_str=True)) print("Could have avoided build if:") print("============================") print(json_encode(res.why_build, as_str=True)) print() from inspect import stack stk = stack()[2] print("Called from %s line %d" % ( stk[1], stk[2], )) exit() jid = Job(jid, record_as or method) self.record[record_in].append(jid) return jid
def _handle_req(self, path, args): if path[0] == 'status': data = job_tracking.get(args.get('subjob_cookie') or None) if not data: self.do_response(400, 'text/plain', 'bad subjob_cookie!\n') return timeout = min(float(args.get('timeout', 0)), 128) status = DotDict(idle=data.lock.acquire(False)) deadline = time.time() + timeout while not status.idle and time.time() < deadline: time.sleep(0.1) status.idle = data.lock.acquire(False) if status.idle: if data.last_error: status.last_error_time = data.last_error[0] status.last_time = data.last_time data.lock.release() elif path == ['status', 'full']: status.status_stacks, status.current = status_stacks_export() status.report_t = time.time() self.do_response(200, "text/json", status) return elif path == ['last_error']: data = job_tracking.get(args.get('subjob_cookie') or None) if not data: self.do_response(400, 'text/plain', 'bad subjob_cookie!\n') return status = DotDict() if data.last_error: status.time = data.last_error[0] status.last_error = data.last_error[1] self.do_response(200, "text/json", status) return elif path == ['list_workdirs']: ws = {k: v.path for k, v in self.ctrl.list_workdirs().items()} self.do_response(200, "text/json", ws) elif path[0] == 'workdir': self.do_response(200, "text/json", self.ctrl.DataBase.db_by_workdir[path[1]]) elif path == ['config']: self.do_response(200, "text/json", self.ctrl.config) elif path == ['update_methods']: self.do_response(200, "text/json", self.ctrl.update_methods()) elif path == ['methods']: """ return a json with everything the Method object knows about the methods """ self.do_response(200, "text/json", self.ctrl.get_methods()) elif path[0] == 'method_info': method = path[1] self.do_response(200, "text/json", self.ctrl.method_info(method)) elif path[0] == 'workspace_info': self.do_response(200, 'text/json', self.ctrl.get_workspace_details()) elif path[0] == 'abort': tokill = list(children) print('Force abort', tokill) for child in tokill: os.killpg(child, signal.SIGKILL) self.do_response(200, 'text/json', {'killed': len(tokill)}) elif path[0] == 'method2job': method, num = path[1:] jobs = self.ctrl.DataBase.db_by_method.get(method, ()) start_ix = 0 start_from = args.get('start_from') if start_from: for start_ix, job in enumerate(jobs): if job.id == start_from: break else: start_ix = None if start_ix is None: res = { 'error': '%s is not a current %s job' % ( start_from, method, ) } else: num = int(num) + start_ix if not jobs: res = { 'error': 'no current jobs with method %s available' % (method, ) } elif num + start_ix >= len(jobs): res = { 'error': 'tried to go %d jobs back from %s, but only %d earlier (current) jobs available' % ( num, jobs[start_ix].id, len(jobs) - start_ix, ) } else: res = {'id': jobs[num + start_ix].id} self.do_response(200, 'text/json', res) elif path[0] == 'job_is_current': job = Job(path[1]) job = self.ctrl.DataBase.db_by_workdir[job.workdir].get(job) self.do_response(200, 'text/json', bool(job and job['current'])) elif path == ['submit']: if self.ctrl.broken: self.do_response( 500, "text/json", { 'broken': self.ctrl.broken, 'error': 'Broken methods: ' + ', '.join( sorted( m.split('.')[-1][2:] for m in self.ctrl.broken)) }) elif 'json' in args: if DEBUG_WRITE_JSON: with open('DEBUG_WRITE.json', 'wb') as fh: fh.write(args['json']) setup = json_decode(args['json']) data = job_tracking.get(setup.get('subjob_cookie') or None) if not data: self.do_response(403, 'text/plain', 'bad subjob_cookie!\n') return if len(job_tracking) - 1 > 5: # max five levels print('Too deep subjob nesting!') self.do_response(403, 'text/plain', 'Too deep subjob nesting') return if data.lock.acquire(False): still_locked = True respond_after = True try: if self.DEBUG: print('@server.py: Got the lock!', file=sys.stderr) workdir = setup.get('workdir', data.workdir) jobidv, job_res = self.ctrl.initialise_jobs( setup, workdir) job_res['done'] = False if jobidv: error = [] tlock = TLock() link2job = { j['link']: j for j in job_res['jobs'].values() } def run(jobidv, tlock): for jobid in jobidv: passed_cookie = None # This is not a race - all higher locks are locked too. while passed_cookie in job_tracking: passed_cookie = gen_cookie() job_tracking[passed_cookie] = DotDict( lock=JLock(), last_error=None, last_time=0, workdir=workdir, ) try: self.ctrl.run_job( jobid, subjob_cookie=passed_cookie, parent_pid=setup.get( 'parent_pid', 0)) # update database since a new jobid was just created job = self.ctrl.add_single_jobid(jobid) with tlock: link2job[jobid]['make'] = 'DONE' link2job[jobid][ 'total_time'] = job.total except JobError as e: error.append( [e.jobid, e.method, e.status]) with tlock: link2job[jobid]['make'] = 'FAIL' return finally: del job_tracking[passed_cookie] # everything was built ok, update symlink try: dn = self.ctrl.workspaces[workdir].path ln = os.path.join(dn, workdir + "-LATEST_") try: os.unlink(ln) except OSError: pass os.symlink(jobid, ln) os.rename( ln, os.path.join(dn, workdir + "-LATEST")) except OSError: traceback.print_exc() t = Thread(target=run, name="job runner", args=( jobidv, tlock, )) t.daemon = True t.start() t.join(2) # give job two seconds to complete with tlock: for j in link2job.values(): if j['make'] in ( True, 'FAIL', ): respond_after = False job_res_json = json_encode(job_res) break if not respond_after: # not all jobs are done yet, give partial response self.do_response(200, "text/json", job_res_json) t.join() # wait until actually complete del tlock del t # verify that all jobs got built. total_time = 0 for j in link2job.values(): jobid = j['link'] if j['make'] == True: # Well, crap. error.append([ jobid, "unknown", { "INTERNAL": "Not built" } ]) print("INTERNAL ERROR IN JOB BUILDING!", file=sys.stderr) total_time += j.get('total_time', 0) if error: data.last_error = (time.time(), error) data.last_time = total_time except Exception as e: if respond_after: data.lock.release() still_locked = False self.do_response(500, "text/json", {'error': str(e)}) raise finally: if still_locked: data.lock.release() if respond_after: job_res['done'] = True self.do_response(200, "text/json", job_res) if self.DEBUG: print("@server.py: Process releases lock!", file=sys.stderr ) # note: has already done http response else: self.do_response(503, 'text/plain', 'Busy doing work for you...\n') else: self.do_response(400, 'text/plain', 'Missing json input!\n') else: self.do_response(404, 'text/plain', 'Unknown path\n') return
def get_job(jobid): if jobid.endswith('-LATEST'): base = jobid.rsplit('-', 1)[0] jobid = os.readlink(Job(base + '-0').path[:-2] + '-LATEST') return Job(jobid)
def _name2job(cfg, n): if n.startswith(':'): # resolve through urd assert cfg.urd, 'No urd configured' a = n[1:].rsplit(':', 1) if len(a) == 1: raise JobNotFound('looks like a partial :urdlist:[entry] spec') entry = a[1] or '-1' try: entry = int(entry, 10) except ValueError: pass path, tildes = split_tildes(a[0]) path = path.split('/') if len(path) < 3: path.insert(0, environ.get('USER', 'NO-USER')) if len(path) < 3: path.append('latest') path = '/'.join(map(url_quote, path)) try: urdres = urd_call_w_tildes(cfg, path, tildes) except UrdError as e: print(e, file=sys.stderr) urdres = None if not urdres: raise JobNotFound('urd list %r not found' % (a[0], )) from accelerator.build import JobList joblist = JobList(Job(e[1], e[0]) for e in urdres.joblist) res = joblist.get(entry) if not res: raise JobNotFound('%r not found in %s' % ( entry, path, )) return res if re.match(r'[^/]+-\d+$', n): # Looks like a jobid return Job(n) m = re.match(r'([^/]+)-LATEST$', n) if m: # Looks like workdir-LATEST wd = m.group(1) if wd not in WORKDIRS: raise NoSuchWorkdirError('Not a valid workdir: "%s"' % (wd, )) path = join(WORKDIRS[wd], n) try: n = readlink(path) except OSError as e: raise JobNotFound('Failed to read %s: %s' % ( path, e, )) return Job(n) if '/' not in n: # Must be a method then return method2job(cfg, n) if exists(join(n, 'setup.json')): # Looks like the path to a jobdir path, jid = split(realpath(n)) job = Job(jid) if WORKDIRS.get(job.workdir, path) != path: print("### Overriding workdir %s to %s" % ( job.workdir, path, )) WORKDIRS[job.workdir] = path return job raise JobNotFound("Don't know what to do with %r." % (n, ))