def test(name, input, want_obj, want_bytes, **kw): json_save(input, name, **kw) with open(name, "rb") as fh: got_bytes_raw = fh.read() assert got_bytes_raw[ -1:] == b"\n", name + " didn't even end with a newline" got_bytes_raw = got_bytes_raw[:-1] as_str = json_encode(input, as_str=True, **kw) as_bytes = json_encode(input, as_str=False, **kw) assert isinstance(as_str, str) and isinstance( as_bytes, bytes), "json_encode returns the wrong types: %s %s" % ( type(as_str), type(as_bytes), ) assert as_bytes == got_bytes_raw, "json_save doesn't save the same thing json_encode returns for " + name if PY3: as_str = as_str.encode("utf-8") assert as_bytes == as_str, "json_encode doesn't return the same data for as_str=True and False" got_obj = json_load(name) assert want_obj == got_obj, "%s roundtrips wrong (wanted %r, got %r)" % ( name, want_obj, got_obj) with open(name, "rb") as fh: got_bytes_fuzzy = b"".join(line.strip() for line in fh) assert want_bytes == got_bytes_fuzzy, "%s wrong on disk (but decoded right)" % ( name, )
def _call(self, url, data=None, fmt=_urd_typeify): assert self._url, "No urd configured for this daemon" url = url.replace(' ', '%20') if data is not None: req = Request(url, json_encode(data), self._headers) else: req = Request(url) tries_left = 3 while True: try: r = urlopen(req) try: return fmt(r.read()) finally: try: r.close() except Exception: pass except HTTPError as e: if e.code in (401, 409,): raise tries_left -= 1 if not tries_left: raise print('Error %d from urd, %d tries left' % (e.code, tries_left,), file=sys.stderr) except ValueError: tries_left -= 1 if not tries_left: raise print('Bad data from urd, %d tries left' % (tries_left,), file=sys.stderr) except URLError: print('Error contacting urd', file=sys.stderr) time.sleep(4)
def __init__(self, methods, setup): tree = methods.new_deptree(setup.method) self.methods = methods self.top_method = setup.method self.tree = tree self.add_flags({ 'make': False, 'link': False, }) seen = set() for method, data in iteritems(self.tree): seen.add(method) data['params'] = {method: setup.params[method]} unmatched = { method: params for method, params in iteritems(setup.params) if method not in seen } if unmatched: from extras import json_encode print("DepTree Warning: Unmatched options remain:", json_encode(unmatched, as_str=True)) def collect(method): # All methods that method depend on for child in tree[method]['dep']: yield child for method in collect(child): yield method # This probably updates some with the same data several times, # but this is cheap (key: dictref updates, nothing more.) for method, data in iteritems(self.tree): for submethod in set(collect(method)): data['params'].update(tree[submethod]['params']) self._fix_options(False) self._fix_jobids('jobids') self._fix_jobids('datasets')
def _handle_req(self, path, args): if path[0] == 'status': data = job_tracking.get(args.get('subjob_cookie') or None) if not data: self.do_response(500, 'text/plain', 'bad subjob_cookie!\n') return timeout = min(float(args.get('timeout', 0)), 128) status = DotDict(idle=data.lock.acquire(False)) deadline = time.time() + timeout while not status.idle and time.time() < deadline: time.sleep(0.1) status.idle = data.lock.acquire(False) if status.idle: if data.last_error: status.last_error = data.last_error data.last_error = None else: status.last_time = data.last_time data.lock.release() elif path == ['status', 'full']: status.status_stacks, status.current = status_stacks_export() self.do_response(200, "text/json", status) return elif path == ['list_workspaces']: ws = {k: v.path for k, v in self.ctrl.list_workspaces().items()} self.do_response(200, "text/json", ws) elif path == ['config']: self.do_response(200, "text/json", self.ctrl.config) elif path == ['update_methods']: self.do_response(200, "text/json", self.ctrl.update_methods()) elif path == ['methods']: """ return a json with everything the Method object knows about the methods """ self.do_response(200, "text/json", self.ctrl.get_methods()) elif path[0] == 'method_info': method = path[1] self.do_response(200, "text/json", self.ctrl.method_info(method)) elif path[0] == 'workspace_info': self.do_response(200, 'text/json', self.ctrl.get_workspace_details()) elif path[0] == 'abort': tokill = list(children) print('Force abort', tokill) for child in tokill: os.killpg(child, signal.SIGKILL) self.do_response(200, 'text/json', {'killed': len(tokill)}) elif path == ['submit']: if self.ctrl.broken: self.do_response( 500, "text/json", { 'broken': self.ctrl.broken, 'error': 'Broken methods: ' + ', '.join( sorted( m.split('.')[-1][2:] for m in self.ctrl.broken)) }) elif 'xml' in args: self.do_response(500, 'text/plain', 'JSON > XML!\n') elif 'json' in args: if DEBUG_WRITE_JSON: with open('DEBUG_WRITE.json', 'wb') as fh: fh.write(args['json']) setup = json_decode(args['json']) data = job_tracking.get(setup.get('subjob_cookie') or None) if not data: self.do_response(500, 'text/plain', 'bad subjob_cookie!\n') return if len(job_tracking) - 1 > 5: # max five levels print('Too deep subjob nesting!') self.do_response(500, 'text/plain', 'Too deep subjob nesting') return if data.lock.acquire(False): respond_after = True try: if self.DEBUG: print('@daemon.py: Got the lock!', file=sys.stderr) jobidv, job_res = self.ctrl.initialise_jobs(setup) job_res['done'] = False if jobidv: error = [] tlock = TLock() link2job = { j['link']: j for j in job_res['jobs'].values() } def run(jobidv, tlock): for jobid in jobidv: passed_cookie = None # This is not a race - all higher locks are locked too. while passed_cookie in job_tracking: passed_cookie = gen_cookie() job_tracking[passed_cookie] = DotDict( lock=JLock(), last_error=None, last_time=0) try: self.ctrl.run_job( jobid, subjob_cookie=passed_cookie, parent_pid=setup.get( 'parent_pid', 0)) # update database since a new jobid was just created job = self.ctrl.add_single_jobid(jobid) with tlock: link2job[jobid]['make'] = 'DONE' link2job[jobid][ 'total_time'] = job.total except JobError as e: error.append( [e.jobid, e.method, e.status]) with tlock: link2job[jobid]['make'] = 'FAIL' return finally: del job_tracking[passed_cookie] # everything was built ok, update symlink try: wn = self.ctrl.target_workdir dn = self.ctrl.workspaces[wn].path ln = os.path.join(dn, wn + "-LATEST_") try: os.unlink(ln) except OSError: pass os.symlink(jobid, ln) os.rename(ln, os.path.join(dn, wn + "-LATEST")) except OSError: traceback.print_exc() t = Thread(target=run, name="job runner", args=( jobidv, tlock, )) t.daemon = True t.start() t.join(2) # give job two seconds to complete with tlock: for j in link2job.values(): if j['make'] in ( True, 'FAIL', ): respond_after = False job_res_json = json_encode(job_res) break if not respond_after: # not all jobs are done yet, give partial response self.do_response(200, "text/json", job_res_json) t.join() # wait until actually complete del tlock del t # verify that all jobs got built. total_time = 0 for j in link2job.values(): jobid = j['link'] if j['make'] == True: # Well, crap. error.append([ jobid, "unknown", { "INTERNAL": "Not built" } ]) print("INTERNAL ERROR IN JOB BUILDING!", file=sys.stderr) total_time += j.get('total_time', 0) data.last_error = error data.last_time = total_time except Exception as e: if respond_after: self.do_response(500, "text/json", {'error': str(e)}) raise finally: data.lock.release() if respond_after: job_res['done'] = True self.do_response(200, "text/json", job_res) if self.DEBUG: print("@daemon.py: Process releases lock!", file=sys.stderr ) # note: has already done http response else: self.do_response(200, 'text/plain', 'Busy doing work for you...\n') else: self.do_response(500, 'text/plain', 'Missing json input!\n') else: self.do_response(500, 'text/plain', 'Unknown path\n') return
def encode_body(self, body): if isinstance(body, bytes): return body if isinstance(body, unicode): return body.encode('utf-8') return json_encode(body)
def call_method(self, method, defopt={}, defdata={}, defjob={}, options=(), datasets=(), jobids=(), record_in=None, record_as=None, why_build=False, caption=None): todo = {method} org_method = method opted = set() self.new(method, caption) # options and datasets can be for just method, or {method: options, ...}. def dictofdicts(d): if method not in d: return {method: dict(d)} else: return dict(d) options = dictofdicts(options) datasets = dictofdicts(datasets) jobids = dictofdicts(jobids) def resolve_something(res_in, d): def resolve(name, inner=False): if name is None and not inner: return None if isinstance(name, JobTuple): names = [str(name)] elif isinstance(name, (list, tuple)): names = name else: assert isinstance(name, str_types), "%s: %s" % (key, name) names = [name] fixed_names = [] for name in names: res_name = res_in.get(name, name) if isinstance(res_name, (list, tuple)): res_name = resolve(res_name, True) assert isinstance(res_name, str_types), "%s: %s" % (key, name) # if name was a job-name this gets a dict and dies fixed_names.append(res_name) return ','.join(fixed_names) for key, name in d.items(): yield key, resolve(name) resolve_datasets = partial(resolve_something, defdata) resolve_jobids = partial(resolve_something, defjob) to_record = [] while todo: method = todo.pop() m_opts = dict(defopt.get(method, ())) m_opts.update(options.get(method, ())) self.options(method, m_opts) m_datas = dict(defdata.get(method, ())) m_datas.update(resolve_datasets(datasets.get(method, {}))) self.datasets(method, m_datas) m_jobs = dict(defjob.get(method, ())) m_jobs.update(resolve_jobids(jobids.get(method, {}))) self.jobids(method, m_jobs) opted.add(method) to_record.append(method) todo.update(self.dep_methods[method]) todo.difference_update(opted) self.submit(why_build=why_build) if why_build: # specified by caller return self.job_retur.why_build if self.job_retur.why_build: # done by server anyway (because --flags why_build) print("Would have built from:") print("======================") print(setupfile.encode_setup(self.history[-1][0], as_str=True)) print("Could have avoided build if:") print("============================") print(json_encode(self.job_retur.why_build, as_str=True)) print() from inspect import stack stk = stack()[1] print("Called from %s line %d" % (stk[1], stk[2],)) exit() if isinstance(record_as, str): record_as = {org_method: record_as} elif not record_as: record_as = {} for m in to_record: self.record[record_in].insert(record_as.get(m, m), self.jobid(m)) return self.jobid(org_method)
def launch(workdir, setup, config, Methods, active_workspaces, slices, debug, daemon_url, subjob_cookie, parent_pid): starttime = time.time() jobid = setup.jobid method = setup.method if subjob_cookie: print_prefix = '' else: print_prefix = ' ' print('%s| %s [%s] |' % ( print_prefix, jobid, method, )) statmsg('| %s [%s] |' % ( jobid, method, )) args = dict( workdir=workdir, slices=slices, jobid=jobid, result_directory=config.get('result_directory', ''), common_directory=config.get('common_directory', ''), source_directory=config.get('source_directory', ''), workspaces=active_workspaces, daemon_url=daemon_url, subjob_cookie=subjob_cookie, parent_pid=parent_pid, ) from runner import runners runner = runners[Methods.db[method].version] child, prof_r = runner.launch_start(args) # There's a race where if we get interrupted right after fork this is not recorded # (the launched job could continue running) try: children.add(child) status, data = runner.launch_finish(child, prof_r, workdir, jobid, method) if status: os.killpg(child, SIGTERM) # give it a chance to exit gracefully # The dying process won't have sent an end message, so it has # the endwait time until we SIGKILL it. msg = json_encode(status, as_str=True) print('%s| %s [%s] failed! (%5.1fs) |' % (print_prefix, jobid, method, time.time() - starttime)) statmsg('| %s [%s] failed! |' % (jobid, method)) statmsg(msg) # There is a race where stuff on the status socket has not arrived when # the sending process exits. This is basically benign, but let's give # it a chance to arrive to cut down on confusing warnings. statmsg_endwait(child, 0.1) finally: try: os.killpg( child, SIGKILL ) # this should normally be a no-op, but in case it left anything. except Exception: pass try: children.remove(child) except Exception: pass try: os.waitpid( child, 0 ) # won't block (we just killed it, plus it had probably already exited) except Exception: pass if status: raise JobError(jobid, method, status) print('%s| %s [%s] completed. (%5.1fs) |' % (print_prefix, jobid, method, time.time() - starttime)) statmsg('| %s [%s] completed. |' % (jobid, method)) return data