def reader_process(slices, filename, write_fds, labels_fd, success_fd, status_fd, comment_char, lf_char): # Terrible hack - try to close FDs we didn't want in this process. # (This is important, if the main process dies this won't be # detected if we still have these open.) keep_fds = set(write_fds) keep_fds.add(labels_fd) keep_fds.add(success_fd) keep_fds.add(status_fd) # a few extra to be safe. for fd in range(3, max(keep_fds) + 32): if fd not in keep_fds: try: os.close(fd) except OSError: pass setproctitle("reader") os.dup2(success_fd, 2) # reader writes errors to stderr os.close(success_fd) success_fd = 2 res = cstuff.backend.reader(filename.encode("utf-8"), slices, options.skip_lines, options.skip_empty_lines, write_fds, labels_fd, status_fd, comment_char, lf_char) if not res: os.write(success_fd, b"\0") os.close(success_fd)
def run(cfg, from_shell=False): project = os.path.split(cfg.project_directory)[1] setproctitle('ax board for %s on %s' % ( project, cfg.board_listen, )) def call_s(*path): return call(os.path.join(cfg.url, *map(url_quote, path))) def call_u(*path): return call(os.path.join(cfg.urd, *map(url_quote, path)), server_name='urd') @bottle.get('/') @view('main') def main_page(): return dict( project=project, workdirs=cfg.workdirs, ) @bottle.get('/results') def results(): res = {} for fn in os.listdir(cfg.result_directory): if fn.endswith('_'): continue ffn = os.path.join(cfg.result_directory, fn) try: jobid, name = os.readlink(ffn).split('/')[-2:] res[fn] = dict( jobid=jobid, name=name, ts=os.lstat(ffn).st_mtime, size=os.stat(ffn).st_size, ) except OSError: continue bottle.response.content_type = 'application/json; charset=UTF-8' bottle.response.set_header('Cache-Control', 'no-cache') return json.dumps(res) @bottle.get('/results/<name>') def file(name): return bottle.static_file(name, root=cfg.result_directory) @bottle.get('/status') @view('status') def status(): status = call_s('status/full') if 'short' in bottle.request.query: if status.idle: return 'idle' else: t, msg, _ = status.current return '%s (%s)' % ( msg, fmttime(status.report_t - t, short=True), ) else: status.tree = list( fix_stacks(status.pop('status_stacks', ()), status.report_t)) return status @bottle.get('/last_error') @view('last_error') def last_error(): return call_s('last_error') @bottle.get('/job/<jobid>/method.tar.gz/') @bottle.get('/job/<jobid>/method.tar.gz/<name:path>') def job_method(jobid, name=None): job = get_job(jobid) with tarfile.open(job.filename('method.tar.gz'), 'r:gz') as tar: if name: info = tar.getmember(name) else: members = [info for info in tar.getmembers() if info.isfile()] if len(members) == 1 and not name: info = members[0] else: return template('job_method_list', members=members, job=job) bottle.response.content_type = 'text/plain; charset=UTF-8' return tar.extractfile(info).read() @bottle.get('/job/<jobid>/<name:path>') def job_file(jobid, name): job = get_job(jobid) res = bottle.static_file(name, root=job.path) if not res.content_type and res.status_code < 400: # bottle default is text/html, which is probably wrong. res.content_type = 'text/plain' return res @bottle.get('/job/<jobid>') @bottle.get('/job/<jobid>/') @view('job') def job(jobid): job = get_job(jobid) try: post = job.post except IOError: post = None if post: aborted = False files = [fn for fn in job.files() if fn[0] != '/'] subjobs = [Job(jobid) for jobid in post.subjobs] current = call_s('job_is_current', job) else: aborted = True current = False files = None subjobs = None return dict( job=job, aborted=aborted, current=current, output=os.path.exists(job.filename('OUTPUT')), datasets=job.datasets, params=job.params, subjobs=subjobs, files=files, ) @bottle.get('/dataset/<dsid:path>') @view('dataset', ds_json) def dataset(dsid): ds = Dataset(dsid.rstrip('/')) q = bottle.request.query if q.column: lines = int(q.lines or 10) it = ds.iterate(None, q.column) it = itertools.islice(it, lines) t = ds.columns[q.column].type if t in ( 'datetime', 'date', 'time', ): it = map(str, it) elif t in ( 'bytes', 'pickle', ): it = map(repr, it) res = list(it) bottle.response.content_type = 'application/json; charset=UTF-8' return json.dumps(res) else: return dict(ds=ds) def load_workdir(jobs, name): known = call_s('workdir', name) jobs[name + '-LATEST'] = None # Sorts first try: latest = os.readlink( os.path.join(cfg.workdirs[name], name + '-LATEST')) except OSError: latest = None for jid in workdir_jids(cfg, name): jobs[jid] = job_data(known, jid) if latest in jobs: jobs[name + '-LATEST'] = jobs[latest] else: del jobs[name + '-LATEST'] return jobs @bottle.get('/workdir/<name>') @view('workdir', 'jobs') def workdir(name): return dict(name=name, jobs=load_workdir(collections.OrderedDict(), name)) @bottle.get('/workdir') @bottle.get('/workdir/') @view('workdir', 'jobs') def all_workdirs(): jobs = collections.OrderedDict() for name in sorted(cfg.workdirs): load_workdir(jobs, name) return dict(name='ALL', jobs=jobs) @bottle.get('/methods') @view('methods') def methods(): methods = call_s('methods') by_package = collections.defaultdict(list) for name, data in sorted(methods.items()): by_package[data.package].append(name) by_package.pop('accelerator.test_methods', None) return dict(methods=methods, by_package=by_package) @bottle.get('/method/<name>') @view('method', 'data') def method(name): methods = call_s('methods') if name not in methods: return bottle.HTTPError(404, 'Method %s not found' % (name, )) return dict(name=name, data=methods[name], cfg=cfg) @bottle.get('/urd') @bottle.get('/urd/') @view('urd', 'lists') def urd(): return dict( lists=call_u('list'), project=project, ) @bottle.get('/urd/<user>/<build>') @bottle.get('/urd/<user>/<build>/') @view('urdlist', 'timestamps') def urdlist(user, build): key = user + '/' + build return dict( key=key, timestamps=call_u(key, 'since/0'), ) @bottle.get('/urd/<user>/<build>/<ts>') @view('urditem', 'entry') def urditem(user, build, ts): key = user + '/' + build + '/' + ts d = call_u(key) return dict(key=key, entry=d) bottle.TEMPLATE_PATH = [os.path.join(os.path.dirname(__file__), 'board')] if from_shell: kw = {'reloader': True} else: kw = {'quiet': True} kw['server'] = WaitressServer listen = cfg.board_listen if isinstance(listen, tuple): kw['host'], kw['port'] = listen else: from accelerator.server import check_socket check_socket(listen) kw['host'] = listen kw['port'] = 0 bottle.run(**kw)
def reader(fd2pid, names, masters, slaves, process_name, basedir, is_main, syncpipe_r, syncpipe_w, q): # don't get killed when we kill the job (will exit on EOF, so no output is lost) os.setpgrp() # we are safe now, the main process can continue os.close(syncpipe_w) os.close(syncpipe_r) signal.signal(signal.SIGTERM, signal.SIG_IGN) signal.signal(signal.SIGINT, signal.SIG_IGN) setproctitle(process_name) out_fd = int(os.environ['BD_TERM_FD']) for fd in slaves: os.close(fd) if q: q.make_writer() fd2fd = {} if not is_main: os.chdir(basedir) fd2name = dict(zip(masters, names)) outputs = dict.fromkeys(masters, b'') if len(fd2pid) == 2: status_blacklist = set(fd2pid.values()) assert len( status_blacklist ) == 1, "fd2pid should only map to 1 value initially: %r" % ( fd2pid, ) else: status_blacklist = () assert len( fd2pid) == 1, "fd2pid should have 1 or 2 elements initially" missed = [False] output_happened = False def try_print(data=b'\n\x1b[31m*** Some output not printed ***\x1b[m\n'): try: os.write(out_fd, data) except OSError: missed[0] = True # set output nonblocking, so we can't be blocked by terminal io. # errors generated here go to stderr, which is the real stderr # in the main iowrapper (so it can block) and goes to the main # iowrapper in the method iowrappers (so it can still block, but # is unlikely to do so for long). with nonblocking(out_fd): while masters: if missed[0]: # Some output failed to print last time around. # Wait up to one second for new data and then try # to write a message about that (before the new data). ready, _, _ = select(masters, [], [], 1.0) missed[0] = False try_print() else: ready, _, _ = select(masters, [], []) for fd in ready: try: data = os.read(fd, 65536) except OSError as e: # On Linux a pty will return # OSError: [Errno 5] Input/output error # instead of b'' for EOF. Don't know why. # Let's try to be a little restrictive in what we catch. if e.errno != errno.EIO: raise data = b'' if data: if not is_main: if fd not in fd2pid: fd2pid[fd] = int(data[:16], 16) data = data[16:] if not data: continue if fd not in fd2fd: fd2fd[fd] = os.open(fd2name[fd], os.O_CREAT | os.O_WRONLY, 0o666) os.write(fd2fd[fd], data) try_print(data) output_happened = True if not is_main: outputs[fd] = (outputs[fd] + data[-MAX_OUTPUT:])[-MAX_OUTPUT:] statmsg._output(fd2pid[fd], outputs[fd].decode('utf-8', 'replace')) else: if q: # let fork_analysis know it's time to wake up # (in case the process died badly and didn't put anything in q) q.try_notify() if fd in fd2fd: os.close(fd2fd[fd]) del fd2fd[fd] masters.remove(fd) os.close(fd) if not is_main: try: pid = fd2pid.pop(fd) if pid in status_blacklist: # don't do it for prepare as synthesis has the same PID. status_blacklist.remove(pid) # but clear the output if needed. if outputs[fd]: statmsg._clear_output(pid) else: statmsg._end(pid=pid) except Exception: # Failure can happen here if the method exits # before analysis (fd2pid not fully populated). pass if missed[0]: missed[0] = False try_print() if missed[0]: # Give it a little time, then give up. sleep(0.03) missed[0] = False try_print() if not output_happened and not is_main: os.chdir('..') os.rmdir(basedir)
def call_analysis(analysis_func, sliceno_, delayed_start, q, preserve_result, parent_pid, output_fds, **kw): try: q.make_writer() # tell iowrapper our PID, so our output goes to the right status stack. # (the pty is not quite a transparent transport ('\n' transforms into # '\r\n'), so we use a fairly human readable encoding.) writeall(output_fds[sliceno_], b'%16x' % (os.getpid(), )) # use our iowrapper fd instead of stdout/stderr os.dup2(output_fds[sliceno_], 1) os.dup2(output_fds[sliceno_], 2) for fd in output_fds: os.close(fd) os.close(_prof_fd) slicename = 'analysis(%d)' % (sliceno_, ) setproctitle(slicename) if delayed_start: os.close(delayed_start[1]) update = statmsg._start( 'waiting for concurrency limit (%d)' % (sliceno_, ), parent_pid, True) if os.read(delayed_start[0], 1) != b'a': raise AcceleratorError('bad delayed_start, giving up') update(slicename) os.close(delayed_start[0]) else: statmsg._start(slicename, parent_pid, True) kw['sliceno'] = g.sliceno = sliceno_ for dw in dataset._datasetwriters.values(): if dw._for_single_slice is None: dw._set_slice(sliceno_) res = analysis_func(**kw) if preserve_result: # Remove defaultdicts until we find one with a picklable default_factory. # (This is what you end up doing manually anyway.) def picklable(v): try: pickle.dumps(v, pickle.HIGHEST_PROTOCOL) return True except Exception: return False def fixup(d): if isinstance( d, defaultdict) and not picklable(d.default_factory): if not d: return {} v = next(iteritems(d)) if isinstance( v, defaultdict) and not picklable(v.default_factory): return {k: fixup(v) for k, v in iteritems(d)} else: return dict(d) else: return d def save(item, name): blob.save(fixup(item), name, sliceno=sliceno_, temp=True) if isinstance(res, tuple): if sliceno_ == 0: blob.save(len(res), "Analysis.tuple", temp=True) for ix, item in enumerate(res): save(item, "Analysis.%d." % (ix, )) else: if sliceno_ == 0: blob.save(False, "Analysis.tuple", temp=True) save(res, "Analysis.") from accelerator.extras import saved_files dw_lens = {} dw_minmax = {} dw_compressions = {} for name, dw in dataset._datasetwriters.items(): if dw._for_single_slice or sliceno_ == 0: dw_compressions[name] = dw._compressions if dw._for_single_slice in ( None, sliceno_, ): dw.close() dw_lens[name] = dw._lens dw_minmax[name] = dw._minmax c_fflush() q.put(( sliceno_, monotonic(), saved_files, dw_lens, dw_minmax, dw_compressions, None, )) q.close() except: c_fflush() msg = fmt_tb(1) print(msg) q.put(( sliceno_, monotonic(), {}, {}, {}, {}, msg, )) q.close() sleep(5) # give launcher time to report error (and kill us) exitfunction()
def execute_process(workdir, jobid, slices, concurrency, result_directory, common_directory, input_directory, index=None, workdirs=None, server_url=None, subjob_cookie=None, parent_pid=0): WORKDIRS.update(workdirs) g.job = jobid setproctitle('launch') path = os.path.join(workdir, jobid) try: os.chdir(path) except Exception: print("Cannot cd to workdir", path) exit(1) g.params = params = job_params() method_ref = import_module(params.package + '.a_' + params.method) g.sliceno = -1 g.job = CurrentJob(jobid, params, result_directory, input_directory) g.slices = slices g.options = params.options g.datasets = params.datasets g.jobs = params.jobs method_ref.options = params.options method_ref.datasets = params.datasets method_ref.jobs = params.jobs g.server_url = server_url g.running = 'launch' statmsg._start('%s %s' % ( jobid, params.method, ), parent_pid) def dummy(): pass prepare_func = getattr(method_ref, 'prepare', dummy) analysis_func = getattr(method_ref, 'analysis', dummy) synthesis_func = getattr(method_ref, 'synthesis', dummy) synthesis_needs_analysis = 'analysis_res' in getarglist(synthesis_func) fd2pid, names, masters, slaves = iowrapper.setup( slices, prepare_func is not dummy, analysis_func is not dummy) def switch_output(): fd = slaves.pop() os.dup2(fd, 1) os.dup2(fd, 2) os.close(fd) if analysis_func is dummy: q = None else: q = LockFreeQueue() iowrapper.run_reader(fd2pid, names, masters, slaves, q=q) for fd in masters: os.close(fd) # A chain must be finished from the back, so sort on that. sortnum_cache = {} def dw_sortnum(name): if name not in sortnum_cache: dw = dataset._datasetwriters.get(name) if not dw: # manually .finish()ed num = -1 elif dw.previous and dw.previous.startswith(jobid + '/'): pname = dw.previous.split('/')[1] num = dw_sortnum(pname) + 1 else: num = 0 sortnum_cache[name] = num return sortnum_cache[name] prof = {} if prepare_func is dummy: prof['prepare'] = 0 # truthish! else: t = monotonic() switch_output() g.running = 'prepare' g.subjob_cookie = subjob_cookie setproctitle(g.running) with statmsg.status(g.running): g.prepare_res = method_ref.prepare(**args_for(method_ref.prepare)) to_finish = [ dw.name for dw in dataset._datasetwriters.values() if dw._started ] if to_finish: with statmsg.status("Finishing datasets"): for name in sorted(to_finish, key=dw_sortnum): dataset._datasetwriters[name].finish() c_fflush() prof['prepare'] = monotonic() - t switch_output() setproctitle('launch') from accelerator.extras import saved_files if analysis_func is dummy: prof['per_slice'] = [] prof['analysis'] = 0 else: t = monotonic() g.running = 'analysis' g.subjob_cookie = None # subjobs are not allowed from analysis with statmsg.status( 'Waiting for all slices to finish analysis') as update: g.update_top_status = update prof['per_slice'], files, g.analysis_res = fork_analysis( slices, concurrency, analysis_func, args_for(analysis_func), synthesis_needs_analysis, slaves, q) del g.update_top_status prof['analysis'] = monotonic() - t saved_files.update(files) t = monotonic() g.running = 'synthesis' g.subjob_cookie = subjob_cookie setproctitle(g.running) with statmsg.status(g.running): synthesis_res = synthesis_func(**args_for(synthesis_func)) if synthesis_res is not None: blob.save(synthesis_res, temp=False) if dataset._datasetwriters: with statmsg.status("Finishing datasets"): for name in sorted(dataset._datasetwriters, key=dw_sortnum): dataset._datasetwriters[name].finish() if dataset._datasets_written: blob.save(dataset._datasets_written, 'DS/LIST', temp=False, _hidden=True) c_fflush() t = monotonic() - t prof['synthesis'] = t from accelerator.subjobs import _record return None, (prof, saved_files, _record)
def reader(fd2pid, names, masters, slaves, process_name, basedir, is_main): signal.signal(signal.SIGTERM, signal.SIG_IGN) signal.signal(signal.SIGINT, signal.SIG_IGN) setproctitle(process_name) out_fd = int(os.environ['BD_TERM_FD']) os.chdir(basedir) for fd in slaves: os.close(fd) if is_main: fd = os.open(names, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o666) fd2fd = dict.fromkeys(masters, fd) else: fd2name = dict(zip(masters, names)) fd2fd = {} outputs = dict.fromkeys(masters, b'') if len(fd2pid) == 2: status_blacklist = set(fd2pid.values()) assert len(status_blacklist) == 1, "fd2pid should only map to 1 value initially: %r" % (fd2pid,) else: status_blacklist = () assert len(fd2pid) == 1, "fd2pid should have 1 or 2 elements initially" missed = [False] output_happened = False def try_print(data=b'\n\x1b[31m*** Some output not printed ***\x1b[m\n'): try: os.write(out_fd, data) except OSError: missed[0] = True # set output nonblocking, so we can't be blocked by terminal io. # errors generated here go to stderr, which is the real stderr # in the main iowrapper (so it can block) and goes to the main # iowrapper in the method iowrappers (so it can still block, but # is unlikely to do so for long, and will end up in the log). with nonblocking(out_fd): while masters: if missed[0]: # Some output failed to print last time around. # Wait up to one second for new data and then try # to write a message about that (before the new data). ready, _, _ = select(masters, [], [], 1.0) missed[0] = False try_print() else: ready, _, _ = select(masters, [], []) for fd in ready: data = os.read(fd, 65536) if data: if not is_main: if fd not in fd2pid: fd2pid[fd] = unpack("=Q", data[:8])[0] data = data[8:] if not data: continue if fd not in fd2fd: fd2fd[fd] = os.open(fd2name[fd], os.O_CREAT | os.O_WRONLY, 0o666) os.write(fd2fd[fd], data) try_print(data) output_happened = True if not is_main: outputs[fd] = (outputs[fd] + data[-MAX_OUTPUT:])[-MAX_OUTPUT:] status._output(fd2pid[fd], outputs[fd].decode('utf-8', 'replace')) else: if fd in fd2fd: os.close(fd2fd[fd]) del fd2fd[fd] masters.remove(fd) os.close(fd) if not is_main: try: pid = fd2pid.pop(fd) if pid in status_blacklist: # don't do it for prepare as synthesis has the same PID. status_blacklist.remove(pid) else: status._end(pid=pid) except Exception: # Failure can happen here if the method exits # before analysis (fd2pid not fully populated). pass if missed[0]: missed[0] = False try_print() if missed[0]: # Give it a little time, then give up. sleep(0.03) try_print() if not output_happened and not is_main: os.chdir('..') os.rmdir(basedir)
def call_analysis(analysis_func, sliceno_, q, preserve_result, parent_pid, output_fds, **kw): try: # tell iowrapper our PID, so our output goes to the right status stack. os.write(output_fds[sliceno_], pack("=Q", os.getpid())) # use our iowrapper fd instead of stdout/stderr os.dup2(output_fds[sliceno_], 1) os.dup2(output_fds[sliceno_], 2) for fd in output_fds: os.close(fd) slicename = 'analysis(%d)' % (sliceno_,) status._start(slicename, parent_pid, 't') setproctitle(slicename) os.close(_prof_fd) kw['sliceno'] = g.sliceno = sliceno_ for dw in dataset._datasetwriters.values(): if dw._for_single_slice is None: dw._set_slice(sliceno_) res = analysis_func(**kw) if preserve_result: # Remove defaultdicts until we find one with a picklable default_factory. # (This is what you end up doing manually anyway.) def picklable(v): try: pickle.dumps(v, pickle.HIGHEST_PROTOCOL) return True except Exception: return False def fixup(d): if isinstance(d, defaultdict) and not picklable(d.default_factory): if not d: return {} v = next(iteritems(d)) if isinstance(v, defaultdict) and not picklable(v.default_factory): return {k: fixup(v) for k, v in iteritems(d)} else: return dict(d) else: return d def save(item, name): blob.save(fixup(item), name, sliceno=sliceno_, temp=True) if isinstance(res, tuple): if sliceno_ == 0: blob.save(len(res), "Analysis.tuple", temp=True) for ix, item in enumerate(res): save(item, "Analysis.%d." % (ix,)) else: if sliceno_ == 0: blob.save(False, "Analysis.tuple", temp=True) save(res, "Analysis.") from accelerator.extras import saved_files dw_lens = {} dw_minmax = {} for name, dw in dataset._datasetwriters.items(): if dw._for_single_slice in (None, sliceno_,): dw.close() dw_lens[name] = dw._lens dw_minmax[name] = dw._minmax c_fflush() q.put((sliceno_, time(), saved_files, dw_lens, dw_minmax, None,)) except: c_fflush() q.put((sliceno_, time(), {}, {}, {}, fmt_tb(1),)) print_exc() sleep(5) # give launcher time to report error (and kill us) exitfunction()