def _mkjob(setup): # Start with defaults, update with actual options def optfilter(d): res = {} for k, v in iteritems(d): if isinstance(v, OptionEnum): v = None elif isinstance(v, OptionDefault): v = v.default res[k] = v return res if setup.method in _control.Methods.params: d = { k: optfilter(v) for k, v in iteritems(_control.Methods.params[ setup.method].defaults) } else: d = {} for k, v in iteritems(d): v.update(setup[k]) optset = _control.Methods.params2optset({setup.method: d}) job = Job( id=setup.jobid, method=setup.method, optset=optset, hash=setup.hash, time=setup.starttime, total=setup.exectime.total, ) return job
def options2typing(method, options): from accelerator.job import JobWithFile res = {} def value2spec(value): if isinstance(value, list): if not value: return fmt = '[%s]' value = value[0] else: fmt = '%s' typ = None if value is JobWithFile or isinstance(value, JobWithFile): typ = 'JobWithFile' elif isinstance(value, set): typ = 'set' elif value in ( datetime.datetime, datetime.date, datetime.time, datetime.timedelta, ): typ = value.__name__ elif isinstance(value, ( datetime.datetime, datetime.date, datetime.time, datetime.timedelta, )): typ = type(value).__name__ if typ: return fmt % (typ, ) def collect(key, value, path=''): path = "%s/%s" % ( path, key, ) if isinstance(value, dict): for v in itervalues(value): collect('*', v, path) return spec = value2spec(value) assert res.get( path, spec) == spec, 'Method %s has incompatible types in options%s' % ( method, path, ) res[path] = spec for k, v in iteritems(options): collect(k, v) # reverse by key len, so something inside a dict always comes before # the dict itself. (We don't currently have any dict-like types, but we # might later.) return sorted(([k[1:], v] for k, v in iteritems(res) if v), key=lambda i: -len(i[0]))
def params2optset(self, params): optset = set() for optmethod, method_params in iteritems(params): for group, d in iteritems(method_params): filled_in = dict(self.params[optmethod].defaults[group]) filled_in.update(d) for optname, optval in iteritems(filled_in): optset.add('%s %s-%s %s' % (optmethod, group, optname, _reprify(optval),)) return optset
def get_reqlist(self): params = { k: dict(v) for k, v in iteritems(self.methods.params[self.method].defaults) } for k, v in iteritems(self.params): params[k].update(v) return [(self.method, 0, self.methods.params2optset({self.method: params}))]
def __init__(self, package_list, configfilename, server_config): super(SubMethods, self).__init__(package_list, configfilename) t0 = monotonic() per_runner = defaultdict(list) for key, val in iteritems(self.db): package = val['package'] per_runner[val['version']].append((package, key)) self.runners = new_runners(server_config, set(per_runner)) warnings = [] failed = [] self.hash = {} self.params = {} self.descriptions = {} self.typing = {} for version, data in iteritems(per_runner): runner = self.runners.get(version) if not runner: msg = '%%s.%%s (unconfigured interpreter %s)' % (version) failed.extend(msg % t for t in sorted(data)) continue v = runner.get_ax_version() if v != ax_version: if runner.python == sys.executable: raise Exception("Server is using accelerator %s but %s is currently installed, please restart server." % (ax_version, v,)) else: print("WARNING: Server is using accelerator %s but runner %r is using accelerator %s." % (ax_version, version, v,)) w, f, h, p, d = runner.load_methods(package_list, data) warnings.extend(w) failed.extend(f) self.hash.update(h) self.params.update(p) self.descriptions.update(d) for key, params in iteritems(self.params): self.typing[key] = options2typing(key, params.options) params.defaults = params2defaults(params) params.required = options2required(params.options) def prt(a, prefix): maxlen = (max(len(e) for e in a) + len(prefix)) line = '=' * maxlen print() print(line) for e in sorted(a): msg = prefix + e print(msg + ' ' * (maxlen - len(msg))) print(line) print() if warnings: prt(warnings, 'WARNING: ') if failed: print('\033[47;31;1m') prt(failed, 'FAILED to import ') print('\033[m') raise MethodLoadException(failed) print("Updated %d methods on %d runners in %.1f seconds" % ( len(self.hash), len(per_runner), monotonic() - t0, ))
def _is_ghost(self, data): for key, data in iteritems(data.deps): db = self.db[key] ts = data['timestamp'] if ts not in db: return True for k, v in iteritems(data): if db[ts].get(k) != v: return True return False
def fixup(d): if isinstance(d, defaultdict) and not picklable(d.default_factory): if not d: return {} v = next(iteritems(d)) if isinstance(v, defaultdict) and not picklable(v.default_factory): return {k: fixup(v) for k, v in iteritems(d)} else: return dict(d) else: return d
def __init__(self, package_list, configfilename, daemon_config): super(SubMethods, self).__init__(package_list, configfilename) t0 = time() per_runner = defaultdict(list) for key, val in iteritems(self.db): package = val['package'] per_runner[val['version']].append((package, key)) self.runners = new_runners(daemon_config, set(per_runner)) warnings = [] failed = [] self.hash = {} self.params = {} self.typing = {} for version, data in iteritems(per_runner): runner = self.runners.get(version) if not runner: msg = '%%s.%%s (unconfigured interpreter %s)' % (version) failed.extend(msg % t for t in sorted(data)) continue w, f, h, p = runner.load_methods(package_list, data) warnings.extend(w) failed.extend(f) self.hash.update(h) self.params.update(p) for key, params in iteritems(self.params): self.typing[key] = options2typing(key, params.options) params.defaults = params2defaults(params) params.required = options2required(params.options) def prt(a, prefix): maxlen = (max(len(e) for e in a) + len(prefix)) line = '=' * maxlen print() print(line) for e in sorted(a): msg = prefix + e print(msg + ' ' * (maxlen - len(msg))) print(line) print() if warnings: prt(warnings, 'WARNING: ') if failed: print('\033[47;31;1m') prt(failed, 'FAILED to import ') print('\033[m') raise MethodLoadException(failed) print("Updated %d methods on %d runners in %.1f seconds" % ( len(self.hash), len(per_runner), time() - t0, ))
def get_reqlist(self): for method, data in self.tree.items(): full_params = {} for submethod, given_params in iteritems(data['params']): params = { k: dict(v) for k, v in iteritems( self.methods.params[submethod].defaults) } for k, v in iteritems(given_params): params[k].update(v) full_params[submethod] = params yield method, data['uid'], self.methods.params2optset(full_params)
def params2defaults(params): d = DotDict() for key in ('datasets', 'jobs',): r = {} for v in params[key]: if isinstance(v, list): r[v[0]] = [] else: r[v] = None d[key] = r def fixup(item): if isinstance(item, dict): d = {k: fixup(v) for k, v in iteritems(item)} if len(d) == 1 and first_value(d) is None and first_value(item) is not None: return {} return d if isinstance(item, (list, tuple, set,)): l = [fixup(v) for v in item] if l == [None] and list(item) != [None]: l = [] return type(item)(l) if isinstance(item, (type, OptionEnum)): return None assert isinstance(item, (bytes, unicode, int, float, long, bool, OptionEnum, NoneType, datetime.datetime, datetime.date, datetime.time, datetime.timedelta)), type(item) return item def fixup0(item): if isinstance(item, RequiredOption): item = item.value if isinstance(item, OptionDefault): item = item.default return fixup(item) d.options = {k: fixup0(v) for k, v in iteritems(params.options)} return d
def _job_params(jobid): from accelerator.setupfile import load_setup d = load_setup(jobid) for method, tl in iteritems(d.get('_typing', {})): _apply_typing(d.params[method].options, tl) d.update(d.params[d.method]) return d
def _reprify(o): if isinstance(o, OptionDefault): o = o.default if isinstance(o, (bytes, str, int, float, long, bool, NoneType)): return repr(o) if isinstance(o, unicode): # not reachable in PY3, the above "str" matches return repr(o.encode('utf-8')) if isinstance(o, set): return '[%s]' % (', '.join(map(_reprify, _sorted_set(o))), ) if isinstance(o, (list, tuple)): return '[%s]' % (', '.join(map(_reprify, o)), ) if isinstance(o, dict): return '{%s}' % (', '.join('%s: %s' % ( _reprify(k), _reprify(v), ) for k, v in sorted(iteritems(o))), ) if isinstance(o, ( datetime.datetime, datetime.date, datetime.time, datetime.timedelta, )): return str(o) raise AcceleratorError('Unhandled %s in dependency resolution' % (type(o), ))
def __init__(self, package_list, configfilename): self.package_list = package_list self.db = {} for package in self.package_list: try: package_mod = import_module(package) if not hasattr(package_mod, "__file__"): raise ImportError("no __file__") except ImportError: raise Exception( "Failed to import %s, maybe missing __init__.py?" % (package, )) if not package_mod.__file__: raise Exception( "%s has no __file__, maybe missing __init__.py?" % (package, )) confname = os.path.join(os.path.dirname(package_mod.__file__), configfilename) tmp = read_method_conf(confname) for x in tmp: if x in self.db: print( "METHOD: ERROR, method \"%s\" defined both in \"%s\" and \"%s\"!" % (x, package, self.db[x]['package'])) exit(1) for x in tmp.values(): x['package'] = os.path.basename(package) self.db.update(tmp) # build dependency tree for all methods self.deptree = {} for method in self.db: self.deptree[method] = self._build_dep_tree(method, tree={}) self.link = {k: v.get('link') for k, v in iteritems(self.db)}
def new_runners(config, used_versions): from accelerator.dispatch import run from accelerator.compat import itervalues, iteritems killed = set() for runner in itervalues(runners): if id(runner) not in killed: runner.kill() killed.add(id(runner)) runners.clear() candidates = {'DEFAULT': sys.executable} for cnt in (1, 2, 3): candidates['.'.join(map(str, sys.version_info[:cnt]))] = sys.executable candidates.update(config.interpreters) todo = {k: v for k, v in candidates.items() if k in used_versions} exe2r = {} for k, py_exe in iteritems(todo): if py_exe in exe2r: runners[k] = exe2r[py_exe] else: sock_p, sock_c = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) cmd = [py_exe, __file__, str(sock_c.fileno()), sys.path[0]] pid = run(cmd, [sock_p.fileno()], [sock_c.fileno()], False) sock_c.close() runners[k] = Runner(pid=pid, sock=sock_p, python=py_exe) exe2r[py_exe] = runners[k] return runners
def _job_candidates_options(candidates): for jobid, remset in iteritems(candidates): setup = job_params(jobid) optdiff = defaultdict(dict) for thing in remset: section, name = thing.split('-', 1) optdiff[section][name] = setup[section][name] yield jobid, optdiff
def optfilter(d): res = {} for k, v in iteritems(d): if isinstance(v, OptionEnum): v = None elif isinstance(v, OptionDefault): v = v.default res[k] = v return res
def upd(aggregate, part, level): if level == depth: aggregate.update(part) else: for k, v in iteritems(part): if k in aggregate: upd(aggregate[k], v, level + 1) else: aggregate[k] = v
def typefix(e): if isinstance(e, dict): return dict_type((typefix(k), typefix(v)) for k, v in iteritems(e)) elif isinstance(e, (list, tuple, set,)): return [typefix(v) for v in e] elif PY2 and isinstance(e, bytes): return uni(e) else: return e
def _unicode_as_utf8bytes(obj): if isinstance(obj, unicode): return obj.encode('utf-8') elif isinstance(obj, dict): return DotDict((_unicode_as_utf8bytes(k), _unicode_as_utf8bytes(v)) for k, v in iteritems(obj)) elif isinstance(obj, list): return [_unicode_as_utf8bytes(v) for v in obj] else: return obj
def inner(): count = 0 for key, db in iteritems(self.db): for ts, data in sorted(db.items()): if self._is_ghost(data): count += 1 del db[ts] self.ghost_db[key][ts].append(data) return count
def fmt(tree, start_indent=0): for pid, d in sorted(iteritems(tree), key=lambda i: (i[1].stack or ((0,),))[0][0]): last[0] = d indent = start_indent for msg, t, _ in d.stack: res.append((pid, indent, msg, t)) indent += 1 fmt(d.children, indent) if d.output: res.append((pid, -1,) + d.output)
def truncate(self, key, timestamp): old = self.db[key] new = {} ghost = {} for ts, data in iteritems(old): if ts < timestamp: new[ts] = data else: ghost[ts] = data self.log('truncate', DotDict(key=key, timestamp=timestamp)) self.db[key] = new ghost_db = self.ghost_db[key] for ts, data in iteritems(ghost): ghost_db[ts].append(data) if ghost: deps = self._update_ghosts() else: deps = 0 return {'count': len(ghost), 'deps': deps}
def _playback_parsed(self): for _writets, line in sorted(iteritems(self._parsed)): action = line.pop(0) assert action in ('add', 'truncate',) if action == 'add': self._parse_add(line) elif action == 'truncate': self._parse_truncate(line) else: assert "can't happen"
def _round_floats(d, ndigits): res = OrderedDict() for k, v in iteritems(d): if isinstance(v, float): v = round(v, ndigits) if isinstance(v, dict): v = _round_floats(v, ndigits) if isinstance(v, list): v = [round(e, ndigits) if isinstance(e, float) else e for e in v] res[k] = v return res
def _fix_jobids(self, key): for method, data in iteritems(self.tree): method_params = data['params'][method] data = method_params[key] method_wants = self.methods.params[method][key] res = {} for jobid_name in method_wants: if isinstance(jobid_name, str_types): value = data.get(jobid_name) assert value is None or isinstance( value, str), 'Input %s on %s not a string as required' % ( jobid_name, method, ) elif isinstance(jobid_name, list): if len(jobid_name) != 1 or not isinstance( jobid_name[0], str_types): raise OptionException('Bad %s item on %s: %s' % ( key, method, repr(jobid_name), )) jobid_name = jobid_name[0] value = data.get(jobid_name) if value: if isinstance(value, str_types): value = [e.strip() for e in value.split(',')] else: value = [] assert isinstance( value, list ), 'Input %s on %s not a list or string as required' % ( jobid_name, method, ) else: raise OptionException( '%s item of unknown type %s on %s: %s' % ( key, type(jobid_name), method, repr(jobid_name), )) res[jobid_name] = value method_params[key] = res spill = set(data) - set(res) if spill: raise OptionException('Unknown %s on %s: %s' % ( key, method, ', '.join(sorted(spill)), ))
def __init__(self, methods, setup): tree = methods.new_deptree(setup.method) self.methods = methods self.top_method = setup.method self.tree = tree self.add_flags({ 'make': False, 'link': False, }) seen = set() for method, data in iteritems(self.tree): seen.add(method) data['params'] = {method: setup.params[method]} unmatched = { method: params for method, params in iteritems(setup.params) if method not in seen } if unmatched: from accelerator.extras import json_encode print("DepTree Warning: Unmatched options remain:", json_encode(unmatched, as_str=True)) def collect(method): # All methods that method depend on for child in tree[method]['dep']: yield child for method in collect(child): yield method # This probably updates some with the same data several times, # but this is cheap (key: dictref updates, nothing more.) for method, data in iteritems(self.tree): for submethod in set(collect(method)): data['params'].update(tree[submethod]['params']) self._fix_options(False) self._fix_jobids('jobids') self._fix_jobids('datasets')
def fixup(item): if isinstance(item, dict): d = {k: fixup(v) for k, v in iteritems(item)} if len(d) == 1 and first_value(d) is None and first_value(item) is not None: return {} return d if isinstance(item, (list, tuple, set,)): l = [fixup(v) for v in item] if l == [None] and list(item) != [None]: l = [] return type(item)(l) if isinstance(item, (type, OptionEnum)): return None assert isinstance(item, (bytes, unicode, int, float, long, bool, OptionEnum, NoneType, datetime.datetime, datetime.date, datetime.time, datetime.timedelta)), type(item) return item
def chain(self, length=-1, reverse=False, stop_job=None): """Like Dataset.chain but for jobs.""" if isinstance(stop_job, dict): assert len( stop_job) == 1, "Only pass a single stop_job={job: name}" stop_job, stop_name = next(iteritems(stop_job)) if stop_job: stop_job = Job(stop_job).params.jobs.get(stop_name) chain = [] current = self while length != len(chain) and current and current != stop_job: chain.append(current) current = current.params.jobs.get('previous') if not reverse: chain.reverse() return chain
def options2required(options): res = set() def chk(key, value): if isinstance(value, (_OptionString, RequiredOption)): res.add(key) elif isinstance(value, OptionEnum): if None not in value._valid: res.add(key) elif isinstance(value, dict): for v in itervalues(value): chk(key, v) elif isinstance(value, (list, tuple, set,)): for v in value: chk(key, v) for key, value in iteritems(options): chk(key, value) return res
def load_methods(all_packages, data): from accelerator.compat import str_types, iteritems from accelerator.extras import DotDict, OptionEnum, OptionEnumValue from accelerator.extras import RequiredOption, OptionDefault from accelerator import __version__ as ax_version res_warnings = [] res_failed = [] res_hashes = {} res_params = {} res_descriptions = {} def tar_add(name, data): assert name.startswith(dep_prefix) info = tarfile.TarInfo() info.name = name[len(dep_prefix):] info.size = len(data) tar_o.addfile(info, io.BytesIO(data)) all_prefixes = set() # This is only used for informational warnings, so failure is not a # disaster. And failure is somewhat likely when using several runners # using the same-ish python version. (Maybe only on python 2.) # There appears to be a race where one python reads an incomplete # .pyc file written by the other. for package in all_packages: try: all_prefixes.add(get_mod(package)[2]) except Exception: pass for package, key in data: modname = '%s.a_%s' % (package, key) try: mod, mod_filename, prefix = get_mod(modname) depend_extra = [] for dep in getattr(mod, 'depend_extra', ()): dep = mod2filename(dep) if isinstance(dep, str_types): dep = str(dep) # might be unicode on py2 if not dep.startswith('/'): dep = prefix + dep depend_extra.append(dep) else: raise MsgException('Bad depend_extra: %r' % (dep, )) dep_prefix = os.path.commonprefix(depend_extra + [mod_filename]) # commonprefix works per character (and commonpath is v3.5+) dep_prefix = dep_prefix.rsplit('/', 1)[0] + '/' with open(mod_filename, 'rb') as fh: src = fh.read() tar_fh = io.BytesIO() tar_o = tarfile.open(mode='w:gz', fileobj=tar_fh, compresslevel=1) tar_add(mod_filename, src) h = hashlib.sha1(src) hash = int(h.hexdigest(), 16) likely_deps = set() dep_names = {} for k in dir(mod): v = getattr(mod, k) if isinstance(v, ModuleType): filename = mod2filename(v) if filename: for cand_prefix in all_prefixes: if filename.startswith(cand_prefix): likely_deps.add(filename) dep_names[filename] = v.__name__ break hash_extra = 0 for dep in depend_extra: with open(dep, 'rb') as fh: data = fh.read() hash_extra ^= int(hashlib.sha1(data).hexdigest(), 16) tar_add(dep, data) for dep in (likely_deps - set(depend_extra)): res_warnings.append( '%s.a_%s should probably depend_extra on %s' % ( package, key, dep_names[dep], )) res_hashes[key] = ("%040x" % (hash ^ hash_extra, ), ) res_params[key] = params = DotDict() # It would have been nice to be able to use ast.get_source_segment def find_source(name): bname = name.encode('ascii') try: start = src.find(b'\n' + bname + b' ') if start == -1: start = src.index(b'\n' + bname + b'=') def find_end(startchar, endchar): pos = src.index(startchar, start) nest = 0 in_comment = False for pos, c in enumerate(src[pos:], pos): if in_comment: if c == b'\n'[0]: in_comment = False elif c == b'#'[0]: in_comment = True elif c == startchar: nest += 1 elif c == endchar: nest -= 1 if nest == 0: return pos end = None for c in src[start:]: if c == b'{'[0]: end = find_end(b'{'[0], b'}'[0]) break elif c == b'('[0]: end = find_end(b'('[0], b')'[0]) break elif c == b'['[0]: end = find_end(b'['[0], b']'[0]) break if not end: print('Failed to figure out where %s is in %s' % ( name, key, )) end = start return slice(start, end) except Exception: return slice(0, 0) res_descriptions[key] = { 'text': getattr(mod, 'description', '').strip(), 'interpreter': sys.executable, 'interpreter_version': sys.version, 'accelerator_version': ax_version, } def fmtopt(v): if isinstance(v, type): return v.__name__ elif isinstance(v, dict): return '{%s}' % (', '.join('%s: %s' % (fmtopt(k), fmtopt(v)) for k, v in v.items()), ) elif isinstance(v, list): return '[%s]' % (', '.join(fmtopt(v) for v in v), ) elif isinstance(v, OptionEnum): return '{%s}' % (', '.join(sorted(map(str, v._valid))), ) elif isinstance(v, OptionEnumValue): return '%r {%s}' % ( v, ', '.join(sorted(map(str, v._valid))), ) elif isinstance(v, RequiredOption): return 'RequiredOption(%s%s)' % ( fmtopt(v.value), ', none_ok=True' if v.none_ok else '', ) elif isinstance(v, OptionDefault): if v.default is None: return 'OptionDefault(%s)' % (fmtopt(v.value), ) return 'OptionDefault(%s, default=%s)' % ( fmtopt(v.value), fmtopt(v.default), ) else: return repr(v) for name, default in ( ( 'options', {}, ), ( 'datasets', (), ), ( 'jobs', (), ), ): params[name] = d = getattr(mod, name, default) if d: items = { v[0] if isinstance(v, list) else v for v in params[name] } if isinstance(d, dict): res_descriptions[key][name] = items = { v: [fmtopt(d[v])] for v in items } else: res_descriptions[key][name] = items = { v: [] for v in items } src_part = src[find_source(name)].decode( 'utf-8', 'backslashreplace') item = None spill = [] prev_item = None item_indent = 0 for orig_line in src_part.split('\n'): line = orig_line.strip() if not line: continue itempart = line if line.startswith('['): itempart = line.split(']')[0][1:] if itempart.startswith("'"): item = itempart[1:itempart.index("'", 1)] elif itempart.startswith('"'): item = itempart[1:itempart.index('"', 1)] elif not line.startswith('#'): item = line.split()[0].split('=')[0] if item.startswith('[') and item.endswith(']'): item = item[1:-1] if item != prev_item: if spill and item in items: items[item].extend(spill) prev_item = item item_indent = len( re.match(r'^\s*', orig_line).group(0).expandtabs()) spill = [] if '#' in line: indent, value = orig_line.split('#', 1) value = value.strip() if value: indent = len(indent.expandtabs()) if item in items and indent > item_indent: items[item].append(value) else: spill.append(value) if spill and item in items: items[item].extend(spill) for name in ( 'datasets', 'jobs', ): d = res_descriptions[key].get(name) for item in getattr(mod, name, ()): if isinstance(item, list): d['[%s]' % (item[0], )] = d.pop(item[0]) equivalent_hashes = getattr(mod, 'equivalent_hashes', ()) if equivalent_hashes: try: assert isinstance(equivalent_hashes, dict) assert len(equivalent_hashes) == 1 k, v = next(iteritems(equivalent_hashes)) assert isinstance(k, str_types) if isinstance(v, str_types): v = (v, ) assert isinstance(v, tuple) for vv in v: assert isinstance(vv, str_types) assert len(vv) == 40 except AssertionError: raise MsgException('Read the docs about equivalent_hashes') m = re.search(br'^equivalent_hashes\s*=\s*\{[^}]*\}', src, re.MULTILINE) assert m, 'Failed to find equivalent_hashes in ' + mod_filename start, end = m.span() end -= 1 # to get the same hash as the old way of parsing h = hashlib.sha1(src[:start]) h.update(src[end:]) verifier = "%040x" % (int(h.hexdigest(), 16) ^ hash_extra, ) if verifier == k: res_hashes[key] += v else: res_warnings.append( '%s.a_%s has equivalent_hashes, but missing verifier %s' % ( package, key, verifier, )) tar_o.close() tar_fh.seek(0) archives[key] = tar_fh.read() check_picklable('options/datasets/jobs', res_params[key]) check_picklable('description', res_descriptions[key]) except Exception as e: if isinstance(e, MsgException): print('%s: %s' % ( modname, str(e), )) else: print_exc() res_failed.append(modname) for d in res_hashes, res_params, res_descriptions: d.pop(key, None) continue return res_warnings, res_failed, res_hashes, res_params, res_descriptions