def _update_finish(self, dict_of_hashes, verbose=False): """Filters in-use database on valid hashes. Always call after (a sequence of) update_workspace calls. """ # discard cached setup.json from any gone jobs # (so we reload it if they reappear, and also so we don't see them here) for j in set(_paramsdict) - self._fsjid: del _paramsdict[j] discarded_due_to_hash_list = [] self.db_by_workdir = defaultdict( dict) # includes all known jobs, not just current ones. # Keep only jobs with valid hashes. job_candidates = {} for setup, subjobs in itervalues(_paramsdict): if setup.hash in dict_of_hashes.get(setup.method, ()): job_candidates[setup.jobid] = (setup, subjobs) else: discarded_due_to_hash_list.append(setup.jobid) self.db_by_workdir[setup.jobid.rsplit( '-', 1)[0]][setup.jobid] = _mklistinfo(setup) # Keep only jobs where all subjobs are kept. discarded_due_to_subjobs = [] done = False while not done: done = True for setup, subjobs in list(itervalues(job_candidates)): for j in subjobs: if j not in job_candidates: done = False discarded_due_to_subjobs.append(setup.jobid) del job_candidates[setup.jobid] break for d in self.db_by_workdir.values(): for jid, li in d.items(): if jid not in job_candidates: li['current'] = False # Keep lists of jobs per method, only with valid hashes and subjobs. self.db_by_method = defaultdict(list) for setup, _ in itervalues(job_candidates): job = _mkjob(setup) self.db_by_method[job.method].append(job) # Newest first for l in itervalues(self.db_by_method): l.sort(key=attrgetter('time'), reverse=True) if verbose: if discarded_due_to_hash_list: print("DATABASE: discarding due to unknown hash: %s" % ', '.join(discarded_due_to_hash_list)) print("DATABASE: Full database contains %d items" % (sum(len(v) for v in itervalues(self.db_by_method)), ))
def new_runners(config, used_versions): from accelerator.dispatch import run from accelerator.compat import itervalues, iteritems killed = set() for runner in itervalues(runners): if id(runner) not in killed: runner.kill() killed.add(id(runner)) runners.clear() candidates = {'DEFAULT': sys.executable} for cnt in (1, 2, 3): candidates['.'.join(map(str, sys.version_info[:cnt]))] = sys.executable candidates.update(config.interpreters) todo = {k: v for k, v in candidates.items() if k in used_versions} exe2r = {} for k, py_exe in iteritems(todo): if py_exe in exe2r: runners[k] = exe2r[py_exe] else: sock_p, sock_c = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) cmd = [py_exe, __file__, str(sock_c.fileno()), sys.path[0]] pid = run(cmd, [sock_p.fileno()], [sock_c.fileno()], False) sock_c.close() runners[k] = Runner(pid=pid, sock=sock_p, python=py_exe) exe2r[py_exe] = runners[k] return runners
def _validate_data(self, data, with_deps=True): if with_deps: assert set(data) == { 'timestamp', 'joblist', 'caption', 'user', 'build', 'deps', 'flags', } assert isinstance(data.user, unicode) assert isinstance(data.build, unicode) assert isinstance(data.deps, dict) for v in itervalues(data.deps): assert isinstance(v, dict) self._validate_data(DotDict(v), False) else: assert set(data) == { 'timestamp', 'joblist', 'caption', } assert joblistlike(data.joblist), data.joblist assert data.joblist assert isinstance(data.caption, unicode) data.timestamp = TimeStamp(data.timestamp)
def analysis(sliceno, slices, prepare_res): if options.numeric_comma: try_locales = [ 'da_DK', 'nb_NO', 'nn_NO', 'sv_SE', 'fi_FI', 'en_ZA', 'es_ES', 'es_MX', 'fr_FR', 'ru_RU', 'de_DE', 'nl_NL', 'it_IT', ] for localename in try_locales: localename = localename.encode('ascii') if not cstuff.backend.numeric_comma(localename): break if not cstuff.backend.numeric_comma(localename + b'.UTF-8'): break else: raise Exception("Failed to enable numeric_comma, please install at least one of the following locales: " + " ".join(try_locales)) dw, dws, lines, chain, column2type = prepare_res if dws: dw = dws[sliceno] rehashing = True else: rehashing = False vars = DotDict( sliceno=sliceno, slices=slices, known_line_count=0, badmap_size=0, badmap_fd=-1, slicemap_size=0, slicemap_fd=-1, map_fhs=[], res_bad_count={}, res_default_count={}, res_minmax={}, first_lap=True, rehashing=rehashing, hash_lines=None, dw=dw, chain=chain, lines=lines, column2type=column2type, rev_rename={v: k for k, v in options.rename.items() if k in datasets.source.columns and v in column2type}, ) if options.filter_bad: vars.badmap_fd = map_init(vars, 'badmap%d' % (sliceno,)) bad_count, default_count, minmax = analysis_lap(vars) if sum(sum(c) for c in itervalues(bad_count)): vars.first_lap = False vars.res_bad_count = {} final_bad_count, default_count, minmax = analysis_lap(vars) final_bad_count = [max(c) for c in zip(*final_bad_count.values())] else: final_bad_count = [0] * slices else: bad_count, default_count, minmax = analysis_lap(vars) final_bad_count = [0] * slices for fh in vars.map_fhs: fh.close() if rehashing: unlink('slicemap%d' % (sliceno,)) return bad_count, final_bad_count, default_count, minmax, vars.hash_lines
def collect(key, value, path=''): path = "%s/%s" % (path, key,) if isinstance(value, dict): for v in itervalues(value): collect('*', v, path) return spec = value2spec(value) assert res.get(path, spec) == spec, 'Method %s has incompatible types in options%s' % (method, path,) res[path] = spec
def chk(key, value): if isinstance(value, (_OptionString, RequiredOption)): res.add(key) elif isinstance(value, OptionEnum): if None not in value._valid: res.add(key) elif isinstance(value, dict): for v in itervalues(value): chk(key, v) elif isinstance(value, (list, tuple, set,)): for v in value: chk(key, v)
def _receiver(self): from accelerator.compat import QueueFull, pickle, itervalues while True: try: hdr = recvall(self.sock, 5) if not hdr: break op, length = struct.unpack('<cI', hdr) data = recvall(self.sock, length) cookie, data = pickle.loads(data) q = self._waiters.pop(cookie) q.put(data) except Exception: print_exc() break # All is lost, unblock anyone waiting for q in itervalues(self._waiters): try: q.put(None, block=False) except QueueFull: pass
def convert(default_v, v): if isinstance(default_v, RequiredOption): if v is None and not default_v.none_ok: raise OptionException('Option %s on method %s requires a non-None value (%r)' % (k, method, default_v.value,)) default_v = default_v.value if default_v is None or v is None: if isinstance(default_v, _OptionString): raise OptionException('Option %s on method %s requires a non-empty string value' % (k, method,)) if hasattr(default_v, '_valid') and v not in default_v._valid: raise OptionException('Option %s on method %s requires a value in %s' % (k, method, default_v._valid,)) if isinstance(default_v, OptionDefault): v = default_v.default return v if isinstance(default_v, OptionDefault): default_v = default_v.value if isinstance(default_v, dict) and isinstance(v, dict): if default_v: sample_v = first_value(default_v) for chk_v in itervalues(default_v): assert isinstance(chk_v, type(sample_v)) return {k: convert(sample_v, v) for k, v in iteritems(v)} else: return v if isinstance(default_v, (list, set, tuple,)) and isinstance(v, str_types + (list, set, tuple,)): if isinstance(v, str_types): v = (e.strip() for e in v.split(',')) if default_v: sample_v = first_value(default_v) for chk_v in default_v: assert isinstance(chk_v, type(sample_v)) v = (convert(sample_v, e) for e in v) return type(default_v)(v) if isinstance(default_v, (OptionEnum, OptionEnumValue,)): if not (v or None) in default_v._valid: ok = False for cand_prefix in default_v._prefixes: if v.startswith(cand_prefix): ok = True break if not ok: raise OptionException('%r not a permitted value for option %s on method %s (%s)' % (v, k, method, default_v._valid)) return v or None if isinstance(default_v, str_types + num_types) and isinstance(v, str_types + num_types): if isinstance(default_v, _OptionString): v = str(v) if not v: raise OptionException('Option %s on method %s requires a non-empty string value' % (k, method,)) return v if isinstance(default_v, unicode) and isinstance(v, bytes): return v.decode('utf-8') return type(default_v)(v) if (isinstance(default_v, type) and isinstance(v, typefuzz(default_v))) or isinstance(v, typefuzz(type(default_v))): return v if isinstance(default_v, bool) and isinstance(v, (str, int)): lv = str(v).lower() if lv in ('true', '1', 't', 'yes', 'on',): return True if lv in ('false', '0', 'f', 'no', 'off', '',): return False if isinstance(default_v, _date_types): default_v = type(default_v) if default_v in _date_types: try: return typing_conv[default_v.__name__](v) except Exception: raise OptionException('Failed to convert option %s %r to %s on method %s' % (k, v, default_v, method,)) if isinstance(v, str_types) and not v: return type(default_v)() if isinstance(default_v, JobWithFile) or default_v is JobWithFile: defaults = ('', '', False, None,) if default_v is JobWithFile: default_v = defaults if not isinstance(v, (list, tuple,)) or not (2 <= len(v) <= 4): raise OptionException('Option %s (%r) on method %s is not %s compatible' % (k, v, method, type(default_v))) v = tuple(v) + defaults[len(v):] # so all of default_v gets convert()ed. v = [convert(dv, vv) for dv, vv in zip(default_v, v)] return JobWithFile(*v) raise OptionException('Failed to convert option %s of %s to %s on method %s' % (k, type(v), type(default_v), method,))
def get_item_by_uid(self, uid): for v in itervalues(self.tree): if v['uid'] == uid: return v