def query(table, config, fidelity, seed=None): _config = OrderedDict(config.get_dictionary()) _fidelity = OrderedDict(fidelity.get_dictionary()) key_path = list(_config.values()) + list(_fidelity.values()) val = glom.glom(table, glom.Path(*key_path), default=None) if val is None: print(key_path) raise ValueError("Table contains no entry for given config-fidelity combination!") if seed is None: seeds = list(val.keys()) seed = np.random.choice(seeds) key_path.append(seed) val = glom.glom(table, glom.Path(*key_path), default=None) return val
def docker_login(self): manifest = self.parent.manifest["docker_repos"] repo_excludes = [] for repo in manifest: if manifest[repo].get("only_if", False) and not os.getenv( manifest[repo]["only_if"] ): log.debug("repo: '%s' only_if requirement not satisfied", repo) continue try: repo_excludes = glom.glom( self.parent.manifest, glom.Path(f"{self.distro}{self.distro_version}", "exclude_repos"), ) if repo in repo_excludes: log.debug("Repo %s has been excluded in the manifest!", repo) continue except glom.PathAccessError: pass user = os.getenv(manifest[repo]["user"]) passwd = os.getenv(manifest[repo]["pass"]) if not user: user = manifest[repo]["user"] if not passwd: passwd = manifest[repo]["pass"] registry = manifest[repo]["registry"] if self.client.login(username=user, password=passwd, registry=registry): self.repos.append(registry) log.info("Logged into %s", registry) if not self.repos: log.fatal( "Docker login failed! Did not log into any repositories. Environment not set?" ) sys.exit(1)
def get_data(self, obj, *path, can_skip=False): try: data = glom.glom(obj, glom.Path(*path)) except glom.PathAccessError: if can_skip: return raise glom.PathAccessError return data
def update_table_with_new_entry(main_data: dict, new_entry: dict, config: dict, fidelity: dict) -> dict: """ Updates the benchmark dict-hierarchy with a new function evaluation entry The storage is in a nested dict structure where the keys are arranged in the order of the configuration parameters ordered by their name, fidelity parameters ordered by their names and the seed. The final value element in the dict contains another dict returned by the actual function evaluations containing the result, cost, other misc. information. Given that the depth of this dict data will vary for different parameter space, the package `glom` is used. Wherein, the sequence of keys can be provided for easy retrieval, and assignment of values even for varying depth of a hierarchical dict. """ seed = new_entry['info']['seed'] key_nest = [] for k, v in config.items(): v = str(v) if isinstance(v, str) else np.float32(v) key_nest.append(v) if glom.glom(main_data, glom.Path(*key_nest), default=None) is None: glom.assign(main_data, glom.Path(*key_nest), dict()) for k, v in fidelity.items(): key_nest.append(np.float32(v)) if glom.glom(main_data, glom.Path(*key_nest), default=None) is None: glom.assign(main_data, glom.Path(*key_nest), dict()) key_nest.append(seed) if glom.glom(main_data, glom.Path(*key_nest), default=None) is None: glom.assign(main_data, glom.Path(*key_nest), dict()) glom.assign(main_data, glom.Path(*key_nest), new_entry) return main_data
def supported_arch_list(self): ls = [] for k in glom.glom( self.parent.manifest, glom.Path(self.key, f"{self.distro}{self.distro_version}"), ): if k in ["x86_64", "ppc64le", "arm64"]: ls.append(k) return ls
def joblib_fn(count, entry, param_names): key_path = entry print(count, end="\r") val = glom.glom(table['data'], glom.Path(*key_path), default=None) if val is None: return count entry = [np.float32(e) for e in entry] entry.append(val) for m in metrics.keys(): for split in splits: split_key = "{}_scores".format(split) entry.append(1 - val['info'][split_key][m]) # loss = 1 - metric _df = pd.DataFrame([entry], index=[count], columns=param_names) return _df
def prepare_context(self): conf = self.parent.manifest major = self.cuda_version.split(".")[0] minor = self.cuda_version.split(".")[1] build_version = self.get_data( conf, f"{self.distro}{self.distro_version}", "cuda", f"v{self.cuda_version}", "build_version", ) # The templating context. This data structure is used to fill the templates. self.cuda = { "version": { "full": f"{self.cuda_version}.{build_version}", "major": major, "minor": minor, "build": build_version, }, "os": {"distro": self.distro, "version": self.distro_version}, "tag_suffix": self.tag_suffix, } # Users of manifest.yaml are allowed to set arbitrary keys for inclusion in the templates # and the discovered keys are injected into the template context. # We only checks at three levels in the manifest self.extract_keys(self.get_data(conf, f"{self.distro}{self.distro_version}")) self.extract_keys( self.get_data(conf, f"{self.distro}{self.distro_version}", "cuda") ) self.extract_keys( self.get_data( conf, f"{self.distro}{self.distro_version}", "cuda", f"v{major}.{minor}" ) ) log.info("cuda version %s", glom.glom(self.cuda, glom.Path("version"))) log.debug("template context %s", self.cuda)
param_list.append(hp.sequence) for name in np.sort(z_discrete.get_hyperparameter_names()): hp = z_discrete.get_hyperparameter(str(name)) param_list.append(hp.sequence) param_list.append(seeds) count = 0 incumbents = dict() for m in metrics.keys(): incumbents[m] = dict(train_scores=np.inf, val_scores=np.inf, test_scores=np.inf) missing = [] for count, entry in enumerate(itertools.product(*param_list), start=1): key_path = entry # key_path = [np.float32(_key) for _key in key_path] val = glom.glom(table['data'], glom.Path(*key_path), default=None) if val is None: missing.append(count) continue print(count, val, '\n') for m in metrics.keys(): for k, v in incumbents[m].items(): if 1 - val['info'][k][m] < v: # loss = 1 - accuracy incumbents[m][k] = 1 - val['info'][k][m] print(incumbents) table['global_min'] = dict() for m in metrics.keys(): table['global_min'][m] = dict(train=incumbents[m]["train_scores"], val=incumbents[m]["val_scores"], test=incumbents[m]["test_scores"]) assert len(missing) == 0, "Incomplete collection: {} missing evaluations!\n" \