def __init__( self, search, evaluator, stop, trial_timeout=None, caught_exceptions=["TimeoutError"], name=None, context={}, ): super().__init__(context=context) self.search = from_config(search) self.evaluator_config = to_config(evaluator) self.stop = _from_config(stop, classes=stoppers) self.trial_timeout = trial_timeout self.caught_exceptions = get_exceptions(caught_exceptions) self.id = compute_hash(time.time(), np.random.random(10)) # unique id of this run if name is None: self.name = humanize(self.id, words=2) else: self.name = name self.ready = False # run is not ready until prepared self.readonly = False # only True if instance is obtained through Run.checkout()
def schedule(self, suggestion): """Schedule evaluation of a suggestion. This also checks the cache in the background, and creates a faux future to return the cached result. This is slightly inefficient, but it substantially reduces the complexity of the interface: We can now always expect a future as a result, and the re-submission can be handled in a unified way by the `Run`. (You can't simply keep requesting suggestions until you hit something is not in the cache, this leads to deadlocks when the search space has been exhausted.) """ eid = compute_hash(suggestion) if eid in self.evals: result = self.evals.get_result(eid) future = self.pool.schedule(passthrough, args=(result, )) else: future = self.pool.schedule(evaluate, args=(eid, suggestion), timeout=self.trial_timeout) future.eid = eid # annotate with hash key in evals future.suggestion = suggestion return future
def replay_submit(self, payload): tid = payload["tid"] result = payload["result"] self.submit(tid, result) # refilling the evals...! (since the pool can't do it) state, outcome = parse_config(result) eid = compute_hash(outcome["suggestion"]) self.evals.submit_result(eid, result)
def prepare_task(data, quippy_config): tid = compute_hash(time.time(), np.random.rand(), data.geom_hash, quippy_config) folder = get_scratch() / f"soap_{tid}" folder.mkdir(parents=True) write_data(data, folder) with open(folder / "quippy_config.txt", "w+") as f: f.write(quippy_config) return folder
def get_geom_hash(self): """Hash of only the geometries, ignoring properties etc.""" return compute_hash(self.z, self.r, self.b)
def get_hash(self): """Hash of dataset, ignoring name and description.""" return compute_hash(self.z, self.r, self.b, self.p)
def __init__( self, z, r, b=None, p={}, name=None, desc="", splits=[], _info=None, _hash=None, _geom_hash=None, ): super().__init__() # Sanity checks assert len(z) == len( r ), "Attempted to create dataset, but z and r are not of the same size ({} vs {})!".format( len(z), len(r) ) assert b is None or len(b) == len( z ), "Attempted to create dataset, but z and b are not of the same size ({} vs {})!".format( len(z), len(b) ) assert len(r) > 0, "Attempted to create dataset, r has 0 length!" if p != {}: for pname, values in p.items(): assert len(values) == len( z ), f"Attempted to create dataset, but z and property {pname} are not of the same size ({len(z)} vs {len(values)})!" self.desc = desc self.z = z self.r = r self.b = b self.p = p self.splits = splits if name is None: name = compute_hash(self.z, self.r, self.b, self.p) self.name = name self.n = len(self.z) # perform some consistency checks; # if these ever fail there Is Trouble # (these are supposed to only be written once and never change, # so if they mismatch most likely the hashing method is not as stable # as I thought...) if _hash is not None: this_hash = self.get_hash() assert _hash == this_hash, "Hashes of dataset are not matching!" self.hash = _hash else: self.hash = self.get_hash() if _geom_hash is not None: this_hash = self.get_geom_hash() assert _geom_hash == this_hash, "Geometry Hashes of dataset are not matching!" self.geom_hash = _geom_hash else: self.geom_hash = self.get_geom_hash() if _info is not None: self.info = _info else: self.info = self.get_info() # compute auxiliary info that we need to convert properties self.aux = {} n_atoms = np.array([len(zz) for zz in self.z]) # count atoms in unit cell n_non_O = np.array( [len(zz[zz != 8]) for zz in self.z] ) # count atoms that are not Oxygen n_non_H = np.array( [len(zz[zz != 1]) for zz in self.z] ) # count atoms that are not Hydrogen self.aux["n_atoms"] = n_atoms self.aux["n_non_O"] = n_non_O self.aux["n_non_H"] = n_non_H # compatibility with Data history tracking # to tide us over until this gets rewritten as # a proper Data subclass self.history = [f"dataset@{self.geom_hash}"] self.id = self.geom_hash