Example #1
0
    def __init__(
        self,
        search,
        evaluator,
        stop,
        trial_timeout=None,
        caught_exceptions=["TimeoutError"],
        name=None,
        context={},
    ):
        super().__init__(context=context)

        self.search = from_config(search)
        self.evaluator_config = to_config(evaluator)
        self.stop = _from_config(stop, classes=stoppers)
        self.trial_timeout = trial_timeout
        self.caught_exceptions = get_exceptions(caught_exceptions)

        self.id = compute_hash(time.time(),
                               np.random.random(10))  # unique id of this run

        if name is None:
            self.name = humanize(self.id, words=2)
        else:
            self.name = name

        self.ready = False  # run is not ready until prepared
        self.readonly = False  # only True if instance is obtained through Run.checkout()
Example #2
0
    def schedule(self, suggestion):
        """Schedule evaluation of a suggestion.

        This also checks the cache in the background, and creates a faux
        future to return the cached result. This is slightly inefficient,
        but it substantially reduces the complexity of the interface: We
        can now always expect a future as a result, and the re-submission
        can be handled in a unified way by the `Run`. (You can't simply
        keep requesting suggestions until you hit something is not in the
        cache, this leads to deadlocks when the search space has been exhausted.)
        """
        eid = compute_hash(suggestion)

        if eid in self.evals:
            result = self.evals.get_result(eid)
            future = self.pool.schedule(passthrough, args=(result, ))
        else:
            future = self.pool.schedule(evaluate,
                                        args=(eid, suggestion),
                                        timeout=self.trial_timeout)

        future.eid = eid  # annotate with hash key in evals
        future.suggestion = suggestion

        return future
Example #3
0
    def replay_submit(self, payload):
        tid = payload["tid"]
        result = payload["result"]
        self.submit(tid, result)

        # refilling the evals...! (since the pool can't do it)
        state, outcome = parse_config(result)
        eid = compute_hash(outcome["suggestion"])
        self.evals.submit_result(eid, result)
Example #4
0
def prepare_task(data, quippy_config):
    tid = compute_hash(time.time(), np.random.rand(), data.geom_hash,
                       quippy_config)
    folder = get_scratch() / f"soap_{tid}"
    folder.mkdir(parents=True)

    write_data(data, folder)

    with open(folder / "quippy_config.txt", "w+") as f:
        f.write(quippy_config)

    return folder
Example #5
0
 def get_geom_hash(self):
     """Hash of only the geometries, ignoring properties etc."""
     return compute_hash(self.z, self.r, self.b)
Example #6
0
 def get_hash(self):
     """Hash of dataset, ignoring name and description."""
     return compute_hash(self.z, self.r, self.b, self.p)
Example #7
0
    def __init__(
        self,
        z,
        r,
        b=None,
        p={},
        name=None,
        desc="",
        splits=[],
        _info=None,
        _hash=None,
        _geom_hash=None,
    ):
        super().__init__()

        # Sanity checks
        assert len(z) == len(
            r
        ), "Attempted to create dataset, but z and r are not of the same size ({} vs {})!".format(
            len(z), len(r)
        )
        assert b is None or len(b) == len(
            z
        ), "Attempted to create dataset, but z and b are not of the same size ({} vs {})!".format(
            len(z), len(b)
        )
        assert len(r) > 0, "Attempted to create dataset, r has 0 length!"

        if p != {}:
            for pname, values in p.items():
                assert len(values) == len(
                    z
                ), f"Attempted to create dataset, but z and property {pname} are not of the same size ({len(z)} vs {len(values)})!"

        self.desc = desc
        self.z = z
        self.r = r
        self.b = b
        self.p = p
        self.splits = splits

        if name is None:
            name = compute_hash(self.z, self.r, self.b, self.p)
        self.name = name

        self.n = len(self.z)

        # perform some consistency checks;
        # if these ever fail there Is Trouble
        # (these are supposed to only be written once and never change,
        # so if they mismatch most likely the hashing method is not as stable
        # as I thought...)
        if _hash is not None:
            this_hash = self.get_hash()
            assert _hash == this_hash, "Hashes of dataset are not matching!"
            self.hash = _hash
        else:
            self.hash = self.get_hash()

        if _geom_hash is not None:
            this_hash = self.get_geom_hash()
            assert _geom_hash == this_hash, "Geometry Hashes of dataset are not matching!"
            self.geom_hash = _geom_hash
        else:
            self.geom_hash = self.get_geom_hash()

        if _info is not None:
            self.info = _info
        else:
            self.info = self.get_info()

        # compute auxiliary info that we need to convert properties
        self.aux = {}
        n_atoms = np.array([len(zz) for zz in self.z])  # count atoms in unit cell
        n_non_O = np.array(
            [len(zz[zz != 8]) for zz in self.z]
        )  # count atoms that are not Oxygen
        n_non_H = np.array(
            [len(zz[zz != 1]) for zz in self.z]
        )  # count atoms that are not Hydrogen

        self.aux["n_atoms"] = n_atoms
        self.aux["n_non_O"] = n_non_O
        self.aux["n_non_H"] = n_non_H

        # compatibility with Data history tracking
        # to tide us over until this gets rewritten as
        # a proper Data subclass
        self.history = [f"dataset@{self.geom_hash}"]
        self.id = self.geom_hash