Beispiel #1
0
 def _dispatch(self, request: tuple[str, ...]) -> str | Exception | None:
     request_type, *request_arg = request
     log.debug("got a request", request=request)
     try:
         if request_type == "start":
             # workers send us their slurm ID for us to fill in
             job_id, log_fname, job_name = request_arg
             kwargs = dict(job_id=job_id,
                           log_fname=log_fname,
                           job_name=job_name)
             # give the worker a job and send back the fname to the worker
             fname = self._start_request(**kwargs)
             if fname is None:
                 raise RuntimeError(
                     "No more learners to run in the database.")
             learner = next(
                 learner for learner, f in zip(self.learners, self.fnames)
                 if maybe_lst(f) == fname)
             log.debug("choose a fname", fname=fname, **kwargs)
             return learner, fname
         elif request_type == "stop":
             fname = request_arg[
                 0]  # workers send us the fname they were given
             log.debug("got a stop request", fname=fname)
             self._stop_request(fname)  # reset the job_id to None
             return None
     except Exception as e:
         return e
Beispiel #2
0
 def _stop_requests(self, fnames: list[str | list[str]]) -> None:
     # Same as `_stop_request` but optimized for processing many `fnames` at once
     fnames = {str(maybe_lst(fname)) for fname in fnames}
     with TinyDB(self.db_fname) as db:
         reset = dict(job_id=None, is_done=True, job_name=None)
         doc_ids = [e.doc_id for e in db.all() if str(e["fname"]) in fnames]
         db.update(reset, doc_ids=doc_ids)
Beispiel #3
0
 def _stop_request(self, fname: str | list[str]) -> None:
     fname = maybe_lst(fname)  # if a BalancingLearner
     Entry = Query()
     with TinyDB(self.db_fname) as db:
         reset = dict(job_id=None, is_done=True, job_name=None)
         assert (db.get(Entry.fname == fname)
                 is not None)  # make sure the entry exists
         db.update(reset, Entry.fname == fname)
def get_learner(
    url: str, log_fname: str, job_id: str, job_name: str
) -> tuple[BaseLearner, str | list[str]]:
    """Get a learner from the database running at `url` and this learner's
    process will be logged in `log_fname` and running under `job_id`.

    Parameters
    ----------
    url : str
        The url of the database manager running via
        (`adaptive_scheduler.server_support.manage_database`).
    log_fname : str
        The filename of the log-file. Should be passed in the job-script.
    job_id : str
        The job_id of the process the job. Should be passed in the job-script.
    job_name : str
        The name of the job. Should be passed in the job-script.

    Returns
    -------
    learner : `adaptive.BaseLearner`
        Learner that is chosen.
    fname : str
        The filename of the learner that was chosen.
    """
    _add_log_file_handler(log_fname)
    log.info(
        "trying to get learner", job_id=job_id, log_fname=log_fname, job_name=job_name
    )
    with ctx.socket(zmq.REQ) as socket:
        socket.setsockopt(zmq.LINGER, 0)
        socket.setsockopt(zmq.SNDTIMEO, 300_000)  # timeout after 300s
        socket.connect(url)
        socket.send_serialized(("start", job_id, log_fname, job_name), _serialize)
        log.info("sent start signal, going to wait 60s for a reply.")
        socket.setsockopt(zmq.RCVTIMEO, 300_000)  # timeout after 300s
        reply = socket.recv_serialized(_deserialize)
        log.info("got reply", reply=str(reply))
        if reply is None:
            msg = "No learners to be run."
            exception = RuntimeError(msg)
            log_exception(log, msg, exception)
            raise exception
        elif isinstance(reply, Exception):
            log_exception(log, "got an exception", exception=reply)
            raise reply
        else:
            learner, fname = reply
            log.info("got fname and learner")

    log.info("picked a learner")
    return learner, maybe_lst(fname)