Beispiel #1
0
    def request_termination(self) -> None:
        logger = logging.getLogger("ert_shared.ensemble_evaluator.tracker")
        # There might be some situations where the
        # evaluation is finished or the evaluation
        # is yet to start when calling this function.
        # In these cases the monitor is not started
        #
        # To avoid waiting too long we exit if we are not
        # able to connect to the monitor after 2 tries
        #
        # See issue: https://github.com/equinor/ert/issues/1250
        #
        try:
            get_event_loop().run_until_complete(
                wait_for_evaluator(
                    base_url=self._ee_con_info.url,
                    token=self._ee_con_info.token,
                    cert=self._ee_con_info.cert,
                    timeout=5,
                ))
        except ClientError as e:
            logger.warning(f"{__name__} - exception {e}")
            return

        with create_ee_monitor(self._ee_con_info) as monitor:
            monitor.signal_cancel()
        while self._drainer_thread.is_alive():
            self._clear_work_queue()
            time.sleep(1)
Beispiel #2
0
    def request_termination(self):
        logger = logging.getLogger("ert_shared.ensemble_evaluator.tracker")

        # There might be some situations where the
        # evaluation is finished or the evaluation
        # is yet to start when calling this function.
        # In these cases the monitor is not started
        #
        # To avoid waiting too long we exit if we are not
        # able to connect to the monitor after 2 tries
        #
        # See issue: https://github.com/equinor/ert/issues/1250
        #
        try:
            wait_for_ws(self._monitor_url, 2)
        except ConnectionRefusedError as e:
            logger.warning(f"{__name__} - exception {e}")
            return

        with create_ee_monitor(self._monitor_host,
                               self._monitor_port) as monitor:
            monitor.signal_cancel()
        while self._drainer_thread.is_alive():
            self._clear_work_queue()
            time.sleep(1)
Beispiel #3
0
 def _drain_monitor(self) -> None:
     asyncio.set_event_loop(asyncio.new_event_loop())
     drainer_logger = logging.getLogger(
         "ert_shared.ensemble_evaluator.drainer")
     while not self._model.isFinished():
         try:
             drainer_logger.debug("connecting to new monitor...")
             with create_ee_monitor(self._ee_con_info) as monitor:
                 drainer_logger.debug("connected")
                 for event in monitor.track():
                     if event["type"] in (
                             EVTYPE_EE_SNAPSHOT,
                             EVTYPE_EE_SNAPSHOT_UPDATE,
                     ):
                         self._work_queue.put(event)
                         if event.data.get(STATUS) in [
                                 ENSEMBLE_STATE_STOPPED,
                                 ENSEMBLE_STATE_FAILED,
                         ]:
                             drainer_logger.debug(
                                 "observed evaluation stopped event, signal done"
                             )
                             monitor.signal_done()
                         if event.data.get(
                                 STATUS) == ENSEMBLE_STATE_CANCELLED:
                             drainer_logger.debug(
                                 "observed evaluation cancelled event, exit drainer"
                             )
                             # Allow track() to emit an EndEvent.
                             self._work_queue.put(EvaluatorTracker.DONE)
                             return
                     elif event["type"] == EVTYPE_EE_TERMINATED:
                         drainer_logger.debug("got terminator event")
             # This sleep needs to be there. Refer to issue #1250: `Authority
             # on information about evaluations/experiments`
             time.sleep(self._next_ensemble_evaluator_wait_time)
         except (ConnectionRefusedError, ClientError) as e:
             if not self._model.isFinished():
                 drainer_logger.debug(f"connection refused: {e}")
         except (ConnectionClosedError) as e:
             # The monitor connection closed unexpectedly
             drainer_logger.debug(f"connection closed error: {e}")
         except BaseException:  # pylint: disable=broad-except
             drainer_logger.exception("unexpected error: ")
             # We really don't know what happened...  shut down
             # the thread and get out of here. The monitor has
             # been stopped by the ctx-mgr
             self._work_queue.put(EvaluatorTracker.DONE)
             self._work_queue.join()
             return
     drainer_logger.debug(
         "observed that model was finished, waiting tasks completion...")
     # The model has finished, we indicate this by sending a DONE
     self._work_queue.put(EvaluatorTracker.DONE)
     self._work_queue.join()
     drainer_logger.debug("tasks complete")
     self._model.teardown_context()
Beispiel #4
0
    def _drain_monitor(self):
        asyncio.set_event_loop(asyncio.new_event_loop())
        drainer_logger = logging.getLogger("ert_shared.ensemble_evaluator.drainer")
        while not self._model.isFinished():
            try:
                drainer_logger.debug("connecting to new monitor...")
                with create_ee_monitor(
                    self._monitor_host,
                    self._monitor_port,
                    protocol=self._protocol,
                    cert=self._cert,
                    token=self._token,
                ) as monitor:
                    drainer_logger.debug("connected")
                    for event in monitor.track():
                        if event["type"] in (
                            ids.EVTYPE_EE_SNAPSHOT,
                            ids.EVTYPE_EE_SNAPSHOT_UPDATE,
                        ):
                            self._work_queue.put(event)
                            if event.data.get(ids.STATUS) in [
                                ENSEMBLE_STATE_STOPPED,
                                ENSEMBLE_STATE_FAILED,
                            ]:
                                drainer_logger.debug(
                                    "observed evaluation stopped event, signal done"
                                )
                                monitor.signal_done()
                            if event.data.get(ids.STATUS) == ENSEMBLE_STATE_CANCELLED:
                                drainer_logger.debug(
                                    "observed evaluation cancelled event, exit drainer"
                                )
                                # Allow track() to emit an EndEvent.
                                self._work_queue.put(EvaluatorTracker.DONE)
                                return
                        elif event["type"] == ids.EVTYPE_EE_TERMINATED:
                            drainer_logger.debug("got terminator event")

                # This sleep needs to be there. Refer to issue #1250: `Authority
                # on information about evaluations/experiments`
                time.sleep(self._next_ensemble_evaluator_wait_time)

            except (ConnectionRefusedError, ClientError) as e:
                if not self._model.isFinished():
                    drainer_logger.debug(f"connection refused: {e}")
            except (ConnectionClosedError) as e:
                # The monitor connection closed unexpectedly
                drainer_logger.debug(f"connection closed error: {e}")

        drainer_logger.debug(
            "observed that model was finished, waiting tasks completion..."
        )
        # The model has finished, we indicate this by sending a DONE
        self._work_queue.put(EvaluatorTracker.DONE)
        self._work_queue.join()
        drainer_logger.debug("tasks complete")
        self._model.teardown_context()
Beispiel #5
0
    def _drain_monitor(self):
        drainer_logger = logging.getLogger(
            "ert_shared.ensemble_evaluator.drainer")
        failures = 0
        while not self._model.isFinished():
            try:
                drainer_logger.debug("connecting to new monitor...")
                with create_ee_monitor(self._monitor_host,
                                       self._monitor_port) as monitor:
                    drainer_logger.debug("connected")
                    for event in monitor.track():
                        if event["type"] in (
                                ids.EVTYPE_EE_SNAPSHOT,
                                ids.EVTYPE_EE_SNAPSHOT_UPDATE,
                        ):
                            self._work_queue.put(event)
                            if event.data.get(ids.STATUS) in [
                                    ENSEMBLE_STATE_STOPPED,
                                    ENSEMBLE_STATE_FAILED,
                            ]:
                                drainer_logger.debug(
                                    "observed evaluation stopped event, signal done"
                                )
                                monitor.signal_done()
                            if event.data.get(
                                    ids.STATUS) == ENSEMBLE_STATE_CANCELLED:
                                drainer_logger.debug(
                                    "observed evaluation cancelled event, exit drainer"
                                )
                                return
                        elif event["type"] == ids.EVTYPE_EE_TERMINATED:
                            drainer_logger.debug("got terminator event")

                # This sleep needs to be there. Refer to issue #1250: `Authority
                # on information about evaluations/experiments`
                time.sleep(5)

            except ConnectionRefusedError as e:
                if not self._model.isFinished():
                    drainer_logger.debug(f"connection refused: {e}")
                    failures += 1
                    if failures == 10:
                        drainer_logger.debug(f"giving up.")
                        raise e
            else:
                failures = 0

        drainer_logger.debug(
            "observed that model was finished, waiting tasks completion...")
        # The model has finished, we indicate this by sending a None
        self._work_queue.put(None)
        self._work_queue.join()
        drainer_logger.debug("tasks complete")
Beispiel #6
0
    def request_termination(self):
        logger = logging.getLogger("ert_shared.ensemble_evaluator.tracker")
        config = load_config()

        # There might be some situations where the
        # evaulation is finished or the evaluation
        # is yet to start when calling this function.
        # In these cases the monitor is not started
        #
        # To avoid waiting too long we exit if we are not
        # able to connect to the monitor after 2 tries
        #
        # See issue: https://github.com/equinor/ert/issues/1250
        #
        try:
            wait_for_ws(config.get("url"), 2)
        except ConnectionRefusedError as e:
            logger.warning(f"{__name__} - exception {e}")
            return

        monitor = create_ee_monitor(self._monitor_host, self._monitor_port)
        monitor.signal_cancel()
Beispiel #7
0
    def _drain_monitor(self):
        drainer_logger = logging.getLogger(
            "ert_shared.ensemble_evaluator.drainer")
        monitor = create_ee_monitor(self._monitor_host, self._monitor_port)
        while monitor:
            try:
                for event in monitor.track():
                    if event["type"] == ids.EVTYPE_EE_SNAPSHOT:
                        iter_ = event.data["metadata"]["iter"]
                        with self._state_mutex:
                            self._realization_progress[
                                iter_] = self._snapshot_to_realization_progress(
                                    event.data)
                            self._work_queue.put(None)
                            if event.data.get(
                                    "status") == _EVTYPE_SNAPSHOT_STOPPED:
                                drainer_logger.debug(
                                    "observed evaluation stopped event, signal done"
                                )
                                monitor.signal_done()
                    elif event["type"] == ids.EVTYPE_EE_SNAPSHOT_UPDATE:
                        with self._state_mutex:
                            self._updates.append(event.data)
                            self._work_queue.put(None)
                            if event.data.get(
                                    "status") == _EVTYPE_SNAPSHOT_CANCELLED:
                                drainer_logger.debug(
                                    "observed evaluation cancelled event, return"
                                )
                                return
                            if event.data.get(
                                    "status") == _EVTYPE_SNAPSHOT_STOPPED:
                                drainer_logger.debug(
                                    "observed evaluation stopped event, signal done"
                                )
                                monitor.signal_done()
                    elif event["type"] == ids.EVTYPE_EE_TERMINATED:
                        drainer_logger.debug("got terminator event")
                        while True:
                            if self._model.isFinished():
                                drainer_logger.debug(
                                    "observed that model was finished, waiting tasks completion..."
                                )
                                self._work_queue.join()
                                drainer_logger.debug("tasks complete")
                                return
                            try:
                                time.sleep(5)
                                drainer_logger.debug(
                                    "connecting to new monitor...")
                                monitor = create_ee_monitor(
                                    self._monitor_host, self._monitor_port)
                                wait_for_ws(monitor.get_base_uri(),
                                            max_retries=2)
                                drainer_logger.debug("connected")
                                break
                            except ConnectionRefusedError as e:
                                drainer_logger.debug(
                                    f"connection refused: {e}")
                                pass

            except ConnectionRefusedError as e:
                if self._model.isFinished():
                    return
                else:
                    raise e