def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator( evaluator): monitor = evaluator.run() events = monitor.track() host = evaluator._config.host port = evaluator._config.port # first snapshot before any event occurs snapshot_event = next(events) snapshot = Snapshot(snapshot_event.data) assert snapshot.get_status() == "Unknown" # two dispatchers connect with Client(host, port, "/dispatch") as dispatch1, Client(host, port, "/dispatch") as dispatch2: # first dispatcher informs that job 0 is running send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_JOB_RUNNING, "/ert/ee/0/real/0/stage/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("0", "0", "0", "0")["status"] == "Running" # second dispatcher informs that job 0 is running send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_RUNNING, "/ert/ee/0/real/1/stage/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0", "0")["status"] == "Running" # second dispatcher informs that job 0 is done send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_SUCCESS, "/ert/ee/0/real/1/stage/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0", "0")["status"] == "Finished" # a second monitor connects monitor2 = ee_monitor.create(host, port) events2 = monitor2.track() snapshot = Snapshot(next(events2).data) assert snapshot.get_status() == "Unknown" assert snapshot.get_job("0", "0", "0", "0")["status"] == "Running" assert snapshot.get_job("1", "0", "0", "0")["status"] == "Finished" # one monitor requests that server exit monitor.signal_cancel() # both monitors should get a terminated event terminated = next(events) terminated2 = next(events2) assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED for e in [events, events2]: for _ in e: assert False, "got unexpected event from monitor"
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator( evaluator): with evaluator.run() as monitor: events = monitor.track() host = evaluator._config.host port = evaluator._config.port # first snapshot before any event occurs snapshot_event = next(events) snapshot = Snapshot(snapshot_event.data) assert snapshot.get_status() == ENSEMBLE_STATE_STARTED # two dispatchers connect with Client(host, port, "/dispatch") as dispatch1, Client( host, port, "/dispatch") as dispatch2: # first dispatcher informs that job 0 is running send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_JOB_RUNNING, "/ert/ee/0/real/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING # second dispatcher informs that job 0 is running send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_RUNNING, "/ert/ee/0/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0").status == JOB_STATE_RUNNING # second dispatcher informs that job 0 is done send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_SUCCESS, "/ert/ee/0/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED # second dispatcher informs that job 1 is failed send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_FAILURE, "/ert/ee/0/real/1/step/0/job/1", "event_job_1_fail", {identifiers.ERROR_MSG: "error"}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE # a second monitor connects with ee_monitor.create(host, port) as monitor2: events2 = monitor2.track() snapshot = Snapshot(next(events2).data) assert snapshot.get_status() == ENSEMBLE_STATE_STARTED assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED # one monitor requests that server exit monitor.signal_cancel() # both monitors should get a terminated event terminated = next(events) terminated2 = next(events2) assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED for e in [events, events2]: for _ in e: assert False, "got unexpected event from monitor"
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator( evaluator): with evaluator.run() as monitor: events = monitor.track() host = evaluator._config.host port = evaluator._config.port token = evaluator._config.token cert = evaluator._config.cert url = evaluator._config.url # first snapshot before any event occurs snapshot_event = next(events) snapshot = Snapshot(snapshot_event.data) assert snapshot.get_status() == ENSEMBLE_STATE_UNKNOWN # two dispatchers connect with Client( url + "/dispatch", cert=cert, token=token, max_retries=1, timeout_multiplier=1, ) as dispatch1, Client( url + "/dispatch", cert=cert, token=token, max_retries=1, timeout_multiplier=1, ) as dispatch2: # first dispatcher informs that job 0 is running send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_JOB_RUNNING, f"/ert/ee/{evaluator._ee_id}/real/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 0 is running send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_RUNNING, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 0 is done send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_SUCCESS, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 1 is failed send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_FAILURE, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/1", "event_job_1_fail", {identifiers.ERROR_MSG: "error"}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE # a second monitor connects with ee_monitor.create(host, port, "wss", cert, token) as monitor2: events2 = monitor2.track() full_snapshot_event = next(events2) assert full_snapshot_event[ "type"] == identifiers.EVTYPE_EE_SNAPSHOT snapshot = Snapshot(full_snapshot_event.data) assert snapshot.get_status() == ENSEMBLE_STATE_UNKNOWN assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED # one monitor requests that server exit monitor.signal_cancel() # both monitors should get a terminated event terminated = next(events) terminated2 = next(events2) assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED for e in [events, events2]: for undexpected_event in e: assert ( False ), f"got unexpected event {undexpected_event} from monitor"