def snapshot_to_tree(snapshot: Snapshot, iter_: int) -> Node: iter_node = Node(iter_, {ids.STATUS: snapshot.get_status()}, NodeType.ITER) snapshot_d = SnapshotDict(**snapshot.to_dict()) for real_id in sorted(snapshot_d.reals, key=int): real = snapshot_d.reals[real_id] real_node = Node( real_id, { ids.STATUS: real.status, ids.ACTIVE: real.active }, NodeType.REAL, ) iter_node.add_child(real_node) for step_id, step in real.steps.items(): step_node = Node(step_id, {ids.STATUS: step.status}, NodeType.STEP) real_node.add_child(step_node) for job_id in sorted(step.jobs, key=int): job = step.jobs[job_id] job_dict = dict(job) job_dict[ids.DATA] = job.data job_node = Node(job_id, job_dict, NodeType.JOB) step_node.add_child(job_node) return iter_node
def test_monitor_stop(evaluator): with evaluator.run() as monitor: for event in monitor.track(): snapshot = Snapshot(event.data) break assert snapshot.get_status() == ENSEMBLE_STATE_STARTED
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator( evaluator): with evaluator.run() as monitor: events = monitor.track() host = evaluator._config.host port = evaluator._config.port # first snapshot before any event occurs snapshot_event = next(events) snapshot = Snapshot(snapshot_event.data) assert snapshot.get_status() == ENSEMBLE_STATE_STARTED # two dispatchers connect with Client(host, port, "/dispatch") as dispatch1, Client( host, port, "/dispatch") as dispatch2: # first dispatcher informs that job 0 is running send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_JOB_RUNNING, "/ert/ee/0/real/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING # second dispatcher informs that job 0 is running send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_RUNNING, "/ert/ee/0/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0").status == JOB_STATE_RUNNING # second dispatcher informs that job 0 is done send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_SUCCESS, "/ert/ee/0/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED # second dispatcher informs that job 1 is failed send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_FAILURE, "/ert/ee/0/real/1/step/0/job/1", "event_job_1_fail", {identifiers.ERROR_MSG: "error"}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE # a second monitor connects with ee_monitor.create(host, port) as monitor2: events2 = monitor2.track() snapshot = Snapshot(next(events2).data) assert snapshot.get_status() == ENSEMBLE_STATE_STARTED assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED # one monitor requests that server exit monitor.signal_cancel() # both monitors should get a terminated event terminated = next(events) terminated2 = next(events2) assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED for e in [events, events2]: for _ in e: assert False, "got unexpected event from monitor"
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator( evaluator): monitor = evaluator.run() events = monitor.track() host = evaluator._config.host port = evaluator._config.port # first snapshot before any event occurs snapshot_event = next(events) snapshot = Snapshot(snapshot_event.data) assert snapshot.get_status() == "Unknown" # two dispatchers connect with Client(host, port, "/dispatch") as dispatch1, Client(host, port, "/dispatch") as dispatch2: # first dispatcher informs that job 0 is running send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_JOB_RUNNING, "/ert/ee/0/real/0/stage/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("0", "0", "0", "0")["status"] == "Running" # second dispatcher informs that job 0 is running send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_RUNNING, "/ert/ee/0/real/1/stage/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0", "0")["status"] == "Running" # second dispatcher informs that job 0 is done send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_SUCCESS, "/ert/ee/0/real/1/stage/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0", "0")["status"] == "Finished" # a second monitor connects monitor2 = ee_monitor.create(host, port) events2 = monitor2.track() snapshot = Snapshot(next(events2).data) assert snapshot.get_status() == "Unknown" assert snapshot.get_job("0", "0", "0", "0")["status"] == "Running" assert snapshot.get_job("1", "0", "0", "0")["status"] == "Finished" # one monitor requests that server exit monitor.signal_cancel() # both monitors should get a terminated event terminated = next(events) terminated2 = next(events2) assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED for e in [events, events2]: for _ in e: assert False, "got unexpected event from monitor"
def test_monitor_stop(evaluator): monitor = evaluator.run() events = monitor.track() snapshot = Snapshot(next(events).data) assert snapshot.get_status() == "Unknown"
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator( evaluator): with evaluator.run() as monitor: events = monitor.track() host = evaluator._config.host port = evaluator._config.port token = evaluator._config.token cert = evaluator._config.cert url = evaluator._config.url # first snapshot before any event occurs snapshot_event = next(events) snapshot = Snapshot(snapshot_event.data) assert snapshot.get_status() == ENSEMBLE_STATE_UNKNOWN # two dispatchers connect with Client( url + "/dispatch", cert=cert, token=token, max_retries=1, timeout_multiplier=1, ) as dispatch1, Client( url + "/dispatch", cert=cert, token=token, max_retries=1, timeout_multiplier=1, ) as dispatch2: # first dispatcher informs that job 0 is running send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_JOB_RUNNING, f"/ert/ee/{evaluator._ee_id}/real/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 0 is running send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_RUNNING, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 0 is done send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_SUCCESS, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 1 is failed send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_FAILURE, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/1", "event_job_1_fail", {identifiers.ERROR_MSG: "error"}, ) snapshot = Snapshot(next(events).data) assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE # a second monitor connects with ee_monitor.create(host, port, "wss", cert, token) as monitor2: events2 = monitor2.track() full_snapshot_event = next(events2) assert full_snapshot_event[ "type"] == identifiers.EVTYPE_EE_SNAPSHOT snapshot = Snapshot(full_snapshot_event.data) assert snapshot.get_status() == ENSEMBLE_STATE_UNKNOWN assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED # one monitor requests that server exit monitor.signal_cancel() # both monitors should get a terminated event terminated = next(events) terminated2 = next(events2) assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED for e in [events, events2]: for undexpected_event in e: assert ( False ), f"got unexpected event {undexpected_event} from monitor"