def _evaluate(self, dispatch_url, ee_id): asyncio.set_event_loop(asyncio.get_event_loop()) try: with Client(dispatch_url) as c: event = CloudEvent( { "type": ids.EVTYPE_ENSEMBLE_STARTED, "source": f"/ert/ee/{self._ee_id}", }, ) c.send(to_json(event).decode()) self.run_flow(ee_id) with Client(dispatch_url) as c: event = CloudEvent( { "type": ids.EVTYPE_ENSEMBLE_STOPPED, "source": f"/ert/ee/{self._ee_id}", "datacontenttype": "application/octet-stream", }, cloudpickle.dumps(self.config["outputs"]), ) c.send(to_json(event).decode()) except Exception as e: logger.exception( "An exception occurred while starting the ensemble evaluation", exc_info=True, ) with Client(dispatch_url) as c: event = CloudEvent( { "type": ids.EVTYPE_ENSEMBLE_FAILED, "source": f"/ert/ee/{self._ee_id}", }, ) c.send(to_json(event).decode())
def run_job(self, client: Client, job: Any, run_path: Path): shell_cmd = [ job.get_executable().as_posix(), *[os.path.expandvars(arg) for arg in job.get_args()], ] env = os.environ.copy() env.update({ "PATH": (run_path / _BIN_FOLDER).as_posix() + ":" + os.environ["PATH"] }) cmd_exec = subprocess.run( shell_cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=run_path.as_posix(), env=env, ) self.logger.info(cmd_exec.stderr) self.logger.info(cmd_exec.stdout) if cmd_exec.returncode != 0: self.logger.error(cmd_exec.stderr) client.send_event( ev_type=ids.EVTYPE_FM_JOB_FAILURE, ev_source=job.get_source(self._ee_id), ev_data={ids.ERROR_MSG: cmd_exec.stderr}, ) raise OSError( f"Script {job.get_name()} failed with exception {cmd_exec.stderr}" )
def run_jobs(self, client: Client, run_path: Path): for job in self._step.get_jobs(): self.logger.info(f"Running command {job.get_name()}") client.send_event( ev_type=ids.EVTYPE_FM_JOB_START, ev_source=job.get_source(self._ee_id), ) self.run_job(client, job, run_path) client.send_event( ev_type=ids.EVTYPE_FM_JOB_SUCCESS, ev_source=job.get_source(self._ee_id), )
def _evaluate(self): get_event_loop() try: with Client( self._ee_config.dispatch_uri, self._ee_config.token, self._ee_config.cert, ) as c: event = CloudEvent( { "type": ids.EVTYPE_ENSEMBLE_STARTED, "source": f"/ert/ee/{self._ee_id}", }, ) c.send(to_json(event).decode()) with prefect.context( url=self._ee_config.dispatch_uri, token=self._ee_config.token, cert=self._ee_config.cert, ): self.run_flow(self._ee_id) with Client( self._ee_config.dispatch_uri, self._ee_config.token, self._ee_config.cert, ) as c: event = CloudEvent( { "type": ids.EVTYPE_ENSEMBLE_STOPPED, "source": f"/ert/ee/{self._ee_id}", "datacontenttype": "application/octet-stream", }, cloudpickle.dumps(self._outputs), ) c.send(to_json(event).decode()) except Exception as e: logger.exception( "An exception occurred while starting the ensemble evaluation", exc_info=True, ) with Client( self._ee_config.dispatch_uri, self._ee_config.token, self._ee_config.cert, ) as c: event = CloudEvent( { "type": ids.EVTYPE_ENSEMBLE_FAILED, "source": f"/ert/ee/{self._ee_id}", }, ) c.send(to_json(event).decode())
def run(self, inputs=None): with tempfile.TemporaryDirectory() as run_path: run_path = Path(run_path) self._load_and_dump_input(transmitters=inputs, runpath=run_path) with Client(prefect.context.url, prefect.context.token, prefect.context.cert) as ee_client: ee_client.send_event( ev_type=ids.EVTYPE_FM_STEP_RUNNING, ev_source=self._step.get_source(self._ee_id), ) outputs = {} self.run_jobs(ee_client, run_path) futures = [] for output in self._step.get_outputs(): if not (run_path / output.get_path()).exists(): raise FileNotFoundError( f"Output file {output.get_path()} was not generated!" ) outputs[output.get_name()] = self._output_transmitters[ output.get_name()] futures.append(outputs[output.get_name()].transmit_file( run_path / output.get_path(), output.get_mime())) asyncio.get_event_loop().run_until_complete( asyncio.gather(*futures)) ee_client.send_event( ev_type=ids.EVTYPE_FM_STEP_SUCCESS, ev_source=self._step.get_source(self._ee_id), ) return outputs
def test_invalid_server(): port = 7777 host = "localhost" url = f"ws://{host}:{port}" with Client(url, max_retries=2, timeout_multiplier=2) as c1: with pytest.raises((ConnectionRefusedError, OSError)): c1.send("hei")
def test_on_task_failure_fail_step(unused_tcp_port, tmpdir): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ensemble = _MockedPrefectEnsemble() mock_ws_thread.start() script_location = ( Path(SOURCE_DIR) / "test-data/local/prefect_test_case/unix_test_retry_script.py") input_ = script_transmitter("script", script_location, storage_path=tmpdir) with tmp() as runpath: step = get_step( step_name="test_step", inputs=[("script", Path("unix_test_retry_script.py"), "application/x-python")], outputs=[], jobs=[("script", Path("unix_test_retry_script.py"), [runpath])], type_="unix", ) with prefect.context(url=url, token=None, cert=None): output_trans = step_output_transmitters(step, storage_path=tmpdir) with Flow("testing") as flow: task = step.get_task( output_transmitters=output_trans, ee_id="test_ee_id", max_retries=1, retry_delay=timedelta(seconds=1), on_failure=mock_ensemble._on_task_failure, ) result = task(inputs=input_) flow_run = flow.run() # Stop the mock evaluator WS server with Client(url) as c: c.send("stop") mock_ws_thread.join() task_result = flow_run.result[result] assert not task_result.is_successful() assert not flow_run.is_successful() fail_job_messages = [ msg for msg in messages if ids.EVTYPE_FM_JOB_FAILURE in msg ] fail_step_messages = [ msg for msg in messages if ids.EVTYPE_FM_STEP_FAILURE in msg ] expected_job_failed_messages = 2 expected_step_failed_messages = 1 assert expected_job_failed_messages == len(fail_job_messages) assert expected_step_failed_messages == len(fail_step_messages)
def _send_event(type_: str, source: str, data: Optional[Dict[str, Any]] = None) -> None: with Client(prefect.context.url, prefect.context.token, prefect.context.cert) as client: client.send_event( ev_type=type_, ev_source=source, ev_data=data, )
async def send_cloudevent(self, url, event, token=None, cert=None, retries=1): client = Client(url, token, cert) await client._send( to_json(event, data_marshaller=serialization.evaluator_marshaller)) await client.websocket.close()
def _evaluate(self, client_url, dispatch_url, ee_id): super()._evaluate(dispatch_url, ee_id) with Client(client_url) as client: client.send( to_json( CloudEvent({ "type": identifiers.EVTYPE_EE_USER_DONE, "source": f"/ert/ee/{ee_id}", "id": f"event-user-done", })))
def _on_task_failure(task, state, url): if prefect_context.task_run_count > task.max_retries: with Client(url) as c: event = CloudEvent( { "type": ids.EVTYPE_FM_STEP_FAILURE, "source": task.get_step()._source, "datacontenttype": "application/json", }, {"error_msg": state.message}, ) c.send(to_json(event).decode())
def test_function_step(unused_tcp_port, tmpdir): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ws_thread.start() test_values = {"values": [42, 24, 6]} inputs = input_transmitter("values", test_values["values"], storage_path=tmpdir) def sum_function(values): return [sum(values)] step = get_step( step_name="test_step", inputs=[("values", "NA", "text/whatever")], outputs=[("output", Path("output.out"), "application/json")], jobs=[("test_function", cloudpickle.dumps(sum_function), None)], type_="function", ) with prefect.context(url=url, token=None, cert=None): output_trans = step_output_transmitters(step, storage_path=tmpdir) with Flow("testing") as flow: task = step.get_task(output_transmitters=output_trans, ee_id="test_ee_id") result = task(inputs=inputs) with tmp(): flow_run = flow.run() # Stop the mock evaluator WS server with Client(url) as c: c.send("stop") mock_ws_thread.join() task_result = flow_run.result[result] assert task_result.is_successful() assert flow_run.is_successful() assert len(task_result.result) == 1 expected_uri = output_trans["output"]._uri output_uri = task_result.result["output"]._uri assert expected_uri == output_uri transmitted_record = asyncio.get_event_loop().run_until_complete( task_result.result["output"].load()) transmitted_result = transmitted_record.data expected_result = sum_function(**test_values) assert expected_result == transmitted_result
def run_job( self, job: _FunctionJob, transmitters: _stage_transmitter_mapping, client: Client, ) -> _stage_transmitter_mapping: self.logger.info(f"Running function {job.name}") client.send_event( ev_type=ids.EVTYPE_FM_JOB_START, ev_source=job.source(self._ee_id), ) try: function: Callable[..., Any] = pickle.loads(job.command) output = self._attempt_execute(func=function, transmitters=transmitters) except Exception as e: self.logger.error(str(e)) client.send_event( ev_type=ids.EVTYPE_FM_JOB_FAILURE, ev_source=job.source(self._ee_id), ev_data={ids.ERROR_MSG: str(e)}, ) raise e else: client.send_event( ev_type=ids.EVTYPE_FM_JOB_SUCCESS, ev_source=job.source(self._ee_id), ) return output
def test_ensure_multi_level_events_in_order(evaluator): with evaluator.run() as monitor: events = monitor.track() token = evaluator._config.token cert = evaluator._config.cert url = evaluator._config.url snapshot_event = next(events) assert snapshot_event["type"] == identifiers.EVTYPE_EE_SNAPSHOT with Client(url + "/dispatch", cert=cert, token=token) as dispatch1: send_dispatch_event( dispatch1, identifiers.EVTYPE_ENSEMBLE_STARTED, f"/ert/ee/{evaluator._ee_id}/ensemble", "event0", {}, ) send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_STEP_SUCCESS, f"/ert/ee/{evaluator._ee_id}/real/0/step/0", "event1", {}, ) send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_STEP_SUCCESS, f"/ert/ee/{evaluator._ee_id}/real/1/step/0", "event2", {}, ) send_dispatch_event( dispatch1, identifiers.EVTYPE_ENSEMBLE_STOPPED, f"/ert/ee/{evaluator._ee_id}/ensemble", "event3", {}, ) monitor.signal_done() events = list(events) # Without making too many assumptions about what events to expect, it # should be reasonable to expect that if an event contains information # about realizations, the state of the ensemble up until that point # should be not final (i.e. not cancelled, stopped, failed). ensemble_state = snapshot_event.data.get("status") for event in events: if event.data: if "reals" in event.data: assert ensemble_state == ENSEMBLE_STATE_STARTED ensemble_state = event.data.get("status", ensemble_state)
async def send_cloudevent( # pylint: disable=too-many-arguments self, url: str, event: CloudEvent, token: Optional[str] = None, cert: Optional[Union[str, bytes]] = None, retries: int = 1, ) -> None: client = Client(url, token, cert) await client._send(to_json(event, data_marshaller=evaluator_marshaller)) assert client.websocket # mypy await client.websocket.close()
def _send_event(type_: str, source: str, data: Optional[Dict[str, Any]] = None) -> None: with Client( prefect.context.url, # type: ignore # pylint: disable=no-member prefect.context.token, # type: ignore # pylint: disable=no-member prefect.context.cert, # type: ignore # pylint: disable=no-member ) as client: client.send_event( ev_type=type_, ev_source=source, ev_data=data, )
def _on_task_failure(self, task, state): if prefect_context.task_run_count > task.max_retries: url = prefect_context.url token = prefect_context.token cert = prefect_context.cert with Client(url, token, cert) as c: event = CloudEvent( { "type": ids.EVTYPE_FM_STEP_FAILURE, "source": task.get_step().get_source(self._ee_id), "datacontenttype": "application/json", }, {"error_msg": state.message}, ) c.send(to_json(event).decode())
def run(self, inputs: Dict[str, "ert3.data.RecordTransmitter"]): with Client(self._step.get_ee_url()) as ee_client: ee_client.send_event( ev_type=ids.EVTYPE_FM_STEP_RUNNING, ev_source=self._step.get_source(self._ee_id), ) output = self.run_job( job=self._step.get_jobs()[0], transmitters=inputs, client=ee_client ) ee_client.send_event( ev_type=ids.EVTYPE_FM_STEP_SUCCESS, ev_source=self._step.get_source(self._ee_id), ) return output
def _on_task_failure( task: Union[UnixTask, FunctionTask], state: State, ee_id: str ) -> None: if prefect_context.task_run_count > task.max_retries: url = prefect_context.url token = prefect_context.token cert = prefect_context.cert with Client(url, token, cert) as c: event = CloudEvent( { "type": EVTYPE_FM_STEP_FAILURE, "source": task.step.source(ee_id), "datacontenttype": "application/json", }, {"error_msg": state.message}, ) c.send(to_json(event).decode())
def test_unix_step_error(unused_tcp_port, tmpdir): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ws_thread.start() script_location = (Path(SOURCE_DIR) / "test-data/local/prefect_test_case/unix_test_script.py") input_ = script_transmitter("test_script", script_location, storage_path=tmpdir) step = get_step( step_name="test_step", inputs=[("test_script", Path("unix_test_script.py"), "application/x-python")], outputs=[("output", Path("output.out"), "application/json")], jobs=[("test_script", Path("unix_test_script.py"), ["foo", "bar"])], type_="unix", ) with prefect.context(url=url, token=None, cert=None): output_trans = step_output_transmitters(step, storage_path=tmpdir) with Flow("testing") as flow: task = step.get_task(output_transmitters=output_trans, ee_id="test_ee_id") result = task(inputs=input_) with tmp(): flow_run = flow.run() # Stop the mock evaluator WS server with Client(url) as c: c.send("stop") mock_ws_thread.join() task_result = flow_run.result[result] assert not task_result.is_successful() assert not flow_run.is_successful() assert isinstance(task_result.result, Exception) assert ("unix_test_script.py: error: unrecognized arguments: bar" in task_result.message)
def test_unix_task(unused_tcp_port, tmpdir): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ws_thread.start() script_location = (Path(SOURCE_DIR) / "test-data/local/prefect_test_case/unix_test_script.py") input_ = script_transmitter("script", script_location, storage_path=tmpdir) step = get_step( step_name="test_step", inputs=[("script", Path("unix_test_script.py"), "application/x-python") ], outputs=[("output", Path("output.out"), "application/json")], jobs=[("script", Path("unix_test_script.py"), ["vas"])], url=url, type_="unix", ) output_trans = step_output_transmitters(step, storage_path=tmpdir) with Flow("testing") as flow: task = step.get_task(output_transmitters=output_trans, ee_id="test_ee_id") result = task(inputs=input_) with tmp(): flow_run = flow.run() # Stop the mock evaluator WS server with Client(url) as c: c.send("stop") mock_ws_thread.join() task_result = flow_run.result[result] assert task_result.is_successful() assert flow_run.is_successful() assert len(task_result.result) == 1 expected_uri = output_trans["output"]._uri output_uri = task_result.result["output"]._uri assert expected_uri == output_uri
def test_successful_sending(unused_tcp_port): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ws_thread.start() messages_c1 = ["test_1", "test_2", "test_3", "stop"] with Client(url) as c1: for msg in messages_c1: c1.send(msg) mock_ws_thread.join() for msg in messages_c1: assert msg in messages
def run(self, inputs: Dict[str, "RecordTransmitter"]): # type: ignore with Client( prefect.context.url, prefect.context.token, prefect.context.cert ) as ee_client: ee_client.send_event( ev_type=ids.EVTYPE_FM_STEP_RUNNING, ev_source=self._step.get_source(self._ee_id), ) output = self.run_job( job=self._step.get_jobs()[0], transmitters=inputs, client=ee_client ) ee_client.send_event( ev_type=ids.EVTYPE_FM_STEP_SUCCESS, ev_source=self._step.get_source(self._ee_id), ) return output
def test_retry(unused_tcp_port): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread( target=partial(_mock_ws, messages=messages, delay_startup=2), args=( host, unused_tcp_port, ), ) mock_ws_thread.start() messages_c1 = ["test_1", "test_2", "test_3", "stop"] with Client(url, max_retries=2, timeout_multiplier=2) as c1: for msg in messages_c1: c1.send(msg) mock_ws_thread.join() for msg in messages_c1: assert msg in messages
def run(self, inputs: _stage_transmitter_mapping): # type: ignore # pylint: disable=arguments-differ # noqa with Client( prefect.context.url, # type: ignore # pylint: disable=no-member prefect.context.token, # type: ignore # pylint: disable=no-member prefect.context.cert, # type: ignore # pylint: disable=no-member ) as ee_client: ee_client.send_event( ev_type=ids.EVTYPE_FM_STEP_RUNNING, ev_source=self.step.source(self._ee_id), ) job = self.step.jobs[0] if not isinstance(job, _FunctionJob): raise TypeError(f"unexpected job {type(job)} in function task") output = self.run_job(job=job, transmitters=inputs, client=ee_client) ee_client.send_event( ev_type=ids.EVTYPE_FM_STEP_SUCCESS, ev_source=self.step.source(self._ee_id), ) return output
def _evaluate(self, url, ee_id): event_id = 0 with Client(url + "/dispatch") as dispatch: send_dispatch_event( dispatch, identifiers.EVTYPE_ENSEMBLE_STARTED, f"/ert/ee/{ee_id}", f"event-{event_id}", None, ) if self.fails: event_id = event_id + 1 send_dispatch_event( dispatch, identifiers.EVTYPE_ENSEMBLE_FAILED, f"/ert/ee/{ee_id}", f"event-{event_id}", None, ) return event_id = event_id + 1 for real in range(0, self.reals): for step in range(0, self.steps): job_failed = False send_dispatch_event( dispatch, identifiers.EVTYPE_FM_STEP_UNKNOWN, f"/ert/ee/{ee_id}/real/{real}/step/{step}", f"event-{event_id}", None, ) event_id = event_id + 1 for job in range(0, self.jobs): send_dispatch_event( dispatch, identifiers.EVTYPE_FM_JOB_RUNNING, f"/ert/ee/{ee_id}/real/{real}/step/{step}/job/{job}", f"event-{event_id}", {"current_memory_usage": 1000}, ) event_id = event_id + 1 if self._shouldFailJob(real, step, job): send_dispatch_event( dispatch, identifiers.EVTYPE_FM_JOB_FAILURE, f"/ert/ee/{ee_id}/real/{real}/step/{step}/job/{job}", f"event-{event_id}", {}, ) event_id = event_id + 1 job_failed = True break else: send_dispatch_event( dispatch, identifiers.EVTYPE_FM_JOB_SUCCESS, f"/ert/ee/{ee_id}/real/{real}/step/{step}/job/{job}", f"event-{event_id}", {"current_memory_usage": 1000}, ) event_id = event_id + 1 if job_failed: send_dispatch_event( dispatch, identifiers.EVTYPE_FM_STEP_FAILURE, f"/ert/ee/{ee_id}/real/{real}/step/{step}/job/{job}", f"event-{event_id}", {}, ) event_id = event_id + 1 else: send_dispatch_event( dispatch, identifiers.EVTYPE_FM_STEP_SUCCESS, f"/ert/ee/{ee_id}/real/{real}/step/{step}/job/{job}", f"event-{event_id}", {}, ) event_id = event_id + 1 data = self.result if self.result else None extra_attrs = {} if self.result_datacontenttype: extra_attrs["datacontenttype"] = self.result_datacontenttype send_dispatch_event( dispatch, identifiers.EVTYPE_ENSEMBLE_STOPPED, f"/ert/ee/{ee_id}", f"event-{event_id}", data, **extra_attrs, )
def test_function_step_for_function_defined_outside_py_environment( unused_tcp_port, tmpdir): # Create temporary module that defines a function `bar` # 'bar' returns a call to different function 'internal_call' defined in the same python file with tmpdir.as_cwd(): module_path = Path(tmpdir) / "foo" module_path.mkdir() init_file = module_path / "__init__.py" init_file.touch() file_path = module_path / "bar.py" file_path.write_text( "def bar(values):\n return internal_call(values)\n" "def internal_call(values):\n return [sum(values)]\n") spec = importlib.util.spec_from_file_location("foo", str(file_path)) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) func = getattr(module, "bar") # Check module is not in the python environment with pytest.raises(ModuleNotFoundError): import foo.bar host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ws_thread.start() test_values = {"values": [42, 24, 6]} inputs = input_transmitter("values", test_values["values"], storage_path=tmpdir) step = get_step( step_name="test_step", inputs=[("values", "NA", "text/whatever")], outputs=[("output", Path("output.out"), "application/json")], jobs=[("test_function", cloudpickle.dumps(func), None)], type_="function", ) expected_result = func(**test_values) # Make sure the function is no longer available before we start creating the flow and task del func with prefect.context(url=url, token=None, cert=None): output_trans = step_output_transmitters(step, storage_path=tmpdir) with Flow("testing") as flow: task = step.get_task(output_transmitters=output_trans, ee_id="test_ee_id") result = task(inputs=inputs) with tmp(): flow_run = flow.run() # Stop the mock evaluator WS server with Client(url) as c: c.send("stop") mock_ws_thread.join() task_result = flow_run.result[result] assert task_result.is_successful() assert flow_run.is_successful() assert len(task_result.result) == 1 expected_uri = output_trans["output"]._uri output_uri = task_result.result["output"]._uri assert expected_uri == output_uri transmitted_record = asyncio.get_event_loop().run_until_complete( task_result.result["output"].load()) transmitted_result = transmitted_record.data assert expected_result == transmitted_result
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator( evaluator): with evaluator.run() as monitor: events = monitor.track() token = evaluator._config.token cert = evaluator._config.cert url = evaluator._config.url # first snapshot before any event occurs snapshot_event = next(events) print(snapshot_event) snapshot = Snapshot(snapshot_event.data) assert snapshot.status == ENSEMBLE_STATE_UNKNOWN # two dispatchers connect with Client( url + "/dispatch", cert=cert, token=token, max_retries=1, timeout_multiplier=1, ) as dispatch1, Client( url + "/dispatch", cert=cert, token=token, max_retries=1, timeout_multiplier=1, ) as dispatch2: # first dispatcher informs that job 0 is running send_dispatch_event( dispatch1, identifiers.EVTYPE_FM_JOB_RUNNING, f"/ert/ee/{evaluator._ee_id}/real/0/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 0 is running send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_RUNNING, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 0 is done send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_SUCCESS, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0", "event1", {"current_memory_usage": 1000}, ) # second dispatcher informs that job 1 is failed send_dispatch_event( dispatch2, identifiers.EVTYPE_FM_JOB_FAILURE, f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/1", "event_job_1_fail", {identifiers.ERROR_MSG: "error"}, ) evt = next(events) print(evt) snapshot = Snapshot(evt.data) assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE # a second monitor connects with ee_monitor.create( evaluator._config.get_connection_info()) as monitor2: events2 = monitor2.track() full_snapshot_event = next(events2) assert full_snapshot_event[ "type"] == identifiers.EVTYPE_EE_SNAPSHOT snapshot = Snapshot(full_snapshot_event.data) assert snapshot.status == ENSEMBLE_STATE_UNKNOWN assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED # one monitor requests that server exit monitor.signal_cancel() # both monitors should get a terminated event terminated = next(events) terminated2 = next(events2) assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED for e in [events, events2]: for undexpected_event in e: assert ( False ), f"got unexpected event {undexpected_event} from monitor"
def _evaluate(self) -> None: get_event_loop() assert self._ee_con_info # mypy assert self._ee_id # mypy try: with Client( self._ee_con_info.dispatch_uri, self._ee_con_info.token, self._ee_con_info.cert, ) as c: event = CloudEvent( { "type": EVTYPE_ENSEMBLE_STARTED, "source": f"/ert/ee/{self._ee_id}", }, ) c.send(to_json(event).decode()) with prefect.context( # type: ignore url=self._ee_con_info.dispatch_uri, token=self._ee_con_info.token, cert=self._ee_con_info.cert, ): self.run_flow(self._ee_id) with Client( self._ee_con_info.dispatch_uri, self._ee_con_info.token, self._ee_con_info.cert, ) as c: event = CloudEvent( { "type": EVTYPE_ENSEMBLE_STOPPED, "source": f"/ert/ee/{self._ee_id}", "datacontenttype": "application/octet-stream", }, cloudpickle.dumps(self._outputs), ) c.send(to_json(event).decode()) except Exception as e: # pylint: disable=broad-except logger.exception( "An exception occurred while starting the ensemble evaluation", exc_info=True, ) # Signal 2 is SIGINT, so it is assumed this exception came from # cancellation. This means the ensemble failed event should not be sent. if isinstance(e, OSError) and "Signal 2" in str(e): logger.debug("interpreting %s as a result of cancellation", e) return with Client( self._ee_con_info.dispatch_uri, self._ee_con_info.token, self._ee_con_info.cert, ) as c: event = CloudEvent( { "type": EVTYPE_ENSEMBLE_FAILED, "source": f"/ert/ee/{self._ee_id}", }, ) c.send(to_json(event).decode())
def join(self): with Client(self.url) as c: c.send("stop") self.mock_ws_thread.join()