def test_python_stand_alone_argument_building(self): systemConfig = { "statsDBHost": "localhost", "statsDBPort": 8899, "statsMeasurementID": "tf-job-0001", "mlObjectSocketHost": "localhost", "mlObjectSocketSourcePort": 9900, "mlObjectSocketSinkPort": 9901, "modelFileSinkPath": "output-model-1234", "modelFileSourcePath": "input-model-1234", "healthStatFilePath": "/tmp/health", "workflowInstanceId": "/tmp/run/filesink1", "socketSourcePort": 0, "socketSinkPort": 0, "enableHealth": True, "canaryThreshold": 0.0 } pipeline = { "name": "stand_alone_test", "engineType": "Generic", "pipe": [{ "name": "Test Train", "id": 1, "type": "test-python-train", "parents": [], "arguments": { "arg1": "arg1-value" } }] } python_engine = PythonEngine("test-pipe") comps_desc_list = ComponentsDesc(python_engine, pipeline=pipeline).load() dag = Dag(pipeline, comps_desc_list, python_engine) dag_node = dag.get_dag_node(0) input_args = dag_node.input_arguments(systemConfig, comp_only_args=True) assert input_args["arg1"] == "arg1-value" assert input_args["output-model"] == "output-model-1234"
def go(self): """ Actual execution phase """ self._logger.debug("Executor.go()") try: self._init_ml_engine(self.pipeline) comps_desc_list = components_desc.ComponentsDesc(self._ml_engine, self.pipeline, self._comp_root_path)\ .load() self._logger.debug("comp_desc: {}".format(comps_desc_list)) dag = Dag(self.pipeline, comps_desc_list, self._ml_engine).use_color(self._use_color) # Flush stdout so the logs looks a bit in order sys.stdout.flush() system_conf = self.pipeline[ json_fields.PIPELINE_SYSTEM_CONFIG_FIELD] mlops._set_test_mode( system_conf.get( json_fields.PIPELINE_SYSTEM_CONFIG_TEST_MODE_PARAM, False)) ee_conf = self.pipeline.get(json_fields.PIPELINE_EE_CONF_FIELD, dict()) if dag.is_stand_alone: dag.run_single_component_pipeline(system_conf, ee_conf, self._ml_engine) else: dag.run_connected_pipeline(system_conf, ee_conf, self._ml_engine) # This except is intended to catch exit() calls from components. # Do not use exit() in mlpiper code. except SystemExit as e: code = self._parse_exit_code(e.code) error_message = "Pipeline called exit(), with code: {}".format( e.code) traceback_message = traceback.format_exc() if code != 0: self._logger.error("{}\n{}".format(error_message, traceback_message)) # For Py2 put traceback into the exception message if sys.version_info[0] == 2: error_message = "{}\n{}".format(error_message, traceback.format_exc()) raise ExecutorException(error_message) else: self._logger.warning(error_message) except KeyboardInterrupt: # When running from mlpiper tool (standalone) pass finally: sys.stdout.flush() self._logger.info("Done running pipeline (in finally block)") self._cleanup_on_exist() print("End of go")
def test_dag_detect_is_stand_alone(self): pipeline = { "name": "stand_alone_test", "engineType": "Generic", "pipe": [{ "name": "Hello", "id": 1, "type": "hello-world", "parents": [], "arguments": { "arg1": "arg1-value" } }] } python_engine = PythonEngine("test-pipe") comps_desc_list = ComponentsDesc(python_engine, pipeline=pipeline).load() dag = Dag(pipeline, comps_desc_list, python_engine) assert dag.is_stand_alone is True
def test_correct_python_component_io(self): pipeline = { "name": "stand_alone_test", "engineType": "Generic", "pipe": [{ "name": "Test Train 1", "id": 1, "type": "test-python-train", "parents": [], "arguments": { "arg1": "arg1-value" } }, { "name": "Test Train 2", "id": 2, "type": "test-python-train", "parents": [{ "parent": 1, "output": 1, "input": 1 }, { "parent": 1, "output": 0, "input": 0 }], "arguments": { "arg1": "arg1-value" } }, { "name": "Test Train 3", "id": 3, "type": "test-python-train", "parents": [{ "parent": 2, "output": 0, "input": 0 }, { "parent": 2, "output": 2, "input": 2 }, { "parent": 2, "output": 1, "input": 1 }], "arguments": { "arg1": "arg1-value" } }, { "name": "Test Train 4", "id": 4, "type": "test-python-train", "parents": [{ "parent": 3, "output": 0, "input": 1 }, { "parent": 3, "output": 1, "input": 0 }], "arguments": { "arg1": "arg1-value" } }] } python_engine = PythonEngine("test-pipe") comps_desc_list = ComponentsDesc(python_engine, pipeline=pipeline).load() dag = Dag(pipeline, comps_desc_list, python_engine) dag_node_1 = dag.get_dag_node(0) dag_node_2 = dag.get_dag_node(1) dag_node_3 = dag.get_dag_node(2) dag_node_4 = dag.get_dag_node(3) # A100 means -- Type A, Node Id 1, Output 0, Goes To 0 # pipeline is as follow # OUTPUT INDEX 0 - INPUT INDEX 0 OUTPUT INDEX 0 - INPUT INDEX 0 OUTPUT INDEX 0 INPUT INDEX 0 # / \ / \ / \ / \ # ID 1 ID 2-OUTPUT INDEX 1 - INPUT INDEX 1-ID 3 /\ ID 4 # \ / \ / \ / \ / # OUTPUT INDEX 1 - INPUT INDEX 1 OUTPUT INDEX 2 - INPUT INDEX 2 OUTPUT INDEX 1 INPUT INDEX 1 dag.update_parent_data_objs(dag_node_1, ["A100", "B111"]) dag.update_parent_data_objs(dag_node_2, ["A200", "B211", "C222"]) dag.update_parent_data_objs(dag_node_3, ["A301", "B310"]) # as node 1 does not have any parents, input object should be empty assert dag.parent_data_objs(dag_node_1) == [] # as node 2 have input coming but json is not correctly order, but still output should be correctly indexed assert dag.parent_data_objs(dag_node_2) == ["A100", "B111"] # little complicated node 3 inputs. but same story as above assert dag.parent_data_objs(dag_node_3) == ["A200", "B211", "C222"] # node 4 gets output of node3's index 0 to its 1st input index and node3's output index 1 to its 0th input indexx assert dag.parent_data_objs(dag_node_4) == ["B310", "A301"]
def test_component_argument_building_with_sagemaker(self): systemConfig = { "statsDBHost": "localhost", "statsDBPort": 8899, "statsMeasurementID": "tf-job-0001", "mlObjectSocketHost": "localhost", "mlObjectSocketSourcePort": 9900, "mlObjectSocketSinkPort": 9901, "modelFileSinkPath": "output-model-1234", "modelFileSourcePath": "input-model-1234", "healthStatFilePath": "/tmp/health", "workflowInstanceId": "/tmp/run/filesink1", "socketSourcePort": 0, "socketSinkPort": 0, "enableHealth": True, "canaryThreshold": 0.0 } region = "us-west-2" iam_role_value = "arn:aws:iam::ACCOUNT-ID-WITHOUT-HYPHENS:role/Get-pics" ee_config = { "configs": { "engConfig": { "type": "sagemaker", "arguments": { "region": { "value": "us-west-2", "type": "string", "optional": "false", "label": "Region", "description": "The AWS Region to send the request to", "editable": "true" }, "aws_access_key_id": { "value": "2134", "type": "string", "optional": "false", "label": "Access Key ID", "description": "A long term credential access key ID", "editable": "true" }, "aws_secret_access_key": { "value": "123qwe", "type": "string", "optional": "false", "label": "Secret Access Key", "description": "A long term credential secret access key", "editable": "true" }, "iam_role": { "value": iam_role_value, "type": "string", "optional": "false", "label": "Region", "description": "The AWS Region to send the request to", "editable": "true" } } } } } pipeline = { "name": "SageMaker pipeline", "engineType": "SageMaker", "systemConfig": systemConfig, "executionEnvironment": ee_config, "pipe": [ { "name": "String Source", "id": 1, "type": "string-source", "parents": [], "arguments": { "arg1": "arg1-value" } } ] } python_engine = SageMakerEngine(pipeline) comps_desc_list = ComponentsDesc(python_engine, pipeline=pipeline).load() dag = Dag(pipeline, comps_desc_list, python_engine) dag_node = dag.get_dag_node(0) input_args = dag_node.input_arguments(systemConfig, ee_config, comp_only_args=False) assert input_args["arg1"] == "arg1-value" assert input_args["configs"]["engConfig"]["arguments"]["iam_role"]["value"] == iam_role_value