Ejemplo n.º 1
0
    def _include_files(self, comp_root, comp_desc):
        include_patterns = self._parse_patterns(comp_desc.get(json_fields.COMPONENT_DESC_INCLUDE_GLOB_PATTERNS))
        exclude_patterns = self._parse_patterns(comp_desc.get(json_fields.COMPONENT_DESC_EXCLUDE_GLOB_PATTERNS))

        # Add "requirements.txt" if includeGlobPattern is defined,
        # so requirements file will be always copied.
        if len(include_patterns):
            if os.path.exists(os.path.join(comp_root, MLCompConstants.REQUIREMENTS_FILENAME)):
                include_patterns.append(MLCompConstants.REQUIREMENTS_FILENAME)

        included_files = []
        init_py_found = False
        for root, _, files in os.walk(comp_root):
            for f in files:
                rltv_path = os.path.relpath(root, comp_root)
                filepath = os.path.join(rltv_path, f) if rltv_path != "." else f
                if self._path_included(filepath, include_patterns, exclude_patterns):
                    if filepath == "__init__.py":
                        init_py_found = True

                    # There can be several comp JSONs in one folder.
                    # Don't include any of them, even related to current component,
                    # it will be included automatically
                    if ComponentsDesc._load_comp_desc(comp_root, f):
                        continue

                    included_files.append(filepath)

        if comp_desc[json_fields.COMPONENT_DESC_LANGUAGE_FIELD] == ComponentLanguage.PYTHON and not init_py_found:
            comp_name = comp_desc[json_fields.COMPONENT_DESC_NAME_FIELD]
            raise Exception("Missing '__init__.py' in component's root folder or it is not included"
                            " by 'glob' pattern! Please make sure to add it! name: {}, path: {}"
                            .format(comp_name, comp_root))

        return included_files
Ejemplo n.º 2
0
    def scan_dir(self, root_dir):
        """
        Scanning a directory returning a map of components:
        {
            "name1": {
                "directory": path_relative_to_root_dir
            }
        }
        :return:
        """
        comps = {}
        logging.debug("Scanning {}".format(root_dir))
        for root, comp_desc, comp_filename in ComponentsDesc.next_comp_desc(root_dir):
            engine_type = comp_desc[json_fields.COMPONENT_DESC_ENGINE_TYPE_FIELD]
            comps.setdefault(engine_type, {})

            comp_name = comp_desc[json_fields.COMPONENT_DESC_NAME_FIELD]
            if comp_name in comps[engine_type]:
                raise Exception("Component already defined!\n\tPrev comp file: {}\n\tCurr comp file: {}"
                                .format(
                                    os.path.join(comps[engine_type][comp_name]["root"], comps[engine_type][comp_name]["comp_filename"]),
                                    os.path.join(root, comp_filename))
                                )

            comps[engine_type][comp_name] = {}
            comps[engine_type][comp_name]["comp_desc"] = comp_desc
            comps[engine_type][comp_name]["root"] = root
            comps[engine_type][comp_name]["files"] = self._include_files(root, comp_desc)
            # Always include current component json file regardless of its name.
            comps[engine_type][comp_name]["files"].append(comp_filename)
            comps[engine_type][comp_name]["comp_filename"] = comp_filename

            logging.debug("Found component, root: {}, engine: {}, name: ".format(root, engine_type, comp_name))
        return comps
Ejemplo n.º 3
0
    def test_python_stand_alone_argument_building(self):
        systemConfig = {
            "statsDBHost": "localhost",
            "statsDBPort": 8899,
            "statsMeasurementID": "tf-job-0001",
            "mlObjectSocketHost": "localhost",
            "mlObjectSocketSourcePort": 9900,
            "mlObjectSocketSinkPort": 9901,
            "modelFileSinkPath": "output-model-1234",
            "modelFileSourcePath": "input-model-1234",
            "healthStatFilePath": "/tmp/health",
            "workflowInstanceId": "/tmp/run/filesink1",
            "socketSourcePort": 0,
            "socketSinkPort": 0,
            "enableHealth": True,
            "canaryThreshold": 0.0
        }
        pipeline = {
            "name":
            "stand_alone_test",
            "engineType":
            "Generic",
            "pipe": [{
                "name": "Test Train",
                "id": 1,
                "type": "test-python-train",
                "parents": [],
                "arguments": {
                    "arg1": "arg1-value"
                }
            }]
        }
        python_engine = PythonEngine("test-pipe")
        comps_desc_list = ComponentsDesc(python_engine,
                                         pipeline=pipeline).load()
        dag = Dag(pipeline, comps_desc_list, python_engine)

        dag_node = dag.get_dag_node(0)
        input_args = dag_node.input_arguments(systemConfig,
                                              comp_only_args=True)
        assert input_args["arg1"] == "arg1-value"
        assert input_args["output-model"] == "output-model-1234"
Ejemplo n.º 4
0
    def test_dag_detect_is_stand_alone(self):

        pipeline = {
            "name":
            "stand_alone_test",
            "engineType":
            "Generic",
            "pipe": [{
                "name": "Hello",
                "id": 1,
                "type": "hello-world",
                "parents": [],
                "arguments": {
                    "arg1": "arg1-value"
                }
            }]
        }
        python_engine = PythonEngine("test-pipe")
        comps_desc_list = ComponentsDesc(python_engine,
                                         pipeline=pipeline).load()
        dag = Dag(pipeline, comps_desc_list, python_engine)
        assert dag.is_stand_alone is True
Ejemplo n.º 5
0
    def test_correct_python_component_io(self):
        pipeline = {
            "name":
            "stand_alone_test",
            "engineType":
            "Generic",
            "pipe": [{
                "name": "Test Train 1",
                "id": 1,
                "type": "test-python-train",
                "parents": [],
                "arguments": {
                    "arg1": "arg1-value"
                }
            }, {
                "name":
                "Test Train 2",
                "id":
                2,
                "type":
                "test-python-train",
                "parents": [{
                    "parent": 1,
                    "output": 1,
                    "input": 1
                }, {
                    "parent": 1,
                    "output": 0,
                    "input": 0
                }],
                "arguments": {
                    "arg1": "arg1-value"
                }
            }, {
                "name":
                "Test Train 3",
                "id":
                3,
                "type":
                "test-python-train",
                "parents": [{
                    "parent": 2,
                    "output": 0,
                    "input": 0
                }, {
                    "parent": 2,
                    "output": 2,
                    "input": 2
                }, {
                    "parent": 2,
                    "output": 1,
                    "input": 1
                }],
                "arguments": {
                    "arg1": "arg1-value"
                }
            }, {
                "name":
                "Test Train 4",
                "id":
                4,
                "type":
                "test-python-train",
                "parents": [{
                    "parent": 3,
                    "output": 0,
                    "input": 1
                }, {
                    "parent": 3,
                    "output": 1,
                    "input": 0
                }],
                "arguments": {
                    "arg1": "arg1-value"
                }
            }]
        }
        python_engine = PythonEngine("test-pipe")
        comps_desc_list = ComponentsDesc(python_engine,
                                         pipeline=pipeline).load()
        dag = Dag(pipeline, comps_desc_list, python_engine)

        dag_node_1 = dag.get_dag_node(0)
        dag_node_2 = dag.get_dag_node(1)
        dag_node_3 = dag.get_dag_node(2)
        dag_node_4 = dag.get_dag_node(3)

        # A100 means -- Type A, Node Id 1, Output 0, Goes To 0
        # pipeline is as follow

        #     OUTPUT INDEX 0 - INPUT INDEX 0      OUTPUT INDEX 0 - INPUT INDEX 0      OUTPUT INDEX 0   INPUT INDEX 0
        #    /                              \    /                              \    /              \ /             \
        # ID 1                               ID 2-OUTPUT INDEX 1 - INPUT INDEX 1-ID 3                /\              ID 4
        #    \                              /    \                              /    \              /  \            /
        #     OUTPUT INDEX 1 - INPUT INDEX 1      OUTPUT INDEX 2 - INPUT INDEX 2      OUTPUT INDEX 1    INPUT INDEX 1

        dag.update_parent_data_objs(dag_node_1, ["A100", "B111"])
        dag.update_parent_data_objs(dag_node_2, ["A200", "B211", "C222"])
        dag.update_parent_data_objs(dag_node_3, ["A301", "B310"])

        # as node 1 does not have any parents, input object should be empty
        assert dag.parent_data_objs(dag_node_1) == []
        # as node 2 have input coming but json is not correctly order, but still output should be correctly indexed
        assert dag.parent_data_objs(dag_node_2) == ["A100", "B111"]
        # little complicated node 3 inputs. but same story as above
        assert dag.parent_data_objs(dag_node_3) == ["A200", "B211", "C222"]
        # node 4 gets output of node3's index 0 to its 1st input index and node3's output index 1 to its 0th input indexx
        assert dag.parent_data_objs(dag_node_4) == ["B310", "A301"]
Ejemplo n.º 6
0
    def test_component_argument_building_with_sagemaker(self):
        systemConfig = {
                    "statsDBHost": "localhost",
                    "statsDBPort": 8899,
                    "statsMeasurementID": "tf-job-0001",
                    "mlObjectSocketHost": "localhost",
                    "mlObjectSocketSourcePort": 9900,
                    "mlObjectSocketSinkPort": 9901,
                    "modelFileSinkPath": "output-model-1234",
                    "modelFileSourcePath": "input-model-1234",
                    "healthStatFilePath": "/tmp/health",
                    "workflowInstanceId": "/tmp/run/filesink1",
                    "socketSourcePort": 0,
                    "socketSinkPort": 0,
                    "enableHealth": True,
                    "canaryThreshold": 0.0
        }
        region = "us-west-2"
        iam_role_value = "arn:aws:iam::ACCOUNT-ID-WITHOUT-HYPHENS:role/Get-pics"
        ee_config = {
            "configs": {
                "engConfig": {
                    "type": "sagemaker",
                    "arguments": {
                        "region": {
                            "value": "us-west-2",
                            "type": "string",
                            "optional": "false",
                            "label": "Region",
                            "description": "The AWS Region to send the request to",
                            "editable": "true"
                        },
                        "aws_access_key_id": {
                            "value": "2134",
                            "type": "string",
                            "optional": "false",
                            "label": "Access Key ID",
                            "description": "A long term credential access key ID",
                            "editable": "true"
                        },
                        "aws_secret_access_key": {
                            "value": "123qwe",
                            "type": "string",
                            "optional": "false",
                            "label": "Secret Access Key",
                            "description": "A long term credential secret access key",
                            "editable": "true"
                        },
                        "iam_role": {
                            "value": iam_role_value,
                            "type": "string",
                            "optional": "false",
                            "label": "Region",
                            "description": "The AWS Region to send the request to",
                            "editable": "true"
                        }
                    }
                }
            }
        }
        pipeline = {
            "name": "SageMaker pipeline",
            "engineType": "SageMaker",
            "systemConfig": systemConfig,
            "executionEnvironment": ee_config,
            "pipe": [
                {
                    "name": "String Source",
                    "id": 1,
                    "type": "string-source",
                    "parents": [],
                    "arguments": {
                        "arg1": "arg1-value"
                    }
                }
            ]
        }
        python_engine = SageMakerEngine(pipeline)
        comps_desc_list = ComponentsDesc(python_engine, pipeline=pipeline).load()
        dag = Dag(pipeline, comps_desc_list, python_engine)

        dag_node = dag.get_dag_node(0)
        input_args = dag_node.input_arguments(systemConfig, ee_config, comp_only_args=False)
        assert input_args["arg1"] == "arg1-value"
        assert input_args["configs"]["engConfig"]["arguments"]["iam_role"]["value"] == iam_role_value