def test_package(self):
        step_name = "train"
        expected_results_schema_type = "package_results"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "9999.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(self.input_parameters,
                                              self.execution_parameters)

        results_ml_object = MLObject()
        results_ml_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        result_ml_object_schema_type = expected_results_schema_type
        result_ml_object_schema_version = expected_results_schema_version

        exec(
            (Path("tests") / "sample_package_execution.py").read_text(),
            globals(),
            locals(),
        )

        results_ml_object.run_date = datetime.datetime.now()
        results_ml_object.step_id = uuid.uuid4()
        results_ml_object.run_id = uuid.uuid4()

        results_ml_object.execution_profile.system_memory_utilization = random(
        )
        results_ml_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_ml_object.execution_profile.gpu_temperature = randint(70, 130)
        results_ml_object.execution_profile.disk_io_utilization = random()
        results_ml_object.execution_profile.gpu_percent_of_time_accessing_memory = (
            random())
        results_ml_object.execution_profile.cpu_utilization = random()
        results_ml_object.execution_profile.gpu_utilization = random()
        results_ml_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            ))
    def test_e2e(self):
        MLSchema.populate_registry()
        MLSchema.append_schema_to_registry(Path.cwd() / ".parameters" /
                                           "schemas")

        # Execute step
        input_parameters = {
            # Put sample required input parameters here
        }

        execution_parameters = {
            # Put sample required execution parameters here
        }

        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "process_data"
        expected_results_schema_type = "data_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "0.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(input_parameters,
                                              execution_parameters)

        results_object = MLObject()
        results_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_object = step_execution_object.execute(
            result_object_schema_type=expected_results_schema_type,
            result_object_schema_version=expected_results_schema_version,
        )

        results_object.run_date = datetime.datetime.now()
        results_object.step_id = uuid.uuid4()
        results_object.run_id = uuid.uuid4()

        self.assertTrue(
            verify_result_contract(results_object,
                                   expected_results_schema_type,
                                   expected_results_schema_version, step_name))
    def test_process_data(self):
        """
        Full E2E of Process Data
        """
        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "process_data"
        expected_results_schema_type = "data_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "9999.0.1"  # MUST BE A SEMVER

        results_ml_object = MLObject()
        results_ml_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_ml_object = MLObject()

        result_ml_object_schema_type = expected_results_schema_type
        result_ml_object_schema_version = expected_results_schema_version

        exec(
            (Path("tests") / "sample_process_data_execution.py").read_text(),
            globals(),
            locals(),
        )

        results_ml_object.run_date = datetime.datetime.now()
        results_ml_object.step_id = str(uuid.uuid4())
        results_ml_object.run_id = str(uuid.uuid4())

        results_ml_object.execution_profile.system_memory_utilization = random(
        )
        results_ml_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_ml_object.execution_profile.gpu_temperature = randint(70, 130)
        results_ml_object.execution_profile.disk_io_utilization = random()
        results_ml_object.execution_profile.gpu_percent_of_time_accessing_memory = (
            random())
        results_ml_object.execution_profile.cpu_utilization = random()
        results_ml_object.execution_profile.gpu_utilization = random()
        results_ml_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            ))
Beispiel #4
0
def execute_step(
    execution_file: str,
    workflow_object: MLObject,
    input_object: MLObject,
    execution_object: MLObject,
    step_name,
    run_id,
):

    rootLogger = setupLogger().get_root_logger()

    results_ml_object = MLObject()

    if execution_file is None:
        msg = "Did not find any value for INPUT_EXECUTION_FILE, using /src/step_execution.py"

        print_left_message(msg)
        rootLogger.debug("::debug::" + msg)

        print("{:>15}".format("ok"))  # Finished loading from environment

        step_execution_object = StepExecution(input_object, execution_object)
        results_ml_object = step_execution_object.execute(
            result_object_schema_type=workflow_object.steps[
                step_name
            ].output.schema_type,
            result_object_schema_version=workflow_object.steps[
                step_name
            ].output.schema_version,
        )

    else:
        # TODO: Critical error if variable set but file not found
        msg = f"Executing '${execution_file}' (found in INPUT_EXECUTION_FILE env var)"

        print_left_message(msg)
        rootLogger.debug("::debug::" + msg)

        execution_file_path = Path(execution_file)

        if execution_file_path.exists() is False:
            raise KnownException(
                f"'{execution_file}' was provided as the file, but it does not appear to exist at {str(execution_file_path.resolve())} -- exiting."
            )

        # The below are used in the execution file
        result_ml_object_schema_type = workflow_object.steps[  # noqa
            step_name
        ].output.schema_type
        result_ml_object_schema_version = workflow_object.steps[  # noqa
            step_name
        ].output.schema_version
        exec(execution_file_path.read_text(), globals(), locals())

        print("{:>15}".format("ok"))  # Finished executing step

    if (results_ml_object is None) or (len(results_ml_object) == 0):
        raise KnownException(
            "No value was assigned to the variable 'results_ml_object' -- exiting."
        )
    elif isinstance(results_ml_object, MLObject) is False:
        raise KnownException(
            "The variable 'results_ml_object' was not of type MLObject -- exiting."
        )

    results_ml_object.run_id = run_id
    results_ml_object.step_id = str(uuid.uuid4())
    results_ml_object.run_date = datetime.datetime.now().isoformat()

    # Using the below to validate the object, even though we already have it created.
    load_contract_object(
        parameters=results_ml_object.dict_without_internal_variables(),
        workflow_object=workflow_object,
        step_name=step_name,
        contract_type="output",
    )

    return results_ml_object
Beispiel #5
0
    def main(self):
        c = """
- Dashboard for runs
-- Size
-- Likelihood of bias
-- Time for run
-- Accuracy
- Filter by version
- Look up at top version and show metadata going in and out

- Show bad input (e.g. it's null) and what happens when you run it
- Show when you add a new step - how you can compare those with other versions

""" # noqa
        credentials = Credentials.metastore_credentials_prod
        MLSchema.append_schema_to_registry(Path(".parameters") / "schemas")
        repo_name = "mlspec"
        output_regex = "::set-output name=output_base64_encoded::(.*?)\\\\"

        run_date_start = datetime.datetime(2020, 1, 1) + datetime.timedelta(
            seconds=random.randrange(0, 5184000))
        run_id = str(uuid.uuid4())

        step_name = "process_data"
        data_source = MLObject()
        data_source.set_type("500.0.1", "data_source")
        data_source.run_id = run_id
        data_source.step_id = str(uuid.uuid4())
        data_source.run_date = str(run_date_start.isoformat())
        data_source.source_id = str(uuid.uuid4())
        data_source.source_uri = f"https://internal.contoso.com/datasets/raw_nlp_data-{run_date_start.strftime('%Y-%m-%d')}-{get_random_md5()}"  # noqa
        data_source.extended_properties = {}

        data_process_run = MLObject()
        data_process_run.set_type("500.0.1", "data_process_run")
        data_process_run.nodes = random.randrange(1, 4) * 2
        data_process_run.cpu_per_node = f"{random.randrange(2,8) * 2}"
        data_process_run.ram_per_node = f"{random.randrange(1,16) * 8}Gi"
        data_process_run.gpu_required = (random.randrange(1, 2) % 2) == 0
        data_process_run.output_root_path = (
            "https://internal.contoso.com/datasets/processed_data/")
        data_process_run.base_image = random_base_image()
        data_process_run.machine_type = random_machine_type()
        data_process_run.run_id = run_id
        data_process_run.step_id = str(uuid.uuid4())
        data_process_run.run_date = str(run_date_start.isoformat())
        data_process_run.extended_properties = {}

        environment_dict = YAML.safe_load(f"""
INPUT_schemas_directory: '.parameters/schemas'
INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git'
INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725'
INPUT_step_name: {step_name}
INPUT_input_parameters_raw: {data_source.dict_without_internal_variables()}
INPUT_execution_parameters_raw: {data_process_run.dict_without_internal_variables()}
INPUT_METASTORE_CREDENTIALS: {credentials}
GITHUB_RUN_ID: {str(run_id)}
GITHUB_WORKSPACE: '/src'
        """)

        self.run_container(repo_name, "mlspeclib-action-samples-process-data",
                           environment_dict)

        buff_val = self.buffer.getvalue()
        m = re.search(output_regex, buff_val)
        process_data_encoded_val = m.group(1)

        # Below is for debugging, we're ok leaving it in base64 encoded
        # process_data_output_value = base64.urlsafe_b64decode(process_data_encoded_val)
        self.buffer.truncate(0)
        self.buffer.seek(0)

        step_name = "train"
        training_run = MLObject()
        training_run.set_type("500.0.1", "training_run")
        training_run.nodes = random.randrange(1, 4) * 2
        training_run.cpu_per_node = random.randrange(2, 8) * 2
        training_run.ram_per_node = f"{random.randrange(1,16) * 8}Gi"
        training_run.gpu_required = (random.randrange(1, 2) % 2) == 0
        training_run.output_path = "test/models/output"
        training_run.training_params.learning_rate = 1 / (pow(
            10, random.randint(0, 4)))
        training_run.training_params.loss = random.random()
        training_run.training_params.batch_size = random.randrange(1, 5) * 500
        training_run.training_params.epoch = random.randrange(1, 8) * 25
        training_run.training_params.optimizer = ["SGD"]
        training_run.training_params.other_tags = {
            "pii": False,
            "data_sha": "8b03f70"
        }
        training_run.extended_properties = {}

        environment_dict_train = YAML.safe_load(f"""
INPUT_schemas_directory: '.parameters/schemas'
INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git'
INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725'
INPUT_step_name: {step_name}
INPUT_input_parameters_base64: {process_data_encoded_val}
INPUT_execution_parameters_raw: {training_run.dict_without_internal_variables()}
INPUT_METASTORE_CREDENTIALS: {credentials}
GITHUB_RUN_ID: {str(run_id)}
GITHUB_WORKSPACE: '/src'
        """)

        self.run_container(repo_name, "mlspeclib-action-samples-train",
                           environment_dict_train)

        buff_val = self.buffer.getvalue()
        m = re.search(output_regex, buff_val)
        train_encoded_val = m.group(1)
        # train_output_value = base64.urlsafe_b64decode(train_encoded_val)
        self.buffer.truncate(0)
        self.buffer.seek(0)

        step_name = "package"
        package_run = MLObject()
        package_run.set_type("500.0.1", "package_run")
        package_run.run_id = run_id
        package_run.step_id = str(uuid.uuid4())
        package_run.run_date = run_date_start.isoformat()
        package_run.model_source = "/nfs/trained_models/nlp"
        package_run.container_registry = f"https://registry.hub.docker.com/v1/repositories/contoso/nlp/{get_random_md5()}"  # noqa
        package_run.agent_pool = "nlp-build-pool"
        package_run.build_args = ["arg1", "arg2", "arg3"]
        package_run.extended_properties = {}
        package_run.secrets = {
            "credentials": "AZURE_CREDENTIALS",
            "docker_username": "******",
            "docker_password": "******",
        }

        environment_dict_package = YAML.safe_load(f"""
INPUT_schemas_directory: '.parameters/schemas'
INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git'
INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725'
INPUT_step_name: {step_name}
INPUT_input_parameters_base64: {train_encoded_val}
INPUT_execution_parameters_raw: {package_run.dict_without_internal_variables()}
INPUT_METASTORE_CREDENTIALS: {credentials}
GITHUB_RUN_ID: {str(run_id)}
GITHUB_WORKSPACE: '/src'
        """)

        self.run_container(repo_name, "mlspeclib-action-samples-package",
                           environment_dict_package)

        buff_val = self.buffer.getvalue()
        m = re.search(output_regex, buff_val)
        encoded_val = m.group(1)
        print(base64.urlsafe_b64decode(encoded_val))
        self.buffer.flush()
    def test_e2e(self):
        MLSchema.populate_registry()
        MLSchema.append_schema_to_registry(Path.cwd() / ".parameters" /
                                           "schemas")

        # Execute step
        input_parameters = {
            # Put sample required input parameters here
        }

        execution_parameters = {
            # Put sample required execution parameters here
        }

        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "process_data"
        expected_results_schema_type = "data_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "500.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(input_parameters,
                                              execution_parameters)

        results_object = MLObject()
        results_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_object = step_execution_object.execute(
            result_object_schema_type=expected_results_schema_type,
            result_object_schema_version=expected_results_schema_version,
        )

        results_object.run_date = datetime.datetime.now()
        results_object.step_id = str(uuid.uuid4())
        results_object.run_id = str(uuid.uuid4())

        results_object.execution_profile.system_memory_utilization = random()
        results_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_object.execution_profile.gpu_temperature = randint(70, 130)
        results_object.execution_profile.disk_io_utilization = random()
        results_object.execution_profile.gpu_percent_of_time_accessing_memory = random(
        )
        results_object.execution_profile.cpu_utilization = random()
        results_object.execution_profile.gpu_utilization = random()
        results_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(results_object,
                                   expected_results_schema_type,
                                   expected_results_schema_version, step_name))