Esempio n. 1
0
    def test_package(self):
        step_name = "train"
        expected_results_schema_type = "package_results"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "9999.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(self.input_parameters,
                                              self.execution_parameters)

        results_ml_object = MLObject()
        results_ml_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        result_ml_object_schema_type = expected_results_schema_type
        result_ml_object_schema_version = expected_results_schema_version

        exec(
            (Path("tests") / "sample_package_execution.py").read_text(),
            globals(),
            locals(),
        )

        results_ml_object.run_date = datetime.datetime.now()
        results_ml_object.step_id = uuid.uuid4()
        results_ml_object.run_id = uuid.uuid4()

        results_ml_object.execution_profile.system_memory_utilization = random(
        )
        results_ml_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_ml_object.execution_profile.gpu_temperature = randint(70, 130)
        results_ml_object.execution_profile.disk_io_utilization = random()
        results_ml_object.execution_profile.gpu_percent_of_time_accessing_memory = (
            random())
        results_ml_object.execution_profile.cpu_utilization = random()
        results_ml_object.execution_profile.gpu_utilization = random()
        results_ml_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            ))
    def test_e2e(self):
        MLSchema.populate_registry()
        MLSchema.append_schema_to_registry(Path.cwd() / ".parameters" /
                                           "schemas")

        # Execute step
        input_parameters = {
            # Put sample required input parameters here
        }

        execution_parameters = {
            # Put sample required execution parameters here
        }

        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "package"
        expected_results_schema_type = "package_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "500.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(input_parameters,
                                              execution_parameters)

        results_object = MLObject()
        results_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_object = step_execution_object.execute(
            result_object_schema_type=expected_results_schema_type,
            result_object_schema_version=expected_results_schema_version,
        )

        results_object.run_date = datetime.datetime.now()
        results_object.step_id = uuid.uuid4()
        results_object.run_id = uuid.uuid4()

        self.assertTrue(
            verify_result_contract(results_object,
                                   expected_results_schema_type,
                                   expected_results_schema_version, step_name))
Esempio n. 3
0
    def test_process_data(self):
        """
        Full E2E of Process Data
        """
        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "process_data"
        expected_results_schema_type = "data_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "9999.0.1"  # MUST BE A SEMVER

        results_ml_object = MLObject()
        results_ml_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_ml_object = MLObject()

        result_ml_object_schema_type = expected_results_schema_type
        result_ml_object_schema_version = expected_results_schema_version

        exec(
            (Path("tests") / "sample_process_data_execution.py").read_text(),
            globals(),
            locals(),
        )

        results_ml_object.run_date = datetime.datetime.now()
        results_ml_object.step_id = str(uuid.uuid4())
        results_ml_object.run_id = str(uuid.uuid4())

        results_ml_object.execution_profile.system_memory_utilization = random(
        )
        results_ml_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_ml_object.execution_profile.gpu_temperature = randint(70, 130)
        results_ml_object.execution_profile.disk_io_utilization = random()
        results_ml_object.execution_profile.gpu_percent_of_time_accessing_memory = (
            random())
        results_ml_object.execution_profile.cpu_utilization = random()
        results_ml_object.execution_profile.gpu_utilization = random()
        results_ml_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            ))
    def test_e2e(self):
        MLSchema.populate_registry()
        MLSchema.append_schema_to_registry(Path.cwd() / ".parameters" /
                                           "schemas")

        # Execute step
        input_parameters = {
            # Put sample required input parameters here
        }

        execution_parameters = {
            # Put sample required execution parameters here
        }

        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "process_data"
        expected_results_schema_type = "data_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "500.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(input_parameters,
                                              execution_parameters)

        results_object = MLObject()
        results_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_object = step_execution_object.execute(
            result_object_schema_type=expected_results_schema_type,
            result_object_schema_version=expected_results_schema_version,
        )

        results_object.run_date = datetime.datetime.now()
        results_object.step_id = str(uuid.uuid4())
        results_object.run_id = str(uuid.uuid4())

        results_object.execution_profile.system_memory_utilization = random()
        results_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_object.execution_profile.gpu_temperature = randint(70, 130)
        results_object.execution_profile.disk_io_utilization = random()
        results_object.execution_profile.gpu_percent_of_time_accessing_memory = random(
        )
        results_object.execution_profile.cpu_utilization = random()
        results_object.execution_profile.gpu_utilization = random()
        results_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(results_object,
                                   expected_results_schema_type,
                                   expected_results_schema_version, step_name))