def verify_result_contract(result_object: MLObject, expected_schema_type,
                           expected_schema_version, step_name: str):
    """ Creates an MLObject based on an input string, and validates it against the workflow object
    and step_name provided.

    Will fail if the .validate() fails on the object or the schema mismatches what is seen in the
    workflow.
    """
    rootLogger = logging.getLogger()

    (contract_object, errors) = MLObject.create_object_from_string(
        result_object.dict_without_internal_variables())

    if errors is not None and len(errors) > 0:
        error_string = f"Error verifying result object for '{step_name}.output': {errors}"
        rootLogger.debug(error_string)
        raise ValueError(error_string)

    if (contract_object.schema_type != expected_schema_type) or (
            contract_object.schema_version != expected_schema_version):
        error_string = f"""Actual data does not match the expected schema and version:
    Expected Type: {expected_schema_type}
    Actual Type: {contract_object.schema_type}

    Expected Version: {expected_schema_version}
    Actual Version: {contract_object.schema_version}")"""
        rootLogger.debug(error_string)
        raise ValueError(error_string)

    rootLogger.debug(
        f"Successfully loaded and validated contract object: {contract_object.schema_type} on step {step_name}.output"
    )

    return True
Ejemplo n.º 2
0
def execute_step(
    execution_file: str,
    workflow_object: MLObject,
    input_object: MLObject,
    execution_object: MLObject,
    step_name,
    run_id,
):

    rootLogger = setupLogger().get_root_logger()

    results_ml_object = MLObject()

    if execution_file is None:
        msg = "Did not find any value for INPUT_EXECUTION_FILE, using /src/step_execution.py"

        print_left_message(msg)
        rootLogger.debug("::debug::" + msg)

        print("{:>15}".format("ok"))  # Finished loading from environment

        step_execution_object = StepExecution(input_object, execution_object)
        results_ml_object = step_execution_object.execute(
            result_object_schema_type=workflow_object.steps[
                step_name
            ].output.schema_type,
            result_object_schema_version=workflow_object.steps[
                step_name
            ].output.schema_version,
        )

    else:
        # TODO: Critical error if variable set but file not found
        msg = f"Executing '${execution_file}' (found in INPUT_EXECUTION_FILE env var)"

        print_left_message(msg)
        rootLogger.debug("::debug::" + msg)

        execution_file_path = Path(execution_file)

        if execution_file_path.exists() is False:
            raise KnownException(
                f"'{execution_file}' was provided as the file, but it does not appear to exist at {str(execution_file_path.resolve())} -- exiting."
            )

        # The below are used in the execution file
        result_ml_object_schema_type = workflow_object.steps[  # noqa
            step_name
        ].output.schema_type
        result_ml_object_schema_version = workflow_object.steps[  # noqa
            step_name
        ].output.schema_version
        exec(execution_file_path.read_text(), globals(), locals())

        print("{:>15}".format("ok"))  # Finished executing step

    if (results_ml_object is None) or (len(results_ml_object) == 0):
        raise KnownException(
            "No value was assigned to the variable 'results_ml_object' -- exiting."
        )
    elif isinstance(results_ml_object, MLObject) is False:
        raise KnownException(
            "The variable 'results_ml_object' was not of type MLObject -- exiting."
        )

    results_ml_object.run_id = run_id
    results_ml_object.step_id = str(uuid.uuid4())
    results_ml_object.run_date = datetime.datetime.now().isoformat()

    # Using the below to validate the object, even though we already have it created.
    load_contract_object(
        parameters=results_ml_object.dict_without_internal_variables(),
        workflow_object=workflow_object,
        step_name=step_name,
        contract_type="output",
    )

    return results_ml_object